Friendica Communications Platform (please note that this is a clone of the repository at github, issues are handled there) https://friendi.ca
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

349 lines
8.9 KiB

11 years ago
11 years ago
11 years ago
11 years ago
  1. <?php
  2. /* To-Do
  3. https://developers.google.com/+/plugins/snippet/
  4. <meta itemprop="name" content="Toller Titel">
  5. <meta itemprop="description" content="Eine tolle Beschreibung">
  6. <meta itemprop="image" content="http://maple.libertreeproject.org/images/tree-icon.png">
  7. <body itemscope itemtype="http://schema.org/Product">
  8. <h1 itemprop="name">Shiny Trinket</h1>
  9. <img itemprop="image" src="{image-url}" />
  10. <p itemprop="description">Shiny trinkets are shiny.</p>
  11. </body>
  12. */
  13. if(!function_exists('deletenode')) {
  14. function deletenode(&$doc, $node)
  15. {
  16. $xpath = new DomXPath($doc);
  17. $list = $xpath->query("//".$node);
  18. foreach ($list as $child)
  19. $child->parentNode->removeChild($child);
  20. }
  21. }
  22. function completeurl($url, $scheme) {
  23. $urlarr = parse_url($url);
  24. if (isset($urlarr["scheme"]))
  25. return($url);
  26. $schemearr = parse_url($scheme);
  27. $complete = $schemearr["scheme"]."://".$schemearr["host"];
  28. if ($schemearr["port"] != "")
  29. $complete .= ":".$schemearr["port"];
  30. if(strpos($urlarr['path'],'/') !== 0)
  31. $complete .= '/';
  32. $complete .= $urlarr["path"];
  33. if ($urlarr["query"] != "")
  34. $complete .= "?".$urlarr["query"];
  35. if ($urlarr["fragment"] != "")
  36. $complete .= "#".$urlarr["fragment"];
  37. return($complete);
  38. }
  39. function parseurl_getsiteinfo($url) {
  40. $siteinfo = array();
  41. $ch = curl_init();
  42. curl_setopt($ch, CURLOPT_URL, $url);
  43. curl_setopt($ch, CURLOPT_HEADER, 1);
  44. curl_setopt($ch, CURLOPT_NOBODY, 0);
  45. curl_setopt($ch, CURLOPT_TIMEOUT, 3);
  46. curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
  47. curl_setopt($ch,CURLOPT_USERAGENT,'Opera/9.64(Windows NT 5.1; U; de) Presto/2.1.1');
  48. $header = curl_exec($ch);
  49. curl_close($ch);
  50. // Fetch the first mentioned charset. Can be in body or header
  51. if (preg_match('/charset=(.*?)['."'".'"\s\n]/', $header, $matches))
  52. $charset = trim(array_pop($matches));
  53. else
  54. $charset = "utf-8";
  55. $pos = strpos($header, "\r\n\r\n");
  56. if ($pos)
  57. $body = trim(substr($header, $pos));
  58. else
  59. $body = $header;
  60. $body = mb_convert_encoding($body, "UTF-8", $charset);
  61. $body = mb_convert_encoding($body, 'HTML-ENTITIES', "UTF-8");
  62. $doc = new DOMDocument();
  63. @$doc->loadHTML($body);
  64. deletenode($doc, 'style');
  65. deletenode($doc, 'script');
  66. deletenode($doc, 'option');
  67. deletenode($doc, 'h1');
  68. deletenode($doc, 'h2');
  69. deletenode($doc, 'h3');
  70. deletenode($doc, 'h4');
  71. deletenode($doc, 'h5');
  72. deletenode($doc, 'h6');
  73. deletenode($doc, 'ol');
  74. deletenode($doc, 'ul');
  75. $xpath = new DomXPath($doc);
  76. //$list = $xpath->query("head/title");
  77. $list = $xpath->query("//title");
  78. foreach ($list as $node)
  79. $siteinfo["title"] = html_entity_decode($node->nodeValue, ENT_QUOTES, "UTF-8");
  80. //$list = $xpath->query("head/meta[@name]");
  81. $list = $xpath->query("//meta[@name]");
  82. foreach ($list as $node) {
  83. $attr = array();
  84. if ($node->attributes->length)
  85. foreach ($node->attributes as $attribute)
  86. $attr[$attribute->name] = $attribute->value;
  87. $attr["content"] = html_entity_decode($attr["content"], ENT_QUOTES, "UTF-8");
  88. switch (strtolower($attr["name"])) {
  89. case "fulltitle":
  90. $siteinfo["title"] = $attr["content"];
  91. break;
  92. case "description":
  93. $siteinfo["text"] = $attr["content"];
  94. break;
  95. case "dc.title":
  96. $siteinfo["title"] = $attr["content"];
  97. break;
  98. case "dc.description":
  99. $siteinfo["text"] = $attr["content"];
  100. break;
  101. }
  102. }
  103. //$list = $xpath->query("head/meta[@property]");
  104. $list = $xpath->query("//meta[@property]");
  105. foreach ($list as $node) {
  106. $attr = array();
  107. if ($node->attributes->length)
  108. foreach ($node->attributes as $attribute)
  109. $attr[$attribute->name] = $attribute->value;
  110. $attr["content"] = html_entity_decode($attr["content"], ENT_QUOTES, "UTF-8");
  111. switch (strtolower($attr["property"])) {
  112. case "og:image":
  113. $siteinfo["image"] = $attr["content"];
  114. break;
  115. case "og:title":
  116. $siteinfo["title"] = $attr["content"];
  117. break;
  118. case "og:description":
  119. $siteinfo["text"] = $attr["content"];
  120. break;
  121. }
  122. }
  123. if ($siteinfo["image"] == "") {
  124. $list = $xpath->query("//img[@src]");
  125. foreach ($list as $node) {
  126. $attr = array();
  127. if ($node->attributes->length)
  128. foreach ($node->attributes as $attribute)
  129. $attr[$attribute->name] = $attribute->value;
  130. $src = completeurl($attr["src"], $url);
  131. $photodata = @getimagesize($src);
  132. if (($photodata) && ($photodata[0] > 150) and ($photodata[1] > 150)) {
  133. if ($photodata[0] > 300) {
  134. $photodata[1] = round($photodata[1] * (300 / $photodata[0]));
  135. $photodata[0] = 300;
  136. }
  137. if ($photodata[1] > 300) {
  138. $photodata[0] = round($photodata[0] * (300 / $photodata[1]));
  139. $photodata[1] = 300;
  140. }
  141. $siteinfo["images"][] = array("src"=>$src,
  142. "width"=>$photodata[0],
  143. "height"=>$photodata[1]);
  144. }
  145. }
  146. } else {
  147. $src = completeurl($siteinfo["image"], $url);
  148. unset($siteinfo["image"]);
  149. $photodata = @getimagesize($src);
  150. if (($photodata) && ($photodata[0] > 10) and ($photodata[1] > 10))
  151. $siteinfo["images"][] = array("src"=>$src,
  152. "width"=>$photodata[0],
  153. "height"=>$photodata[1]);
  154. }
  155. if ($siteinfo["text"] == "") {
  156. $text = "";
  157. $list = $xpath->query("//div[@class='article']");
  158. foreach ($list as $node)
  159. if (strlen($node->nodeValue) > 40)
  160. $text .= " ".trim($node->nodeValue);
  161. if ($text == "") {
  162. $list = $xpath->query("//div[@class='content']");
  163. foreach ($list as $node)
  164. if (strlen($node->nodeValue) > 40)
  165. $text .= " ".trim($node->nodeValue);
  166. }
  167. // If none text was found then take the paragraph content
  168. if ($text == "") {
  169. $list = $xpath->query("//p");
  170. foreach ($list as $node)
  171. if (strlen($node->nodeValue) > 40)
  172. $text .= " ".trim($node->nodeValue);
  173. }
  174. if ($text != "") {
  175. $text = trim(str_replace(array("\n", "\r"), array(" ", " "), $text));
  176. while (strpos($text, " "))
  177. $text = trim(str_replace(" ", " ", $text));
  178. $siteinfo["text"] = html_entity_decode(substr($text,0,350), ENT_QUOTES, "UTF-8").'...';
  179. }
  180. }
  181. return($siteinfo);
  182. }
  183. function arr_add_hashes(&$item,$k) {
  184. $item = '#' . $item;
  185. }
  186. function parse_url_content(&$a) {
  187. $text = null;
  188. $str_tags = '';
  189. $textmode = false;
  190. if(local_user() && intval(get_pconfig(local_user(),'system','plaintext')))
  191. $textmode = true;
  192. //if($textmode)
  193. $br = (($textmode) ? "\n" : '<br />');
  194. if(x($_GET,'binurl'))
  195. $url = trim(hex2bin($_GET['binurl']));
  196. else
  197. $url = trim($_GET['url']);
  198. if($_GET['title'])
  199. $title = strip_tags(trim($_GET['title']));
  200. if($_GET['description'])
  201. $text = strip_tags(trim($_GET['description']));
  202. if($_GET['tags']) {
  203. $arr_tags = str_getcsv($_GET['tags']);
  204. if(count($arr_tags)) {
  205. array_walk($arr_tags,'arr_add_hashes');
  206. $str_tags = $br . implode(' ',$arr_tags) . $br;
  207. }
  208. }
  209. logger('parse_url: ' . $url);
  210. if($textmode)
  211. $template = $br . '[bookmark=%s]%s[/bookmark]%s' . $br;
  212. else
  213. $template = "<br /><a class=\"bookmark\" href=\"%s\" >%s</a>%s<br />";
  214. $arr = array('url' => $url, 'text' => '');
  215. call_hooks('parse_link', $arr);
  216. if(strlen($arr['text'])) {
  217. echo $arr['text'];
  218. killme();
  219. }
  220. if($url && $title && $text) {
  221. if($textmode)
  222. $text = $br . '[quote]' . trim($text) . '[/quote]' . $br;
  223. else
  224. $text = '<br /><blockquote>' . trim($text) . '</blockquote><br />';
  225. $title = str_replace(array("\r","\n"),array('',''),$title);
  226. $result = sprintf($template,$url,($title) ? $title : $url,$text) . $str_tags;
  227. logger('parse_url (unparsed): returns: ' . $result);
  228. echo $result;
  229. killme();
  230. }
  231. $siteinfo = parseurl_getsiteinfo($url);
  232. if($siteinfo["title"] == "") {
  233. echo sprintf($template,$url,$url,'') . $str_tags;
  234. killme();
  235. } else {
  236. $text = $siteinfo["text"];
  237. $title = $siteinfo["title"];
  238. }
  239. $image = "";
  240. if(sizeof($siteinfo["images"]) > 0){
  241. /* Execute below code only if image is present in siteinfo */
  242. $total_images = 0;
  243. $max_images = get_config('system','max_bookmark_images');
  244. if($max_images === false)
  245. $max_images = 2;
  246. else
  247. $max_images = intval($max_images);
  248. foreach ($siteinfo["images"] as $imagedata) {
  249. if($textmode)
  250. $image .= '[img='.$imagedata["width"].'x'.$imagedata["height"].']'.$imagedata["src"].'[/img]' . "\n";
  251. else
  252. $image .= '<img height="'.$imagedata["height"].'" width="'.$imagedata["width"].'" src="'.$imagedata["src"].'" alt="photo" /><br />';
  253. $total_images ++;
  254. if($max_images && $max_images >= $total_images)
  255. break;
  256. }
  257. }
  258. if(strlen($text)) {
  259. if($textmode)
  260. $text = $br.'[quote]'.trim($text).'[/quote]'.$br ;
  261. else
  262. $text = '<br /><blockquote>'.trim($text).'</blockquote><br />';
  263. }
  264. if($image) {
  265. $text = $br.$br.$image.$text;
  266. }
  267. $title = str_replace(array("\r","\n"),array('',''),$title);
  268. $result = sprintf($template,$url,($title) ? $title : $url,$text) . $str_tags;
  269. logger('parse_url: returns: ' . $result);
  270. echo trim($result);
  271. killme();
  272. }