Friendica Communications Platform (please note that this is a clone of the repository at github, issues are handled there) https://friendi.ca
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

173 lines
3.4 KiB

  1. <?php
  2. require_once('library/HTML5/Parser.php');
  3. require_once('library/HTMLPurifier.auto.php');
  4. function arr_add_hashes(&$item,$k) {
  5. $item = '#' . $item;
  6. }
  7. function parse_url_content(&$a) {
  8. $text = null;
  9. $str_tags = '';
  10. if(x($_GET,'binurl'))
  11. $url = trim(hex2bin($_GET['binurl']));
  12. else
  13. $url = trim($_GET['url']);
  14. if($_GET['title'])
  15. $title = strip_tags(trim($_GET['title']));
  16. if($_GET['description'])
  17. $text = strip_tags(trim($_GET['description']));
  18. if($_GET['tags']) {
  19. $arr_tags = str_getcsv($_GET['tags']);
  20. if(count($arr_tags)) {
  21. array_walk($arr_tags,'arr_add_hashes');
  22. $str_tags = '<br />' . implode(' ',$arr_tags) . '<br />';
  23. }
  24. }
  25. logger('parse_url: ' . $url);
  26. $template = "<br /><a class=\"bookmark\" href=\"%s\" >%s</a>%s<br />";
  27. $arr = array('url' => $url, 'text' => '');
  28. call_hooks('parse_link', $arr);
  29. if(strlen($arr['text'])) {
  30. echo $arr['text'];
  31. killme();
  32. }
  33. if($url && $title && $text) {
  34. $text = '<br /><br /><blockquote>' . $text . '</blockquote><br />';
  35. $title = str_replace(array("\r","\n"),array('',''),$title);
  36. $result = sprintf($template,$url,($title) ? $title : $url,$text) . $str_tags;
  37. logger('parse_url (unparsed): returns: ' . $result);
  38. echo $result;
  39. killme();
  40. }
  41. if($url) {
  42. $s = fetch_url($url);
  43. } else {
  44. echo '';
  45. killme();
  46. }
  47. logger('parse_url: data: ' . $s, LOGGER_DATA);
  48. if(! $s) {
  49. echo sprintf($template,$url,$url,'') . $str_tags;
  50. killme();
  51. }
  52. if(! $title) {
  53. if(strpos($s,'<title>')) {
  54. $title = substr($s,strpos($s,'<title>')+7,64);
  55. if(strpos($title,'<') !== false)
  56. $title = strip_tags(substr($title,0,strpos($title,'<')));
  57. }
  58. }
  59. $config = HTMLPurifier_Config::createDefault();
  60. $config->set('Cache.DefinitionImpl', null);
  61. $purifier = new HTMLPurifier($config);
  62. $s = $purifier->purify($s);
  63. try {
  64. $dom = HTML5_Parser::parse($s);
  65. } catch (DOMException $e) {
  66. logger('scrape_dfrn: parse error: ' . $e);
  67. }
  68. if(! $dom) {
  69. echo sprintf($template,$url,$url,'') . $str_tags;
  70. killme();
  71. }
  72. $items = $dom->getElementsByTagName('title');
  73. if($items) {
  74. foreach($items as $item) {
  75. $title = trim($item->textContent);
  76. break;
  77. }
  78. }
  79. if(! $text) {
  80. $divs = $dom->getElementsByTagName('div');
  81. if($divs) {
  82. foreach($divs as $div) {
  83. $class = $div->getAttribute('class');
  84. if($class && (stristr($class,'article') || stristr($class,'content'))) {
  85. $items = $div->getElementsByTagName('p');
  86. if($items) {
  87. foreach($items as $item) {
  88. $text = $item->textContent;
  89. if(stristr($text,'<script')) {
  90. $text = '';
  91. continue;
  92. }
  93. $text = strip_tags($text);
  94. if(strlen($text) < 100) {
  95. $text = '';
  96. continue;
  97. }
  98. $text = substr($text,0,250) . '...' ;
  99. break;
  100. }
  101. }
  102. }
  103. if($text)
  104. break;
  105. }
  106. }
  107. if(! $text) {
  108. $items = $dom->getElementsByTagName('p');
  109. if($items) {
  110. foreach($items as $item) {
  111. $text = $item->textContent;
  112. if(stristr($text,'<script'))
  113. continue;
  114. $text = strip_tags($text);
  115. if(strlen($text) < 100) {
  116. $text = '';
  117. continue;
  118. }
  119. $text = substr($text,0,250) . '...' ;
  120. break;
  121. }
  122. }
  123. }
  124. }
  125. if(strlen($text)) {
  126. $text = '<br /><br /><blockquote>' . $text . '</blockquote><br />';
  127. }
  128. $title = str_replace(array("\r","\n"),array('',''),$title);
  129. $result = sprintf($template,$url,($title) ? $title : $url,$text) . $str_tags;
  130. logger('parse_url: returns: ' . $result);
  131. echo $result;
  132. killme();
  133. }