Friendica Communications Platform (please note that this is a clone of the repository at github, issues are handled there) https://friendi.ca
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

981 lines
29KB

  1. <?php
  2. /**
  3. * @file src/Content/Text/HTML.php
  4. */
  5. namespace Friendica\Content\Text;
  6. use DOMDocument;
  7. use DOMXPath;
  8. use Friendica\Content\Widget\ContactBlock;
  9. use Friendica\Core\Hook;
  10. use Friendica\Core\L10n;
  11. use Friendica\Core\Config;
  12. use Friendica\Core\Renderer;
  13. use Friendica\Model\Contact;
  14. use Friendica\Util\Network;
  15. use Friendica\Util\Proxy as ProxyUtils;
  16. use Friendica\Util\Strings;
  17. use Friendica\Util\XML;
  18. use League\HTMLToMarkdown\HtmlConverter;
  19. class HTML
  20. {
  21. public static function sanitizeCSS($input)
  22. {
  23. $cleaned = "";
  24. $input = strtolower($input);
  25. for ($i = 0; $i < strlen($input); $i++) {
  26. $char = substr($input, $i, 1);
  27. if (($char >= "a") && ($char <= "z")) {
  28. $cleaned .= $char;
  29. }
  30. if (!(strpos(" #;:0123456789-_.%", $char) === false)) {
  31. $cleaned .= $char;
  32. }
  33. }
  34. return $cleaned;
  35. }
  36. /**
  37. * Search all instances of a specific HTML tag node in the provided DOM document and replaces them with BBCode text nodes.
  38. *
  39. * @see HTML::tagToBBCodeSub()
  40. */
  41. private static function tagToBBCode(DOMDocument $doc, string $tag, array $attributes, string $startbb, string $endbb, bool $ignoreChildren = false)
  42. {
  43. do {
  44. $done = self::tagToBBCodeSub($doc, $tag, $attributes, $startbb, $endbb, $ignoreChildren);
  45. } while ($done);
  46. }
  47. /**
  48. * Search the first specific HTML tag node in the provided DOM document and replaces it with BBCode text nodes.
  49. *
  50. * @param DOMDocument $doc
  51. * @param string $tag HTML tag name
  52. * @param array $attributes Array of attributes to match and optionally use the value from
  53. * @param string $startbb BBCode tag opening
  54. * @param string $endbb BBCode tag closing
  55. * @param bool $ignoreChildren If set to false, the HTML tag children will be appended as text inside the BBCode tag
  56. * Otherwise, they will be entirely ignored. Useful for simple BBCode that draw their
  57. * inner value from an attribute value and disregard the tag children.
  58. * @return bool Whether a replacement was done
  59. */
  60. private static function tagToBBCodeSub(DOMDocument $doc, string $tag, array $attributes, string $startbb, string $endbb, bool $ignoreChildren = false)
  61. {
  62. $savestart = str_replace('$', '\x01', $startbb);
  63. $replace = false;
  64. $xpath = new DOMXPath($doc);
  65. /** @var \DOMNode[] $list */
  66. $list = $xpath->query("//" . $tag);
  67. foreach ($list as $node) {
  68. $attr = [];
  69. if ($node->attributes->length) {
  70. foreach ($node->attributes as $attribute) {
  71. $attr[$attribute->name] = $attribute->value;
  72. }
  73. }
  74. $replace = true;
  75. $startbb = $savestart;
  76. $i = 0;
  77. foreach ($attributes as $attribute => $value) {
  78. $startbb = str_replace('\x01' . ++$i, '$1', $startbb);
  79. if (strpos('*' . $startbb, '$1') > 0) {
  80. if ($replace && (@$attr[$attribute] != '')) {
  81. $startbb = preg_replace($value, $startbb, $attr[$attribute], -1, $count);
  82. // If nothing could be changed
  83. if ($count == 0) {
  84. $replace = false;
  85. }
  86. } else {
  87. $replace = false;
  88. }
  89. } else {
  90. if (@$attr[$attribute] != $value) {
  91. $replace = false;
  92. }
  93. }
  94. }
  95. if ($replace) {
  96. $StartCode = $doc->createTextNode($startbb);
  97. $EndCode = $doc->createTextNode($endbb);
  98. $node->parentNode->insertBefore($StartCode, $node);
  99. if (!$ignoreChildren && $node->hasChildNodes()) {
  100. /** @var \DOMNode $child */
  101. foreach ($node->childNodes as $key => $child) {
  102. /* Remove empty text nodes at the start or at the end of the children list */
  103. if ($key > 0 && $key < $node->childNodes->length - 1 || $child->nodeName != '#text' || trim($child->nodeValue)) {
  104. $newNode = $child->cloneNode(true);
  105. $node->parentNode->insertBefore($newNode, $node);
  106. }
  107. }
  108. }
  109. $node->parentNode->insertBefore($EndCode, $node);
  110. $node->parentNode->removeChild($node);
  111. }
  112. }
  113. return $replace;
  114. }
  115. /**
  116. * Made by: ike@piratenpartei.de
  117. * Originally made for the syncom project: http://wiki.piratenpartei.de/Syncom
  118. * https://github.com/annando/Syncom
  119. *
  120. * @brief Converter for HTML to BBCode
  121. * @param string $message
  122. * @param string $basepath
  123. * @return string
  124. * @throws \Friendica\Network\HTTPException\InternalServerErrorException
  125. */
  126. public static function toBBCode($message, $basepath = '')
  127. {
  128. $message = str_replace("\r", "", $message);
  129. // Removing code blocks before the whitespace removal processing below
  130. $codeblocks = [];
  131. $message = preg_replace_callback(
  132. '#<pre><code(?: class="language-([^"]*)")?>(.*)</code></pre>#iUs',
  133. function ($matches) use (&$codeblocks) {
  134. $return = '[codeblock-' . count($codeblocks) . ']';
  135. $prefix = '[code]';
  136. if ($matches[1] != '') {
  137. $prefix = '[code=' . $matches[1] . ']';
  138. }
  139. $codeblocks[] = $prefix . PHP_EOL . trim($matches[2]) . PHP_EOL . '[/code]';
  140. return $return;
  141. },
  142. $message
  143. );
  144. $message = str_replace(
  145. [
  146. "<li><p>",
  147. "</p></li>",
  148. ],
  149. [
  150. "<li>",
  151. "</li>",
  152. ],
  153. $message
  154. );
  155. // remove namespaces
  156. $message = preg_replace('=<(\w+):(.+?)>=', '<removeme>', $message);
  157. $message = preg_replace('=</(\w+):(.+?)>=', '</removeme>', $message);
  158. $doc = new DOMDocument();
  159. $doc->preserveWhiteSpace = false;
  160. $message = mb_convert_encoding($message, 'HTML-ENTITIES', "UTF-8");
  161. @$doc->loadHTML($message, LIBXML_HTML_NODEFDTD);
  162. XML::deleteNode($doc, 'style');
  163. XML::deleteNode($doc, 'head');
  164. XML::deleteNode($doc, 'title');
  165. XML::deleteNode($doc, 'meta');
  166. XML::deleteNode($doc, 'xml');
  167. XML::deleteNode($doc, 'removeme');
  168. $xpath = new DomXPath($doc);
  169. $list = $xpath->query("//pre");
  170. foreach ($list as $node) {
  171. // Ensure to escape unescaped & - they will otherwise raise a warning
  172. $safe_value = preg_replace('/&(?!\w+;)/', '&amp;', $node->nodeValue);
  173. $node->nodeValue = str_replace("\n", "\r", $safe_value);
  174. }
  175. $message = $doc->saveHTML();
  176. $message = str_replace(["\n<", ">\n", "\r", "\n", "\xC3\x82\xC2\xA0"], ["<", ">", "<br />", " ", ""], $message);
  177. $message = preg_replace('= [\s]*=i', " ", $message);
  178. @$doc->loadHTML($message, LIBXML_HTML_NODEFDTD);
  179. self::tagToBBCode($doc, 'html', [], "", "");
  180. self::tagToBBCode($doc, 'body', [], "", "");
  181. // Outlook-Quote - Variant 1
  182. self::tagToBBCode($doc, 'p', ['class' => 'MsoNormal', 'style' => 'margin-left:35.4pt'], '[quote]', '[/quote]');
  183. // Outlook-Quote - Variant 2
  184. self::tagToBBCode(
  185. $doc,
  186. 'div',
  187. ['style' => 'border:none;border-left:solid blue 1.5pt;padding:0cm 0cm 0cm 4.0pt'],
  188. '[quote]',
  189. '[/quote]'
  190. );
  191. // MyBB-Stuff
  192. self::tagToBBCode($doc, 'span', ['style' => 'text-decoration: underline;'], '[u]', '[/u]');
  193. self::tagToBBCode($doc, 'span', ['style' => 'font-style: italic;'], '[i]', '[/i]');
  194. self::tagToBBCode($doc, 'span', ['style' => 'font-weight: bold;'], '[b]', '[/b]');
  195. /* self::node2BBCode($doc, 'font', array('face'=>'/([\w ]+)/', 'size'=>'/(\d+)/', 'color'=>'/(.+)/'), '[font=$1][size=$2][color=$3]', '[/color][/size][/font]');
  196. self::node2BBCode($doc, 'font', array('size'=>'/(\d+)/', 'color'=>'/(.+)/'), '[size=$1][color=$2]', '[/color][/size]');
  197. self::node2BBCode($doc, 'font', array('face'=>'/([\w ]+)/', 'size'=>'/(.+)/'), '[font=$1][size=$2]', '[/size][/font]');
  198. self::node2BBCode($doc, 'font', array('face'=>'/([\w ]+)/', 'color'=>'/(.+)/'), '[font=$1][color=$3]', '[/color][/font]');
  199. self::node2BBCode($doc, 'font', array('face'=>'/([\w ]+)/'), '[font=$1]', '[/font]');
  200. self::node2BBCode($doc, 'font', array('size'=>'/(\d+)/'), '[size=$1]', '[/size]');
  201. self::node2BBCode($doc, 'font', array('color'=>'/(.+)/'), '[color=$1]', '[/color]');
  202. */
  203. // Untested
  204. //self::node2BBCode($doc, 'span', array('style'=>'/.*font-size:\s*(.+?)[,;].*font-family:\s*(.+?)[,;].*color:\s*(.+?)[,;].*/'), '[size=$1][font=$2][color=$3]', '[/color][/font][/size]');
  205. //self::node2BBCode($doc, 'span', array('style'=>'/.*font-size:\s*(\d+)[,;].*/'), '[size=$1]', '[/size]');
  206. //self::node2BBCode($doc, 'span', array('style'=>'/.*font-size:\s*(.+?)[,;].*/'), '[size=$1]', '[/size]');
  207. self::tagToBBCode($doc, 'span', ['style' => '/.*color:\s*(.+?)[,;].*/'], '[color="$1"]', '[/color]');
  208. //self::node2BBCode($doc, 'span', array('style'=>'/.*font-family:\s*(.+?)[,;].*/'), '[font=$1]', '[/font]');
  209. //self::node2BBCode($doc, 'div', array('style'=>'/.*font-family:\s*(.+?)[,;].*font-size:\s*(\d+?)pt.*/'), '[font=$1][size=$2]', '[/size][/font]');
  210. //self::node2BBCode($doc, 'div', array('style'=>'/.*font-family:\s*(.+?)[,;].*font-size:\s*(\d+?)px.*/'), '[font=$1][size=$2]', '[/size][/font]');
  211. //self::node2BBCode($doc, 'div', array('style'=>'/.*font-family:\s*(.+?)[,;].*/'), '[font=$1]', '[/font]');
  212. // Importing the classes - interesting for importing of posts from third party networks that were exported from friendica
  213. // Test
  214. //self::node2BBCode($doc, 'span', array('class'=>'/([\w ]+)/'), '[class=$1]', '[/class]');
  215. self::tagToBBCode($doc, 'span', ['class' => 'type-link'], '[class=type-link]', '[/class]');
  216. self::tagToBBCode($doc, 'span', ['class' => 'type-video'], '[class=type-video]', '[/class]');
  217. self::tagToBBCode($doc, 'strong', [], '[b]', '[/b]');
  218. self::tagToBBCode($doc, 'em', [], '[i]', '[/i]');
  219. self::tagToBBCode($doc, 'b', [], '[b]', '[/b]');
  220. self::tagToBBCode($doc, 'i', [], '[i]', '[/i]');
  221. self::tagToBBCode($doc, 'u', [], '[u]', '[/u]');
  222. self::tagToBBCode($doc, 's', [], '[s]', '[/s]');
  223. self::tagToBBCode($doc, 'del', [], '[s]', '[/s]');
  224. self::tagToBBCode($doc, 'strike', [], '[s]', '[/s]');
  225. self::tagToBBCode($doc, 'big', [], "[size=large]", "[/size]");
  226. self::tagToBBCode($doc, 'small', [], "[size=small]", "[/size]");
  227. self::tagToBBCode($doc, 'blockquote', [], '[quote]', '[/quote]');
  228. self::tagToBBCode($doc, 'br', [], "\n", '');
  229. self::tagToBBCode($doc, 'p', ['class' => 'MsoNormal'], "\n", "");
  230. self::tagToBBCode($doc, 'div', ['class' => 'MsoNormal'], "\r", "");
  231. self::tagToBBCode($doc, 'span', [], "", "");
  232. self::tagToBBCode($doc, 'span', [], "", "");
  233. self::tagToBBCode($doc, 'pre', [], "", "");
  234. self::tagToBBCode($doc, 'div', [], "\r", "\r");
  235. self::tagToBBCode($doc, 'p', [], "\n", "\n");
  236. self::tagToBBCode($doc, 'ul', [], "[list]", "[/list]");
  237. self::tagToBBCode($doc, 'ol', [], "[list=1]", "[/list]");
  238. self::tagToBBCode($doc, 'li', [], "[*]", "");
  239. self::tagToBBCode($doc, 'hr', [], "[hr]", "");
  240. self::tagToBBCode($doc, 'table', [], "", "");
  241. self::tagToBBCode($doc, 'tr', [], "\n", "");
  242. self::tagToBBCode($doc, 'td', [], "\t", "");
  243. //self::node2BBCode($doc, 'table', array(), "[table]", "[/table]");
  244. //self::node2BBCode($doc, 'th', array(), "[th]", "[/th]");
  245. //self::node2BBCode($doc, 'tr', array(), "[tr]", "[/tr]");
  246. //self::node2BBCode($doc, 'td', array(), "[td]", "[/td]");
  247. //self::node2BBCode($doc, 'h1', array(), "\n\n[size=xx-large][b]", "[/b][/size]\n");
  248. //self::node2BBCode($doc, 'h2', array(), "\n\n[size=x-large][b]", "[/b][/size]\n");
  249. //self::node2BBCode($doc, 'h3', array(), "\n\n[size=large][b]", "[/b][/size]\n");
  250. //self::node2BBCode($doc, 'h4', array(), "\n\n[size=medium][b]", "[/b][/size]\n");
  251. //self::node2BBCode($doc, 'h5', array(), "\n\n[size=small][b]", "[/b][/size]\n");
  252. //self::node2BBCode($doc, 'h6', array(), "\n\n[size=x-small][b]", "[/b][/size]\n");
  253. self::tagToBBCode($doc, 'h1', [], "[h1]", "[/h1]");
  254. self::tagToBBCode($doc, 'h2', [], "[h2]", "[/h2]");
  255. self::tagToBBCode($doc, 'h3', [], "[h3]", "[/h3]");
  256. self::tagToBBCode($doc, 'h4', [], "[h4]", "[/h4]");
  257. self::tagToBBCode($doc, 'h5', [], "[h5]", "[/h5]");
  258. self::tagToBBCode($doc, 'h6', [], "[h6]", "[/h6]");
  259. self::tagToBBCode($doc, 'a', ['href' => '/mailto:(.+)/'], '[mail=$1]', '[/mail]');
  260. self::tagToBBCode($doc, 'a', ['href' => '/(.+)/'], '[url=$1]', '[/url]');
  261. self::tagToBBCode($doc, 'img', ['src' => '/(.+)/', 'alt' => '/(.+)/'], '[img=$1]$2', '[/img]', true);
  262. self::tagToBBCode($doc, 'img', ['src' => '/(.+)/', 'width' => '/(\d+)/', 'height' => '/(\d+)/'], '[img=$2x$3]$1', '[/img]', true);
  263. self::tagToBBCode($doc, 'img', ['src' => '/(.+)/'], '[img]$1', '[/img]', true);
  264. self::tagToBBCode($doc, 'video', ['src' => '/(.+)/'], '[video]$1', '[/video]', true);
  265. self::tagToBBCode($doc, 'audio', ['src' => '/(.+)/'], '[audio]$1', '[/audio]', true);
  266. self::tagToBBCode($doc, 'iframe', ['src' => '/(.+)/'], '[iframe]$1', '[/iframe]', true);
  267. self::tagToBBCode($doc, 'key', [], '[code]', '[/code]');
  268. self::tagToBBCode($doc, 'code', [], '[code]', '[/code]');
  269. $message = $doc->saveHTML();
  270. // I'm removing something really disturbing
  271. // Don't know exactly what it is
  272. $message = str_replace(chr(194) . chr(160), ' ', $message);
  273. $message = str_replace("&nbsp;", " ", $message);
  274. // removing multiple DIVs
  275. $message = preg_replace('=\r *\r=i', "\n", $message);
  276. $message = str_replace("\r", "\n", $message);
  277. Hook::callAll('html2bbcode', $message);
  278. $message = strip_tags($message);
  279. $message = html_entity_decode($message, ENT_QUOTES, 'UTF-8');
  280. $message = str_replace(["<"], ["&lt;"], $message);
  281. // remove quotes if they don't make sense
  282. $message = preg_replace('=\[/quote\][\s]*\[quote\]=i', "\n", $message);
  283. $message = preg_replace('=\[quote\]\s*=i', "[quote]", $message);
  284. $message = preg_replace('=\s*\[/quote\]=i', "[/quote]", $message);
  285. do {
  286. $oldmessage = $message;
  287. $message = str_replace("\n \n", "\n\n", $message);
  288. } while ($oldmessage != $message);
  289. do {
  290. $oldmessage = $message;
  291. $message = str_replace("\n\n\n", "\n\n", $message);
  292. } while ($oldmessage != $message);
  293. do {
  294. $oldmessage = $message;
  295. $message = str_replace(
  296. [
  297. "[/size]\n\n",
  298. "\n[hr]",
  299. "[hr]\n",
  300. "\n[list",
  301. "[/list]\n",
  302. "\n[/",
  303. "[list]\n",
  304. "[list=1]\n",
  305. "\n[*]"],
  306. [
  307. "[/size]\n",
  308. "[hr]",
  309. "[hr]",
  310. "[list",
  311. "[/list]",
  312. "[/",
  313. "[list]",
  314. "[list=1]",
  315. "[*]"],
  316. $message
  317. );
  318. } while ($message != $oldmessage);
  319. $message = str_replace(
  320. ['[b][b]', '[/b][/b]', '[i][i]', '[/i][/i]'],
  321. ['[b]', '[/b]', '[i]', '[/i]'],
  322. $message
  323. );
  324. // Handling Yahoo style of mails
  325. $message = str_replace('[hr][b]From:[/b]', '[quote][b]From:[/b]', $message);
  326. // Restore code blocks
  327. $message = preg_replace_callback(
  328. '#\[codeblock-([0-9]+)\]#iU',
  329. function ($matches) use ($codeblocks) {
  330. $return = '';
  331. if (isset($codeblocks[intval($matches[1])])) {
  332. $return = $codeblocks[$matches[1]];
  333. }
  334. return $return;
  335. },
  336. $message
  337. );
  338. $message = trim($message);
  339. if ($basepath != '') {
  340. $message = self::qualifyURLs($message, $basepath);
  341. }
  342. return $message;
  343. }
  344. /**
  345. * @brief Sub function to complete incomplete URL
  346. *
  347. * @param array $matches Result of preg_replace_callback
  348. * @param string $basepath Basepath that is used to complete the URL
  349. *
  350. * @return string The expanded URL
  351. */
  352. private static function qualifyURLsSub($matches, $basepath)
  353. {
  354. $base = parse_url($basepath);
  355. unset($base['query']);
  356. unset($base['fragment']);
  357. $link = $matches[0];
  358. $url = $matches[1];
  359. if (empty($url) || empty(parse_url($url))) {
  360. return $matches[0];
  361. }
  362. $parts = array_merge($base, parse_url($url));
  363. $url2 = Network::unparseURL($parts);
  364. return str_replace($url, $url2, $link);
  365. }
  366. /**
  367. * @brief Complete incomplete URLs in BBCode
  368. *
  369. * @param string $body Body with URLs
  370. * @param string $basepath Base path that is used to complete the URL
  371. *
  372. * @return string Body with expanded URLs
  373. */
  374. private static function qualifyURLs($body, $basepath)
  375. {
  376. $URLSearchString = "^\[\]";
  377. $matches = ["/\[url\=([$URLSearchString]*)\].*?\[\/url\]/ism",
  378. "/\[url\]([$URLSearchString]*)\[\/url\]/ism",
  379. "/\[img\=[0-9]*x[0-9]*\](.*?)\[\/img\]/ism",
  380. "/\[img\](.*?)\[\/img\]/ism",
  381. "/\[zmg\=[0-9]*x[0-9]*\](.*?)\[\/img\]/ism",
  382. "/\[zmg\](.*?)\[\/zmg\]/ism",
  383. "/\[video\](.*?)\[\/video\]/ism",
  384. "/\[audio\](.*?)\[\/audio\]/ism",
  385. ];
  386. foreach ($matches as $match) {
  387. $body = preg_replace_callback(
  388. $match,
  389. function ($match) use ($basepath) {
  390. return self::qualifyURLsSub($match, $basepath);
  391. },
  392. $body
  393. );
  394. }
  395. return $body;
  396. }
  397. private static function breakLines($line, $level, $wraplength = 75)
  398. {
  399. if ($wraplength == 0) {
  400. $wraplength = 2000000;
  401. }
  402. $wraplen = $wraplength - $level;
  403. $newlines = [];
  404. do {
  405. $oldline = $line;
  406. $subline = substr($line, 0, $wraplen);
  407. $pos = strrpos($subline, ' ');
  408. if ($pos == 0) {
  409. $pos = strpos($line, ' ');
  410. }
  411. if (($pos > 0) && strlen($line) > $wraplen) {
  412. $newline = trim(substr($line, 0, $pos));
  413. if ($level > 0) {
  414. $newline = str_repeat(">", $level) . ' ' . $newline;
  415. }
  416. $newlines[] = $newline . " ";
  417. $line = substr($line, $pos + 1);
  418. }
  419. } while ((strlen($line) > $wraplen) && !($oldline == $line));
  420. if ($level > 0) {
  421. $line = str_repeat(">", $level) . ' ' . $line;
  422. }
  423. $newlines[] = $line;
  424. return implode($newlines, "\n");
  425. }
  426. private static function quoteLevel($message, $wraplength = 75)
  427. {
  428. $lines = explode("\n", $message);
  429. $newlines = [];
  430. $level = 0;
  431. foreach ($lines as $line) {
  432. $line = trim($line);
  433. $startquote = false;
  434. while (strpos("*" . $line, '[quote]') > 0) {
  435. $level++;
  436. $pos = strpos($line, '[quote]');
  437. $line = substr($line, 0, $pos) . substr($line, $pos + 7);
  438. $startquote = true;
  439. }
  440. $currlevel = $level;
  441. while (strpos("*" . $line, '[/quote]') > 0) {
  442. $level--;
  443. if ($level < 0) {
  444. $level = 0;
  445. }
  446. $pos = strpos($line, '[/quote]');
  447. $line = substr($line, 0, $pos) . substr($line, $pos + 8);
  448. }
  449. if (!$startquote || ($line != '')) {
  450. $newlines[] = self::breakLines($line, $currlevel, $wraplength);
  451. }
  452. }
  453. return implode($newlines, "\n");
  454. }
  455. private static function collectURLs($message)
  456. {
  457. $pattern = '/<a.*?href="(.*?)".*?>(.*?)<\/a>/is';
  458. preg_match_all($pattern, $message, $result, PREG_SET_ORDER);
  459. $urls = [];
  460. foreach ($result as $treffer) {
  461. $ignore = false;
  462. // A list of some links that should be ignored
  463. $list = ["/user/", "/tag/", "/group/", "/profile/", "/search?search=", "/search?tag=", "mailto:", "/u/", "/node/",
  464. "//plus.google.com/", "//twitter.com/"];
  465. foreach ($list as $listitem) {
  466. if (strpos($treffer[1], $listitem) !== false) {
  467. $ignore = true;
  468. }
  469. }
  470. if ((strpos($treffer[1], "//twitter.com/") !== false) && (strpos($treffer[1], "/status/") !== false)) {
  471. $ignore = false;
  472. }
  473. if ((strpos($treffer[1], "//plus.google.com/") !== false) && (strpos($treffer[1], "/posts") !== false)) {
  474. $ignore = false;
  475. }
  476. if ((strpos($treffer[1], "//plus.google.com/") !== false) && (strpos($treffer[1], "/photos") !== false)) {
  477. $ignore = false;
  478. }
  479. $ignore = $ignore || strpos($treffer[1], '#') === 0;
  480. if (!$ignore) {
  481. $urls[$treffer[1]] = $treffer[1];
  482. }
  483. }
  484. return $urls;
  485. }
  486. /**
  487. * @param string $html
  488. * @param int $wraplength Ensures individual lines aren't longer than this many characters. Doesn't break words.
  489. * @param bool $compact True: Completely strips image tags; False: Keeps image URLs
  490. * @return string
  491. */
  492. public static function toPlaintext(string $html, $wraplength = 75, $compact = false)
  493. {
  494. $message = str_replace("\r", "", $html);
  495. $doc = new DOMDocument();
  496. $doc->preserveWhiteSpace = false;
  497. $message = mb_convert_encoding($message, 'HTML-ENTITIES', "UTF-8");
  498. @$doc->loadHTML($message, LIBXML_HTML_NODEFDTD);
  499. $message = $doc->saveHTML();
  500. // Remove eventual UTF-8 BOM
  501. $message = str_replace("\xC3\x82\xC2\xA0", "", $message);
  502. // Collecting all links
  503. $urls = self::collectURLs($message);
  504. @$doc->loadHTML($message, LIBXML_HTML_NODEFDTD);
  505. self::tagToBBCode($doc, 'html', [], '', '');
  506. self::tagToBBCode($doc, 'body', [], '', '');
  507. if ($compact) {
  508. self::tagToBBCode($doc, 'blockquote', [], "»", "«");
  509. } else {
  510. self::tagToBBCode($doc, 'blockquote', [], '[quote]', "[/quote]\n");
  511. }
  512. self::tagToBBCode($doc, 'br', [], "\n", '');
  513. self::tagToBBCode($doc, 'span', [], "", "");
  514. self::tagToBBCode($doc, 'pre', [], "", "");
  515. self::tagToBBCode($doc, 'div', [], "\r", "\r");
  516. self::tagToBBCode($doc, 'p', [], "\n", "\n");
  517. self::tagToBBCode($doc, 'li', [], "\n* ", "\n");
  518. self::tagToBBCode($doc, 'hr', [], "\n" . str_repeat("-", 70) . "\n", "");
  519. self::tagToBBCode($doc, 'tr', [], "\n", "");
  520. self::tagToBBCode($doc, 'td', [], "\t", "");
  521. self::tagToBBCode($doc, 'h1', [], "\n\n*", "*\n");
  522. self::tagToBBCode($doc, 'h2', [], "\n\n*", "*\n");
  523. self::tagToBBCode($doc, 'h3', [], "\n\n*", "*\n");
  524. self::tagToBBCode($doc, 'h4', [], "\n\n*", "*\n");
  525. self::tagToBBCode($doc, 'h5', [], "\n\n*", "*\n");
  526. self::tagToBBCode($doc, 'h6', [], "\n\n*", "*\n");
  527. if (!$compact) {
  528. self::tagToBBCode($doc, 'img', ['src' => '/(.+)/'], ' [img]$1', '[/img] ');
  529. } else {
  530. self::tagToBBCode($doc, 'img', ['src' => '/(.+)/'], ' ', ' ');
  531. }
  532. self::tagToBBCode($doc, 'iframe', ['src' => '/(.+)/'], ' $1 ', '');
  533. $message = $doc->saveHTML();
  534. if (!$compact) {
  535. $message = str_replace("[img]", "", $message);
  536. $message = str_replace("[/img]", "", $message);
  537. }
  538. // was ersetze ich da?
  539. // Irgendein stoerrisches UTF-Zeug
  540. $message = str_replace(chr(194) . chr(160), ' ', $message);
  541. $message = str_replace("&nbsp;", " ", $message);
  542. // Aufeinanderfolgende DIVs
  543. $message = preg_replace('=\r *\r=i', "\n", $message);
  544. $message = str_replace("\r", "\n", $message);
  545. $message = strip_tags($message);
  546. $message = html_entity_decode($message, ENT_QUOTES, 'UTF-8');
  547. if (!$compact && ($message != '')) {
  548. foreach ($urls as $id => $url) {
  549. if ($url != '' && strpos($message, $url) === false) {
  550. $message .= "\n" . $url . ' ';
  551. }
  552. }
  553. }
  554. $message = str_replace("\n«", "«\n", $message);
  555. $message = str_replace("»\n", "\n»", $message);
  556. do {
  557. $oldmessage = $message;
  558. $message = str_replace("\n\n\n", "\n\n", $message);
  559. } while ($oldmessage != $message);
  560. $message = self::quoteLevel(trim($message), $wraplength);
  561. return trim($message);
  562. }
  563. /**
  564. * Converts provided HTML code to Markdown. The hardwrap parameter maximizes
  565. * compatibility with Diaspora in spite of the Markdown standards.
  566. *
  567. * @param string $html
  568. * @return string
  569. */
  570. public static function toMarkdown($html)
  571. {
  572. $converter = new HtmlConverter(['hard_break' => true]);
  573. $markdown = $converter->convert($html);
  574. return $markdown;
  575. }
  576. /**
  577. * @brief Convert video HTML to BBCode tags
  578. *
  579. * @param string $s
  580. * @return string
  581. */
  582. public static function toBBCodeVideo($s)
  583. {
  584. $s = preg_replace(
  585. '#<object[^>]+>(.*?)https?://www.youtube.com/((?:v|cp)/[A-Za-z0-9\-_=]+)(.*?)</object>#ism',
  586. '[youtube]$2[/youtube]',
  587. $s
  588. );
  589. $s = preg_replace(
  590. '#<iframe[^>](.*?)https?://www.youtube.com/embed/([A-Za-z0-9\-_=]+)(.*?)</iframe>#ism',
  591. '[youtube]$2[/youtube]',
  592. $s
  593. );
  594. $s = preg_replace(
  595. '#<iframe[^>](.*?)https?://player.vimeo.com/video/([0-9]+)(.*?)</iframe>#ism',
  596. '[vimeo]$2[/vimeo]',
  597. $s
  598. );
  599. return $s;
  600. }
  601. /**
  602. * transform link href and img src from relative to absolute
  603. *
  604. * @param string $text
  605. * @param string $base base url
  606. * @return string
  607. */
  608. public static function relToAbs($text, $base)
  609. {
  610. if (empty($base)) {
  611. return $text;
  612. }
  613. $base = rtrim($base, '/');
  614. $base2 = $base . "/";
  615. // Replace links
  616. $pattern = "/<a([^>]*) href=\"(?!http|https|\/)([^\"]*)\"/";
  617. $replace = "<a\${1} href=\"" . $base2 . "\${2}\"";
  618. $text = preg_replace($pattern, $replace, $text);
  619. $pattern = "/<a([^>]*) href=\"(?!http|https)([^\"]*)\"/";
  620. $replace = "<a\${1} href=\"" . $base . "\${2}\"";
  621. $text = preg_replace($pattern, $replace, $text);
  622. // Replace images
  623. $pattern = "/<img([^>]*) src=\"(?!http|https|\/)([^\"]*)\"/";
  624. $replace = "<img\${1} src=\"" . $base2 . "\${2}\"";
  625. $text = preg_replace($pattern, $replace, $text);
  626. $pattern = "/<img([^>]*) src=\"(?!http|https)([^\"]*)\"/";
  627. $replace = "<img\${1} src=\"" . $base . "\${2}\"";
  628. $text = preg_replace($pattern, $replace, $text);
  629. // Done
  630. return $text;
  631. }
  632. /**
  633. * return div element with class 'clear'
  634. * @return string
  635. * @deprecated
  636. */
  637. public static function clearDiv()
  638. {
  639. return '<div class="clear"></div>';
  640. }
  641. /**
  642. * Loader for infinite scrolling
  643. *
  644. * @return string html for loader
  645. * @throws \Friendica\Network\HTTPException\InternalServerErrorException
  646. */
  647. public static function scrollLoader()
  648. {
  649. $tpl = Renderer::getMarkupTemplate("scroll_loader.tpl");
  650. return Renderer::replaceMacros($tpl, [
  651. 'wait' => L10n::t('Loading more entries...'),
  652. 'end' => L10n::t('The end')
  653. ]);
  654. }
  655. /**
  656. * Get html for contact block.
  657. *
  658. * @deprecated since version 2019.03
  659. * @see ContactBlock::getHTML()
  660. * @return string
  661. * @throws \Friendica\Network\HTTPException\InternalServerErrorException
  662. * @throws \ImagickException
  663. */
  664. public static function contactBlock()
  665. {
  666. $a = \get_app();
  667. return ContactBlock::getHTML($a->profile);
  668. }
  669. /**
  670. * @brief Format contacts as picture links or as text links
  671. *
  672. * @param array $contact Array with contacts which contains an array with
  673. * int 'id' => The ID of the contact
  674. * int 'uid' => The user ID of the user who owns this data
  675. * string 'name' => The name of the contact
  676. * string 'url' => The url to the profile page of the contact
  677. * string 'addr' => The webbie of the contact (e.g.) username@friendica.com
  678. * string 'network' => The network to which the contact belongs to
  679. * string 'thumb' => The contact picture
  680. * string 'click' => js code which is performed when clicking on the contact
  681. * @param boolean $redirect If true try to use the redir url if it's possible
  682. * @param string $class CSS class for the
  683. * @param boolean $textmode If true display the contacts as text links
  684. * if false display the contacts as picture links
  685. * @return string Formatted html
  686. * @throws \Friendica\Network\HTTPException\InternalServerErrorException
  687. * @throws \ImagickException
  688. */
  689. public static function micropro($contact, $redirect = false, $class = '', $textmode = false)
  690. {
  691. // Use the contact URL if no address is available
  692. if (empty($contact['addr'])) {
  693. $contact["addr"] = $contact["url"];
  694. }
  695. $url = $contact['url'];
  696. $sparkle = '';
  697. $redir = false;
  698. if ($redirect) {
  699. $url = Contact::magicLink($contact['url']);
  700. if (strpos($url, 'redir/') === 0) {
  701. $sparkle = ' sparkle';
  702. }
  703. }
  704. // If there is some js available we don't need the url
  705. if (!empty($contact['click'])) {
  706. $url = '';
  707. }
  708. return Renderer::replaceMacros(Renderer::getMarkupTemplate(($textmode)?'micropro_txt.tpl':'micropro_img.tpl'), [
  709. '$click' => defaults($contact, 'click', ''),
  710. '$class' => $class,
  711. '$url' => $url,
  712. '$photo' => ProxyUtils::proxifyUrl($contact['thumb'], false, ProxyUtils::SIZE_THUMB),
  713. '$name' => $contact['name'],
  714. 'title' => $contact['name'] . ' [' . $contact['addr'] . ']',
  715. '$parkle' => $sparkle,
  716. '$redir' => $redir
  717. ]);
  718. }
  719. /**
  720. * Search box.
  721. *
  722. * @param string $s Search query.
  723. * @param string $id HTML id
  724. * @param string $url Search url.
  725. * @param bool $aside Display the search widgit aside.
  726. *
  727. * @return string Formatted HTML.
  728. * @throws \Exception
  729. */
  730. public static function search($s, $id = 'search-box', $aside = true)
  731. {
  732. $mode = 'text';
  733. if (strpos($s, '#') === 0) {
  734. $mode = 'tag';
  735. }
  736. $save_label = $mode === 'text' ? L10n::t('Save') : L10n::t('Follow');
  737. $values = [
  738. '$s' => $s,
  739. '$q' => urlencode($s),
  740. '$id' => $id,
  741. '$search_label' => L10n::t('Search'),
  742. '$save_label' => $save_label,
  743. '$search_hint' => L10n::t('@name, !forum, #tags, content'),
  744. '$mode' => $mode,
  745. '$return_url' => urlencode('search?q=' . $s),
  746. ];
  747. if (!$aside) {
  748. $values['$search_options'] = [
  749. 'fulltext' => L10n::t('Full Text'),
  750. 'tags' => L10n::t('Tags'),
  751. 'contacts' => L10n::t('Contacts')
  752. ];
  753. if (Config::get('system', 'poco_local_search')) {
  754. $values['$searchoption']['forums'] = L10n::t('Forums');
  755. }
  756. }
  757. return Renderer::replaceMacros(Renderer::getMarkupTemplate('searchbox.tpl'), $values);
  758. }
  759. /**
  760. * Replace naked text hyperlink with HTML formatted hyperlink
  761. *
  762. * @param string $s
  763. * @return string
  764. */
  765. public static function toLink($s)
  766. {
  767. $s = preg_replace("/(https?\:\/\/[a-zA-Z0-9\:\/\-\?\&\;\.\=\_\~\#\'\%\$\!\+]*)/", ' <a href="$1" target="_blank">$1</a>', $s);
  768. $s = preg_replace("/\<(.*?)(src|href)=(.*?)\&amp\;(.*?)\>/ism", '<$1$2=$3&$4>', $s);
  769. return $s;
  770. }
  771. /**
  772. * Given a HTML text and a set of filtering reasons, adds a content hiding header with the provided reasons
  773. *
  774. * Reasons are expected to have been translated already.
  775. *
  776. * @param string $html
  777. * @param array $reasons
  778. * @return string
  779. * @throws \Friendica\Network\HTTPException\InternalServerErrorException
  780. */
  781. public static function applyContentFilter($html, array $reasons)
  782. {
  783. if (count($reasons)) {
  784. $tpl = Renderer::getMarkupTemplate('wall/content_filter.tpl');
  785. $html = Renderer::replaceMacros($tpl, [
  786. '$reasons' => $reasons,
  787. '$rnd' => Strings::getRandomHex(8),
  788. '$openclose' => L10n::t('Click to open/close'),
  789. '$html' => $html
  790. ]);
  791. }
  792. return $html;
  793. }
  794. /**
  795. * replace html amp entity with amp char
  796. * @param string $s
  797. * @return string
  798. */
  799. public static function unamp($s)
  800. {
  801. return str_replace('&amp;', '&', $s);
  802. }
  803. }