Friendica Communications Platform (please note that this is a clone of the repository at github, issues are handled there) https://friendi.ca
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

671 lines
16 KiB

  1. <?php
  2. /**
  3. * @file include/text.php
  4. */
  5. use Friendica\App;
  6. use Friendica\Content\ContactSelector;
  7. use Friendica\Content\Feature;
  8. use Friendica\Content\Smilies;
  9. use Friendica\Content\Text\BBCode;
  10. use Friendica\Core\Addon;
  11. use Friendica\Core\Config;
  12. use Friendica\Core\L10n;
  13. use Friendica\Core\PConfig;
  14. use Friendica\Core\Protocol;
  15. use Friendica\Core\System;
  16. use Friendica\Database\DBA;
  17. use Friendica\Model\Contact;
  18. use Friendica\Model\Event;
  19. use Friendica\Model\Item;
  20. use Friendica\Render\FriendicaSmarty;
  21. use Friendica\Util\DateTimeFormat;
  22. use Friendica\Util\Map;
  23. use Friendica\Util\Proxy as ProxyUtils;
  24. use Friendica\Core\Logger;
  25. use Friendica\Core\Renderer;
  26. use Friendica\Model\FileTag;
  27. use Friendica\Util\XML;
  28. use Friendica\Content\Text\HTML;
  29. require_once "include/conversation.php";
  30. /**
  31. * @brief Generates a pseudo-random string of hexadecimal characters
  32. *
  33. * @param int $size
  34. * @return string
  35. */
  36. function random_string($size = 64)
  37. {
  38. $byte_size = ceil($size / 2);
  39. $bytes = random_bytes($byte_size);
  40. $return = substr(bin2hex($bytes), 0, $size);
  41. return $return;
  42. }
  43. /**
  44. * This is our primary input filter.
  45. *
  46. * The high bit hack only involved some old IE browser, forget which (IE5/Mac?)
  47. * that had an XSS attack vector due to stripping the high-bit on an 8-bit character
  48. * after cleansing, and angle chars with the high bit set could get through as markup.
  49. *
  50. * This is now disabled because it was interfering with some legitimate unicode sequences
  51. * and hopefully there aren't a lot of those browsers left.
  52. *
  53. * Use this on any text input where angle chars are not valid or permitted
  54. * They will be replaced with safer brackets. This may be filtered further
  55. * if these are not allowed either.
  56. *
  57. * @param string $string Input string
  58. * @return string Filtered string
  59. */
  60. function notags($string) {
  61. return str_replace(["<", ">"], ['[', ']'], $string);
  62. // High-bit filter no longer used
  63. // return str_replace(array("<",">","\xBA","\xBC","\xBE"), array('[',']','','',''), $string);
  64. }
  65. /**
  66. * use this on "body" or "content" input where angle chars shouldn't be removed,
  67. * and allow them to be safely displayed.
  68. * @param string $string
  69. * @return string
  70. */
  71. function escape_tags($string) {
  72. return htmlspecialchars($string, ENT_COMPAT, 'UTF-8', false);
  73. }
  74. /**
  75. * generate a string that's random, but usually pronounceable.
  76. * used to generate initial passwords
  77. * @param int $len
  78. * @return string
  79. */
  80. function autoname($len) {
  81. if ($len <= 0) {
  82. return '';
  83. }
  84. $vowels = ['a','a','ai','au','e','e','e','ee','ea','i','ie','o','ou','u'];
  85. if (mt_rand(0, 5) == 4) {
  86. $vowels[] = 'y';
  87. }
  88. $cons = [
  89. 'b','bl','br',
  90. 'c','ch','cl','cr',
  91. 'd','dr',
  92. 'f','fl','fr',
  93. 'g','gh','gl','gr',
  94. 'h',
  95. 'j',
  96. 'k','kh','kl','kr',
  97. 'l',
  98. 'm',
  99. 'n',
  100. 'p','ph','pl','pr',
  101. 'qu',
  102. 'r','rh',
  103. 's','sc','sh','sm','sp','st',
  104. 't','th','tr',
  105. 'v',
  106. 'w','wh',
  107. 'x',
  108. 'z','zh'
  109. ];
  110. $midcons = ['ck','ct','gn','ld','lf','lm','lt','mb','mm', 'mn','mp',
  111. 'nd','ng','nk','nt','rn','rp','rt'];
  112. $noend = ['bl', 'br', 'cl','cr','dr','fl','fr','gl','gr',
  113. 'kh', 'kl','kr','mn','pl','pr','rh','tr','qu','wh','q'];
  114. $start = mt_rand(0,2);
  115. if ($start == 0) {
  116. $table = $vowels;
  117. } else {
  118. $table = $cons;
  119. }
  120. $word = '';
  121. for ($x = 0; $x < $len; $x ++) {
  122. $r = mt_rand(0,count($table) - 1);
  123. $word .= $table[$r];
  124. if ($table == $vowels) {
  125. $table = array_merge($cons,$midcons);
  126. } else {
  127. $table = $vowels;
  128. }
  129. }
  130. $word = substr($word,0,$len);
  131. foreach ($noend as $noe) {
  132. $noelen = strlen($noe);
  133. if ((strlen($word) > $noelen) && (substr($word, -$noelen) == $noe)) {
  134. $word = autoname($len);
  135. break;
  136. }
  137. }
  138. return $word;
  139. }
  140. /**
  141. * Turn user/group ACLs stored as angle bracketed text into arrays
  142. *
  143. * @param string $s
  144. * @return array
  145. */
  146. function expand_acl($s) {
  147. // turn string array of angle-bracketed elements into numeric array
  148. // e.g. "<1><2><3>" => array(1,2,3);
  149. $ret = [];
  150. if (strlen($s)) {
  151. $t = str_replace('<', '', $s);
  152. $a = explode('>', $t);
  153. foreach ($a as $aa) {
  154. if (intval($aa)) {
  155. $ret[] = intval($aa);
  156. }
  157. }
  158. }
  159. return $ret;
  160. }
  161. /**
  162. * Wrap ACL elements in angle brackets for storage
  163. * @param string $item
  164. */
  165. function sanitise_acl(&$item) {
  166. if (intval($item)) {
  167. $item = '<' . intval(notags(trim($item))) . '>';
  168. } else {
  169. unset($item);
  170. }
  171. }
  172. /**
  173. * Convert an ACL array to a storable string
  174. *
  175. * Normally ACL permissions will be an array.
  176. * We'll also allow a comma-separated string.
  177. *
  178. * @param string|array $p
  179. * @return string
  180. */
  181. function perms2str($p) {
  182. $ret = '';
  183. if (is_array($p)) {
  184. $tmp = $p;
  185. } else {
  186. $tmp = explode(',', $p);
  187. }
  188. if (is_array($tmp)) {
  189. array_walk($tmp, 'sanitise_acl');
  190. $ret = implode('', $tmp);
  191. }
  192. return $ret;
  193. }
  194. /**
  195. * for html,xml parsing - let's say you've got
  196. * an attribute foobar="class1 class2 class3"
  197. * and you want to find out if it contains 'class3'.
  198. * you can't use a normal sub string search because you
  199. * might match 'notclass3' and a regex to do the job is
  200. * possible but a bit complicated.
  201. * pass the attribute string as $attr and the attribute you
  202. * are looking for as $s - returns true if found, otherwise false
  203. *
  204. * @param string $attr attribute value
  205. * @param string $s string to search
  206. * @return boolean True if found, False otherwise
  207. */
  208. function attribute_contains($attr, $s) {
  209. $a = explode(' ', $attr);
  210. return (count($a) && in_array($s,$a));
  211. }
  212. /**
  213. * Compare activity uri. Knows about activity namespace.
  214. *
  215. * @param string $haystack
  216. * @param string $needle
  217. * @return boolean
  218. */
  219. function activity_match($haystack,$needle) {
  220. return (($haystack === $needle) || ((basename($needle) === $haystack) && strstr($needle, NAMESPACE_ACTIVITY_SCHEMA)));
  221. }
  222. /**
  223. * @brief Pull out all #hashtags and @person tags from $string.
  224. *
  225. * We also get @person@domain.com - which would make
  226. * the regex quite complicated as tags can also
  227. * end a sentence. So we'll run through our results
  228. * and strip the period from any tags which end with one.
  229. * Returns array of tags found, or empty array.
  230. *
  231. * @param string $string Post content
  232. * @return array List of tag and person names
  233. */
  234. function get_tags($string) {
  235. $ret = [];
  236. // Convert hashtag links to hashtags
  237. $string = preg_replace('/#\[url\=([^\[\]]*)\](.*?)\[\/url\]/ism', '#$2', $string);
  238. // ignore anything in a code block
  239. $string = preg_replace('/\[code\](.*?)\[\/code\]/sm', '', $string);
  240. // Force line feeds at bbtags
  241. $string = str_replace(['[', ']'], ["\n[", "]\n"], $string);
  242. // ignore anything in a bbtag
  243. $string = preg_replace('/\[(.*?)\]/sm', '', $string);
  244. // Match full names against @tags including the space between first and last
  245. // We will look these up afterward to see if they are full names or not recognisable.
  246. if (preg_match_all('/(@[^ \x0D\x0A,:?]+ [^ \x0D\x0A@,:?]+)([ \x0D\x0A@,:?]|$)/', $string, $matches)) {
  247. foreach ($matches[1] as $match) {
  248. if (strstr($match, ']')) {
  249. // we might be inside a bbcode color tag - leave it alone
  250. continue;
  251. }
  252. if (substr($match, -1, 1) === '.') {
  253. $ret[] = substr($match, 0, -1);
  254. } else {
  255. $ret[] = $match;
  256. }
  257. }
  258. }
  259. // Otherwise pull out single word tags. These can be @nickname, @first_last
  260. // and #hash tags.
  261. if (preg_match_all('/([!#@][^\^ \x0D\x0A,;:?]+)([ \x0D\x0A,;:?]|$)/', $string, $matches)) {
  262. foreach ($matches[1] as $match) {
  263. if (strstr($match, ']')) {
  264. // we might be inside a bbcode color tag - leave it alone
  265. continue;
  266. }
  267. if (substr($match, -1, 1) === '.') {
  268. $match = substr($match,0,-1);
  269. }
  270. // ignore strictly numeric tags like #1
  271. if ((strpos($match, '#') === 0) && ctype_digit(substr($match, 1))) {
  272. continue;
  273. }
  274. // try not to catch url fragments
  275. if (strpos($string, $match) && preg_match('/[a-zA-z0-9\/]/', substr($string, strpos($string, $match) - 1, 1))) {
  276. continue;
  277. }
  278. $ret[] = $match;
  279. }
  280. }
  281. return $ret;
  282. }
  283. /**
  284. * quick and dirty quoted_printable encoding
  285. *
  286. * @param string $s
  287. * @return string
  288. */
  289. function qp($s) {
  290. return str_replace("%", "=", rawurlencode($s));
  291. }
  292. /**
  293. * @brief Check for a valid email string
  294. *
  295. * @param string $email_address
  296. * @return boolean
  297. */
  298. function valid_email($email_address)
  299. {
  300. return preg_match('/^[_a-zA-Z0-9\-\+]+(\.[_a-zA-Z0-9\-\+]+)*@[a-zA-Z0-9-]+(\.[a-zA-Z0-9-]+)+$/', $email_address);
  301. }
  302. /**
  303. * Normalize url
  304. *
  305. * @param string $url
  306. * @return string
  307. */
  308. function normalise_link($url) {
  309. $ret = str_replace(['https:', '//www.'], ['http:', '//'], $url);
  310. return rtrim($ret,'/');
  311. }
  312. /**
  313. * Compare two URLs to see if they are the same, but ignore
  314. * slight but hopefully insignificant differences such as if one
  315. * is https and the other isn't, or if one is www.something and
  316. * the other isn't - and also ignore case differences.
  317. *
  318. * @param string $a first url
  319. * @param string $b second url
  320. * @return boolean True if the URLs match, otherwise False
  321. *
  322. */
  323. function link_compare($a, $b) {
  324. return (strcasecmp(normalise_link($a), normalise_link($b)) === 0);
  325. }
  326. /**
  327. * @brief Find any non-embedded images in private items and add redir links to them
  328. *
  329. * @param App $a
  330. * @param array &$item The field array of an item row
  331. */
  332. function redir_private_images($a, &$item)
  333. {
  334. $matches = false;
  335. $cnt = preg_match_all('|\[img\](http[^\[]*?/photo/[a-fA-F0-9]+?(-[0-9]\.[\w]+?)?)\[\/img\]|', $item['body'], $matches, PREG_SET_ORDER);
  336. if ($cnt) {
  337. foreach ($matches as $mtch) {
  338. if (strpos($mtch[1], '/redir') !== false) {
  339. continue;
  340. }
  341. if ((local_user() == $item['uid']) && ($item['private'] == 1) && ($item['contact-id'] != $a->contact['id']) && ($item['network'] == Protocol::DFRN)) {
  342. $img_url = 'redir?f=1&quiet=1&url=' . urlencode($mtch[1]) . '&conurl=' . urlencode($item['author-link']);
  343. $item['body'] = str_replace($mtch[0], '[img]' . $img_url . '[/img]', $item['body']);
  344. }
  345. }
  346. }
  347. }
  348. /**
  349. * @brief Given a text string, convert from bbcode to html and add smilie icons.
  350. *
  351. * @param string $text String with bbcode.
  352. * @return string Formattet HTML.
  353. */
  354. function prepare_text($text) {
  355. if (stristr($text, '[nosmile]')) {
  356. $s = BBCode::convert($text);
  357. } else {
  358. $s = Smilies::replace(BBCode::convert($text));
  359. }
  360. return trim($s);
  361. }
  362. /**
  363. * return array with details for categories and folders for an item
  364. *
  365. * @param array $item
  366. * @return array
  367. *
  368. * [
  369. * [ // categories array
  370. * {
  371. * 'name': 'category name',
  372. * 'removeurl': 'url to remove this category',
  373. * 'first': 'is the first in this array? true/false',
  374. * 'last': 'is the last in this array? true/false',
  375. * } ,
  376. * ....
  377. * ],
  378. * [ //folders array
  379. * {
  380. * 'name': 'folder name',
  381. * 'removeurl': 'url to remove this folder',
  382. * 'first': 'is the first in this array? true/false',
  383. * 'last': 'is the last in this array? true/false',
  384. * } ,
  385. * ....
  386. * ]
  387. * ]
  388. */
  389. function get_cats_and_terms($item)
  390. {
  391. $categories = [];
  392. $folders = [];
  393. $matches = false;
  394. $first = true;
  395. $cnt = preg_match_all('/<(.*?)>/', $item['file'], $matches, PREG_SET_ORDER);
  396. if ($cnt) {
  397. foreach ($matches as $mtch) {
  398. $categories[] = [
  399. 'name' => XML::escape(FileTag::decode($mtch[1])),
  400. 'url' => "#",
  401. 'removeurl' => ((local_user() == $item['uid'])?'filerm/' . $item['id'] . '?f=&cat=' . XML::escape(FileTag::decode($mtch[1])):""),
  402. 'first' => $first,
  403. 'last' => false
  404. ];
  405. $first = false;
  406. }
  407. }
  408. if (count($categories)) {
  409. $categories[count($categories) - 1]['last'] = true;
  410. }
  411. if (local_user() == $item['uid']) {
  412. $matches = false;
  413. $first = true;
  414. $cnt = preg_match_all('/\[(.*?)\]/', $item['file'], $matches, PREG_SET_ORDER);
  415. if ($cnt) {
  416. foreach ($matches as $mtch) {
  417. $folders[] = [
  418. 'name' => XML::escape(FileTag::decode($mtch[1])),
  419. 'url' => "#",
  420. 'removeurl' => ((local_user() == $item['uid']) ? 'filerm/' . $item['id'] . '?f=&term=' . XML::escape(FileTag::decode($mtch[1])) : ""),
  421. 'first' => $first,
  422. 'last' => false
  423. ];
  424. $first = false;
  425. }
  426. }
  427. }
  428. if (count($folders)) {
  429. $folders[count($folders) - 1]['last'] = true;
  430. }
  431. return [$categories, $folders];
  432. }
  433. /**
  434. * return number of bytes in size (K, M, G)
  435. * @param string $size_str
  436. * @return number
  437. */
  438. function return_bytes($size_str) {
  439. switch (substr ($size_str, -1)) {
  440. case 'M': case 'm': return (int)$size_str * 1048576;
  441. case 'K': case 'k': return (int)$size_str * 1024;
  442. case 'G': case 'g': return (int)$size_str * 1073741824;
  443. default: return $size_str;
  444. }
  445. }
  446. /**
  447. * @param string $s
  448. * @param boolean $strip_padding
  449. * @return string
  450. */
  451. function base64url_encode($s, $strip_padding = false) {
  452. $s = strtr(base64_encode($s), '+/', '-_');
  453. if ($strip_padding) {
  454. $s = str_replace('=','',$s);
  455. }
  456. return $s;
  457. }
  458. /**
  459. * @param string $s
  460. * @return string
  461. */
  462. function base64url_decode($s) {
  463. if (is_array($s)) {
  464. Logger::log('base64url_decode: illegal input: ' . print_r(debug_backtrace(), true));
  465. return $s;
  466. }
  467. /*
  468. * // Placeholder for new rev of salmon which strips base64 padding.
  469. * // PHP base64_decode handles the un-padded input without requiring this step
  470. * // Uncomment if you find you need it.
  471. *
  472. * $l = strlen($s);
  473. * if (!strpos($s,'=')) {
  474. * $m = $l % 4;
  475. * if ($m == 2)
  476. * $s .= '==';
  477. * if ($m == 3)
  478. * $s .= '=';
  479. * }
  480. *
  481. */
  482. return base64_decode(strtr($s,'-_','+/'));
  483. }
  484. function bb_translate_video($s) {
  485. $matches = null;
  486. $r = preg_match_all("/\[video\](.*?)\[\/video\]/ism",$s,$matches,PREG_SET_ORDER);
  487. if ($r) {
  488. foreach ($matches as $mtch) {
  489. if ((stristr($mtch[1], 'youtube')) || (stristr($mtch[1], 'youtu.be'))) {
  490. $s = str_replace($mtch[0], '[youtube]' . $mtch[1] . '[/youtube]', $s);
  491. } elseif (stristr($mtch[1], 'vimeo')) {
  492. $s = str_replace($mtch[0], '[vimeo]' . $mtch[1] . '[/vimeo]', $s);
  493. }
  494. }
  495. }
  496. return $s;
  497. }
  498. function normalise_openid($s) {
  499. return trim(str_replace(['http://', 'https://'], ['', ''], $s), '/');
  500. }
  501. function undo_post_tagging($s) {
  502. $matches = null;
  503. $cnt = preg_match_all('/([!#@])\[url=(.*?)\](.*?)\[\/url\]/ism', $s, $matches, PREG_SET_ORDER);
  504. if ($cnt) {
  505. foreach ($matches as $mtch) {
  506. if (in_array($mtch[1], ['!', '@'])) {
  507. $contact = Contact::getDetailsByURL($mtch[2]);
  508. $mtch[3] = empty($contact['addr']) ? $mtch[2] : $contact['addr'];
  509. }
  510. $s = str_replace($mtch[0], $mtch[1] . $mtch[3],$s);
  511. }
  512. }
  513. return $s;
  514. }
  515. function protect_sprintf($s) {
  516. return str_replace('%', '%%', $s);
  517. }
  518. /// @TODO Rewrite this
  519. function is_a_date_arg($s) {
  520. $i = intval($s);
  521. if ($i > 1900) {
  522. $y = date('Y');
  523. if ($i <= $y + 1 && strpos($s, '-') == 4) {
  524. $m = intval(substr($s, 5));
  525. if ($m > 0 && $m <= 12) {
  526. return true;
  527. }
  528. }
  529. }
  530. return false;
  531. }
  532. /**
  533. * remove intentation from a text
  534. */
  535. function deindent($text, $chr = "[\t ]", $count = NULL) {
  536. $lines = explode("\n", $text);
  537. if (is_null($count)) {
  538. $m = [];
  539. $k = 0;
  540. while ($k < count($lines) && strlen($lines[$k]) == 0) {
  541. $k++;
  542. }
  543. preg_match("|^" . $chr . "*|", $lines[$k], $m);
  544. $count = strlen($m[0]);
  545. }
  546. for ($k = 0; $k < count($lines); $k++) {
  547. $lines[$k] = preg_replace("|^" . $chr . "{" . $count . "}|", "", $lines[$k]);
  548. }
  549. return implode("\n", $lines);
  550. }
  551. function formatBytes($bytes, $precision = 2) {
  552. $units = ['B', 'KB', 'MB', 'GB', 'TB'];
  553. $bytes = max($bytes, 0);
  554. $pow = floor(($bytes ? log($bytes) : 0) / log(1024));
  555. $pow = min($pow, count($units) - 1);
  556. $bytes /= pow(1024, $pow);
  557. return round($bytes, $precision) . ' ' . $units[$pow];
  558. }
  559. /**
  560. * @brief translate and format the networkname of a contact
  561. *
  562. * @param string $network
  563. * Networkname of the contact (e.g. dfrn, rss and so on)
  564. * @param sting $url
  565. * The contact url
  566. * @return string
  567. */
  568. function format_network_name($network, $url = 0) {
  569. if ($network != "") {
  570. if ($url != "") {
  571. $network_name = '<a href="'.$url.'">'.ContactSelector::networkToName($network, $url)."</a>";
  572. } else {
  573. $network_name = ContactSelector::networkToName($network);
  574. }
  575. return $network_name;
  576. }
  577. }