Friendica Communications Platform (please note that this is a clone of the repository at github, issues are handled there) https://friendi.ca
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

435 lines
13KB

  1. <?php
  2. /**
  3. * @file include/plaintext.php
  4. */
  5. use Friendica\App;
  6. use Friendica\ParseUrl;
  7. require_once("include/Photo.php");
  8. require_once("include/bbcode.php");
  9. require_once("include/html2plain.php");
  10. require_once("include/network.php");
  11. /**
  12. * @brief Fetches attachment data that were generated the old way
  13. *
  14. * @param string $body Message body
  15. * @return array
  16. * 'type' -> Message type ("link", "video", "photo")
  17. * 'text' -> Text before the shared message
  18. * 'after' -> Text after the shared message
  19. * 'image' -> Preview image of the message
  20. * 'url' -> Url to the attached message
  21. * 'title' -> Title of the attachment
  22. * 'description' -> Description of the attachment
  23. */
  24. function get_old_attachment_data($body) {
  25. $post = array();
  26. // Simplify image codes
  27. $body = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '[img]$3[/img]', $body);
  28. if (preg_match_all("(\[class=(.*?)\](.*?)\[\/class\])ism",$body, $attached, PREG_SET_ORDER)) {
  29. foreach ($attached AS $data) {
  30. if (!in_array($data[1], array("type-link", "type-video", "type-photo")))
  31. continue;
  32. $post["type"] = substr($data[1], 5);
  33. $pos = strpos($body, $data[0]);
  34. if ($pos > 0) {
  35. $post["text"] = trim(substr($body, 0, $pos));
  36. $post["after"] = trim(substr($body, $pos + strlen($data[0])));
  37. } else
  38. $post["text"] = trim(str_replace($data[0], "", $body));
  39. $attacheddata = $data[2];
  40. $URLSearchString = "^\[\]";
  41. if (preg_match("/\[img\]([$URLSearchString]*)\[\/img\]/ism", $attacheddata, $matches)) {
  42. $picturedata = get_photo_info($matches[1]);
  43. if (($picturedata[0] >= 500) AND ($picturedata[0] >= $picturedata[1]))
  44. $post["image"] = $matches[1];
  45. else
  46. $post["preview"] = $matches[1];
  47. }
  48. if (preg_match("/\[bookmark\=([$URLSearchString]*)\](.*?)\[\/bookmark\]/ism", $attacheddata, $matches)) {
  49. $post["url"] = $matches[1];
  50. $post["title"] = $matches[2];
  51. }
  52. if (($post["url"] == "") AND (in_array($post["type"], array("link", "video")))
  53. AND preg_match("/\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism", $attacheddata, $matches)) {
  54. $post["url"] = $matches[1];
  55. }
  56. // Search for description
  57. if (preg_match("/\[quote\](.*?)\[\/quote\]/ism", $attacheddata, $matches))
  58. $post["description"] = $matches[1];
  59. }
  60. }
  61. return $post;
  62. }
  63. /**
  64. * @brief Fetches attachment data that were generated with the "attachment" element
  65. *
  66. * @param string $body Message body
  67. * @return array
  68. * 'type' -> Message type ("link", "video", "photo")
  69. * 'text' -> Text before the shared message
  70. * 'after' -> Text after the shared message
  71. * 'image' -> Preview image of the message
  72. * 'url' -> Url to the attached message
  73. * 'title' -> Title of the attachment
  74. * 'description' -> Description of the attachment
  75. */
  76. function get_attachment_data($body) {
  77. $data = array();
  78. if (!preg_match("/(.*)\[attachment(.*?)\](.*?)\[\/attachment\](.*)/ism", $body, $match))
  79. return get_old_attachment_data($body);
  80. $attributes = $match[2];
  81. $data["text"] = trim($match[1]);
  82. $type = "";
  83. preg_match("/type='(.*?)'/ism", $attributes, $matches);
  84. if ($matches[1] != "")
  85. $type = strtolower($matches[1]);
  86. preg_match('/type="(.*?)"/ism', $attributes, $matches);
  87. if ($matches[1] != "")
  88. $type = strtolower($matches[1]);
  89. if ($type == "")
  90. return(array());
  91. if (!in_array($type, array("link", "audio", "photo", "video")))
  92. return(array());
  93. if ($type != "")
  94. $data["type"] = $type;
  95. $url = "";
  96. preg_match("/url='(.*?)'/ism", $attributes, $matches);
  97. if ($matches[1] != "")
  98. $url = $matches[1];
  99. preg_match('/url="(.*?)"/ism', $attributes, $matches);
  100. if ($matches[1] != "")
  101. $url = $matches[1];
  102. if ($url != "")
  103. $data["url"] = html_entity_decode($url, ENT_QUOTES, 'UTF-8');
  104. $title = "";
  105. preg_match("/title='(.*?)'/ism", $attributes, $matches);
  106. if ($matches[1] != "")
  107. $title = $matches[1];
  108. preg_match('/title="(.*?)"/ism', $attributes, $matches);
  109. if ($matches[1] != "")
  110. $title = $matches[1];
  111. if ($title != "") {
  112. $title = bbcode(html_entity_decode($title, ENT_QUOTES, 'UTF-8'), false, false, true);
  113. $title = html_entity_decode($title, ENT_QUOTES, 'UTF-8');
  114. $title = str_replace(array("[", "]"), array("&#91;", "&#93;"), $title);
  115. $data["title"] = $title;
  116. }
  117. $image = "";
  118. preg_match("/image='(.*?)'/ism", $attributes, $matches);
  119. if ($matches[1] != "")
  120. $image = $matches[1];
  121. preg_match('/image="(.*?)"/ism', $attributes, $matches);
  122. if ($matches[1] != "")
  123. $image = $matches[1];
  124. if ($image != "")
  125. $data["image"] = html_entity_decode($image, ENT_QUOTES, 'UTF-8');
  126. $preview = "";
  127. preg_match("/preview='(.*?)'/ism", $attributes, $matches);
  128. if ($matches[1] != "")
  129. $preview = $matches[1];
  130. preg_match('/preview="(.*?)"/ism', $attributes, $matches);
  131. if ($matches[1] != "")
  132. $preview = $matches[1];
  133. if ($preview != "")
  134. $data["preview"] = html_entity_decode($preview, ENT_QUOTES, 'UTF-8');
  135. $data["description"] = trim($match[3]);
  136. $data["after"] = trim($match[4]);
  137. return($data);
  138. }
  139. function get_attached_data($body) {
  140. /*
  141. - text:
  142. - type: link, video, photo
  143. - title:
  144. - url:
  145. - image:
  146. - description:
  147. - (thumbnail)
  148. */
  149. $post = get_attachment_data($body);
  150. // if nothing is found, it maybe having an image.
  151. if (!isset($post["type"])) {
  152. $URLSearchString = "^\[\]";
  153. if (preg_match_all("(\[url=([$URLSearchString]*)\]\s*\[img\]([$URLSearchString]*)\[\/img\]\s*\[\/url\])ism", $body, $pictures, PREG_SET_ORDER)) {
  154. if (count($pictures) == 1) {
  155. // Checking, if the link goes to a picture
  156. $data = ParseUrl::getSiteinfoCached($pictures[0][1], true);
  157. // Workaround:
  158. // Sometimes photo posts to the own album are not detected at the start.
  159. // So we seem to cannot use the cache for these cases. That's strange.
  160. if (($data["type"] != "photo") AND strstr($pictures[0][1], "/photos/"))
  161. $data = ParseUrl::getSiteinfo($pictures[0][1], true);
  162. if ($data["type"] == "photo") {
  163. $post["type"] = "photo";
  164. if (isset($data["images"][0])) {
  165. $post["image"] = $data["images"][0]["src"];
  166. $post["url"] = $data["url"];
  167. } else
  168. $post["image"] = $data["url"];
  169. $post["preview"] = $pictures[0][2];
  170. $post["text"] = str_replace($pictures[0][0], "", $body);
  171. } else {
  172. $imgdata = get_photo_info($pictures[0][1]);
  173. if (substr($imgdata["mime"], 0, 6) == "image/") {
  174. $post["type"] = "photo";
  175. $post["image"] = $pictures[0][1];
  176. $post["preview"] = $pictures[0][2];
  177. $post["text"] = str_replace($pictures[0][0], "", $body);
  178. }
  179. }
  180. } elseif (count($pictures) > 1) {
  181. $post["type"] = "link";
  182. $post["url"] = $b["plink"];
  183. $post["image"] = $pictures[0][2];
  184. $post["text"] = $body;
  185. }
  186. } elseif (preg_match_all("(\[img\]([$URLSearchString]*)\[\/img\])ism", $body, $pictures, PREG_SET_ORDER)) {
  187. if (count($pictures) == 1) {
  188. $post["type"] = "photo";
  189. $post["image"] = $pictures[0][1];
  190. $post["text"] = str_replace($pictures[0][0], "", $body);
  191. } elseif (count($pictures) > 1) {
  192. $post["type"] = "link";
  193. $post["url"] = $b["plink"];
  194. $post["image"] = $pictures[0][1];
  195. $post["text"] = $body;
  196. }
  197. }
  198. if (preg_match_all("(\[url\]([$URLSearchString]*)\[\/url\])ism", $body, $links, PREG_SET_ORDER)) {
  199. if (count($links) == 1) {
  200. $post["type"] = "text";
  201. $post["url"] = $links[0][1];
  202. $post["text"] = $body;
  203. }
  204. }
  205. if (!isset($post["type"])) {
  206. $post["type"] = "text";
  207. $post["text"] = trim($body);
  208. }
  209. } elseif (isset($post["url"]) AND ($post["type"] == "video")) {
  210. $data = ParseUrl::getSiteinfoCached($post["url"], true);
  211. if (isset($data["images"][0]))
  212. $post["image"] = $data["images"][0]["src"];
  213. }
  214. return($post);
  215. }
  216. function shortenmsg($msg, $limit, $twitter = false) {
  217. /// @TODO
  218. /// For Twitter URLs aren't shortened, but they have to be calculated as if.
  219. $lines = explode("\n", $msg);
  220. $msg = "";
  221. $recycle = html_entity_decode("&#x2672; ", ENT_QUOTES, 'UTF-8');
  222. $ellipsis = html_entity_decode("&#x2026;", ENT_QUOTES, 'UTF-8');
  223. foreach ($lines AS $row=>$line) {
  224. if (iconv_strlen(trim($msg."\n".$line), "UTF-8") <= $limit)
  225. $msg = trim($msg."\n".$line);
  226. // Is the new message empty by now or is it a reshared message?
  227. elseif (($msg == "") OR (($row == 1) AND (substr($msg, 0, 4) == $recycle)))
  228. $msg = iconv_substr(iconv_substr(trim($msg."\n".$line), 0, $limit, "UTF-8"), 0, -3, "UTF-8").$ellipsis;
  229. else
  230. break;
  231. }
  232. return($msg);
  233. }
  234. /**
  235. * @brief Convert a message into plaintext for connectors to other networks
  236. *
  237. * @param App $a The application class
  238. * @param array $b The message array that is about to be posted
  239. * @param int $limit The maximum number of characters when posting to that network
  240. * @param bool $includedlinks Has an attached link to be included into the message?
  241. * @param int $htmlmode This triggers the behaviour of the bbcode conversion
  242. * @param string $target_network Name of the network where the post should go to.
  243. *
  244. * @return string The converted message
  245. */
  246. function plaintext(App $a, $b, $limit = 0, $includedlinks = false, $htmlmode = 2, $target_network = "") {
  247. // Remove the hash tags
  248. $URLSearchString = "^\[\]";
  249. $body = preg_replace("/([#@])\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism", '$1$3', $b["body"]);
  250. // Add an URL element if the text contains a raw link
  251. $body = preg_replace("/([^\]\='".'"'."]|^)(https?\:\/\/[a-zA-Z0-9\:\/\-\?\&\;\.\=\_\~\#\%\$\!\+\,]+)/ism", '$1[url]$2[/url]', $body);
  252. // Remove the abstract
  253. $body = remove_abstract($body);
  254. // At first look at data that is attached via "type-..." stuff
  255. // This will hopefully replaced with a dedicated bbcode later
  256. //$post = get_attached_data($b["body"]);
  257. $post = get_attached_data($body);
  258. if (($b["title"] != "") AND ($post["text"] != ""))
  259. $post["text"] = trim($b["title"]."\n\n".$post["text"]);
  260. elseif ($b["title"] != "")
  261. $post["text"] = trim($b["title"]);
  262. $abstract = "";
  263. // Fetch the abstract from the given target network
  264. if ($target_network != "") {
  265. $default_abstract = fetch_abstract($b["body"]);
  266. $abstract = fetch_abstract($b["body"], $target_network);
  267. // If we post to a network with no limit we only fetch
  268. // an abstract exactly for this network
  269. if (($limit == 0) AND ($abstract == $default_abstract))
  270. $abstract = "";
  271. } else // Try to guess the correct target network
  272. switch ($htmlmode) {
  273. case 8:
  274. $abstract = fetch_abstract($b["body"], NETWORK_TWITTER);
  275. break;
  276. case 7:
  277. $abstract = fetch_abstract($b["body"], NETWORK_STATUSNET);
  278. break;
  279. case 6:
  280. $abstract = fetch_abstract($b["body"], NETWORK_APPNET);
  281. break;
  282. default: // We don't know the exact target.
  283. // We fetch an abstract since there is a posting limit.
  284. if ($limit > 0)
  285. $abstract = fetch_abstract($b["body"]);
  286. }
  287. if ($abstract != "") {
  288. $post["text"] = $abstract;
  289. if ($post["type"] == "text") {
  290. $post["type"] = "link";
  291. $post["url"] = $b["plink"];
  292. }
  293. }
  294. $html = bbcode($post["text"].$post["after"], false, false, $htmlmode);
  295. $msg = html2plain($html, 0, true);
  296. $msg = trim(html_entity_decode($msg,ENT_QUOTES,'UTF-8'));
  297. $link = "";
  298. if ($includedlinks) {
  299. if ($post["type"] == "link")
  300. $link = $post["url"];
  301. elseif ($post["type"] == "text")
  302. $link = $post["url"];
  303. elseif ($post["type"] == "video")
  304. $link = $post["url"];
  305. elseif ($post["type"] == "photo")
  306. $link = $post["image"];
  307. if (($msg == "") AND isset($post["title"]))
  308. $msg = trim($post["title"]);
  309. if (($msg == "") AND isset($post["description"]))
  310. $msg = trim($post["description"]);
  311. // If the link is already contained in the post, then it neeedn't to be added again
  312. // But: if the link is beyond the limit, then it has to be added.
  313. if (($link != "") AND strstr($msg, $link)) {
  314. $pos = strpos($msg, $link);
  315. // Will the text be shortened in the link?
  316. // Or is the link the last item in the post?
  317. if (($limit > 0) AND ($pos < $limit) AND (($pos + 23 > $limit) OR ($pos + strlen($link) == strlen($msg))))
  318. $msg = trim(str_replace($link, "", $msg));
  319. elseif (($limit == 0) OR ($pos < $limit)) {
  320. // The limit has to be increased since it will be shortened - but not now
  321. // Only do it with Twitter (htmlmode = 8)
  322. if (($limit > 0) AND (strlen($link) > 23) AND ($htmlmode == 8))
  323. $limit = $limit - 23 + strlen($link);
  324. $link = "";
  325. if ($post["type"] == "text")
  326. unset($post["url"]);
  327. }
  328. }
  329. }
  330. if ($limit > 0) {
  331. // Reduce multiple spaces
  332. // When posted to a network with limited space, we try to gain space where possible
  333. while (strpos($msg, " ") !== false)
  334. $msg = str_replace(" ", " ", $msg);
  335. // Twitter is using its own limiter, so we always assume that shortened links will have this length
  336. if (iconv_strlen($link, "UTF-8") > 0)
  337. $limit = $limit - 23;
  338. if (iconv_strlen($msg, "UTF-8") > $limit) {
  339. if (($post["type"] == "text") AND isset($post["url"]))
  340. $post["url"] = $b["plink"];
  341. elseif (!isset($post["url"])) {
  342. $limit = $limit - 23;
  343. $post["url"] = $b["plink"];
  344. } elseif (strpos($b["body"], "[share") !== false)
  345. $post["url"] = $b["plink"];
  346. elseif (get_pconfig($b["uid"], "system", "no_intelligent_shortening"))
  347. $post["url"] = $b["plink"];
  348. $msg = shortenmsg($msg, $limit);
  349. }
  350. }
  351. $post["text"] = trim($msg);
  352. return($post);
  353. }