diff --git a/mod/babel.php b/mod/babel.php index cafd06556..3352366bd 100644 --- a/mod/babel.php +++ b/mod/babel.php @@ -23,6 +23,12 @@ function babel_content() 'content' => visible_lf($bbcode) ]; + $plain = Text\BBCode::toPlaintext($bbcode, false); + $results[] = [ + 'title' => L10n::t('BBCode::toPlaintext'), + 'content' => visible_lf($plain) + ]; + $html = Text\BBCode::convert($bbcode); $results[] = [ 'title' => L10n::t("BBCode::convert \x28raw HTML\x29"), diff --git a/src/Content/Text/BBCode.php b/src/Content/Text/BBCode.php index 375559346..6c441bac6 100644 --- a/src/Content/Text/BBCode.php +++ b/src/Content/Text/BBCode.php @@ -343,159 +343,20 @@ class BBCode extends BaseObject } /** - * @brief Convert a message into plaintext for connectors to other networks + * @brief Converts a BBCode text into plaintext * - * @param array $b The message array that is about to be posted - * @param int $limit The maximum number of characters when posting to that network - * @param bool $includedlinks Has an attached link to be included into the message? - * @param int $htmlmode This triggers the behaviour of the bbcode conversion - * @param string $target_network Name of the network where the post should go to. + * @param bool $keep_urls Whether to keep URLs in the resulting plaintext * - * @return string The converted message + * @return string */ - public static function toPlaintext($b, $limit = 0, $includedlinks = false, $htmlmode = 2, $target_network = "") + public static function toPlaintext($text, $keep_urls = true) { - // Remove the hash tags - $URLSearchString = "^\[\]"; - $body = preg_replace("/([#@])\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism", '$1$3', $b["body"]); - - // Add an URL element if the text contains a raw link - $body = preg_replace("/([^\]\='".'"'."]|^)(https?\:\/\/[a-zA-Z0-9\:\/\-\?\&\;\.\=\_\~\#\%\$\!\+\,]+)/ism", '$1[url]$2[/url]', $body); - - // Remove the abstract - $body = self::stripAbstract($body); - - // At first look at data that is attached via "type-..." stuff - // This will hopefully replaced with a dedicated bbcode later - //$post = self::getAttachedData($b["body"]); - $post = self::getAttachedData($body, $b); - - if (($b["title"] != "") && ($post["text"] != "")) { - $post["text"] = trim($b["title"]."\n\n".$post["text"]); - } elseif ($b["title"] != "") { - $post["text"] = trim($b["title"]); + $naked_text = preg_replace('/\[(.+?)\]/','', $text); + if (!$keep_urls) { + $naked_text = preg_replace('#https?\://[^\s<]+[^\s\.\)]#i', '', $naked_text); } - $abstract = ""; - - // Fetch the abstract from the given target network - if ($target_network != "") { - $default_abstract = self::getAbstract($b["body"]); - $abstract = self::getAbstract($b["body"], $target_network); - - // If we post to a network with no limit we only fetch - // an abstract exactly for this network - if (($limit == 0) && ($abstract == $default_abstract)) { - $abstract = ""; - } - } else {// Try to guess the correct target network - switch ($htmlmode) { - case 8: - $abstract = self::getAbstract($b["body"], NETWORK_TWITTER); - break; - case 7: - $abstract = self::getAbstract($b["body"], NETWORK_STATUSNET); - break; - case 6: - $abstract = self::getAbstract($b["body"], NETWORK_APPNET); - break; - default: // We don't know the exact target. - // We fetch an abstract since there is a posting limit. - if ($limit > 0) { - $abstract = self::getAbstract($b["body"]); - } - } - } - - if ($abstract != "") { - $post["text"] = $abstract; - - if ($post["type"] == "text") { - $post["type"] = "link"; - $post["url"] = $b["plink"]; - } - } - - $html = self::convert($post["text"].$post["after"], false, $htmlmode); - $msg = HTML::toPlaintext($html, 0, true); - $msg = trim(html_entity_decode($msg, ENT_QUOTES, 'UTF-8')); - - $link = ""; - if ($includedlinks) { - if ($post["type"] == "link") { - $link = $post["url"]; - } elseif ($post["type"] == "text") { - $link = $post["url"]; - } elseif ($post["type"] == "video") { - $link = $post["url"]; - } elseif ($post["type"] == "photo") { - $link = $post["image"]; - } - - if (($msg == "") && isset($post["title"])) { - $msg = trim($post["title"]); - } - - if (($msg == "") && isset($post["description"])) { - $msg = trim($post["description"]); - } - - // If the link is already contained in the post, then it neeedn't to be added again - // But: if the link is beyond the limit, then it has to be added. - if (($link != "") && strstr($msg, $link)) { - $pos = strpos($msg, $link); - - // Will the text be shortened in the link? - // Or is the link the last item in the post? - if (($limit > 0) && ($pos < $limit) && (($pos + 23 > $limit) || ($pos + strlen($link) == strlen($msg)))) { - $msg = trim(str_replace($link, "", $msg)); - } elseif (($limit == 0) || ($pos < $limit)) { - // The limit has to be increased since it will be shortened - but not now - // Only do it with Twitter (htmlmode = 8) - if (($limit > 0) && (strlen($link) > 23) && ($htmlmode == 8)) { - $limit = $limit - 23 + strlen($link); - } - - $link = ""; - - if ($post["type"] == "text") { - unset($post["url"]); - } - } - } - } - - if ($limit > 0) { - // Reduce multiple spaces - // When posted to a network with limited space, we try to gain space where possible - while (strpos($msg, " ") !== false) { - $msg = str_replace(" ", " ", $msg); - } - - // Twitter is using its own limiter, so we always assume that shortened links will have this length - if (iconv_strlen($link, "UTF-8") > 0) { - $limit = $limit - 23; - } - - if (iconv_strlen($msg, "UTF-8") > $limit) { - if (($post["type"] == "text") && isset($post["url"])) { - $post["url"] = $b["plink"]; - } elseif (!isset($post["url"])) { - $limit = $limit - 23; - $post["url"] = $b["plink"]; - // Which purpose has this line? It is now uncommented, but left as a reminder - //} elseif (strpos($b["body"], "[share") !== false) { - // $post["url"] = $b["plink"]; - } elseif (PConfig::get($b["uid"], "system", "no_intelligent_shortening")) { - $post["url"] = $b["plink"]; - } - $msg = Plaintext::shorten($msg, $limit); - } - } - - $post["text"] = trim($msg); - - return($post); + return $naked_text; } public static function scaleExternalImages($srctext, $include_link = true, $scale_replace = false) @@ -1947,7 +1808,7 @@ class BBCode extends BaseObject * @param string $addon The addon for which the abstract is meant for * @return string The abstract */ - private static function getAbstract($text, $addon = "") + public static function getAbstract($text, $addon = "") { $abstract = ""; $abstracts = []; diff --git a/src/Model/Item.php b/src/Model/Item.php index bdb85af44..499ec25c1 100644 --- a/src/Model/Item.php +++ b/src/Model/Item.php @@ -7,6 +7,7 @@ namespace Friendica\Model; use Friendica\BaseObject; +use Friendica\Content\Text; use Friendica\Core\Addon; use Friendica\Core\Config; use Friendica\Core\L10n; @@ -977,35 +978,35 @@ class Item extends BaseObject * if possible and not already present. * Expects "body" element to exist in $arr. */ - private static function addLanguageInPostopts(&$arr) + private static function addLanguageInPostopts(&$item) { - if (x($arr, 'postopts')) { - if (strstr($arr['postopts'], 'lang=')) { + if (!empty($item['postopts'])) { + if (strstr($item['postopts'], 'lang=')) { // do not override return; } - $postopts = $arr['postopts']; + $postopts = $item['postopts']; } else { $postopts = ""; } - $naked_body = preg_replace('/\[(.+?)\]/','', $arr['body']); - $l = new Text_LanguageDetect(); - $lng = $l->detect($naked_body, 3); + $naked_body = Text\BBCode::toPlaintext($item['body'], false); - if (sizeof($lng) > 0) { - if ($postopts != "") { + $languages = (new Text_LanguageDetect())->detect($naked_body, 3); + + if (sizeof($languages) > 0) { + if ($postopts != '') { $postopts .= '&'; // arbitrary separator, to be reviewed } $postopts .= 'lang='; $sep = ""; - foreach ($lng as $language => $score) { + foreach ($languages as $language => $score) { $postopts .= $sep . $language . ";" . $score; $sep = ':'; } - $arr['postopts'] = $postopts; + $item['postopts'] = $postopts; } } diff --git a/src/Model/ItemContent.php b/src/Model/ItemContent.php new file mode 100644 index 000000000..21dbd34d0 --- /dev/null +++ b/src/Model/ItemContent.php @@ -0,0 +1,176 @@ + 0) { + $abstract = Text\BBCode::getAbstract($item['body']); + } + } + } + + if ($abstract != '') { + $post['text'] = $abstract; + + if ($post['type'] == 'text') { + $post['type'] = 'link'; + $post['url'] = $item['plink']; + } + } + + $html = Text\BBCode::convert($post['text'] . $post['after'], false, $htmlmode); + $msg = Text\HTML::toPlaintext($html, 0, true); + $msg = trim(html_entity_decode($msg, ENT_QUOTES, 'UTF-8')); + + $link = ''; + if ($includedlinks) { + if ($post['type'] == 'link') { + $link = $post['url']; + } elseif ($post['type'] == 'text') { + $link = $post['url']; + } elseif ($post['type'] == 'video') { + $link = $post['url']; + } elseif ($post['type'] == 'photo') { + $link = $post['image']; + } + + if (($msg == '') && isset($post['title'])) { + $msg = trim($post['title']); + } + + if (($msg == '') && isset($post['description'])) { + $msg = trim($post['description']); + } + + // If the link is already contained in the post, then it neeedn't to be added again + // But: if the link is beyond the limit, then it has to be added. + if (($link != '') && strstr($msg, $link)) { + $pos = strpos($msg, $link); + + // Will the text be shortened in the link? + // Or is the link the last item in the post? + if (($limit > 0) && ($pos < $limit) && (($pos + 23 > $limit) || ($pos + strlen($link) == strlen($msg)))) { + $msg = trim(str_replace($link, '', $msg)); + } elseif (($limit == 0) || ($pos < $limit)) { + // The limit has to be increased since it will be shortened - but not now + // Only do it with Twitter (htmlmode = 8) + if (($limit > 0) && (strlen($link) > 23) && ($htmlmode == 8)) { + $limit = $limit - 23 + strlen($link); + } + + $link = ''; + + if ($post['type'] == 'text') { + unset($post['url']); + } + } + } + } + + if ($limit > 0) { + // Reduce multiple spaces + // When posted to a network with limited space, we try to gain space where possible + while (strpos($msg, ' ') !== false) { + $msg = str_replace(' ', ' ', $msg); + } + + // Twitter is using its own limiter, so we always assume that shortened links will have this length + if (iconv_strlen($link, 'UTF-8') > 0) { + $limit = $limit - 23; + } + + if (iconv_strlen($msg, 'UTF-8') > $limit) { + if (($post['type'] == 'text') && isset($post['url'])) { + $post['url'] = $item['plink']; + } elseif (!isset($post['url'])) { + $limit = $limit - 23; + $post['url'] = $item['plink']; + } elseif (strpos($item['body'], '[share') !== false) { + $post['url'] = $item['plink']; + } elseif (PConfig::get($item['uid'], 'system', 'no_intelligent_shortening')) { + $post['url'] = $item['plink']; + } + $msg = Text\Plaintext::shorten($msg, $limit); + } + } + + $post['text'] = trim($msg); + + return $post; + } +}