From 54d25ebc91dd7b66ffbbf176c0a11fa17fbae3b3 Mon Sep 17 00:00:00 2001 From: Michael Date: Sat, 1 Oct 2022 22:26:37 +0000 Subject: [PATCH 1/3] Fix picture link removal in "getAttachedData" --- src/Content/Text/BBCode.php | 28 ++++++++++++++++------------ src/Content/Text/Plaintext.php | 25 +++++++++++++++++-------- 2 files changed, 33 insertions(+), 20 deletions(-) diff --git a/src/Content/Text/BBCode.php b/src/Content/Text/BBCode.php index 69f5fa6e8..0a07e18f4 100644 --- a/src/Content/Text/BBCode.php +++ b/src/Content/Text/BBCode.php @@ -287,14 +287,17 @@ class BBCode } } + if (!isset($post['type'])) { + $post['text'] = $body; + } + + // Simplify image codes + $post['text'] = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '[img]$3[/img]', $post['text']); + $post['text'] = preg_replace("/\[img\=(.*?)\](.*?)\[\/img\]/ism", '[img]$1[/img]', $post['text']); + // if nothing is found, it maybe having an image. if (!isset($post['type'])) { - // Simplify image codes - $body = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '[img]$3[/img]', $body); - $body = preg_replace("/\[img\=(.*?)\](.*?)\[\/img\]/ism", '[img]$1[/img]', $body); - $post['text'] = $body; - - if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img\]([^\[]+?)\[/img\]\s*\[/url\]#ism", $body, $pictures, PREG_SET_ORDER)) { + if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img\]([^\[]+?)\[/img\]\s*\[/url\]#ism", $post['text'], $pictures, PREG_SET_ORDER)) { if ((count($pictures) == 1) && !$has_title) { if (!empty($item['object-type']) && ($item['object-type'] == Activity\ObjectType::IMAGE)) { // Replace the preview picture with the real picture @@ -322,14 +325,14 @@ class BBCode } $post['preview'] = $pictures[0][2]; - $post['text'] = trim(str_replace($pictures[0][0], '', $body)); + $post['text'] = trim(str_replace($pictures[0][0], '', $post['text'])); } else { $imgdata = Images::getInfoFromURLCached($pictures[0][1]); if (($imgdata) && substr($imgdata['mime'], 0, 6) == 'image/') { $post['type'] = 'photo'; $post['image'] = $pictures[0][1]; $post['preview'] = $pictures[0][2]; - $post['text'] = trim(str_replace($pictures[0][0], '', $body)); + $post['text'] = trim(str_replace($pictures[0][0], '', $post['text'])); } } } elseif (count($pictures) > 0) { @@ -341,13 +344,12 @@ class BBCode } $post['image'] = $pictures[0][2]; - $post['text'] = $body; foreach ($pictures as $picture) { $post['text'] = trim(str_replace($picture[0], '', $post['text'])); } } - } elseif (preg_match_all("(\[img\](.*?)\[\/img\])ism", $body, $pictures, PREG_SET_ORDER)) { + } elseif (preg_match_all("(\[img\](.*?)\[\/img\])ism", $post['text'], $pictures, PREG_SET_ORDER)) { if ($has_title) { $post['type'] = 'link'; $post['url'] = $plink; @@ -356,7 +358,6 @@ class BBCode } $post['image'] = $pictures[0][1]; - $post['text'] = $body; foreach ($pictures as $picture) { $post['text'] = trim(str_replace($picture[0], '', $post['text'])); } @@ -396,7 +397,6 @@ class BBCode if (!isset($post['type'])) { $post['type'] = 'text'; - $post['text'] = trim($body); } if (($post['type'] == 'photo') && empty($post['images']) && !empty($post['remote_images'])) { @@ -413,6 +413,10 @@ class BBCode if (isset($data['images'][0])) { $post['image'] = $data['images'][0]['src']; } + } elseif (preg_match_all("#\[url=([^\]]+?)\]\s*\[img\]([^\[]+?)\[/img\]\s*\[/url\]#ism", $post['text'], $pictures, PREG_SET_ORDER)) { + foreach ($pictures as $picture) { + $post['text'] = trim(str_replace($picture[0], '', $post['text'])); + } } DI::profiler()->stopRecording(); diff --git a/src/Content/Text/Plaintext.php b/src/Content/Text/Plaintext.php index 1e76a557b..5fe779704 100644 --- a/src/Content/Text/Plaintext.php +++ b/src/Content/Text/Plaintext.php @@ -126,8 +126,6 @@ class Plaintext $body = BBCode::stripAbstract($body); // At first look at data that is attached via "type-..." stuff - // This will hopefully replaced with a dedicated bbcode later - //$post = self::getAttachedData($b['body']); $post = BBCode::getAttachedData($body, $item); if (($item['title'] != '') && ($post['text'] != '')) { @@ -238,11 +236,13 @@ class Plaintext $limit = $limit - 23; } - if (!in_array($link, ['', $item['plink']]) && ($post['type'] != 'photo')) { - $complete_msg .= "\n" . $link; + if (!in_array($link, ['', $item['plink']]) && ($post['type'] != 'photo') && (strpos($complete_msg, $link) === false)) { + $complete_link = $link; + } else { + $complete_link = ''; } - $post['parts'] = self::getParts(trim($complete_msg), $limit); + $post['parts'] = self::getParts(trim($complete_msg), $limit, $complete_link); if (iconv_strlen($msg, 'UTF-8') > $limit) { if (($post['type'] == 'text') && isset($post['url'])) { @@ -271,12 +271,18 @@ class Plaintext * @param integer $limit * @return array */ - private static function getParts(string $message, int $limit): array + private static function getParts(string $message, int $limit, string $link): array { $parts = []; $part = ''; - while (trim($message)) { + if (($link != '') && (strlen($message) <= $limit - 24)) { + return [$message. "\n" . $link]; + } elseif (($link == '') && (strlen($message) <= $limit)) { + return [$message]; + } + + while ($message) { $pos1 = strpos($message, ' '); $pos2 = strpos($message, "\n"); @@ -299,10 +305,13 @@ class Plaintext if (strlen($part . $word) > ($limit - 8)) { $parts[] = trim($part); $part = ''; + if (strlen($message) <= ($limit - 8)) { + $limit -= 23; + } } $part .= $word; } - $parts[] = $part; + $parts[] = trim($part . "\n" . $link); if (count($parts) > 1) { foreach ($parts as $key => $part) { From bf4c5ba84f4aa54d67b019619838d2b356dbeca6 Mon Sep 17 00:00:00 2001 From: Michael Date: Sun, 2 Oct 2022 20:42:21 +0000 Subject: [PATCH 2/3] Improved URL handling --- src/Content/Text/Plaintext.php | 59 +++++++++++++++++----------------- 1 file changed, 29 insertions(+), 30 deletions(-) diff --git a/src/Content/Text/Plaintext.php b/src/Content/Text/Plaintext.php index 5fe779704..5c8629f62 100644 --- a/src/Content/Text/Plaintext.php +++ b/src/Content/Text/Plaintext.php @@ -23,9 +23,12 @@ namespace Friendica\Content\Text; use Friendica\Core\Protocol; use Friendica\DI; +use Friendica\Util\Network; class Plaintext { + const URL_LENGTH = 23; + /** * Shortens message * @@ -41,18 +44,18 @@ class Plaintext $ellipsis = html_entity_decode("…", ENT_QUOTES, 'UTF-8'); if (!empty($uid) && DI::pConfig()->get($uid, 'system', 'simple_shortening')) { - return iconv_substr(iconv_substr(trim($msg), 0, $limit, "UTF-8"), 0, -3, "UTF-8") . $ellipsis; + return mb_substr(mb_substr(trim($msg), 0, $limit), 0, -3) . $ellipsis; } $lines = explode("\n", $msg); $msg = ""; $recycle = html_entity_decode("♲ ", ENT_QUOTES, 'UTF-8'); foreach ($lines as $row => $line) { - if (iconv_strlen(trim($msg . "\n" . $line), "UTF-8") <= $limit) { + if (mb_strlen(trim($msg . "\n" . $line)) <= $limit) { $msg = trim($msg . "\n" . $line); } elseif (($msg == "") || (($row == 1) && (substr($msg, 0, 4) == $recycle))) { // Is the new message empty by now or is it a reshared message? - $msg = iconv_substr(iconv_substr(trim($msg . "\n" . $line), 0, $limit, "UTF-8"), 0, -3, "UTF-8") . $ellipsis; + $msg = mb_substr(mb_substr(trim($msg . "\n" . $line), 0, $limit), 0, -3) . $ellipsis; } else { break; } @@ -206,13 +209,13 @@ class Plaintext // Will the text be shortened in the link? // Or is the link the last item in the post? - if (($limit > 0) && ($pos < $limit) && (($pos + 23 > $limit) || ($pos + strlen($link) == strlen($msg)))) { + if (($limit > 0) && ($pos < $limit) && (($pos + self::URL_LENGTH > $limit) || ($pos + mb_strlen($link) == mb_strlen($msg)))) { $msg = trim(str_replace($link, '', $msg)); } elseif (($limit == 0) || ($pos < $limit)) { // The limit has to be increased since it will be shortened - but not now // Only do it with Twitter - if (($limit > 0) && (strlen($link) > 23) && ($htmlmode == BBCode::TWITTER)) { - $limit = $limit - 23 + strlen($link); + if (($limit > 0) && (mb_strlen($link) > self::URL_LENGTH) && ($htmlmode == BBCode::TWITTER)) { + $limit = $limit - self::URL_LENGTH + mb_strlen($link); } $link = ''; @@ -231,24 +234,22 @@ class Plaintext $msg = str_replace(' ', ' ', $msg); } - // Twitter is using its own limiter, so we always assume that shortened links will have this length - if (iconv_strlen($link, 'UTF-8') > 0) { - $limit = $limit - 23; - } - if (!in_array($link, ['', $item['plink']]) && ($post['type'] != 'photo') && (strpos($complete_msg, $link) === false)) { - $complete_link = $link; - } else { - $complete_link = ''; + $complete_msg .= "\n" . $link; } - $post['parts'] = self::getParts(trim($complete_msg), $limit, $complete_link); + $post['parts'] = self::getParts(trim($complete_msg), $limit); - if (iconv_strlen($msg, 'UTF-8') > $limit) { + // Twitter is using its own limiter, so we always assume that shortened links will have this length + if (mb_strlen($link) > 0) { + $limit = $limit - self::URL_LENGTH; + } + + if (mb_strlen($msg) > $limit) { if (($post['type'] == 'text') && isset($post['url'])) { $post['url'] = $item['plink']; } elseif (!isset($post['url'])) { - $limit = $limit - 23; + $limit = $limit - self::URL_LENGTH; $post['url'] = $item['plink']; } elseif (strpos($item['body'], '[share') !== false) { $post['url'] = $item['plink']; @@ -268,19 +269,15 @@ class Plaintext * Split the message in parts * * @param string $message - * @param integer $limit + * @param integer $baselimit * @return array */ - private static function getParts(string $message, int $limit, string $link): array + private static function getParts(string $message, int $baselimit): array { $parts = []; $part = ''; - if (($link != '') && (strlen($message) <= $limit - 24)) { - return [$message. "\n" . $link]; - } elseif (($link == '') && (strlen($message) <= $limit)) { - return [$message]; - } + $limit = $baselimit; while ($message) { $pos1 = strpos($message, ' '); @@ -302,16 +299,18 @@ class Plaintext $message = trim(substr($message, $pos)); } - if (strlen($part . $word) > ($limit - 8)) { + if (Network::isValidHttpUrl(trim($word))) { + $limit += mb_strlen(trim($word)) - self::URL_LENGTH; + } + + if ((mb_strlen($part . $word) > $limit - 8) && (mb_strlen($part . $word . $message) > $limit)) { $parts[] = trim($part); - $part = ''; - if (strlen($message) <= ($limit - 8)) { - $limit -= 23; - } + $part = ''; + $limit = $baselimit; } $part .= $word; } - $parts[] = trim($part . "\n" . $link); + $parts[] = trim($part); if (count($parts) > 1) { foreach ($parts as $key => $part) { From 1afbcb248634b119fba8e07193f9d5ec411f9702 Mon Sep 17 00:00:00 2001 From: Michael Date: Sun, 2 Oct 2022 23:55:33 +0000 Subject: [PATCH 3/3] Added description --- src/Content/Text/Plaintext.php | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Content/Text/Plaintext.php b/src/Content/Text/Plaintext.php index 5c8629f62..6e8d0daff 100644 --- a/src/Content/Text/Plaintext.php +++ b/src/Content/Text/Plaintext.php @@ -27,6 +27,7 @@ use Friendica\Util\Network; class Plaintext { + // Assumed length of an URL when shortened via the network's own url shortener (e.g. Twitter) const URL_LENGTH = 23; /**