From 2a752b37b1a67c7f199db4330054341a19c64455 Mon Sep 17 00:00:00 2001 From: Michael Date: Mon, 13 Mar 2023 05:04:55 +0000 Subject: [PATCH] The function "getAttachedData" is replaced by a simplified functionality --- src/Content/Text/BBCode.php | 177 --------------------------------- src/Content/Text/Plaintext.php | 144 +++++++++++++++++++-------- src/Model/Post/Media.php | 59 +++++++---- 3 files changed, 143 insertions(+), 237 deletions(-) diff --git a/src/Content/Text/BBCode.php b/src/Content/Text/BBCode.php index 12fef305b1..33f91b43d1 100644 --- a/src/Content/Text/BBCode.php +++ b/src/Content/Text/BBCode.php @@ -209,183 +209,6 @@ class BBCode ); } - public static function getAttachedData(string $body, array $item = []): array - { - /* - - text: - - type: link, video, photo - - title: - - url: - - image: - - description: - - (thumbnail) - */ - - DI::profiler()->startRecording('rendering'); - $has_title = !empty($item['title']); - $plink = $item['plink'] ?? ''; - $post = self::getAttachmentData($body); - - // Get all linked images with alternative image description - if (preg_match_all("/\[img=(http[^\[\]]*)\]([^\[\]]*)\[\/img\]/Usi", $body, $pictures, PREG_SET_ORDER)) { - foreach ($pictures as $picture) { - if ($id = Photo::getIdForName($picture[1])) { - $post['images'][] = ['url' => str_replace('-1.', '-0.', $picture[1]), 'description' => $picture[2], 'id' => $id]; - } else { - $post['remote_images'][] = ['url' => $picture[1], 'description' => $picture[2]]; - } - } - if (!empty($post['images']) && !empty($post['images'][0]['description'])) { - $post['image_description'] = $post['images'][0]['description']; - } - } - - if (preg_match_all("/\[img\]([^\[\]]*)\[\/img\]/Usi", $body, $pictures, PREG_SET_ORDER)) { - foreach ($pictures as $picture) { - if ($id = Photo::getIdForName($picture[1])) { - $post['images'][] = ['url' => str_replace('-1.', '-0.', $picture[1]), 'description' => '', 'id' => $id]; - } else { - $post['remote_images'][] = ['url' => $picture[1], 'description' => '']; - } - } - } - - if (!isset($post['type'])) { - $post['text'] = $body; - } - - // Simplify image codes - $post['text'] = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '[img]$3[/img]', $post['text']); - $post['text'] = preg_replace("/\[img\=(.*?)\](.*?)\[\/img\]/ism", '[img]$1[/img]', $post['text']); - - // if nothing is found, it maybe having an image. - if (!isset($post['type'])) { - if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img\]([^\[]+?)\[/img\]\s*\[/url\]#ism", $post['text'], $pictures, PREG_SET_ORDER)) { - if ((count($pictures) == 1) && !$has_title) { - if (!empty($item['object-type']) && ($item['object-type'] == Activity\ObjectType::IMAGE)) { - // Replace the preview picture with the real picture - $url = str_replace('-1.', '-0.', $pictures[0][2]); - $data = ['url' => $url, 'type' => 'photo']; - } else { - // Checking, if the link goes to a picture - $data = ParseUrl::getSiteinfoCached($pictures[0][1]); - } - - // Workaround: - // Sometimes photo posts to the own album are not detected at the start. - // So we seem to cannot use the cache for these cases. That's strange. - if (($data['type'] != 'photo') && strstr($pictures[0][1], '/photos/')) { - $data = ParseUrl::getSiteinfo($pictures[0][1]); - } - - if ($data['type'] == 'photo') { - $post['type'] = 'photo'; - if (isset($data['images'][0])) { - $post['image'] = $data['images'][0]['src']; - $post['url'] = $data['url']; - } else { - $post['image'] = $data['url']; - } - - $post['preview'] = $pictures[0][2]; - $post['text'] = trim(str_replace($pictures[0][0], '', $post['text'])); - } else { - $imgdata = Images::getInfoFromURLCached($pictures[0][1]); - if (($imgdata) && substr($imgdata['mime'], 0, 6) == 'image/') { - $post['type'] = 'photo'; - $post['image'] = $pictures[0][1]; - $post['preview'] = $pictures[0][2]; - $post['text'] = trim(str_replace($pictures[0][0], '', $post['text'])); - } - } - } elseif (count($pictures) > 0) { - if (count($pictures) > 4) { - $post['type'] = 'link'; - $post['url'] = $plink; - } else { - $post['type'] = 'photo'; - } - - $post['image'] = $pictures[0][2]; - - foreach ($pictures as $picture) { - $post['text'] = trim(str_replace($picture[0], '', $post['text'])); - } - } - } elseif (preg_match_all("(\[img\](.*?)\[\/img\])ism", $post['text'], $pictures, PREG_SET_ORDER)) { - if ($has_title) { - $post['type'] = 'link'; - $post['url'] = $plink; - } else { - $post['type'] = 'photo'; - } - - $post['image'] = $pictures[0][1]; - foreach ($pictures as $picture) { - $post['text'] = trim(str_replace($picture[0], '', $post['text'])); - } - } - - // Test for the external links - preg_match_all("(\[url\](.*?)\[\/url\])ism", $post['text'], $links1, PREG_SET_ORDER); - preg_match_all("(\[url\=(.*?)\].*?\[\/url\])ism", $post['text'], $links2, PREG_SET_ORDER); - - $links = array_merge($links1, $links2); - - // If there is only a single one, then use it. - // This should cover link posts via API. - if ((count($links) == 1) && !isset($post['preview']) && !$has_title) { - $post['type'] = 'link'; - $post['url'] = $links[0][1]; - } - - // Simplify "video" element - $post['text'] = preg_replace('(\[video.*?\ssrc\s?=\s?([^\s\]]+).*?\].*?\[/video\])ism', '[video]$1[/video]', $post['text']); - - // Now count the number of external media links - preg_match_all("(\[vimeo\](.*?)\[\/vimeo\])ism", $post['text'], $links1, PREG_SET_ORDER); - preg_match_all("(\[youtube\\](.*?)\[\/youtube\\])ism", $post['text'], $links2, PREG_SET_ORDER); - preg_match_all("(\[video\\](.*?)\[\/video\\])ism", $post['text'], $links3, PREG_SET_ORDER); - preg_match_all("(\[audio\\](.*?)\[\/audio\\])ism", $post['text'], $links4, PREG_SET_ORDER); - - // Add them to the other external links - $links = array_merge($links, $links1, $links2, $links3, $links4); - - // Are there more than one? - if (count($links) > 1) { - // The post will be the type "text", which means a blog post - unset($post['type']); - $post['url'] = $plink; - } - - if (!isset($post['type'])) { - $post['type'] = 'text'; - } - - if (($post['type'] == 'photo') && empty($post['images']) && !empty($post['remote_images'])) { - $post['images'] = $post['remote_images']; - $post['image'] = $post['images'][0]['url']; - if (!empty($post['images']) && !empty($post['images'][0]['description'])) { - $post['image_description'] = $post['images'][0]['description']; - } - } - unset($post['remote_images']); - } elseif (isset($post['url']) && ($post['type'] == 'video')) { - $data = ParseUrl::getSiteinfoCached($post['url']); - - if (isset($data['images'][0])) { - $post['image'] = $data['images'][0]['src']; - } - } elseif (preg_match_all("#\[url=([^\]]+?)\]\s*\[img\]([^\[]+?)\[/img\]\s*\[/url\]#ism", $post['text'], $pictures, PREG_SET_ORDER)) { - foreach ($pictures as $picture) { - $post['text'] = trim(str_replace($picture[0], '', $post['text'])); - } - } - - DI::profiler()->stopRecording(); - return $post; - } - /** * Remove [attachment] BBCode * diff --git a/src/Content/Text/Plaintext.php b/src/Content/Text/Plaintext.php index c72dad2be5..ce0a305fe7 100644 --- a/src/Content/Text/Plaintext.php +++ b/src/Content/Text/Plaintext.php @@ -23,7 +23,10 @@ namespace Friendica\Content\Text; use Friendica\Core\Protocol; use Friendica\DI; +use Friendica\Model\Photo; +use Friendica\Model\Post; use Friendica\Util\Network; +use Friendica\Util\Strings; class Plaintext { @@ -109,30 +112,15 @@ class Plaintext * @param int $limit The maximum number of characters when posting to that network * @param bool $includedlinks Has an attached link to be included into the message? * @param int $htmlmode This controls the behavior of the BBCode conversion - * @param string $target_network Name of the network where the post should go to. * * @return array Same array structure than \Friendica\Content\Text\BBCode::getAttachedData * @throws \Friendica\Network\HTTPException\InternalServerErrorException * @see \Friendica\Content\Text\BBCode::getAttachedData */ - public static function getPost(array $item, int $limit = 0, bool $includedlinks = false, int $htmlmode = BBCode::MASTODON_API, string $target_network = '') + public static function getPost(array $item, int $limit = 0, bool $includedlinks = false, int $htmlmode = BBCode::MASTODON_API) { - // Remove hashtags - $URLSearchString = '^\[\]'; - $body = preg_replace("/([#@])\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism", '$1$3', $item['body']); - - // Add an URL element if the text contains a raw link - $body = preg_replace( - '/([^\]\=\'"]|^)(https?\:\/\/[a-zA-Z0-9\:\/\-\?\&\;\.\=\_\~\#\%\$\!\+\,]+)/ism', - '$1[url]$2[/url]', - $body - ); - - // Remove the abstract - $body = BBCode::stripAbstract($body); - - // At first look at data that is attached via "type-..." stuff - $post = BBCode::getAttachedData($body, $item); + // Fetch attached media information + $post = self::getPostMedia($item); if (($item['title'] != '') && ($post['text'] != '')) { $post['text'] = trim($item['title'] . "\n\n" . $post['text']); @@ -140,34 +128,21 @@ class Plaintext $post['text'] = trim($item['title']); } - $abstract = ''; - // Fetch the abstract from the given target network - if ($target_network != '') { - $default_abstract = BBCode::getAbstract($item['body']); - $abstract = BBCode::getAbstract($item['body'], $target_network); + switch ($htmlmode) { + case BBCode::TWITTER: + $abstract = BBCode::getAbstract($item['body'], Protocol::TWITTER); + break; - // If we post to a network with no limit we only fetch - // an abstract exactly for this network - if (($limit == 0) && ($abstract == $default_abstract)) { - $abstract = ''; - } - } else { // Try to guess the correct target network - switch ($htmlmode) { - case BBCode::TWITTER: - $abstract = BBCode::getAbstract($item['body'], Protocol::TWITTER); - break; + case BBCode::OSTATUS: + $abstract = BBCode::getAbstract($item['body'], Protocol::STATUSNET); + break; - case BBCode::OSTATUS: - $abstract = BBCode::getAbstract($item['body'], Protocol::STATUSNET); - break; - - default: // We don't know the exact target. - // We fetch an abstract since there is a posting limit. - if ($limit > 0) { - $abstract = BBCode::getAbstract($item['body']); - } - } + default: // We don't know the exact target. + // We fetch an abstract since there is a posting limit. + if ($limit > 0) { + $abstract = BBCode::getAbstract($item['body']); + } } if ($abstract != '') { @@ -323,4 +298,87 @@ class Plaintext return $parts; } + + /** + * Fetch attached media to the post and simplify the body. + * + * @param array $item + * @return array + */ + private static function getPostMedia(array $item): array + { + $post = ['type' => 'text', 'images' => [], 'remote_images' => []]; + + // Remove mentions and hashtag links + $URLSearchString = '^\[\]'; + $post['text'] = preg_replace("/([#!@])\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism", '$1$3', $item['body']); + + // Remove abstract + $post['text'] = BBCode::stripAbstract($post['text']); + // Remove attached links + $post['text'] = BBCode::removeAttachment($post['text']); + // Remove any links + $post['text'] = Post\Media::removeFromBody($post['text']); + + $images = Post\Media::getByURIId($item['uri-id'], [Post\Media::IMAGE]); + if (!empty($item['quote-uri-id'])) { + $images = array_merge($images, Post\Media::getByURIId($item['quote-uri-id'], [Post\Media::IMAGE])); + } + foreach ($images as $image) { + if ($id = Photo::getIdForName($image['url'])) { + $post['images'][] = ['url' => $image['url'], 'description' => $image['description'], 'id' => $id]; + } else { + $post['remote_images'][] = ['url' => $image['url'], 'description' => $image['description']]; + } + } + + if (empty($post['images'])) { + unset($post['images']); + } + + if (empty($post['remote_images'])) { + unset($post['remote_images']); + } + + if (!empty($post['images'])) { + $post['type'] = 'photo'; + $post['image'] = $post['images'][0]['url']; + $post['image_description'] = $post['images'][0]['description']; + } elseif (!empty($post['remote_images'])) { + $post['type'] = 'photo'; + $post['image'] = $post['remote_images'][0]['url']; + $post['image_description'] = $post['remote_images'][0]['description']; + } + + // Look for audio or video links + $media = Post\Media::getByURIId($item['uri-id'], [Post\Media::AUDIO, Post\Media::VIDEO]); + if (!empty($item['quote-uri-id'])) { + $media = array_merge($media, Post\Media::getByURIId($item['quote-uri-id'], [Post\Media::AUDIO, Post\Media::VIDEO])); + } + + foreach ($media as $medium) { + if (in_array($medium['type'], [Post\Media::AUDIO, Post\Media::VIDEO])) { + $post['type'] = 'link'; + $post['url'] = $medium['url']; + } + } + + // Look for an attached link + $page = Post\Media::getByURIId($item['uri-id'], [Post\Media::HTML]); + if (!empty($item['quote-uri-id']) && empty($page)) { + $page = Post\Media::getByURIId($item['quote-uri-id'], [Post\Media::HTML]); + } + if (!empty($page)) { + $post['type'] = 'link'; + $post['url'] = $page[0]['url']; + $post['description'] = $page[0]['description']; + $post['title'] = $page[0]['name']; + + if (empty($post['image']) && !empty($page[0]['preview'])) { + $post['image'] = $page[0]['preview']; + } + } + + return $post; + } } diff --git a/src/Model/Post/Media.php b/src/Model/Post/Media.php index ab17789724..dd3f9f36e7 100644 --- a/src/Model/Post/Media.php +++ b/src/Model/Post/Media.php @@ -463,7 +463,7 @@ class Media */ private static function isPictureLink(string $page, string $preview): bool { - return preg_match('#/photos/.*/image/#ism', $page) && preg_match('#/photo/.*-1\.#ism', $preview); + return (preg_match('#/photo/.*-0\.#ism', $page) || preg_match('#/photos/.*/image/#ism', $page)) && preg_match('#/photo/.*-[01]\.#ism', $preview); } /** @@ -482,15 +482,20 @@ class Media $attachments = []; if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img=([^\[\]]*)\]([^\[\]]*)\[\/img\]\s*\[/url\]$endmatchpattern#ism", $body, $pictures, PREG_SET_ORDER)) { foreach ($pictures as $picture) { - if (!self::isPictureLink($picture[1], $picture[2])) { - continue; + if (self::isPictureLink($picture[1], $picture[2])) { + $body = str_replace($picture[0], '', $body); + $image = str_replace('-1.', '-0.', $picture[2]); + $attachments[$image] = [ + 'uri-id' => $uriid, 'type' => self::IMAGE, 'url' => $image, + 'preview' => $picture[2], 'description' => $picture[3] + ]; + } else { + $body = str_replace($picture[0], '', $body); + $attachments[$picture[1]] = [ + 'uri-id' => $uriid, 'type' => self::UNKNOWN, 'url' => $picture[1], + 'preview' => $picture[2], 'description' => $picture[3] + ]; } - $body = str_replace($picture[0], '', $body); - $image = str_replace('-1.', '-0.', $picture[2]); - $attachments[$image] = [ - 'uri-id' => $uriid, 'type' => self::IMAGE, 'url' => $image, - 'preview' => $picture[2], 'description' => $picture[3] - ]; } } @@ -503,15 +508,20 @@ class Media if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img\]([^\[]+?)\[/img\]\s*\[/url\]$endmatchpattern#ism", $body, $pictures, PREG_SET_ORDER)) { foreach ($pictures as $picture) { - if (!self::isPictureLink($picture[1], $picture[2])) { - continue; + if (self::isPictureLink($picture[1], $picture[2])) { + $body = str_replace($picture[0], '', $body); + $image = str_replace('-1.', '-0.', $picture[2]); + $attachments[$image] = [ + 'uri-id' => $uriid, 'type' => self::IMAGE, 'url' => $image, + 'preview' => $picture[2], 'description' => null + ]; + } else { + $body = str_replace($picture[0], '', $body); + $attachments[$picture[1]] = [ + 'uri-id' => $uriid, 'type' => self::UNKNOWN, 'url' => $picture[1], + 'preview' => $picture[2], 'description' => null + ]; } - $body = str_replace($picture[0], '', $body); - $image = str_replace('-1.', '-0.', $picture[2]); - $attachments[$image] = [ - 'uri-id' => $uriid, 'type' => self::IMAGE, 'url' => $image, - 'preview' => $picture[2], 'description' => null - ]; } } @@ -567,6 +577,21 @@ class Media return $body; } + /** + * Remove media from the body + * + * @param string $body + * @return string + */ + public static function removeFromBody(string $body): string + { + do { + $prebody = $body; + $body = self::insertFromBody(0, $body); + } while ($prebody != $body); + return $body; + } + /** * Add media links from a relevant url in the body *