From 821a135033bd6b873e8836c66c50fb9fd0a1994d Mon Sep 17 00:00:00 2001 From: Michael Date: Sat, 24 Feb 2024 10:58:18 +0000 Subject: [PATCH] Unused OEmbed functionality is removed --- src/Content/OEmbed.php | 191 ++++++++---------------------------- src/Content/Text/BBCode.php | 3 - src/Module/Oembed.php | 74 -------------- src/Util/ParseUrl.php | 49 ++------- static/routes.config.php | 5 - 5 files changed, 48 insertions(+), 274 deletions(-) delete mode 100644 src/Module/Oembed.php diff --git a/src/Content/OEmbed.php b/src/Content/OEmbed.php index 445c52ced0..8274bc03c8 100644 --- a/src/Content/OEmbed.php +++ b/src/Content/OEmbed.php @@ -58,7 +58,7 @@ class OEmbed public static function replaceCallback(array $matches): string { $embedurl = $matches[1]; - $j = self::fetchURL($embedurl, !self::isAllowedURL($embedurl)); + $j = self::fetchURL($embedurl); $s = self::formatObject($j); return $s; @@ -67,14 +67,12 @@ class OEmbed /** * Get data from an URL to embed its content. * - * @param string $embedurl The URL from which the data should be fetched. - * @param bool $no_rich_type If set to true rich type content won't be fetched. - * @param bool $use_parseurl Use the "ParseUrl" functionality to add additional data + * @param string $embedurl The URL from which the data should be fetched. * * @return \Friendica\Object\OEmbed * @throws \Friendica\Network\HTTPException\InternalServerErrorException */ - public static function fetchURL(string $embedurl, bool $no_rich_type = false, bool $use_parseurl = true): \Friendica\Object\OEmbed + private static function fetchURL(string $embedurl): \Friendica\Object\OEmbed { $embedurl = trim($embedurl, '\'"'); @@ -119,7 +117,7 @@ class OEmbed $href = str_replace(['http://www.youtube.com/', 'http://player.vimeo.com/'], ['https://www.youtube.com/', 'https://player.vimeo.com/'], $href); $result = DI::httpClient()->fetchFull($href . '&maxwidth=' . $a->getThemeInfoValue('videowidth')); - if ($result->getReturnCode() === 200) { + if ($result->isSuccess()) { $json_string = $result->getBodyString(); break; } @@ -157,57 +155,55 @@ class OEmbed } // Improve the OEmbed data with data from OpenGraph, Twitter cards and other sources - if ($use_parseurl) { - $data = ParseUrl::getSiteinfoCached($embedurl, false); + $data = ParseUrl::getSiteinfoCached($embedurl); - if (($oembed->type == 'error') && empty($data['title']) && empty($data['text'])) { - return $oembed; - } + if (($oembed->type == 'error') && empty($data['title']) && empty($data['text'])) { + return $oembed; + } - if ($no_rich_type || ($oembed->type == 'error')) { - $oembed->html = ''; - $oembed->type = $data['type']; + if (!self::isAllowedURL($oembed->url) || ($oembed->type == 'error')) { + $oembed->html = ''; + $oembed->type = $data['type']; - if ($oembed->type == 'photo') { - if (!empty($data['images'])) { - $oembed->url = $data['images'][0]['src']; - $oembed->width = $data['images'][0]['width']; - $oembed->height = $data['images'][0]['height']; - } else { - $oembed->type = 'link'; - } + if ($oembed->type == 'photo') { + if (!empty($data['images'])) { + $oembed->url = $data['images'][0]['src']; + $oembed->width = $data['images'][0]['width']; + $oembed->height = $data['images'][0]['height']; + } else { + $oembed->type = 'link'; } } + } - if (!empty($data['title'])) { - $oembed->title = $data['title']; - } + if (!empty($data['title'])) { + $oembed->title = $data['title']; + } - if (!empty($data['text'])) { - $oembed->description = $data['text']; - } + if (!empty($data['text'])) { + $oembed->description = $data['text']; + } - if (!empty($data['publisher_name'])) { - $oembed->provider_name = $data['publisher_name']; - } + if (!empty($data['publisher_name'])) { + $oembed->provider_name = $data['publisher_name']; + } - if (!empty($data['publisher_url'])) { - $oembed->provider_url = $data['publisher_url']; - } + if (!empty($data['publisher_url'])) { + $oembed->provider_url = $data['publisher_url']; + } - if (!empty($data['author_name'])) { - $oembed->author_name = $data['author_name']; - } + if (!empty($data['author_name'])) { + $oembed->author_name = $data['author_name']; + } - if (!empty($data['author_url'])) { - $oembed->author_url = $data['author_url']; - } + if (!empty($data['author_url'])) { + $oembed->author_url = $data['author_url']; + } - if (!empty($data['images']) && ($oembed->type != 'photo')) { - $oembed->thumbnail_url = $data['images'][0]['src']; - $oembed->thumbnail_width = $data['images'][0]['width']; - $oembed->thumbnail_height = $data['images'][0]['height']; - } + if (!empty($data['images']) && ($oembed->type != 'photo')) { + $oembed->thumbnail_url = $data['images'][0]['src']; + $oembed->thumbnail_width = $data['images'][0]['width']; + $oembed->thumbnail_height = $data['images'][0]['height']; } Hook::callAll('oembed_fetch_url', $embedurl, $oembed); @@ -318,43 +314,6 @@ class OEmbed return preg_replace_callback("/\[embed\](.+?)\[\/embed\]/is", [self::class, 'replaceCallback'], $text); } - /** - * Find .... - * and replace it with [embed]url[/embed] - * - * @param string $text - * @return string - */ - public static function HTML2BBCode(string $text): string - { - // start parser only if 'oembed' is in text - if (strpos($text, 'oembed')) { - // convert non ascii chars to html entities - $html_text = mb_convert_encoding($text, 'HTML-ENTITIES', mb_detect_encoding($text)); - - // If it doesn't parse at all, just return the text. - $dom = new DOMDocument(); - if (!@$dom->loadHTML($html_text)) { - return $text; - } - $xpath = new DOMXPath($dom); - - $xattr = self::buildXPath('class', 'oembed'); - $entries = $xpath->query("//div[$xattr]"); - - $xattr = "@rel='oembed'"; //oe_build_xpath("rel","oembed"); - foreach ($entries as $e) { - $href = $xpath->evaluate("a[$xattr]/@href", $e)->item(0)->nodeValue; - if (!is_null($href)) { - $e->parentNode->replaceChild(new DOMText('[embed]' . $href . '[/embed]'), $e); - } - } - return self::getInnerHTML($dom->getElementsByTagName('body')->item(0)); - } else { - return $text; - } - } - /** * Determines if rich content OEmbed is allowed for the provided URL * @@ -392,7 +351,7 @@ class OEmbed */ public static function getHTML(string $url, string $title = ''): string { - $o = self::fetchURL($url, !self::isAllowedURL($url)); + $o = self::fetchURL($url); if (!is_object($o) || property_exists($o, 'type') && $o->type == 'error') { throw new Exception('OEmbed failed for URL: ' . $url); @@ -406,70 +365,4 @@ class OEmbed return $html; } - - /** - * Generates the iframe HTML for an oembed attachment. - * - * Width and height are given by the remote, and are regularly too small for - * the generated iframe. - * - * The width is entirely discarded for the actual width of the post, while fixed - * height is used as a starting point before the inevitable resizing. - * - * Since the iframe is automatically resized on load, there are no need for ugly - * and impractical scrollbars. - * - * @todo This function is currently unused until someoneā„¢ adds support for a separate OEmbed domain - * - * @param string $src Original remote URL to embed - * @param string $width - * @param string $height - * @return string Formatted HTML - * - * @throws \Friendica\Network\HTTPException\InternalServerErrorException - * @see oembed_format_object() - */ - private static function iframe(string $src, string $width, string $height): string - { - if (!$height || strstr($height, '%')) { - $height = '200'; - } - $width = '100%'; - - $src = DI::baseUrl() . '/oembed/' . Strings::base64UrlEncode($src); - return ''; - } - - /** - * Generates attribute search XPath string - * - * Generates an XPath query to select elements whose provided attribute contains - * the provided value in a space-separated list. - * - * @param string $attr Name of the attribute to search - * @param string $value Value to search in a space-separated list - * @return string - */ - private static function buildXPath(string $attr, $value): string - { - // https://www.westhoffswelt.de/blog/2009/6/9/select-html-elements-with-more-than-one-css-class-using-xpath - return "contains(normalize-space(@$attr), ' $value ') or substring(normalize-space(@$attr), 1, string-length('$value') + 1) = '$value ' or substring(normalize-space(@$attr), string-length(@$attr) - string-length('$value')) = ' $value' or @$attr = '$value'"; - } - - /** - * Returns the inner XML string of a provided DOMNode - * - * @param DOMNode $node - * @return string - */ - private static function getInnerHTML(DOMNode $node): string - { - $innerHTML = ''; - $children = $node->childNodes; - foreach ($children as $child) { - $innerHTML .= $child->ownerDocument->saveXML($child); - } - return $innerHTML; - } - } diff --git a/src/Content/Text/BBCode.php b/src/Content/Text/BBCode.php index f5bba8ce42..23f02dd26b 100644 --- a/src/Content/Text/BBCode.php +++ b/src/Content/Text/BBCode.php @@ -2058,9 +2058,6 @@ class BBCode // Default iframe allowed domains/path $allowedIframeDomains = [ - DI::baseUrl()->getHost() - . (DI::baseUrl()->getPath() ? '/' . DI::baseUrl()->getPath() : '') - . '/oembed/', # The path part has to change with the source in Content\Oembed::iframe 'www.youtube.com/embed/', 'player.vimeo.com/video/', ]; diff --git a/src/Module/Oembed.php b/src/Module/Oembed.php deleted file mode 100644 index 68e13a2e86..0000000000 --- a/src/Module/Oembed.php +++ /dev/null @@ -1,74 +0,0 @@ -. - * - */ - -namespace Friendica\Module; - -use Friendica\BaseModule; -use Friendica\Content; -use Friendica\Core\System; -use Friendica\DI; -use Friendica\Util\Strings; - -/** - * Oembed module - * - * Displays stored embed content based on a base64 hash of a remote URL - * - * Example: /oembed/aHR0cHM6Ly9... - * - * @author Hypolite Petovan - */ -class Oembed extends BaseModule -{ - protected function content(array $request = []): string - { - // Unused form: /oembed/b2h?url=... - if (DI::args()->getArgv()[1] == 'b2h') { - $url = ["", trim(hex2bin($_REQUEST['url']))]; - echo Content\OEmbed::replaceCallback($url); - System::exit(); - } - - // Unused form: /oembed/h2b?text=... - if (DI::args()->getArgv()[1] == 'h2b') { - $text = trim(hex2bin($_REQUEST['text'])); - echo Content\OEmbed::HTML2BBCode($text); - System::exit(); - } - - // @TODO: Replace with parameter from router - if (DI::args()->getArgc() == 2) { - echo ''; - $url = Strings::base64UrlDecode(DI::args()->getArgv()[1]); - $j = Content\OEmbed::fetchURL($url); - - // workaround for media.ccc.de (and any other endpoint that return size 0) - if (substr($j->html, 0, 7) == "html, 'width="0"')) { - $j->html = '' . $j->html; - $j->html = str_replace('width="0"', '', $j->html); - $j->html = str_replace('height="0"', '', $j->html); - } - echo $j->html; - echo ''; - } - System::exit(); - } -} diff --git a/src/Util/ParseUrl.php b/src/Util/ParseUrl.php index 1287160576..7c9a058b43 100644 --- a/src/Util/ParseUrl.php +++ b/src/Util/ParseUrl.php @@ -99,8 +99,6 @@ class ParseUrl * Search for cached embeddable data of an url otherwise fetch it * * @param string $url The url of the page which should be scraped - * @param bool $do_oembed The false option is used by the function fetch_oembed() - * to avoid endless loops * * @return array which contains needed data for embedding * string 'url' => The url of the parsed page @@ -115,7 +113,7 @@ class ParseUrl * @see ParseUrl::getSiteinfo() for more information about scraping * embeddable content */ - public static function getSiteinfoCached(string $url, bool $do_oembed = true): array + public static function getSiteinfoCached(string $url): array { if (empty($url)) { return [ @@ -127,14 +125,14 @@ class ParseUrl $urlHash = hash('sha256', $url); $parsed_url = DBA::selectFirst('parsed_url', ['content'], - ['url_hash' => $urlHash, 'oembed' => $do_oembed] + ['url_hash' => $urlHash, 'oembed' => false] ); if (!empty($parsed_url['content'])) { $data = unserialize($parsed_url['content']); return $data; } - $data = self::getSiteinfo($url, $do_oembed); + $data = self::getSiteinfo($url); $expires = $data['expires']; @@ -144,7 +142,7 @@ class ParseUrl 'parsed_url', [ 'url_hash' => $urlHash, - 'oembed' => $do_oembed, + 'oembed' => false, 'url' => $url, 'content' => serialize($data), 'created' => DateTimeFormat::utcNow(), @@ -194,7 +192,7 @@ class ParseUrl * * @endverbatim */ - public static function getSiteinfo(string $url, bool $do_oembed = true, int $count = 1): array + public static function getSiteinfo(string $url, int $count = 1): array { if (empty($url)) { return [ @@ -254,41 +252,6 @@ class ParseUrl $body = $curlResult->getBodyString(); - if ($do_oembed) { - $oembed_data = OEmbed::fetchURL($url, false, false); - - if (!empty($oembed_data->type)) { - if (!in_array($oembed_data->type, ['error', 'rich', 'image', 'video', 'audio', ''])) { - $siteinfo['type'] = $oembed_data->type; - } - - // See https://github.com/friendica/friendica/pull/5763#discussion_r217913178 - if ($siteinfo['type'] != 'photo') { - if (!empty($oembed_data->title)) { - $siteinfo['title'] = trim($oembed_data->title); - } - if (!empty($oembed_data->description)) { - $siteinfo['text'] = trim($oembed_data->description); - } - if (!empty($oembed_data->author_name)) { - $siteinfo['author_name'] = trim($oembed_data->author_name); - } - if (!empty($oembed_data->author_url)) { - $siteinfo['author_url'] = Network::sanitizeUrl($oembed_data->author_url); - } - if (!empty($oembed_data->provider_name)) { - $siteinfo['publisher_name'] = trim($oembed_data->provider_name); - } - if (!empty($oembed_data->provider_url)) { - $siteinfo['publisher_url'] = Network::sanitizeUrl($oembed_data->provider_url); - } - if (!empty($oembed_data->thumbnail_url)) { - $siteinfo['image'] = $oembed_data->thumbnail_url; - } - } - } - } - $charset = ''; try { // Look for a charset, first in headers @@ -351,7 +314,7 @@ class ParseUrl } } if ($content != '') { - $siteinfo = self::getSiteinfo($content, $do_oembed, ++$count); + $siteinfo = self::getSiteinfo($content, ++$count); return $siteinfo; } } diff --git a/static/routes.config.php b/static/routes.config.php index 146255adc6..77d04f92c1 100644 --- a/static/routes.config.php +++ b/static/routes.config.php @@ -557,11 +557,6 @@ return [ '/objects/{guid}[/{activity}]' => [Module\ActivityPub\Objects::class, [R::GET]], - '/oembed' => [ - '/b2h' => [Module\Oembed::class, [R::GET]], - '/h2b' => [Module\Oembed::class, [R::GET]], - '/{hash}' => [Module\Oembed::class, [R::GET]], - ], '/outbox/{nickname}' => [Module\ActivityPub\Outbox::class, [R::GET, R::POST]], '/owa' => [Module\Owa::class, [R::GET]], '/openid' => [Module\Security\OpenID::class, [R::GET]],