diff --git a/src/Content/PageInfo.php b/src/Content/PageInfo.php index b80a551cb9..d59cf5f320 100644 --- a/src/Content/PageInfo.php +++ b/src/Content/PageInfo.php @@ -170,7 +170,7 @@ class PageInfo foreach ($data['keywords'] as $keyword) { /// @TODO make a positive list of allowed characters $hashtag = str_replace([' ', '+', '/', '.', '#', '@', "'", '"', '’', '`', '(', ')', '„', '“'], '', $keyword); - $hashtags .= '#[url=' . DI::baseUrl() . '/search?tag=' . $hashtag . ']' . $hashtag . '[/url] '; + $hashtags .= '#[url=' . DI::baseUrl() . '/search?tag=' . urlencode($hashtag) . ']' . $hashtag . '[/url] '; } } diff --git a/src/Content/Text/BBCode.php b/src/Content/Text/BBCode.php index 3f5f31b138..4c2a3a6b46 100644 --- a/src/Content/Text/BBCode.php +++ b/src/Content/Text/BBCode.php @@ -41,6 +41,7 @@ use Friendica\Model\Tag; use Friendica\Network\HTTPClient\Client\HttpClientAccept; use Friendica\Network\HTTPClient\Client\HttpClientOptions; use Friendica\Util\Map; +use Friendica\Util\Network; use Friendica\Util\ParseUrl; use Friendica\Util\Proxy; use Friendica\Util\Strings; @@ -124,7 +125,7 @@ class BBCode break; case 'publisher_url': - $data['provider_url'] = html_entity_decode($value, ENT_QUOTES, 'UTF-8'); + $data['provider_url'] = Network::sanitizeUrl(html_entity_decode($value, ENT_QUOTES, 'UTF-8')); break; case 'author_name': @@ -135,7 +136,7 @@ class BBCode break; case 'author_url': - $data['author_url'] = html_entity_decode($value, ENT_QUOTES, 'UTF-8'); + $data['author_url'] = Network::sanitizeUrl(html_entity_decode($value, ENT_QUOTES, 'UTF-8')); if ($data['provider_url'] == $data['author_url']) { $data['author_url'] = ''; } @@ -434,6 +435,8 @@ class BBCode return $text; } + $data['url'] = Network::sanitizeUrl($data['url']); + if (isset($data['title'])) { $data['title'] = strip_tags($data['title']); $data['title'] = str_replace(['http://', 'https://'], '', $data['title']); @@ -485,6 +488,7 @@ class BBCode } if (!empty($data['provider_url']) && !empty($data['provider_name'])) { + $data['provider_url'] = Network::sanitizeUrl($data['provider_url']); if (!empty($data['author_name'])) { $return .= sprintf('%s (%s)', $data['provider_url'], $data['author_name'], $data['provider_name']); } else { @@ -1064,6 +1068,21 @@ class BBCode return $text; } + /** + * Callback: Sanitize links from given $match array + * + * @param array $match Array with link match + * @return string BBCode + */ + private static function sanitizeLinksCallback(array $match): string + { + if (count($match) == 3) { + return '[' . $match[1] . ']' . Network::sanitizeUrl($match[2]) . '[/' . $match[1] . ']'; + } else { + return '[' . $match[1] . '=' . Network::sanitizeUrl($match[2]) . ']' . $match[3] . '[/' . $match[1] . ']'; + } + } + /** * Callback: Expands links from given $match array * @@ -1455,7 +1474,7 @@ class BBCode // Replace non graphical smilies for external posts if (!$nosmile) { - $text = self::performWithEscapedTags($text, ['img'], function ($text) use ($simple_html, $for_plaintext) { + $text = self::performWithEscapedTags($text, ['url', 'img', 'audio', 'video', 'youtube', 'vimeo', 'share', 'attachment', 'iframe', 'bookmark'], function ($text) use ($simple_html, $for_plaintext) { return Smilies::replace($text, ($simple_html != self::INTERNAL) || $for_plaintext); }); } @@ -1717,6 +1736,9 @@ class BBCode // Simplify "video" element $text = preg_replace('(\[video[^\]]*?\ssrc\s?=\s?([^\s\]]+)[^\]]*?\].*?\[/video\])ism', '[video]$1[/video]', $text); + $text = preg_replace_callback("/\[(video)\](.*?)\[\/video\]/ism", [self::class, 'sanitizeLinksCallback'], $text); + $text = preg_replace_callback("/\[(audio)\](.*?)\[\/audio\]/ism", [self::class, 'sanitizeLinksCallback'], $text); + if ($simple_html == self::NPF) { $text = preg_replace( "/\[video\](.*?)\[\/video\]/ism", @@ -1759,6 +1781,7 @@ class BBCode } // Backward compatibility, [iframe] support has been removed in version 2020.12 + $text = preg_replace_callback("/\[(iframe)\](.*?)\[\/iframe\]/ism", [self::class, 'sanitizeLinksCallback'], $text); $text = preg_replace("/\[iframe\](.*?)\[\/iframe\]/ism", '$1', $text); $text = self::normalizeVideoLinks($text); @@ -1811,6 +1834,9 @@ class BBCode $text = '' . $text . ''; } + $text = preg_replace_callback("/\[(url)\](.*?)\[\/url\]/ism", [self::class, 'sanitizeLinksCallback'], $text); + $text = preg_replace_callback("/\[(url)\=(.*?)\](.*?)\[\/url\]/ism", [self::class, 'sanitizeLinksCallback'], $text); + // Handle mentions and hashtag links if ($simple_html == self::DIASPORA) { // The ! is converted to @ since Diaspora only understands the @ @@ -1913,11 +1939,11 @@ class BBCode self::performWithEscapedTags($text, ['url', 'share'], function ($text) use ($simple_html) { $text = preg_replace_callback("/(?:#\[url\=[^\[\]]*\]|\[url\=[^\[\]]*\]#)(.*?)\[\/url\]/ism", function ($matches) use ($simple_html) { if ($simple_html == self::ACTIVITYPUB) { - return '#' . XML::escape($matches[1]) . ''; } else { - return '#'; } @@ -1944,6 +1970,7 @@ class BBCode $text = preg_replace('/acct:([^@]+)@((?!\-)(?:[a-zA-Z\d\-]{0,62}[a-zA-Z\d]\.){1,126}(?!\d+)[a-zA-Z\d]{1,63})/', 'acct:$1@$2', $text); // Perform MAIL Search + $text = preg_replace_callback("/\[(mail)\](.*?)\[\/mail\]/ism", [self::class, 'sanitizeLinksCallback'], $text); $text = preg_replace("/\[mail\](.*?)\[\/mail\]/", '$1', $text); $text = preg_replace("/\[mail\=(.*?)\](.*?)\[\/mail\]/", '$2', $text); @@ -2304,7 +2331,7 @@ class BBCode case '#': default: - return $match[1] . '[url=' . DI::baseUrl() . '/search?tag=' . $match[2] . ']' . $match[2] . '[/url]'; + return $match[1] . '[url=' . DI::baseUrl() . '/search?tag=' . urlencode($match[2]) . ']' . $match[2] . '[/url]'; } }, $body diff --git a/src/Model/Item.php b/src/Model/Item.php index aa3a8bc08b..8b43df849b 100644 --- a/src/Model/Item.php +++ b/src/Model/Item.php @@ -2491,7 +2491,7 @@ class Item } $basetag = str_replace('_', ' ', substr($tag, 1)); - $newtag = '#[url=' . DI::baseUrl() . '/search?tag=' . $basetag . ']' . $basetag . '[/url]'; + $newtag = '#[url=' . DI::baseUrl() . '/search?tag=' . urlencode($basetag) . ']' . $basetag . '[/url]'; $body = str_replace($tag, $newtag, $body); } diff --git a/src/Model/Post/Link.php b/src/Model/Post/Link.php index 4146efe761..be2f7fd2da 100644 --- a/src/Model/Post/Link.php +++ b/src/Model/Post/Link.php @@ -31,6 +31,7 @@ use Friendica\Util\HTTPSignature; use Friendica\Util\Images; use Friendica\Util\Proxy; use Friendica\Object\Image; +use Friendica\Util\Network; /** * Class Link @@ -77,7 +78,7 @@ class Link } else { $fields = self::fetchMimeType($url); $fields['uri-id'] = $uriId; - $fields['url'] = $url; + $fields['url'] = Network::sanitizeUrl($url); DBA::insert('post-link', $fields, Database::INSERT_IGNORE); $id = DBA::lastInsertId(); diff --git a/src/Model/Post/Media.php b/src/Model/Post/Media.php index afd6ca8383..cbbfdb97ec 100644 --- a/src/Model/Post/Media.php +++ b/src/Model/Post/Media.php @@ -96,6 +96,7 @@ class Media return false; } + $media['url'] = Network::sanitizeUrl($media['url']); $media = self::unsetEmptyFields($media); $media = DI::dbaDefinition()->truncateFieldsForTable('post-media', $media); diff --git a/src/Model/Tag.php b/src/Model/Tag.php index e63a272a75..dd9ca079e3 100644 --- a/src/Model/Tag.php +++ b/src/Model/Tag.php @@ -558,7 +558,7 @@ class Tag ); while ($tag = DBA::fetch($taglist)) { if ($tag['url'] == '') { - $tag['url'] = $searchpath . rawurlencode($tag['name']); + $tag['url'] = $searchpath . urlencode($tag['name']); } $orig_tag = $tag['url']; diff --git a/src/Module/Profile/Profile.php b/src/Module/Profile/Profile.php index aeab1eb758..03ed3e3acf 100644 --- a/src/Module/Profile/Profile.php +++ b/src/Module/Profile/Profile.php @@ -226,7 +226,7 @@ class Profile extends BaseProfile // Separator is defined in Module\Settings\Profile\Index::cleanKeywords foreach (explode(', ', $profile['pub_keywords']) as $tag_label) { $tags[] = [ - 'url' => '/search?tag=' . $tag_label, + 'url' => '/search?tag=' . urlencode($tag_label), 'label' => Tag::TAG_CHARACTER[Tag::HASHTAG] . $tag_label, ]; } diff --git a/src/Util/Network.php b/src/Util/Network.php index 415a20c763..02124a4e71 100644 --- a/src/Util/Network.php +++ b/src/Util/Network.php @@ -659,6 +659,29 @@ class Network return !empty($scheme) && in_array($scheme, ['http', 'https']) && parse_url($url, PHP_URL_HOST); } + /** + * Remove invalid parts from an URL + * + * @param string $url + * @return string sanitized URL + */ + public static function sanitizeUrl(string $url): string + { + $sanitized = $url = trim($url); + + foreach (['"', ' '] as $character) { + $pos = strpos($sanitized, $character); + if ($pos !== false) { + $sanitized = trim(substr($sanitized, 0, $pos)); + } + } + + if ($sanitized != $url) { + Logger::debug('Link got sanitized', ['url' => $url, 'sanitzed' => $sanitized]); + } + return $sanitized; + } + /** * Creates an Uri object out of a given Uri string * diff --git a/src/Util/ParseUrl.php b/src/Util/ParseUrl.php index 184a02ae75..cd9669624a 100644 --- a/src/Util/ParseUrl.php +++ b/src/Util/ParseUrl.php @@ -274,13 +274,13 @@ class ParseUrl $siteinfo['author_name'] = trim($oembed_data->author_name); } if (!empty($oembed_data->author_url)) { - $siteinfo['author_url'] = trim($oembed_data->author_url); + $siteinfo['author_url'] = Network::sanitizeUrl($oembed_data->author_url); } if (!empty($oembed_data->provider_name)) { $siteinfo['publisher_name'] = trim($oembed_data->provider_name); } if (!empty($oembed_data->provider_url)) { - $siteinfo['publisher_url'] = trim($oembed_data->provider_url); + $siteinfo['publisher_url'] = Network::sanitizeUrl($oembed_data->provider_url); } if (!empty($oembed_data->thumbnail_url)) { $siteinfo['image'] = $oembed_data->thumbnail_url; @@ -884,7 +884,7 @@ class ParseUrl $content = JsonLD::fetchElement($jsonld, 'publisher', 'url'); if (!empty($content) && is_string($content)) { - $jsonldinfo['publisher_url'] = trim($content); + $jsonldinfo['publisher_url'] = Network::sanitizeUrl($content); } $brand = JsonLD::fetchElement($jsonld, 'publisher', 'brand', '@type', 'Organization'); @@ -896,7 +896,7 @@ class ParseUrl $content = JsonLD::fetchElement($brand, 'url'); if (!empty($content) && is_string($content)) { - $jsonldinfo['publisher_url'] = trim($content); + $jsonldinfo['publisher_url'] = Network::sanitizeUrl($content); } $content = JsonLD::fetchElement($brand, 'logo', 'url'); @@ -924,12 +924,12 @@ class ParseUrl $content = JsonLD::fetchElement($jsonld, 'author', 'sameAs'); if (!empty($content) && is_string($content)) { - $jsonldinfo['author_url'] = trim($content); + $jsonldinfo['author_url'] = Network::sanitizeUrl($content); } $content = JsonLD::fetchElement($jsonld, 'author', 'url'); if (!empty($content) && is_string($content)) { - $jsonldinfo['author_url'] = trim($content); + $jsonldinfo['author_url'] = Network::sanitizeUrl($content); } $logo = JsonLD::fetchElement($jsonld, 'author', 'logo'); @@ -1084,7 +1084,7 @@ class ParseUrl $content = JsonLD::fetchElement($jsonld, 'url'); if (!empty($content) && is_string($content)) { - $jsonldinfo['publisher_url'] = trim($content); + $jsonldinfo['publisher_url'] = Network::sanitizeUrl($content); } $content = JsonLD::fetchElement($jsonld, 'thumbnailUrl'); @@ -1123,7 +1123,7 @@ class ParseUrl $content = JsonLD::fetchElement($jsonld, 'url'); if (!empty($content) && is_string($content)) { - $jsonldinfo['publisher_url'] = trim($content); + $jsonldinfo['publisher_url'] = Network::sanitizeUrl($content); } $content = JsonLD::fetchElement($jsonld, 'logo', 'url', '@type', 'ImageObject'); @@ -1140,7 +1140,7 @@ class ParseUrl $content = JsonLD::fetchElement($jsonld, 'brand', 'url', '@type', 'Organization'); if (!empty($content) && is_string($content)) { - $jsonldinfo['publisher_url'] = trim($content); + $jsonldinfo['publisher_url'] = Network::sanitizeUrl($content); } Logger::info('Fetched Organization information', ['url' => $siteinfo['url'], 'fetched' => $jsonldinfo]); @@ -1172,12 +1172,12 @@ class ParseUrl $content = JsonLD::fetchElement($jsonld, 'sameAs'); if (!empty($content) && is_string($content)) { - $jsonldinfo['author_url'] = trim($content); + $jsonldinfo['author_url'] = Network::sanitizeUrl($content); } $content = JsonLD::fetchElement($jsonld, 'url'); if (!empty($content) && is_string($content)) { - $jsonldinfo['author_url'] = trim($content); + $jsonldinfo['author_url'] = Network::sanitizeUrl($content); } $content = JsonLD::fetchElement($jsonld, 'image', 'url', '@type', 'ImageObject');