Merge pull request #13889 from annando/issue-13884

Issue 13884: Sanitation of links in BBCode parser
This commit is contained in:
Hypolite Petovan 2024-02-12 15:28:04 -05:00 committed by GitHub
commit 59c27a6cbb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 74 additions and 22 deletions

View file

@ -170,7 +170,7 @@ class PageInfo
foreach ($data['keywords'] as $keyword) { foreach ($data['keywords'] as $keyword) {
/// @TODO make a positive list of allowed characters /// @TODO make a positive list of allowed characters
$hashtag = str_replace([' ', '+', '/', '.', '#', '@', "'", '"', '', '`', '(', ')', '„', '“'], '', $keyword); $hashtag = str_replace([' ', '+', '/', '.', '#', '@', "'", '"', '', '`', '(', ')', '„', '“'], '', $keyword);
$hashtags .= '#[url=' . DI::baseUrl() . '/search?tag=' . $hashtag . ']' . $hashtag . '[/url] '; $hashtags .= '#[url=' . DI::baseUrl() . '/search?tag=' . urlencode($hashtag) . ']' . $hashtag . '[/url] ';
} }
} }

View file

@ -41,6 +41,7 @@ use Friendica\Model\Tag;
use Friendica\Network\HTTPClient\Client\HttpClientAccept; use Friendica\Network\HTTPClient\Client\HttpClientAccept;
use Friendica\Network\HTTPClient\Client\HttpClientOptions; use Friendica\Network\HTTPClient\Client\HttpClientOptions;
use Friendica\Util\Map; use Friendica\Util\Map;
use Friendica\Util\Network;
use Friendica\Util\ParseUrl; use Friendica\Util\ParseUrl;
use Friendica\Util\Proxy; use Friendica\Util\Proxy;
use Friendica\Util\Strings; use Friendica\Util\Strings;
@ -124,7 +125,7 @@ class BBCode
break; break;
case 'publisher_url': case 'publisher_url':
$data['provider_url'] = html_entity_decode($value, ENT_QUOTES, 'UTF-8'); $data['provider_url'] = Network::sanitizeUrl(html_entity_decode($value, ENT_QUOTES, 'UTF-8'));
break; break;
case 'author_name': case 'author_name':
@ -135,7 +136,7 @@ class BBCode
break; break;
case 'author_url': case 'author_url':
$data['author_url'] = html_entity_decode($value, ENT_QUOTES, 'UTF-8'); $data['author_url'] = Network::sanitizeUrl(html_entity_decode($value, ENT_QUOTES, 'UTF-8'));
if ($data['provider_url'] == $data['author_url']) { if ($data['provider_url'] == $data['author_url']) {
$data['author_url'] = ''; $data['author_url'] = '';
} }
@ -434,6 +435,8 @@ class BBCode
return $text; return $text;
} }
$data['url'] = Network::sanitizeUrl($data['url']);
if (isset($data['title'])) { if (isset($data['title'])) {
$data['title'] = strip_tags($data['title']); $data['title'] = strip_tags($data['title']);
$data['title'] = str_replace(['http://', 'https://'], '', $data['title']); $data['title'] = str_replace(['http://', 'https://'], '', $data['title']);
@ -485,6 +488,7 @@ class BBCode
} }
if (!empty($data['provider_url']) && !empty($data['provider_name'])) { if (!empty($data['provider_url']) && !empty($data['provider_name'])) {
$data['provider_url'] = Network::sanitizeUrl($data['provider_url']);
if (!empty($data['author_name'])) { if (!empty($data['author_name'])) {
$return .= sprintf('<sup><a href="%s" target="_blank" rel="noopener noreferrer">%s (%s)</a></sup>', $data['provider_url'], $data['author_name'], $data['provider_name']); $return .= sprintf('<sup><a href="%s" target="_blank" rel="noopener noreferrer">%s (%s)</a></sup>', $data['provider_url'], $data['author_name'], $data['provider_name']);
} else { } else {
@ -1064,6 +1068,21 @@ class BBCode
return $text; return $text;
} }
/**
* Callback: Sanitize links from given $match array
*
* @param array $match Array with link match
* @return string BBCode
*/
private static function sanitizeLinksCallback(array $match): string
{
if (count($match) == 3) {
return '[' . $match[1] . ']' . Network::sanitizeUrl($match[2]) . '[/' . $match[1] . ']';
} else {
return '[' . $match[1] . '=' . Network::sanitizeUrl($match[2]) . ']' . $match[3] . '[/' . $match[1] . ']';
}
}
/** /**
* Callback: Expands links from given $match array * Callback: Expands links from given $match array
* *
@ -1455,7 +1474,7 @@ class BBCode
// Replace non graphical smilies for external posts // Replace non graphical smilies for external posts
if (!$nosmile) { if (!$nosmile) {
$text = self::performWithEscapedTags($text, ['img'], function ($text) use ($simple_html, $for_plaintext) { $text = self::performWithEscapedTags($text, ['url', 'img', 'audio', 'video', 'youtube', 'vimeo', 'share', 'attachment', 'iframe', 'bookmark'], function ($text) use ($simple_html, $for_plaintext) {
return Smilies::replace($text, ($simple_html != self::INTERNAL) || $for_plaintext); return Smilies::replace($text, ($simple_html != self::INTERNAL) || $for_plaintext);
}); });
} }
@ -1717,6 +1736,9 @@ class BBCode
// Simplify "video" element // Simplify "video" element
$text = preg_replace('(\[video[^\]]*?\ssrc\s?=\s?([^\s\]]+)[^\]]*?\].*?\[/video\])ism', '[video]$1[/video]', $text); $text = preg_replace('(\[video[^\]]*?\ssrc\s?=\s?([^\s\]]+)[^\]]*?\].*?\[/video\])ism', '[video]$1[/video]', $text);
$text = preg_replace_callback("/\[(video)\](.*?)\[\/video\]/ism", [self::class, 'sanitizeLinksCallback'], $text);
$text = preg_replace_callback("/\[(audio)\](.*?)\[\/audio\]/ism", [self::class, 'sanitizeLinksCallback'], $text);
if ($simple_html == self::NPF) { if ($simple_html == self::NPF) {
$text = preg_replace( $text = preg_replace(
"/\[video\](.*?)\[\/video\]/ism", "/\[video\](.*?)\[\/video\]/ism",
@ -1759,6 +1781,7 @@ class BBCode
} }
// Backward compatibility, [iframe] support has been removed in version 2020.12 // Backward compatibility, [iframe] support has been removed in version 2020.12
$text = preg_replace_callback("/\[(iframe)\](.*?)\[\/iframe\]/ism", [self::class, 'sanitizeLinksCallback'], $text);
$text = preg_replace("/\[iframe\](.*?)\[\/iframe\]/ism", '<a href="$1">$1</a>', $text); $text = preg_replace("/\[iframe\](.*?)\[\/iframe\]/ism", '<a href="$1">$1</a>', $text);
$text = self::normalizeVideoLinks($text); $text = self::normalizeVideoLinks($text);
@ -1811,6 +1834,9 @@ class BBCode
$text = '<span style="font-size: xx-large; line-height: normal;">' . $text . '</span>'; $text = '<span style="font-size: xx-large; line-height: normal;">' . $text . '</span>';
} }
$text = preg_replace_callback("/\[(url)\](.*?)\[\/url\]/ism", [self::class, 'sanitizeLinksCallback'], $text);
$text = preg_replace_callback("/\[(url)\=(.*?)\](.*?)\[\/url\]/ism", [self::class, 'sanitizeLinksCallback'], $text);
// Handle mentions and hashtag links // Handle mentions and hashtag links
if ($simple_html == self::DIASPORA) { if ($simple_html == self::DIASPORA) {
// The ! is converted to @ since Diaspora only understands the @ // The ! is converted to @ since Diaspora only understands the @
@ -1913,11 +1939,11 @@ class BBCode
self::performWithEscapedTags($text, ['url', 'share'], function ($text) use ($simple_html) { self::performWithEscapedTags($text, ['url', 'share'], function ($text) use ($simple_html) {
$text = preg_replace_callback("/(?:#\[url\=[^\[\]]*\]|\[url\=[^\[\]]*\]#)(.*?)\[\/url\]/ism", function ($matches) use ($simple_html) { $text = preg_replace_callback("/(?:#\[url\=[^\[\]]*\]|\[url\=[^\[\]]*\]#)(.*?)\[\/url\]/ism", function ($matches) use ($simple_html) {
if ($simple_html == self::ACTIVITYPUB) { if ($simple_html == self::ACTIVITYPUB) {
return '<a href="' . DI::baseUrl() . '/search?tag=' . rawurlencode($matches[1]) return '<a href="' . DI::baseUrl() . '/search?tag=' . urlencode($matches[1])
. '" data-tag="' . XML::escape($matches[1]) . '" rel="tag ugc">#' . '" data-tag="' . XML::escape($matches[1]) . '" rel="tag ugc">#'
. XML::escape($matches[1]) . '</a>'; . XML::escape($matches[1]) . '</a>';
} else { } else {
return '#<a href="' . DI::baseUrl() . '/search?tag=' . rawurlencode($matches[1]) return '#<a href="' . DI::baseUrl() . '/search?tag=' . urlencode($matches[1])
. '" class="tag" rel="tag" title="' . XML::escape($matches[1]) . '">' . '" class="tag" rel="tag" title="' . XML::escape($matches[1]) . '">'
. XML::escape($matches[1]) . '</a>'; . XML::escape($matches[1]) . '</a>';
} }
@ -1944,6 +1970,7 @@ class BBCode
$text = preg_replace('/acct:([^@]+)@((?!\-)(?:[a-zA-Z\d\-]{0,62}[a-zA-Z\d]\.){1,126}(?!\d+)[a-zA-Z\d]{1,63})/', '<a href="' . DI::baseUrl() . '/acctlink?addr=$1@$2" target="extlink">acct:$1@$2</a>', $text); $text = preg_replace('/acct:([^@]+)@((?!\-)(?:[a-zA-Z\d\-]{0,62}[a-zA-Z\d]\.){1,126}(?!\d+)[a-zA-Z\d]{1,63})/', '<a href="' . DI::baseUrl() . '/acctlink?addr=$1@$2" target="extlink">acct:$1@$2</a>', $text);
// Perform MAIL Search // Perform MAIL Search
$text = preg_replace_callback("/\[(mail)\](.*?)\[\/mail\]/ism", [self::class, 'sanitizeLinksCallback'], $text);
$text = preg_replace("/\[mail\](.*?)\[\/mail\]/", '<a href="mailto:$1">$1</a>', $text); $text = preg_replace("/\[mail\](.*?)\[\/mail\]/", '<a href="mailto:$1">$1</a>', $text);
$text = preg_replace("/\[mail\=(.*?)\](.*?)\[\/mail\]/", '<a href="mailto:$1">$2</a>', $text); $text = preg_replace("/\[mail\=(.*?)\](.*?)\[\/mail\]/", '<a href="mailto:$1">$2</a>', $text);
@ -2304,7 +2331,7 @@ class BBCode
case '#': case '#':
default: default:
return $match[1] . '[url=' . DI::baseUrl() . '/search?tag=' . $match[2] . ']' . $match[2] . '[/url]'; return $match[1] . '[url=' . DI::baseUrl() . '/search?tag=' . urlencode($match[2]) . ']' . $match[2] . '[/url]';
} }
}, },
$body $body

View file

@ -2491,7 +2491,7 @@ class Item
} }
$basetag = str_replace('_', ' ', substr($tag, 1)); $basetag = str_replace('_', ' ', substr($tag, 1));
$newtag = '#[url=' . DI::baseUrl() . '/search?tag=' . $basetag . ']' . $basetag . '[/url]'; $newtag = '#[url=' . DI::baseUrl() . '/search?tag=' . urlencode($basetag) . ']' . $basetag . '[/url]';
$body = str_replace($tag, $newtag, $body); $body = str_replace($tag, $newtag, $body);
} }

View file

@ -31,6 +31,7 @@ use Friendica\Util\HTTPSignature;
use Friendica\Util\Images; use Friendica\Util\Images;
use Friendica\Util\Proxy; use Friendica\Util\Proxy;
use Friendica\Object\Image; use Friendica\Object\Image;
use Friendica\Util\Network;
/** /**
* Class Link * Class Link
@ -77,7 +78,7 @@ class Link
} else { } else {
$fields = self::fetchMimeType($url); $fields = self::fetchMimeType($url);
$fields['uri-id'] = $uriId; $fields['uri-id'] = $uriId;
$fields['url'] = $url; $fields['url'] = Network::sanitizeUrl($url);
DBA::insert('post-link', $fields, Database::INSERT_IGNORE); DBA::insert('post-link', $fields, Database::INSERT_IGNORE);
$id = DBA::lastInsertId(); $id = DBA::lastInsertId();

View file

@ -96,6 +96,7 @@ class Media
return false; return false;
} }
$media['url'] = Network::sanitizeUrl($media['url']);
$media = self::unsetEmptyFields($media); $media = self::unsetEmptyFields($media);
$media = DI::dbaDefinition()->truncateFieldsForTable('post-media', $media); $media = DI::dbaDefinition()->truncateFieldsForTable('post-media', $media);

View file

@ -558,7 +558,7 @@ class Tag
); );
while ($tag = DBA::fetch($taglist)) { while ($tag = DBA::fetch($taglist)) {
if ($tag['url'] == '') { if ($tag['url'] == '') {
$tag['url'] = $searchpath . rawurlencode($tag['name']); $tag['url'] = $searchpath . urlencode($tag['name']);
} }
$orig_tag = $tag['url']; $orig_tag = $tag['url'];

View file

@ -226,7 +226,7 @@ class Profile extends BaseProfile
// Separator is defined in Module\Settings\Profile\Index::cleanKeywords // Separator is defined in Module\Settings\Profile\Index::cleanKeywords
foreach (explode(', ', $profile['pub_keywords']) as $tag_label) { foreach (explode(', ', $profile['pub_keywords']) as $tag_label) {
$tags[] = [ $tags[] = [
'url' => '/search?tag=' . $tag_label, 'url' => '/search?tag=' . urlencode($tag_label),
'label' => Tag::TAG_CHARACTER[Tag::HASHTAG] . $tag_label, 'label' => Tag::TAG_CHARACTER[Tag::HASHTAG] . $tag_label,
]; ];
} }

View file

@ -659,6 +659,29 @@ class Network
return !empty($scheme) && in_array($scheme, ['http', 'https']) && parse_url($url, PHP_URL_HOST); return !empty($scheme) && in_array($scheme, ['http', 'https']) && parse_url($url, PHP_URL_HOST);
} }
/**
* Remove invalid parts from an URL
*
* @param string $url
* @return string sanitized URL
*/
public static function sanitizeUrl(string $url): string
{
$sanitized = $url = trim($url);
foreach (['"', ' '] as $character) {
$pos = strpos($sanitized, $character);
if ($pos !== false) {
$sanitized = trim(substr($sanitized, 0, $pos));
}
}
if ($sanitized != $url) {
Logger::debug('Link got sanitized', ['url' => $url, 'sanitzed' => $sanitized]);
}
return $sanitized;
}
/** /**
* Creates an Uri object out of a given Uri string * Creates an Uri object out of a given Uri string
* *

View file

@ -274,13 +274,13 @@ class ParseUrl
$siteinfo['author_name'] = trim($oembed_data->author_name); $siteinfo['author_name'] = trim($oembed_data->author_name);
} }
if (!empty($oembed_data->author_url)) { if (!empty($oembed_data->author_url)) {
$siteinfo['author_url'] = trim($oembed_data->author_url); $siteinfo['author_url'] = Network::sanitizeUrl($oembed_data->author_url);
} }
if (!empty($oembed_data->provider_name)) { if (!empty($oembed_data->provider_name)) {
$siteinfo['publisher_name'] = trim($oembed_data->provider_name); $siteinfo['publisher_name'] = trim($oembed_data->provider_name);
} }
if (!empty($oembed_data->provider_url)) { if (!empty($oembed_data->provider_url)) {
$siteinfo['publisher_url'] = trim($oembed_data->provider_url); $siteinfo['publisher_url'] = Network::sanitizeUrl($oembed_data->provider_url);
} }
if (!empty($oembed_data->thumbnail_url)) { if (!empty($oembed_data->thumbnail_url)) {
$siteinfo['image'] = $oembed_data->thumbnail_url; $siteinfo['image'] = $oembed_data->thumbnail_url;
@ -884,7 +884,7 @@ class ParseUrl
$content = JsonLD::fetchElement($jsonld, 'publisher', 'url'); $content = JsonLD::fetchElement($jsonld, 'publisher', 'url');
if (!empty($content) && is_string($content)) { if (!empty($content) && is_string($content)) {
$jsonldinfo['publisher_url'] = trim($content); $jsonldinfo['publisher_url'] = Network::sanitizeUrl($content);
} }
$brand = JsonLD::fetchElement($jsonld, 'publisher', 'brand', '@type', 'Organization'); $brand = JsonLD::fetchElement($jsonld, 'publisher', 'brand', '@type', 'Organization');
@ -896,7 +896,7 @@ class ParseUrl
$content = JsonLD::fetchElement($brand, 'url'); $content = JsonLD::fetchElement($brand, 'url');
if (!empty($content) && is_string($content)) { if (!empty($content) && is_string($content)) {
$jsonldinfo['publisher_url'] = trim($content); $jsonldinfo['publisher_url'] = Network::sanitizeUrl($content);
} }
$content = JsonLD::fetchElement($brand, 'logo', 'url'); $content = JsonLD::fetchElement($brand, 'logo', 'url');
@ -924,12 +924,12 @@ class ParseUrl
$content = JsonLD::fetchElement($jsonld, 'author', 'sameAs'); $content = JsonLD::fetchElement($jsonld, 'author', 'sameAs');
if (!empty($content) && is_string($content)) { if (!empty($content) && is_string($content)) {
$jsonldinfo['author_url'] = trim($content); $jsonldinfo['author_url'] = Network::sanitizeUrl($content);
} }
$content = JsonLD::fetchElement($jsonld, 'author', 'url'); $content = JsonLD::fetchElement($jsonld, 'author', 'url');
if (!empty($content) && is_string($content)) { if (!empty($content) && is_string($content)) {
$jsonldinfo['author_url'] = trim($content); $jsonldinfo['author_url'] = Network::sanitizeUrl($content);
} }
$logo = JsonLD::fetchElement($jsonld, 'author', 'logo'); $logo = JsonLD::fetchElement($jsonld, 'author', 'logo');
@ -1084,7 +1084,7 @@ class ParseUrl
$content = JsonLD::fetchElement($jsonld, 'url'); $content = JsonLD::fetchElement($jsonld, 'url');
if (!empty($content) && is_string($content)) { if (!empty($content) && is_string($content)) {
$jsonldinfo['publisher_url'] = trim($content); $jsonldinfo['publisher_url'] = Network::sanitizeUrl($content);
} }
$content = JsonLD::fetchElement($jsonld, 'thumbnailUrl'); $content = JsonLD::fetchElement($jsonld, 'thumbnailUrl');
@ -1123,7 +1123,7 @@ class ParseUrl
$content = JsonLD::fetchElement($jsonld, 'url'); $content = JsonLD::fetchElement($jsonld, 'url');
if (!empty($content) && is_string($content)) { if (!empty($content) && is_string($content)) {
$jsonldinfo['publisher_url'] = trim($content); $jsonldinfo['publisher_url'] = Network::sanitizeUrl($content);
} }
$content = JsonLD::fetchElement($jsonld, 'logo', 'url', '@type', 'ImageObject'); $content = JsonLD::fetchElement($jsonld, 'logo', 'url', '@type', 'ImageObject');
@ -1140,7 +1140,7 @@ class ParseUrl
$content = JsonLD::fetchElement($jsonld, 'brand', 'url', '@type', 'Organization'); $content = JsonLD::fetchElement($jsonld, 'brand', 'url', '@type', 'Organization');
if (!empty($content) && is_string($content)) { if (!empty($content) && is_string($content)) {
$jsonldinfo['publisher_url'] = trim($content); $jsonldinfo['publisher_url'] = Network::sanitizeUrl($content);
} }
Logger::info('Fetched Organization information', ['url' => $siteinfo['url'], 'fetched' => $jsonldinfo]); Logger::info('Fetched Organization information', ['url' => $siteinfo['url'], 'fetched' => $jsonldinfo]);
@ -1172,12 +1172,12 @@ class ParseUrl
$content = JsonLD::fetchElement($jsonld, 'sameAs'); $content = JsonLD::fetchElement($jsonld, 'sameAs');
if (!empty($content) && is_string($content)) { if (!empty($content) && is_string($content)) {
$jsonldinfo['author_url'] = trim($content); $jsonldinfo['author_url'] = Network::sanitizeUrl($content);
} }
$content = JsonLD::fetchElement($jsonld, 'url'); $content = JsonLD::fetchElement($jsonld, 'url');
if (!empty($content) && is_string($content)) { if (!empty($content) && is_string($content)) {
$jsonldinfo['author_url'] = trim($content); $jsonldinfo['author_url'] = Network::sanitizeUrl($content);
} }
$content = JsonLD::fetchElement($jsonld, 'image', 'url', '@type', 'ImageObject'); $content = JsonLD::fetchElement($jsonld, 'image', 'url', '@type', 'ImageObject');