Add tag escaping to Item::setHashtags

- Add return value to Item::setHashtags
This commit is contained in:
Hypolite Petovan 2020-06-04 20:56:50 -04:00
parent 472018191b
commit 76460ddd2d
3 changed files with 61 additions and 80 deletions

View File

@ -642,7 +642,7 @@ function item_post(App $a) {
// Check for hashtags in the body and repair or add hashtag links // Check for hashtags in the body and repair or add hashtag links
if ($preview || $orig_post) { if ($preview || $orig_post) {
Item::setHashtags($datarray); $datarray['body'] = Item::setHashtags($datarray['body']);
} }
// preview mode - prepare the body for display and send it via json // preview mode - prepare the body for display and send it via json

View File

@ -1780,7 +1780,7 @@ class Item
// Check for hashtags in the body and repair or add hashtag links // Check for hashtags in the body and repair or add hashtag links
self::setHashtags($item); $item['body'] = self::setHashtags($item['body']);
// Fill the cache field // Fill the cache field
self::putInCache($item); self::putInCache($item);
@ -2424,84 +2424,69 @@ class Item
} }
} }
public static function setHashtags(&$item) public static function setHashtags($body)
{ {
$tags = BBCode::getTags($item["body"]); $body = BBCode::performWithEscapedTags($body, ['noparse', 'pre', 'code'], function ($body) {
$tags = BBCode::getTags($body);
// No hashtags? // No hashtags?
if (!count($tags)) { if (!count($tags)) {
return false; return $body;
}
// What happens in [code], stays in [code]!
// escape the # and the [
// hint: we will also get in trouble with #tags, when we want markdown in posts -> ### Headline 3
$item["body"] = preg_replace_callback("/\[code(.*?)\](.*?)\[\/code\]/ism",
function ($match) {
// we truly ESCape all # and [ to prevent gettin weird tags in [code] blocks
$find = ['#', '['];
$replace = [chr(27).'sharp', chr(27).'leftsquarebracket'];
return ("[code" . $match[1] . "]" . str_replace($find, $replace, $match[2]) . "[/code]");
}, $item["body"]);
// This sorting is important when there are hashtags that are part of other hashtags
// Otherwise there could be problems with hashtags like #test and #test2
// Because of this we are sorting from the longest to the shortest tag.
usort($tags, function($a, $b) {
return strlen($b) <=> strlen($a);
});
$URLSearchString = "^\[\]";
// All hashtags should point to the home server if "local_tags" is activated
if (DI::config()->get('system', 'local_tags')) {
$item["body"] = preg_replace("/#\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism",
"#[url=".DI::baseUrl()."/search?tag=$2]$2[/url]", $item["body"]);
}
// mask hashtags inside of url, bookmarks and attachments to avoid urls in urls
$item["body"] = preg_replace_callback("/\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism",
function ($match) {
return ("[url=" . str_replace("#", "&num;", $match[1]) . "]" . str_replace("#", "&num;", $match[2]) . "[/url]");
}, $item["body"]);
$item["body"] = preg_replace_callback("/\[bookmark\=([$URLSearchString]*)\](.*?)\[\/bookmark\]/ism",
function ($match) {
return ("[bookmark=" . str_replace("#", "&num;", $match[1]) . "]" . str_replace("#", "&num;", $match[2]) . "[/bookmark]");
}, $item["body"]);
$item["body"] = preg_replace_callback("/\[attachment (.*)\](.*?)\[\/attachment\]/ism",
function ($match) {
return ("[attachment " . str_replace("#", "&num;", $match[1]) . "]" . $match[2] . "[/attachment]");
}, $item["body"]);
// Repair recursive urls
$item["body"] = preg_replace("/&num;\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism",
"&num;$2", $item["body"]);
foreach ($tags as $tag) {
if ((strpos($tag, '#') !== 0) || strpos($tag, '[url=') || strlen($tag) < 2 || $tag[1] == '#') {
continue;
} }
$basetag = str_replace('_',' ',substr($tag,1)); // This sorting is important when there are hashtags that are part of other hashtags
$newtag = '#[url=' . DI::baseUrl() . '/search?tag=' . $basetag . ']' . $basetag . '[/url]'; // Otherwise there could be problems with hashtags like #test and #test2
// Because of this we are sorting from the longest to the shortest tag.
usort($tags, function ($a, $b) {
return strlen($b) <=> strlen($a);
});
$item["body"] = str_replace($tag, $newtag, $item["body"]); $URLSearchString = "^\[\]";
}
// Convert back the masked hashtags // All hashtags should point to the home server if "local_tags" is activated
$item["body"] = str_replace("&num;", "#", $item["body"]); if (DI::config()->get('system', 'local_tags')) {
$body = preg_replace("/#\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism",
"#[url=" . DI::baseUrl() . "/search?tag=$2]$2[/url]", $body);
}
// Remember! What happens in [code], stays in [code] // mask hashtags inside of url, bookmarks and attachments to avoid urls in urls
// roleback the # and [ $body = preg_replace_callback("/\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism",
$item["body"] = preg_replace_callback("/\[code(.*?)\](.*?)\[\/code\]/ism", function ($match) {
function ($match) { return ("[url=" . str_replace("#", "&num;", $match[1]) . "]" . str_replace("#", "&num;", $match[2]) . "[/url]");
// we truly unESCape all sharp and leftsquarebracket }, $body);
$find = [chr(27).'sharp', chr(27).'leftsquarebracket'];
$replace = ['#', '[']; $body = preg_replace_callback("/\[bookmark\=([$URLSearchString]*)\](.*?)\[\/bookmark\]/ism",
return ("[code" . $match[1] . "]" . str_replace($find, $replace, $match[2]) . "[/code]"); function ($match) {
}, $item["body"]); return ("[bookmark=" . str_replace("#", "&num;", $match[1]) . "]" . str_replace("#", "&num;", $match[2]) . "[/bookmark]");
}, $body);
$body = preg_replace_callback("/\[attachment (.*)\](.*?)\[\/attachment\]/ism",
function ($match) {
return ("[attachment " . str_replace("#", "&num;", $match[1]) . "]" . $match[2] . "[/attachment]");
}, $body);
// Repair recursive urls
$body = preg_replace("/&num;\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism",
"&num;$2", $body);
foreach ($tags as $tag) {
if ((strpos($tag, '#') !== 0) || strpos($tag, '[url=') || strlen($tag) < 2 || $tag[1] == '#') {
continue;
}
$basetag = str_replace('_', ' ', substr($tag, 1));
$newtag = '#[url=' . DI::baseUrl() . '/search?tag=' . $basetag . ']' . $basetag . '[/url]';
$body = str_replace($tag, $newtag, $body);
}
// Convert back the masked hashtags
$body = str_replace("&num;", "#", $body);
return $body;
});
return $body;
} }
/** /**

View File

@ -102,14 +102,12 @@ class Babel extends BaseModule
'content' => visible_whitespace($bbcode4) 'content' => visible_whitespace($bbcode4)
]; ];
$item = ['body' => $bbcode];
$tags = Text\BBCode::getTags($bbcode); $tags = Text\BBCode::getTags($bbcode);
Item::setHashtags($item); $body = Item::setHashtags($bbcode);
$results[] = [ $results[] = [
'title' => DI::l10n()->t('Item Body'), 'title' => DI::l10n()->t('Item Body'),
'content' => visible_whitespace($item['body']) 'content' => visible_whitespace($body)
]; ];
$results[] = [ $results[] = [
'title' => DI::l10n()->t('Item Tags'), 'title' => DI::l10n()->t('Item Tags'),
@ -125,9 +123,7 @@ class Babel extends BaseModule
$markdown = XML::unescape($diaspora); $markdown = XML::unescape($diaspora);
case 'markdown': case 'markdown':
if (!isset($markdown)) { $markdown = $markdown ?? trim($_REQUEST['text']);
$markdown = trim($_REQUEST['text']);
}
$results[] = [ $results[] = [
'title' => DI::l10n()->t('Source input (Markdown)'), 'title' => DI::l10n()->t('Source input (Markdown)'),