1
1
Fork 0

Merge pull request #9621 from MrPetovan/bug/9611-bbcode-convert-html-purify

Add HTML Purify to BBCode::convert
This commit is contained in:
Michael Vogel 2020-12-05 19:27:59 +01:00 committed by GitHub
commit f9994548c1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 181 additions and 131 deletions

View file

@ -34,6 +34,7 @@
"league/html-to-markdown": "^4.8", "league/html-to-markdown": "^4.8",
"level-2/dice": "^4", "level-2/dice": "^4",
"lightopenid/lightopenid": "dev-master", "lightopenid/lightopenid": "dev-master",
"matriphe/iso-639": "^1.2",
"michelf/php-markdown": "^1.7", "michelf/php-markdown": "^1.7",
"mobiledetect/mobiledetectlib": "^2.8", "mobiledetect/mobiledetectlib": "^2.8",
"monolog/monolog": "^1.25", "monolog/monolog": "^1.25",
@ -47,6 +48,7 @@
"psr/container": "^1.0", "psr/container": "^1.0",
"seld/cli-prompt": "^1.0", "seld/cli-prompt": "^1.0",
"smarty/smarty": "^3.1", "smarty/smarty": "^3.1",
"xemlock/htmlpurifier-html5": "^0.1.11",
"fxp/composer-asset-plugin": "^1.4", "fxp/composer-asset-plugin": "^1.4",
"bower-asset/base64": "^1.0", "bower-asset/base64": "^1.0",
"bower-asset/chart-js": "^2.8", "bower-asset/chart-js": "^2.8",
@ -64,8 +66,7 @@
"npm-asset/moment": "^2.24", "npm-asset/moment": "^2.24",
"npm-asset/perfect-scrollbar": "0.6.16", "npm-asset/perfect-scrollbar": "0.6.16",
"npm-asset/textcomplete": "^0.18.2", "npm-asset/textcomplete": "^0.18.2",
"npm-asset/typeahead.js": "^0.11.1", "npm-asset/typeahead.js": "^0.11.1"
"matriphe/iso-639": "^1.2"
}, },
"repositories": [ "repositories": [
{ {

53
composer.lock generated
View file

@ -4,7 +4,7 @@
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
"This file is @generated automatically" "This file is @generated automatically"
], ],
"content-hash": "fd22bd8c29dcea3d6b6eeb117d79af52", "content-hash": "7d8031c9b95fd94d8872804759a26509",
"packages": [ "packages": [
{ {
"name": "asika/simple-console", "name": "asika/simple-console",
@ -3431,6 +3431,57 @@
"shim" "shim"
], ],
"time": "2020-05-12T16:14:59+00:00" "time": "2020-05-12T16:14:59+00:00"
},
{
"name": "xemlock/htmlpurifier-html5",
"version": "v0.1.11",
"source": {
"type": "git",
"url": "https://github.com/xemlock/htmlpurifier-html5.git",
"reference": "f0d563f9fd4a82a3d759043483f9a94c0d8c2255"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/xemlock/htmlpurifier-html5/zipball/f0d563f9fd4a82a3d759043483f9a94c0d8c2255",
"reference": "f0d563f9fd4a82a3d759043483f9a94c0d8c2255",
"shasum": ""
},
"require": {
"ezyang/htmlpurifier": "^4.8",
"php": ">=5.2"
},
"require-dev": {
"php-coveralls/php-coveralls": "^1.1|^2.1",
"phpunit/phpunit": ">=4.7 <8.0"
},
"type": "library",
"autoload": {
"classmap": [
"library/HTMLPurifier/"
]
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "xemlock",
"email": "xemlock@gmail.com"
}
],
"description": "HTML5 element definitions for HTML Purifier",
"keywords": [
"HTML5",
"Purifier",
"html",
"htmlpurifier",
"security",
"tidy",
"validator",
"xss"
],
"time": "2019-08-07T17:19:21+00:00"
} }
], ],
"packages-dev": [ "packages-dev": [

View file

@ -265,7 +265,7 @@ class PageInfo
} }
if (!$matches && $searchNakedUrls) { if (!$matches && $searchNakedUrls) {
preg_match('~(?<=\W|^)(?<![=\]])(https?://.+)$~is', $body, $matches); preg_match(Strings::autoLinkRegEx(), $body, $matches);
if ($matches && !Strings::endsWith($body, $matches[1])) { if ($matches && !Strings::endsWith($body, $matches[1])) {
unset($matches); unset($matches);
} }

View file

@ -49,6 +49,9 @@ use Friendica\Util\XML;
class BBCode class BBCode
{ {
// Update this value to the current date whenever changes are made to BBCode::convert
const VERSION = '2020-12-03';
const INTERNAL = 0; const INTERNAL = 0;
const API = 2; const API = 2;
const DIASPORA = 3; const DIASPORA = 3;
@ -671,9 +674,9 @@ class BBCode
$return .= sprintf('<a href="%s" target="_blank" rel="noopener noreferrer"><img src="%s" alt="" title="%s" class="attachment-image" /></a>', $data['url'], self::proxyUrl($data['image'], $simplehtml), $data['title']); $return .= sprintf('<a href="%s" target="_blank" rel="noopener noreferrer"><img src="%s" alt="" title="%s" class="attachment-image" /></a>', $data['url'], self::proxyUrl($data['image'], $simplehtml), $data['title']);
} else { } else {
if (!empty($data['image'])) { if (!empty($data['image'])) {
$return .= sprintf('<a href="%s" target="_blank" rel="noopener noreferrer"><img src="%s" alt="" title="%s" class="attachment-image" /></a><br />', $data['url'], self::proxyUrl($data['image'], $simplehtml), $data['title']); $return .= sprintf('<a href="%s" target="_blank" rel="noopener noreferrer"><img src="%s" alt="" title="%s" class="attachment-image" /></a><br>', $data['url'], self::proxyUrl($data['image'], $simplehtml), $data['title']);
} elseif (!empty($data['preview'])) { } elseif (!empty($data['preview'])) {
$return .= sprintf('<a href="%s" target="_blank" rel="noopener noreferrer"><img src="%s" alt="" title="%s" class="attachment-preview" /></a><br />', $data['url'], self::proxyUrl($data['preview'], $simplehtml), $data['title']); $return .= sprintf('<a href="%s" target="_blank" rel="noopener noreferrer"><img src="%s" alt="" title="%s" class="attachment-preview" /></a><br>', $data['url'], self::proxyUrl($data['preview'], $simplehtml), $data['title']);
} }
$return .= sprintf('<h4><a href="%s">%s</a></h4>', $data['url'], $data['title']); $return .= sprintf('<h4><a href="%s">%s</a></h4>', $data['url'], $data['title']);
} }
@ -1033,7 +1036,7 @@ class BBCode
switch ($simplehtml) { switch ($simplehtml) {
case self::API: case self::API:
$text = ($is_quote_share? '<br />' : '') . '<p>' . html_entity_decode('&#x2672; ', ENT_QUOTES, 'UTF-8') . ' ' . $author_contact['addr'] . ': </p>' . "\n" . $content; $text = ($is_quote_share? '<br>' : '') . '<p>' . html_entity_decode('&#x2672; ', ENT_QUOTES, 'UTF-8') . ' ' . $author_contact['addr'] . ': </p>' . "\n" . $content;
break; break;
case self::DIASPORA: case self::DIASPORA:
if (stripos(Strings::normaliseLink($attributes['link']), 'http://twitter.com/') === 0) { if (stripos(Strings::normaliseLink($attributes['link']), 'http://twitter.com/') === 0) {
@ -1062,7 +1065,7 @@ class BBCode
break; break;
case self::OSTATUS: case self::OSTATUS:
$text = ($is_quote_share? '<br />' : '') . '<p>' . html_entity_decode('&#x2672; ', ENT_QUOTES, 'UTF-8') . ' @' . $author_contact['addr'] . ': ' . $content . '</p>' . "\n"; $text = ($is_quote_share? '<br>' : '') . '<p>' . html_entity_decode('&#x2672; ', ENT_QUOTES, 'UTF-8') . ' @' . $author_contact['addr'] . ': ' . $content . '</p>' . "\n";
break; break;
case self::ACTIVITYPUB: case self::ACTIVITYPUB:
$author = '@<span class="vcard"><a href="' . $author_contact['url'] . '" class="url u-url mention" title="' . $author_contact['addr'] . '"><span class="fn nickname mention">' . $author_contact['addr'] . '</span></a>:</span>'; $author = '@<span class="vcard"><a href="' . $author_contact['url'] . '" class="url u-url mention" title="' . $author_contact['addr'] . '"><span class="fn nickname mention">' . $author_contact['addr'] . '</span></a>:</span>';
@ -1275,6 +1278,8 @@ class BBCode
return ''; return '';
} }
Hook::callAll('bbcode', $text);
$a = DI::app(); $a = DI::app();
$text = self::performWithEscapedTags($text, ['code'], function ($text) use ($try_oembed, $simple_html, $for_plaintext, $a) { $text = self::performWithEscapedTags($text, ['code'], function ($text) use ($try_oembed, $simple_html, $for_plaintext, $a) {
@ -1300,11 +1305,12 @@ class BBCode
return $return; return $return;
}; };
// Remove the abstract element. It is a non visible element. // Remove the abstract element. It is a non visible element.
$text = self::stripAbstract($text); $text = self::stripAbstract($text);
// Line ending normalisation
$text = str_replace("\r\n", "\n", $text);
// Move new lines outside of tags // Move new lines outside of tags
$text = preg_replace("#\[(\w*)](\n*)#ism", '$2[$1]', $text); $text = preg_replace("#\[(\w*)](\n*)#ism", '$2[$1]', $text);
$text = preg_replace("#(\n*)\[/(\w*)]#ism", '[/$2]$1', $text); $text = preg_replace("#(\n*)\[/(\w*)]#ism", '[/$2]$1', $text);
@ -1338,16 +1344,6 @@ class BBCode
$text = preg_replace("/\[share(.*?)avatar\s?=\s?'.*?'\s?(.*?)\]\s?(.*?)\s?\[\/share\]\s?/ism", "\n[share$1$2]$3[/share]", $text); $text = preg_replace("/\[share(.*?)avatar\s?=\s?'.*?'\s?(.*?)\]\s?(.*?)\s?\[\/share\]\s?/ism", "\n[share$1$2]$3[/share]", $text);
} }
// Convert new line chars to html <br /> tags
// nlbr seems to be hopelessly messed up
// $Text = nl2br($Text);
// We'll emulate it.
$text = trim($text);
$text = str_replace("\r\n", "\n", $text);
// Remove linefeeds inside of the table elements. See issue #6799 // Remove linefeeds inside of the table elements. See issue #6799
$search = ["\n[th]", "[th]\n", " [th]", "\n[/th]", "[/th]\n", "[/th] ", $search = ["\n[th]", "[th]\n", " [th]", "\n[/th]", "[/th]\n", "[/th] ",
"\n[td]", "[td]\n", " [td]", "\n[/td]", "[/td]\n", "[/td] ", "\n[td]", "[td]\n", " [td]", "\n[/td]", "[/td]\n", "[/td] ",
@ -1367,11 +1363,14 @@ class BBCode
$replace = ["[table]", "[/table]"]; $replace = ["[table]", "[/table]"];
$text = str_replace($search, $replace, $text); $text = str_replace($search, $replace, $text);
// Trim new lines regardless of the system.remove_multiplicated_lines config value
$text = trim($text, "\n");
// removing multiplicated newlines // removing multiplicated newlines
if (DI::config()->get('system', 'remove_multiplicated_lines')) { if (DI::config()->get('system', 'remove_multiplicated_lines')) {
$search = ["\n\n\n", "\n ", " \n", "[/quote]\n\n", "\n[/quote]", "[/li]\n", "\n[li]", "\n[ul]", "[/ul]\n", "\n\n[share ", "[/attachment]\n", $search = ["\n\n\n", "\n ", " \n", "[/quote]\n\n", "\n[/quote]", "[/li]\n", "\n[li]", "\n[*]", "\n[ul]", "[/ul]\n", "\n\n[share ", "[/attachment]\n",
"\n[h1]", "[/h1]\n", "\n[h2]", "[/h2]\n", "\n[h3]", "[/h3]\n", "\n[h4]", "[/h4]\n", "\n[h5]", "[/h5]\n", "\n[h6]", "[/h6]\n"]; "\n[h1]", "[/h1]\n", "\n[h2]", "[/h2]\n", "\n[h3]", "[/h3]\n", "\n[h4]", "[/h4]\n", "\n[h5]", "[/h5]\n", "\n[h6]", "[/h6]\n"];
$replace = ["\n\n", "\n", "\n", "[/quote]\n", "[/quote]", "[/li]", "[li]", "[ul]", "[/ul]", "\n[share ", "[/attachment]", $replace = ["\n\n", "\n", "\n", "[/quote]\n", "[/quote]", "[/li]", "[li]", "[*]", "[ul]", "[/ul]", "\n[share ", "[/attachment]",
"[h1]", "[/h1]", "[h2]", "[/h2]", "[h3]", "[/h3]", "[h4]", "[/h4]", "[h5]", "[/h5]", "[h6]", "[/h6]"]; "[h1]", "[/h1]", "[h2]", "[/h2]", "[h3]", "[/h3]", "[h4]", "[/h4]", "[h5]", "[/h5]", "[h6]", "[/h6]"];
do { do {
$oldtext = $text; $oldtext = $text;
@ -1447,8 +1446,8 @@ class BBCode
// Check for sized text // Check for sized text
// [size=50] --> font-size: 50px (with the unit). // [size=50] --> font-size: 50px (with the unit).
if ($simple_html != self::DIASPORA) { if ($simple_html != self::DIASPORA) {
$text = preg_replace("(\[size=(\d*?)\](.*?)\[\/size\])ism", "<span style=\"font-size: $1px; line-height: initial;\">$2</span>", $text); $text = preg_replace("(\[size=(\d*?)\](.*?)\[\/size\])ism", '<span style="font-size:$1px;line-height:normal;">$2</span>', $text);
$text = preg_replace("(\[size=(.*?)\](.*?)\[\/size\])ism", "<span style=\"font-size: $1; line-height: initial;\">$2</span>", $text); $text = preg_replace("(\[size=(.*?)\](.*?)\[\/size\])ism", '<span style="font-size:$1;line-height:normal;">$2</span>', $text);
} else { } else {
// Issue 2199: Diaspora doesn't interpret the construct above, nor the <small> or <big> element // Issue 2199: Diaspora doesn't interpret the construct above, nor the <small> or <big> element
$text = preg_replace("(\[size=(.*?)\](.*?)\[\/size\])ism", "$2", $text); $text = preg_replace("(\[size=(.*?)\](.*?)\[\/size\])ism", "$2", $text);
@ -1456,28 +1455,16 @@ class BBCode
// Check for centered text // Check for centered text
$text = preg_replace("(\[center\](.*?)\[\/center\])ism", "<div style=\"text-align:center;\">$1</div>", $text); $text = preg_replace("(\[center\](.*?)\[\/center\])ism", '<div style="text-align:center;">$1</div>', $text);
// Check for list text // Check for list text
$text = str_replace("[*]", "<li>", $text); $text = str_replace("[*]", "<li>", $text);
// Check for style sheet commands // Check for style sheet commands
$text = preg_replace_callback( $text = preg_replace("(\[style=(.*?)\](.*?)\[\/style\])ism", '<span style="$1">$2</span>', $text);
"(\[style=(.*?)\](.*?)\[\/style\])ism",
function ($match) {
return "<span style=\"" . HTML::sanitizeCSS($match[1]) . ";\">" . $match[2] . "</span>";
},
$text
);
// Check for CSS classes // Check for CSS classes
$text = preg_replace_callback( $text = preg_replace("(\[class=(.*?)\](.*?)\[\/class\])ism", '<span style="$1">$2</span>', $text);
"(\[class=(.*?)\](.*?)\[\/class\])ism",
function ($match) {
return "<span class=\"" . HTML::sanitizeCSS($match[1]) . "\">" . $match[2] . "</span>";
},
$text
);
// handle nested lists // handle nested lists
$endlessloop = 0; $endlessloop = 0;
@ -1608,20 +1595,20 @@ class BBCode
$text = preg_replace("/\[img\](.*?)\[\/img\]/ism", '<img src="$1" alt="' . DI::l10n()->t('Image/photo') . '" />', $text); $text = preg_replace("/\[img\](.*?)\[\/img\]/ism", '<img src="$1" alt="' . DI::l10n()->t('Image/photo') . '" />', $text);
$text = preg_replace("/\[zmg\](.*?)\[\/zmg\]/ism", '<img src="$1" alt="' . DI::l10n()->t('Image/photo') . '" />', $text); $text = preg_replace("/\[zmg\](.*?)\[\/zmg\]/ism", '<img src="$1" alt="' . DI::l10n()->t('Image/photo') . '" />', $text);
$text = preg_replace("/\[crypt\](.*?)\[\/crypt\]/ism", '<br/><img src="' .DI::baseUrl() . '/images/lock_icon.gif" alt="' . DI::l10n()->t('Encrypted content') . '" title="' . DI::l10n()->t('Encrypted content') . '" /><br />', $text); $text = preg_replace("/\[crypt\](.*?)\[\/crypt\]/ism", '<br><img src="' .DI::baseUrl() . '/images/lock_icon.gif" alt="' . DI::l10n()->t('Encrypted content') . '" title="' . DI::l10n()->t('Encrypted content') . '" /><br>', $text);
$text = preg_replace("/\[crypt(.*?)\](.*?)\[\/crypt\]/ism", '<br/><img src="' .DI::baseUrl() . '/images/lock_icon.gif" alt="' . DI::l10n()->t('Encrypted content') . '" title="' . '$1' . ' ' . DI::l10n()->t('Encrypted content') . '" /><br />', $text); $text = preg_replace("/\[crypt(.*?)\](.*?)\[\/crypt\]/ism", '<br><img src="' .DI::baseUrl() . '/images/lock_icon.gif" alt="' . DI::l10n()->t('Encrypted content') . '" title="' . '$1' . ' ' . DI::l10n()->t('Encrypted content') . '" /><br>', $text);
//$Text = preg_replace("/\[crypt=(.*?)\](.*?)\[\/crypt\]/ism", '<br/><img src="' .DI::baseUrl() . '/images/lock_icon.gif" alt="' . DI::l10n()->t('Encrypted content') . '" title="' . '$1' . ' ' . DI::l10n()->t('Encrypted content') . '" /><br />', $Text); //$Text = preg_replace("/\[crypt=(.*?)\](.*?)\[\/crypt\]/ism", '<br><img src="' .DI::baseUrl() . '/images/lock_icon.gif" alt="' . DI::l10n()->t('Encrypted content') . '" title="' . '$1' . ' ' . DI::l10n()->t('Encrypted content') . '" /><br>', $Text);
// Simplify "video" element // Simplify "video" element
$text = preg_replace('(\[video.*?\ssrc\s?=\s?([^\s\]]+).*?\].*?\[/video\])ism', '[video]$1[/video]', $text); $text = preg_replace('(\[video.*?\ssrc\s?=\s?([^\s\]]+).*?\].*?\[/video\])ism', '[video]$1[/video]', $text);
// Try to Oembed
if ($try_oembed) { if ($try_oembed) {
// html5 video and audio
$text = preg_replace("/\[video\](.*?\.(ogg|ogv|oga|ogm|webm|mp4).*?)\[\/video\]/ism", $text = preg_replace("/\[video\](.*?\.(ogg|ogv|oga|ogm|webm|mp4).*?)\[\/video\]/ism",
'<video src="$1" controls="controls" width="' . $a->videowidth . '" height="' . $a->videoheight . '" loop="true"><a href="$1">$1</a></video>', $text); '<video src="$1" controls width="' . $a->videowidth . '" height="' . $a->videoheight . '" loop="true"><a href="$1">$1</a></video>', $text);
$text = preg_replace("/\[video\](.*?)\[\/video\]/ism", $text = preg_replace("/\[video\](.*?)\[\/video\]/ism",
'<a href="$1" target="_blank" rel="noopener noreferrer">$1</a>', $text); '<a href="$1" target="_blank" rel="noopener noreferrer">$1</a>', $text);
$text = preg_replace("/\[audio\](.*?)\[\/audio\]/ism", '<audio src="$1" controls="controls"><a href="$1">$1</a></audio>', $text); $text = preg_replace("/\[audio\](.*?)\[\/audio\]/ism", '<audio src="$1" controls><a href="$1">$1</a></audio>', $text);
$text = preg_replace_callback("/\[video\](.*?)\[\/video\]/ism", $try_oembed_callback, $text); $text = preg_replace_callback("/\[video\](.*?)\[\/video\]/ism", $try_oembed_callback, $text);
$text = preg_replace_callback("/\[audio\](.*?)\[\/audio\]/ism", $try_oembed_callback, $text); $text = preg_replace_callback("/\[audio\](.*?)\[\/audio\]/ism", $try_oembed_callback, $text);
@ -1632,9 +1619,6 @@ class BBCode
'<a href="$1" target="_blank" rel="noopener noreferrer">$1</a>', $text); '<a href="$1" target="_blank" rel="noopener noreferrer">$1</a>', $text);
} }
// html5 video and audio
if ($try_oembed) { if ($try_oembed) {
$text = preg_replace("/\[iframe\](.*?)\[\/iframe\]/ism", '<iframe src="$1" width="' . $a->videowidth . '" height="' . $a->videoheight . '"><a href="$1">$1</a></iframe>', $text); $text = preg_replace("/\[iframe\](.*?)\[\/iframe\]/ism", '<iframe src="$1" width="' . $a->videowidth . '" height="' . $a->videoheight . '"><a href="$1">$1</a></iframe>', $text);
} else { } else {
@ -1678,7 +1662,7 @@ class BBCode
$text = OEmbed::BBCode2HTML($text); $text = OEmbed::BBCode2HTML($text);
// Avoid triple linefeeds through oembed // Avoid triple linefeeds through oembed
$text = str_replace("<br style='clear:left'></span><br /><br />", "<br style='clear:left'></span><br />", $text); $text = str_replace("<br style='clear:left'></span><br><br>", "<br style='clear:left'></span><br>", $text);
// If we found an event earlier, strip out all the event code and replace with a reformatted version. // If we found an event earlier, strip out all the event code and replace with a reformatted version.
// Replace the event-start section with the entire formatted event. The other bbcode is stripped. // Replace the event-start section with the entire formatted event. The other bbcode is stripped.
@ -1708,7 +1692,7 @@ class BBCode
$conv = html_entity_decode(str_replace([' ', "\n", "\r"], '', $text)); $conv = html_entity_decode(str_replace([' ', "\n", "\r"], '', $text));
// Emojis are always 4 byte Unicode characters // Emojis are always 4 byte Unicode characters
if (!empty($conv) && (strlen($conv) / mb_strlen($conv) == 4)) { if (!empty($conv) && (strlen($conv) / mb_strlen($conv) == 4)) {
$text = '<span style="font-size: xx-large; line-height: initial;">' . $text . '</span>'; $text = '<span style="font-size: xx-large; line-height: normal;">' . $text . '</span>';
} }
} }
@ -1722,8 +1706,6 @@ class BBCode
$text = preg_replace_callback("&\[url=([^\[\]]*)\]\[img\](.*)\[\/img\]\[\/url\]&Usi", 'self::removePictureLinksCallback', $text); $text = preg_replace_callback("&\[url=([^\[\]]*)\]\[img\](.*)\[\/img\]\[\/url\]&Usi", 'self::removePictureLinksCallback', $text);
} }
$text = str_replace(["\r","\n"], ['<br />', '<br />'], $text);
// Remove all hashtag addresses // Remove all hashtag addresses
if ($simple_html && !in_array($simple_html, [self::DIASPORA, self::OSTATUS, self::ACTIVITYPUB])) { if ($simple_html && !in_array($simple_html, [self::DIASPORA, self::OSTATUS, self::ACTIVITYPUB])) {
$text = preg_replace("/([#@!])\[url\=(.*?)\](.*?)\[\/url\]/ism", '$1$3', $text); $text = preg_replace("/([#@!])\[url\=(.*?)\](.*?)\[\/url\]/ism", '$1$3', $text);
@ -1872,18 +1854,21 @@ class BBCode
// Additionally, [pre] tags preserve spaces // Additionally, [pre] tags preserve spaces
$text = preg_replace_callback("/\[pre\](.*?)\[\/pre\]/ism", function ($match) { $text = preg_replace_callback("/\[pre\](.*?)\[\/pre\]/ism", function ($match) {
return str_replace(' ', '&nbsp;', $match[1]); return str_replace(' ', '&nbsp;', htmlentities($match[1], ENT_NOQUOTES,'UTF-8'));
}, $text); }, $text);
// Add HTML new lines
$text = str_replace("\n", '<br>', $text);
return $text; return $text;
}); // Escaped code }); // Escaped code
$text = preg_replace_callback("#\[code(?:=([^\]]*))?\](.*?)\[\/code\]#ism", $text = preg_replace_callback("#\[code(?:=([^\]]*))?\](.*?)\[\/code\]#ism",
function ($matches) { function ($matches) {
if (strpos($matches[2], "\n") !== false) { if (strpos($matches[2], "\n") !== false) {
$return = '<pre><code class="language-' . trim($matches[1]) . '">' . htmlspecialchars(trim($matches[2], "\n\r"), ENT_NOQUOTES, 'UTF-8') . '</code></pre>'; $return = '<pre><code class="language-' . trim($matches[1]) . '">' . htmlentities(trim($matches[2], "\n\r"), ENT_NOQUOTES, 'UTF-8') . '</code></pre>';
} else { } else {
$return = '<code>' . htmlspecialchars($matches[2], ENT_NOQUOTES, 'UTF-8') . '</code>'; $return = '<code>' . htmlentities($matches[2], ENT_NOQUOTES, 'UTF-8') . '</code>';
} }
return $return; return $return;
@ -1891,37 +1876,20 @@ class BBCode
$text $text
); );
// Clean up the HTML by loading and saving the HTML with the DOM. $config = \HTMLPurifier_HTML5Config::createDefault();
// Bad structured html can break a whole page. $config->set('HTML.Doctype', 'HTML5');
// For performance reasons do it only with activated item cache or at export. $config->set('Attr.AllowedRel', [
if (!$try_oembed || (get_itemcachepath() != '')) { 'noreferrer' => true,
$doc = new DOMDocument(); 'noopener' => true,
$doc->preserveWhiteSpace = false; ]);
$config->set('Attr.AllowedFrameTargets', [
'_blank' => true,
]);
$text = mb_convert_encoding($text, 'HTML-ENTITIES', "UTF-8"); $HTMLPurifier = new \HTMLPurifier($config);
$text = $HTMLPurifier->purify($text);
$doctype = '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">'; return $text;
$encoding = '<?xml encoding="UTF-8">';
@$doc->loadHTML($encoding . $doctype . '<html><body>' . $text . '</body></html>');
$doc->encoding = 'UTF-8';
$text = $doc->saveHTML();
$text = str_replace(['<html><body>', '</body></html>', $doctype, $encoding], ['', '', '', ''], $text);
$text = str_replace('<br></li>', '</li>', $text);
//$Text = mb_convert_encoding($Text, "UTF-8", 'HTML-ENTITIES');
}
// Clean up some useless linebreaks in lists
//$Text = str_replace('<br /><ul', '<ul ', $Text);
//$Text = str_replace('</ul><br />', '</ul>', $Text);
//$Text = str_replace('</li><br />', '</li>', $Text);
//$Text = str_replace('<br /><li>', '<li>', $Text);
//$Text = str_replace('<br /><ul', '<ul ', $Text);
Hook::callAll('bbcode', $text);
return trim($text);
} }
/** /**

View file

@ -36,27 +36,6 @@ use League\HTMLToMarkdown\HtmlConverter;
class HTML class HTML
{ {
public static function sanitizeCSS($input)
{
$cleaned = "";
$input = strtolower($input);
for ($i = 0; $i < strlen($input); $i++) {
$char = substr($input, $i, 1);
if (($char >= "a") && ($char <= "z")) {
$cleaned .= $char;
}
if (!(strpos(" #;:0123456789-_.%", $char) === false)) {
$cleaned .= $char;
}
}
return $cleaned;
}
/** /**
* Search all instances of a specific HTML tag node in the provided DOM document and replaces them with BBCode text nodes. * Search all instances of a specific HTML tag node in the provided DOM document and replaces them with BBCode text nodes.
* *

View file

@ -3529,20 +3529,21 @@ class Item
*/ */
public static function putInCache(&$item, $update = false) public static function putInCache(&$item, $update = false)
{ {
$body = $item["body"]; // Save original body to prevent addons to modify it
$body = $item['body'];
$rendered_hash = $item['rendered-hash'] ?? ''; $rendered_hash = $item['rendered-hash'] ?? '';
$rendered_html = $item['rendered-html'] ?? ''; $rendered_html = $item['rendered-html'] ?? '';
if ($rendered_hash == '' if ($rendered_hash == ''
|| $rendered_html == "" || $rendered_html == ''
|| $rendered_hash != hash("md5", $item["body"]) || $rendered_hash != hash('md5', BBCode::VERSION . '::' . $body)
|| DI::config()->get("system", "ignore_cache") || DI::config()->get('system', 'ignore_cache')
) { ) {
self::addRedirToImageTags($item); self::addRedirToImageTags($item);
$item["rendered-html"] = BBCode::convert($item["body"]); $item['rendered-html'] = BBCode::convert($item['body']);
$item["rendered-hash"] = hash("md5", $item["body"]); $item['rendered-hash'] = hash('md5', BBCode::VERSION . '::' . $body);
$hook_data = ['item' => $item, 'rendered-html' => $item['rendered-html'], 'rendered-hash' => $item['rendered-hash']]; $hook_data = ['item' => $item, 'rendered-html' => $item['rendered-html'], 'rendered-hash' => $item['rendered-hash']];
Hook::callAll('put_item_in_cache', $hook_data); Hook::callAll('put_item_in_cache', $hook_data);
@ -3551,27 +3552,27 @@ class Item
unset($hook_data); unset($hook_data);
// Force an update if the generated values differ from the existing ones // Force an update if the generated values differ from the existing ones
if ($rendered_hash != $item["rendered-hash"]) { if ($rendered_hash != $item['rendered-hash']) {
$update = true; $update = true;
} }
// Only compare the HTML when we forcefully ignore the cache // Only compare the HTML when we forcefully ignore the cache
if (DI::config()->get("system", "ignore_cache") && ($rendered_html != $item["rendered-html"])) { if (DI::config()->get('system', 'ignore_cache') && ($rendered_html != $item['rendered-html'])) {
$update = true; $update = true;
} }
if ($update && !empty($item["id"])) { if ($update && !empty($item['id'])) {
self::update( self::update(
[ [
'rendered-html' => $item["rendered-html"], 'rendered-html' => $item['rendered-html'],
'rendered-hash' => $item["rendered-hash"] 'rendered-hash' => $item['rendered-hash']
], ],
['id' => $item["id"]] ['id' => $item['id']]
); );
} }
} }
$item["body"] = $body; $item['body'] = $body;
} }
/** /**

View file

@ -49,7 +49,7 @@ class Babel extends BaseModule
if (!empty($_REQUEST['text'])) { if (!empty($_REQUEST['text'])) {
switch (($_REQUEST['type'] ?? '') ?: 'bbcode') { switch (($_REQUEST['type'] ?? '') ?: 'bbcode') {
case 'bbcode': case 'bbcode':
$bbcode = trim($_REQUEST['text']); $bbcode = $_REQUEST['text'];
$results[] = [ $results[] = [
'title' => DI::l10n()->t('Source input'), 'title' => DI::l10n()->t('Source input'),
'content' => visible_whitespace($bbcode) 'content' => visible_whitespace($bbcode)
@ -67,6 +67,11 @@ class Babel extends BaseModule
'content' => visible_whitespace($html) 'content' => visible_whitespace($html)
]; ];
$results[] = [
'title' => DI::l10n()->t('BBCode::convert (hex)'),
'content' => visible_whitespace(bin2hex($html)),
];
$results[] = [ $results[] = [
'title' => DI::l10n()->t('BBCode::convert'), 'title' => DI::l10n()->t('BBCode::convert'),
'content' => $html 'content' => $html
@ -178,6 +183,25 @@ class Babel extends BaseModule
'content' => $html 'content' => $html
]; ];
$config = \HTMLPurifier_Config::createDefault();
$HTMLPurifier = new \HTMLPurifier($config);
$purified = $HTMLPurifier->purify($html);
$results[] = [
'title' => DI::l10n()->t('HTML Purified (raw)'),
'content' => visible_whitespace($purified),
];
$results[] = [
'title' => DI::l10n()->t('HTML Purified (hex)'),
'content' => visible_whitespace(bin2hex($purified)),
];
$results[] = [
'title' => DI::l10n()->t('HTML Purified'),
'content' => $purified,
];
$bbcode = Text\HTML::toBBCode($html); $bbcode = Text\HTML::toBBCode($html);
$results[] = [ $results[] = [
'title' => DI::l10n()->t('HTML::toBBCode'), 'title' => DI::l10n()->t('HTML::toBBCode'),

View file

@ -75,6 +75,18 @@ class BBCodeTest extends MockedTest
$this->dice->shouldReceive('create') $this->dice->shouldReceive('create')
->with(BaseURL::class) ->with(BaseURL::class)
->andReturn($baseUrlMock); ->andReturn($baseUrlMock);
$config = \HTMLPurifier_HTML5Config::createDefault();
$config->set('HTML.Doctype', 'HTML5');
$config->set('Attr.AllowedRel', [
'noreferrer' => true,
'noopener' => true,
]);
$config->set('Attr.AllowedFrameTargets', [
'_blank' => true,
]);
$this->HTMLPurifier = new \HTMLPurifier($config);
} }
public function dataLinks() public function dataLinks()
@ -171,7 +183,7 @@ class BBCodeTest extends MockedTest
public function testAutoLinking(string $data, bool $assertHTML) public function testAutoLinking(string $data, bool $assertHTML)
{ {
$output = BBCode::convert($data); $output = BBCode::convert($data);
$assert = '<a href="' . $data . '" target="_blank" rel="noopener noreferrer">' . $data . '</a>'; $assert = $this->HTMLPurifier->purify('<a href="' . $data . '" target="_blank" rel="noopener noreferrer">' . $data . '</a>');
if ($assertHTML) { if ($assertHTML) {
self::assertEquals($assert, $output); self::assertEquals($assert, $output);
} else { } else {
@ -183,31 +195,31 @@ class BBCodeTest extends MockedTest
{ {
return [ return [
'bug-7271-condensed-space' => [ 'bug-7271-condensed-space' => [
'expectedHtml' => '<ul class="listdecimal" style="list-style-type: decimal;"><li> <a href="http://example.com/" target="_blank" rel="noopener noreferrer">http://example.com/</a></li></ul>', 'expectedHtml' => '<ul class="listdecimal" style="list-style-type:decimal;"><li> <a href="http://example.com/" target="_blank" rel="noopener noreferrer">http://example.com/</a></li></ul>',
'text' => '[ol][*] http://example.com/[/ol]', 'text' => '[ol][*] http://example.com/[/ol]',
], ],
'bug-7271-condensed-nospace' => [ 'bug-7271-condensed-nospace' => [
'expectedHtml' => '<ul class="listdecimal" style="list-style-type: decimal;"><li><a href="http://example.com/" target="_blank" rel="noopener noreferrer">http://example.com/</a></li></ul>', 'expectedHtml' => '<ul class="listdecimal" style="list-style-type:decimal;"><li><a href="http://example.com/" target="_blank" rel="noopener noreferrer">http://example.com/</a></li></ul>',
'text' => '[ol][*]http://example.com/[/ol]', 'text' => '[ol][*]http://example.com/[/ol]',
], ],
'bug-7271-indented-space' => [ 'bug-7271-indented-space' => [
'expectedHtml' => '<ul class="listbullet" style="list-style-type: circle;"><li> <a href="http://example.com/" target="_blank" rel="noopener noreferrer">http://example.com/</a></li></ul>', 'expectedHtml' => '<ul class="listbullet" style="list-style-type:circle;"><li> <a href="http://example.com/" target="_blank" rel="noopener noreferrer">http://example.com/</a></li></ul>',
'text' => '[ul] 'text' => '[ul]
[*] http://example.com/ [*] http://example.com/
[/ul]', [/ul]',
], ],
'bug-7271-indented-nospace' => [ 'bug-7271-indented-nospace' => [
'expectedHtml' => '<ul class="listbullet" style="list-style-type: circle;"><li><a href="http://example.com/" target="_blank" rel="noopener noreferrer">http://example.com/</a></li></ul>', 'expectedHtml' => '<ul class="listbullet" style="list-style-type:circle;"><li><a href="http://example.com/" target="_blank" rel="noopener noreferrer">http://example.com/</a></li></ul>',
'text' => '[ul] 'text' => '[ul]
[*]http://example.com/ [*]http://example.com/
[/ul]', [/ul]',
], ],
'bug-2199-named-size' => [ 'bug-2199-named-size' => [
'expectedHtml' => '<span style="font-size: xx-large; line-height: initial;">Test text</span>', 'expectedHtml' => '<span style="font-size:xx-large;line-height:normal;">Test text</span>',
'text' => '[size=xx-large]Test text[/size]', 'text' => '[size=xx-large]Test text[/size]',
], ],
'bug-2199-numeric-size' => [ 'bug-2199-numeric-size' => [
'expectedHtml' => '<span style="font-size: 24px; line-height: initial;">Test text</span>', 'expectedHtml' => '<span style="font-size:24px;line-height:normal;">Test text</span>',
'text' => '[size=24]Test text[/size]', 'text' => '[size=24]Test text[/size]',
], ],
'bug-2199-diaspora-no-named-size' => [ 'bug-2199-diaspora-no-named-size' => [
@ -225,7 +237,7 @@ class BBCodeTest extends MockedTest
'simpleHtml' => 3, 'simpleHtml' => 3,
], ],
'bug-7665-audio-tag' => [ 'bug-7665-audio-tag' => [
'expectedHtml' => '<audio src="http://www.cendrones.fr/colloque2017/jonathanbocquet.mp3" controls="controls"><a href="http://www.cendrones.fr/colloque2017/jonathanbocquet.mp3">http://www.cendrones.fr/colloque2017/jonathanbocquet.mp3</a></audio>', 'expectedHtml' => '<audio src="http://www.cendrones.fr/colloque2017/jonathanbocquet.mp3" controls><a href="http://www.cendrones.fr/colloque2017/jonathanbocquet.mp3">http://www.cendrones.fr/colloque2017/jonathanbocquet.mp3</a></audio>',
'text' => '[audio]http://www.cendrones.fr/colloque2017/jonathanbocquet.mp3[/audio]', 'text' => '[audio]http://www.cendrones.fr/colloque2017/jonathanbocquet.mp3[/audio]',
'try_oembed' => true, 'try_oembed' => true,
], ],
@ -246,9 +258,25 @@ class BBCodeTest extends MockedTest
'text' => '[test] Space', 'text' => '[test] Space',
], ],
'task-8800-pre-spaces' => [ 'task-8800-pre-spaces' => [
'expectedHtml' => '&nbsp;&nbsp;&nbsp;&nbsp;Spaces', 'expectedHtml' => '    Spaces',
'text' => '[pre] Spaces[/pre]', 'text' => '[pre] Spaces[/pre]',
], ],
'bug-9611-purify-xss-nobb' => [
'expectedHTML' => '<span>dare to move your mouse here</span>',
'text' => '[nobb]<span onmouseover="alert(0)">dare to move your mouse here</span>[/nobb]'
],
'bug-9611-purify-xss-noparse' => [
'expectedHTML' => '<span>dare to move your mouse here</span>',
'text' => '[noparse]<span onmouseover="alert(0)">dare to move your mouse here</span>[/noparse]'
],
'bug-9611-purify-xss-attributes' => [
'expectedHTML' => '<span>dare to move your mouse here</span>',
'text' => '[color="onmouseover=alert(0) style="]dare to move your mouse here[/color]'
],
'bug-9611-purify-attributes-correct' => [
'expectedHTML' => '<span style="color:#FFFFFF;">dare to move your mouse here</span>',
'text' => '[color=FFFFFF]dare to move your mouse here[/color]'
],
]; ];
} }

View file

@ -24,9 +24,7 @@
<div class="panel-heading"> <div class="panel-heading">
<h3 class="panel-title">{{$result.title}}</h3> <h3 class="panel-title">{{$result.title}}</h3>
</div> </div>
<div class="panel-body"> <div class="panel-body">{{$result.content nofilter}}</div>
{{$result.content nofilter}}
</div>
</div> </div>
{{/foreach}} {{/foreach}}
</div> </div>