diff --git a/src/Content/PageInfo.php b/src/Content/PageInfo.php
index 786385c3e..5396bc1bb 100644
--- a/src/Content/PageInfo.php
+++ b/src/Content/PageInfo.php
@@ -265,7 +265,7 @@ class PageInfo
}
if (!$matches && $searchNakedUrls) {
- preg_match('~(?<=\W|^)(?
', $data['url'], self::proxyUrl($data['image'], $simplehtml), $data['title']);
} else {
if (!empty($data['image'])) {
- $return .= sprintf('
', $data['url'], self::proxyUrl($data['image'], $simplehtml), $data['title']);
+ $return .= sprintf('
', $data['url'], self::proxyUrl($data['image'], $simplehtml), $data['title']);
} elseif (!empty($data['preview'])) {
- $return .= sprintf('
', $data['url'], self::proxyUrl($data['preview'], $simplehtml), $data['title']);
+ $return .= sprintf('
', $data['url'], self::proxyUrl($data['preview'], $simplehtml), $data['title']);
}
$return .= sprintf('
', $data['url'], $data['title']);
}
@@ -1033,7 +1033,7 @@ class BBCode
switch ($simplehtml) {
case self::API:
- $text = ($is_quote_share? '
' : '') . '' . html_entity_decode('♲ ', ENT_QUOTES, 'UTF-8') . ' ' . $author_contact['addr'] . ':
' . "\n" . $content;
+ $text = ($is_quote_share? '
' : '') . '' . html_entity_decode('♲ ', ENT_QUOTES, 'UTF-8') . ' ' . $author_contact['addr'] . ':
' . "\n" . $content;
break;
case self::DIASPORA:
if (stripos(Strings::normaliseLink($attributes['link']), 'http://twitter.com/') === 0) {
@@ -1062,7 +1062,7 @@ class BBCode
break;
case self::OSTATUS:
- $text = ($is_quote_share? '
' : '') . '' . html_entity_decode('♲ ', ENT_QUOTES, 'UTF-8') . ' @' . $author_contact['addr'] . ': ' . $content . '
' . "\n";
+ $text = ($is_quote_share? '
' : '') . '' . html_entity_decode('♲ ', ENT_QUOTES, 'UTF-8') . ' @' . $author_contact['addr'] . ': ' . $content . '
' . "\n";
break;
case self::ACTIVITYPUB:
$author = '@' . $author_contact['addr'] . ':';
@@ -1275,6 +1275,8 @@ class BBCode
return '';
}
+ Hook::callAll('bbcode', $text);
+
$a = DI::app();
$text = self::performWithEscapedTags($text, ['code'], function ($text) use ($try_oembed, $simple_html, $for_plaintext, $a) {
@@ -1300,11 +1302,12 @@ class BBCode
return $return;
};
-
-
// Remove the abstract element. It is a non visible element.
$text = self::stripAbstract($text);
+ // Line ending normalisation
+ $text = str_replace("\r\n", "\n", $text);
+
// Move new lines outside of tags
$text = preg_replace("#\[(\w*)](\n*)#ism", '$2[$1]', $text);
$text = preg_replace("#(\n*)\[/(\w*)]#ism", '[/$2]$1', $text);
@@ -1338,16 +1341,6 @@ class BBCode
$text = preg_replace("/\[share(.*?)avatar\s?=\s?'.*?'\s?(.*?)\]\s?(.*?)\s?\[\/share\]\s?/ism", "\n[share$1$2]$3[/share]", $text);
}
- // Convert new line chars to html
tags
-
- // nlbr seems to be hopelessly messed up
- // $Text = nl2br($Text);
-
- // We'll emulate it.
-
- $text = trim($text);
- $text = str_replace("\r\n", "\n", $text);
-
// Remove linefeeds inside of the table elements. See issue #6799
$search = ["\n[th]", "[th]\n", " [th]", "\n[/th]", "[/th]\n", "[/th] ",
"\n[td]", "[td]\n", " [td]", "\n[/td]", "[/td]\n", "[/td] ",
@@ -1367,11 +1360,14 @@ class BBCode
$replace = ["[table]", "[/table]"];
$text = str_replace($search, $replace, $text);
+ // Trim new lines regardless of the system.remove_multiplicated_lines config value
+ $text = trim($text, "\n");
+
// removing multiplicated newlines
if (DI::config()->get('system', 'remove_multiplicated_lines')) {
- $search = ["\n\n\n", "\n ", " \n", "[/quote]\n\n", "\n[/quote]", "[/li]\n", "\n[li]", "\n[ul]", "[/ul]\n", "\n\n[share ", "[/attachment]\n",
+ $search = ["\n\n\n", "\n ", " \n", "[/quote]\n\n", "\n[/quote]", "[/li]\n", "\n[li]", "\n[*]", "\n[ul]", "[/ul]\n", "\n\n[share ", "[/attachment]\n",
"\n[h1]", "[/h1]\n", "\n[h2]", "[/h2]\n", "\n[h3]", "[/h3]\n", "\n[h4]", "[/h4]\n", "\n[h5]", "[/h5]\n", "\n[h6]", "[/h6]\n"];
- $replace = ["\n\n", "\n", "\n", "[/quote]\n", "[/quote]", "[/li]", "[li]", "[ul]", "[/ul]", "\n[share ", "[/attachment]",
+ $replace = ["\n\n", "\n", "\n", "[/quote]\n", "[/quote]", "[/li]", "[li]", "[*]", "[ul]", "[/ul]", "\n[share ", "[/attachment]",
"[h1]", "[/h1]", "[h2]", "[/h2]", "[h3]", "[/h3]", "[h4]", "[/h4]", "[h5]", "[/h5]", "[h6]", "[/h6]"];
do {
$oldtext = $text;
@@ -1447,8 +1443,8 @@ class BBCode
// Check for sized text
// [size=50] --> font-size: 50px (with the unit).
if ($simple_html != self::DIASPORA) {
- $text = preg_replace("(\[size=(\d*?)\](.*?)\[\/size\])ism", "$2", $text);
- $text = preg_replace("(\[size=(.*?)\](.*?)\[\/size\])ism", "$2", $text);
+ $text = preg_replace("(\[size=(\d*?)\](.*?)\[\/size\])ism", '$2', $text);
+ $text = preg_replace("(\[size=(.*?)\](.*?)\[\/size\])ism", '$2', $text);
} else {
// Issue 2199: Diaspora doesn't interpret the construct above, nor the or element
$text = preg_replace("(\[size=(.*?)\](.*?)\[\/size\])ism", "$2", $text);
@@ -1456,28 +1452,16 @@ class BBCode
// Check for centered text
- $text = preg_replace("(\[center\](.*?)\[\/center\])ism", "$1
", $text);
+ $text = preg_replace("(\[center\](.*?)\[\/center\])ism", '$1
', $text);
// Check for list text
$text = str_replace("[*]", "", $text);
// Check for style sheet commands
- $text = preg_replace_callback(
- "(\[style=(.*?)\](.*?)\[\/style\])ism",
- function ($match) {
- return "" . $match[2] . "";
- },
- $text
- );
+ $text = preg_replace("(\[style=(.*?)\](.*?)\[\/style\])ism", '$2', $text);
// Check for CSS classes
- $text = preg_replace_callback(
- "(\[class=(.*?)\](.*?)\[\/class\])ism",
- function ($match) {
- return "" . $match[2] . "";
- },
- $text
- );
+ $text = preg_replace("(\[class=(.*?)\](.*?)\[\/class\])ism", '$2', $text);
// handle nested lists
$endlessloop = 0;
@@ -1608,20 +1592,20 @@ class BBCode
$text = preg_replace("/\[img\](.*?)\[\/img\]/ism", '
', $text);
$text = preg_replace("/\[zmg\](.*?)\[\/zmg\]/ism", '
', $text);
- $text = preg_replace("/\[crypt\](.*?)\[\/crypt\]/ism", '
 . '/images/lock_icon.gif)
', $text);
- $text = preg_replace("/\[crypt(.*?)\](.*?)\[\/crypt\]/ism", '
 . '/images/lock_icon.gif)
', $text);
- //$Text = preg_replace("/\[crypt=(.*?)\](.*?)\[\/crypt\]/ism", '
 . '/images/lock_icon.gif)
', $Text);
+ $text = preg_replace("/\[crypt\](.*?)\[\/crypt\]/ism", '
 . '/images/lock_icon.gif)
', $text);
+ $text = preg_replace("/\[crypt(.*?)\](.*?)\[\/crypt\]/ism", '
 . '/images/lock_icon.gif)
', $text);
+ //$Text = preg_replace("/\[crypt=(.*?)\](.*?)\[\/crypt\]/ism", '
 . '/images/lock_icon.gif)
', $Text);
// Simplify "video" element
$text = preg_replace('(\[video.*?\ssrc\s?=\s?([^\s\]]+).*?\].*?\[/video\])ism', '[video]$1[/video]', $text);
- // Try to Oembed
if ($try_oembed) {
+ // html5 video and audio
$text = preg_replace("/\[video\](.*?\.(ogg|ogv|oga|ogm|webm|mp4).*?)\[\/video\]/ism",
- '', $text);
+ '', $text);
$text = preg_replace("/\[video\](.*?)\[\/video\]/ism",
'$1', $text);
- $text = preg_replace("/\[audio\](.*?)\[\/audio\]/ism", '', $text);
+ $text = preg_replace("/\[audio\](.*?)\[\/audio\]/ism", '', $text);
$text = preg_replace_callback("/\[video\](.*?)\[\/video\]/ism", $try_oembed_callback, $text);
$text = preg_replace_callback("/\[audio\](.*?)\[\/audio\]/ism", $try_oembed_callback, $text);
@@ -1632,9 +1616,6 @@ class BBCode
'$1', $text);
}
- // html5 video and audio
-
-
if ($try_oembed) {
$text = preg_replace("/\[iframe\](.*?)\[\/iframe\]/ism", '', $text);
} else {
@@ -1678,7 +1659,7 @@ class BBCode
$text = OEmbed::BBCode2HTML($text);
// Avoid triple linefeeds through oembed
- $text = str_replace("
", "
", $text);
+ $text = str_replace("
", "
", $text);
// If we found an event earlier, strip out all the event code and replace with a reformatted version.
// Replace the event-start section with the entire formatted event. The other bbcode is stripped.
@@ -1708,7 +1689,7 @@ class BBCode
$conv = html_entity_decode(str_replace([' ', "\n", "\r"], '', $text));
// Emojis are always 4 byte Unicode characters
if (!empty($conv) && (strlen($conv) / mb_strlen($conv) == 4)) {
- $text = '' . $text . '';
+ $text = '' . $text . '';
}
}
@@ -1722,8 +1703,6 @@ class BBCode
$text = preg_replace_callback("&\[url=([^\[\]]*)\]\[img\](.*)\[\/img\]\[\/url\]&Usi", 'self::removePictureLinksCallback', $text);
}
- $text = str_replace(["\r","\n"], ['
', '
'], $text);
-
// Remove all hashtag addresses
if ($simple_html && !in_array($simple_html, [self::DIASPORA, self::OSTATUS, self::ACTIVITYPUB])) {
$text = preg_replace("/([#@!])\[url\=(.*?)\](.*?)\[\/url\]/ism", '$1$3', $text);
@@ -1872,18 +1851,21 @@ class BBCode
// Additionally, [pre] tags preserve spaces
$text = preg_replace_callback("/\[pre\](.*?)\[\/pre\]/ism", function ($match) {
- return str_replace(' ', ' ', $match[1]);
+ return str_replace(' ', ' ', htmlentities($match[1], ENT_NOQUOTES,'UTF-8'));
}, $text);
+ // Add HTML new lines
+ $text = str_replace("\n", '
', $text);
+
return $text;
}); // Escaped code
$text = preg_replace_callback("#\[code(?:=([^\]]*))?\](.*?)\[\/code\]#ism",
function ($matches) {
if (strpos($matches[2], "\n") !== false) {
- $return = '' . htmlspecialchars(trim($matches[2], "\n\r"), ENT_NOQUOTES, 'UTF-8') . '
';
+ $return = '' . htmlentities(trim($matches[2], "\n\r"), ENT_NOQUOTES, 'UTF-8') . '
';
} else {
- $return = '' . htmlspecialchars($matches[2], ENT_NOQUOTES, 'UTF-8') . '
';
+ $return = '' . htmlentities($matches[2], ENT_NOQUOTES, 'UTF-8') . '
';
}
return $return;
@@ -1891,37 +1873,20 @@ class BBCode
$text
);
- // Clean up the HTML by loading and saving the HTML with the DOM.
- // Bad structured html can break a whole page.
- // For performance reasons do it only with activated item cache or at export.
- if (!$try_oembed || (get_itemcachepath() != '')) {
- $doc = new DOMDocument();
- $doc->preserveWhiteSpace = false;
+ $config = \HTMLPurifier_HTML5Config::createDefault();
+ $config->set('HTML.Doctype', 'HTML5');
+ $config->set('Attr.AllowedRel', [
+ 'noreferrer' => true,
+ 'noopener' => true,
+ ]);
+ $config->set('Attr.AllowedFrameTargets', [
+ '_blank' => true,
+ ]);
- $text = mb_convert_encoding($text, 'HTML-ENTITIES', "UTF-8");
+ $HTMLPurifier = new \HTMLPurifier($config);
+ $text = $HTMLPurifier->purify($text);
- $doctype = '';
- $encoding = '';
- @$doc->loadHTML($encoding . $doctype . '' . $text . '');
- $doc->encoding = 'UTF-8';
- $text = $doc->saveHTML();
- $text = str_replace(['', '', $doctype, $encoding], ['', '', '', ''], $text);
-
- $text = str_replace('
', '', $text);
-
- //$Text = mb_convert_encoding($Text, "UTF-8", 'HTML-ENTITIES');
- }
-
- // Clean up some useless linebreaks in lists
- //$Text = str_replace('
', $Text);
- //$Text = str_replace('
', '', $Text);
- //$Text = str_replace('
', '', $Text);
- //$Text = str_replace('
= "a") && ($char <= "z")) {
- $cleaned .= $char;
- }
-
- if (!(strpos(" #;:0123456789-_.%", $char) === false)) {
- $cleaned .= $char;
- }
- }
-
- return $cleaned;
- }
-
/**
* Search all instances of a specific HTML tag node in the provided DOM document and replaces them with BBCode text nodes.
*
diff --git a/tests/src/Content/Text/BBCodeTest.php b/tests/src/Content/Text/BBCodeTest.php
index 1a1e05f45..1769e6fd7 100644
--- a/tests/src/Content/Text/BBCodeTest.php
+++ b/tests/src/Content/Text/BBCodeTest.php
@@ -75,6 +75,18 @@ class BBCodeTest extends MockedTest
$this->dice->shouldReceive('create')
->with(BaseURL::class)
->andReturn($baseUrlMock);
+
+ $config = \HTMLPurifier_HTML5Config::createDefault();
+ $config->set('HTML.Doctype', 'HTML5');
+ $config->set('Attr.AllowedRel', [
+ 'noreferrer' => true,
+ 'noopener' => true,
+ ]);
+ $config->set('Attr.AllowedFrameTargets', [
+ '_blank' => true,
+ ]);
+
+ $this->HTMLPurifier = new \HTMLPurifier($config);
}
public function dataLinks()
@@ -171,7 +183,7 @@ class BBCodeTest extends MockedTest
public function testAutoLinking(string $data, bool $assertHTML)
{
$output = BBCode::convert($data);
- $assert = '' . $data . '';
+ $assert = $this->HTMLPurifier->purify('' . $data . '');
if ($assertHTML) {
self::assertEquals($assert, $output);
} else {
@@ -183,31 +195,31 @@ class BBCodeTest extends MockedTest
{
return [
'bug-7271-condensed-space' => [
- 'expectedHtml' => '',
+ 'expectedHtml' => '',
'text' => '[ol][*] http://example.com/[/ol]',
],
'bug-7271-condensed-nospace' => [
- 'expectedHtml' => '',
+ 'expectedHtml' => '',
'text' => '[ol][*]http://example.com/[/ol]',
],
'bug-7271-indented-space' => [
- 'expectedHtml' => '',
+ 'expectedHtml' => '',
'text' => '[ul]
[*] http://example.com/
[/ul]',
],
'bug-7271-indented-nospace' => [
- 'expectedHtml' => '',
+ 'expectedHtml' => '',
'text' => '[ul]
[*]http://example.com/
[/ul]',
],
'bug-2199-named-size' => [
- 'expectedHtml' => 'Test text',
+ 'expectedHtml' => 'Test text',
'text' => '[size=xx-large]Test text[/size]',
],
'bug-2199-numeric-size' => [
- 'expectedHtml' => 'Test text',
+ 'expectedHtml' => 'Test text',
'text' => '[size=24]Test text[/size]',
],
'bug-2199-diaspora-no-named-size' => [
@@ -225,7 +237,7 @@ class BBCodeTest extends MockedTest
'simpleHtml' => 3,
],
'bug-7665-audio-tag' => [
- 'expectedHtml' => '',
+ 'expectedHtml' => '',
'text' => '[audio]http://www.cendrones.fr/colloque2017/jonathanbocquet.mp3[/audio]',
'try_oembed' => true,
],
@@ -246,9 +258,25 @@ class BBCodeTest extends MockedTest
'text' => '[test] Space',
],
'task-8800-pre-spaces' => [
- 'expectedHtml' => ' Spaces',
+ 'expectedHtml' => ' Spaces',
'text' => '[pre] Spaces[/pre]',
],
+ 'bug-9611-purify-xss-nobb' => [
+ 'expectedHTML' => 'dare to move your mouse here',
+ 'text' => '[nobb]dare to move your mouse here[/nobb]'
+ ],
+ 'bug-9611-purify-xss-noparse' => [
+ 'expectedHTML' => 'dare to move your mouse here',
+ 'text' => '[noparse]dare to move your mouse here[/noparse]'
+ ],
+ 'bug-9611-purify-xss-attributes' => [
+ 'expectedHTML' => 'dare to move your mouse here',
+ 'text' => '[color="onmouseover=alert(0) style="]dare to move your mouse here[/color]'
+ ],
+ 'bug-9611-purify-attributes-correct' => [
+ 'expectedHTML' => 'dare to move your mouse here',
+ 'text' => '[color=FFFFFF]dare to move your mouse here[/color]'
+ ],
];
}