diff --git a/include/api.php b/include/api.php index 543c01827..a7d25c83f 100644 --- a/include/api.php +++ b/include/api.php @@ -614,7 +614,7 @@ function api_get_user(App $a, $contact_id = null) 'name' => $contact["name"], 'screen_name' => (($contact['nick']) ? $contact['nick'] : $contact['name']), 'location' => ($contact["location"] != "") ? $contact["location"] : ContactSelector::networkToName($contact['network'], $contact['url']), - 'description' => HTML::toPlaintext(BBCode::toPlaintext($contact["about"])), + 'description' => BBCode::toPlaintext($contact["about"]), 'profile_image_url' => $contact["micro"], 'profile_image_url_https' => $contact["micro"], 'profile_image_url_profile_size' => $contact["thumb"], @@ -693,7 +693,7 @@ function api_get_user(App $a, $contact_id = null) 'name' => (($uinfo[0]['name']) ? $uinfo[0]['name'] : $uinfo[0]['nick']), 'screen_name' => (($uinfo[0]['nick']) ? $uinfo[0]['nick'] : $uinfo[0]['name']), 'location' => $location, - 'description' => HTML::toPlaintext(BBCode::toPlaintext($description)), + 'description' => BBCode::toPlaintext($description), 'profile_image_url' => $uinfo[0]['micro'], 'profile_image_url_https' => $uinfo[0]['micro'], 'profile_image_url_profile_size' => $uinfo[0]["thumb"], diff --git a/src/Content/Text/BBCode.php b/src/Content/Text/BBCode.php index b2d4ebb5d..b012e79fb 100644 --- a/src/Content/Text/BBCode.php +++ b/src/Content/Text/BBCode.php @@ -357,10 +357,7 @@ class BBCode extends BaseObject */ public static function toPlaintext($text, $keep_urls = true) { - $naked_text = preg_replace('/\[.+?\]/','', $text); - if (!$keep_urls) { - $naked_text = preg_replace('#https?\://[^\s<]+[^\s\.\)]#i', '', $naked_text); - } + $naked_text = HTML::toPlaintext(BBCode::convert($text, false, 0, true), 0, !$keep_urls); return $naked_text; } diff --git a/src/Content/Text/HTML.php b/src/Content/Text/HTML.php index 9f614a90c..a4829c8c3 100644 --- a/src/Content/Text/HTML.php +++ b/src/Content/Text/HTML.php @@ -56,6 +56,7 @@ class HTML $xpath = new DOMXPath($doc); + /** @var \DOMNode[] $list */ $list = $xpath->query("//" . $tag); foreach ($list as $node) { $attr = []; @@ -98,9 +99,12 @@ class HTML $node->parentNode->insertBefore($StartCode, $node); if ($node->hasChildNodes()) { + /** @var \DOMNode $child */ foreach ($node->childNodes as $child) { - $newNode = $child->cloneNode(true); - $node->parentNode->insertBefore($newNode, $node); + if (trim($child->nodeValue)) { + $newNode = $child->cloneNode(true); + $node->parentNode->insertBefore($newNode, $node); + } } } @@ -560,6 +564,8 @@ class HTML $ignore = false; } + $ignore = $ignore || strpos($treffer[1], '#') === 0; + if (!$ignore) { $urls[$treffer[1]] = $treffer[1]; } @@ -583,7 +589,7 @@ class HTML $message = mb_convert_encoding($message, 'HTML-ENTITIES', "UTF-8"); - @$doc->loadHTML($message); + @$doc->loadHTML($message, LIBXML_HTML_NODEFDTD | LIBXML_NOBLANKS); $message = $doc->saveHTML(); // Remove eventual UTF-8 BOM @@ -592,7 +598,7 @@ class HTML // Collecting all links $urls = self::collectURLs($message); - @$doc->loadHTML($message); + @$doc->loadHTML($message, LIBXML_HTML_NODEFDTD | LIBXML_NOBLANKS); self::tagToBBCode($doc, 'html', [], '', ''); self::tagToBBCode($doc, 'body', [], '', ''); diff --git a/src/Module/Debug/Babel.php b/src/Module/Debug/Babel.php index be10da7ea..b9b629f07 100644 --- a/src/Module/Debug/Babel.php +++ b/src/Module/Debug/Babel.php @@ -148,6 +148,12 @@ class Babel extends BaseModule 'content' => htmlspecialchars($html2) ]; + $bbcode2plain = Text\BBCode::toPlaintext($bbcode); + $results[] = [ + 'title' => L10n::t('HTML::toBBCode => BBCode::toPlaintext'), + 'content' => '
' . $bbcode2plain . '' + ]; + $markdown = Text\HTML::toMarkdown($html); $results[] = [ 'title' => L10n::t('HTML::toMarkdown'), @@ -162,7 +168,7 @@ class Babel extends BaseModule $text = Text\HTML::toPlaintext($html, 0, true); $results[] = [ - 'title' => L10n::t('HTML::toPlaintext'), + 'title' => L10n::t('HTML::toPlaintext (compact)'), 'content' => '
' . $text . '' ]; } diff --git a/tests/datasets/content/text/html/bug-7457.html b/tests/datasets/content/text/html/bug-7457.html new file mode 100644 index 000000000..4a2d4b33c --- /dev/null +++ b/tests/datasets/content/text/html/bug-7457.html @@ -0,0 +1 @@ +