From 596b5b0982029f1633315271d56b818d02bfd3b0 Mon Sep 17 00:00:00 2001 From: Hypolite Petovan Date: Mon, 27 May 2019 12:01:48 -0400 Subject: [PATCH 1/3] Simplify HTML::toPlaintext - Keep new lines in plain text output --- src/Content/Text/HTML.php | 38 +++++++++----------------------------- 1 file changed, 9 insertions(+), 29 deletions(-) diff --git a/src/Content/Text/HTML.php b/src/Content/Text/HTML.php index 47463bdd09..4be217b3e4 100644 --- a/src/Content/Text/HTML.php +++ b/src/Content/Text/HTML.php @@ -567,7 +567,13 @@ class HTML return $urls; } - public static function toPlaintext($html, $wraplength = 75, $compact = false) + /** + * @param string $html + * @param int $wraplength Ensures individual lines aren't longer than this many characters. Doesn't break words. + * @param bool $compact True: Completely strips image tags; False: Keeps image URLs + * @return string + */ + public static function toPlaintext(string $html, $wraplength = 75, $compact = false) { $message = str_replace("\r", "", $html); @@ -578,15 +584,9 @@ class HTML @$doc->loadHTML($message); - $xpath = new DOMXPath($doc); - $list = $xpath->query("//pre"); - foreach ($list as $node) { - $node->nodeValue = str_replace("\n", "\r", $node->nodeValue); - } - $message = $doc->saveHTML(); - $message = str_replace(["\n<", ">\n", "\r", "\n", "\xC3\x82\xC2\xA0"], ["<", ">", "
", " ", ""], $message); - $message = preg_replace('= [\s]*=i', " ", $message); + // Remove eventual UTF-8 BOM + $message = str_replace("\xC3\x82\xC2\xA0", "", $message); // Collecting all links $urls = self::collectURLs($message); @@ -596,18 +596,6 @@ class HTML self::tagToBBCode($doc, 'html', [], '', ''); self::tagToBBCode($doc, 'body', [], '', ''); - // MyBB-Auszeichnungen - /* - self::node2BBCode($doc, 'span', array('style'=>'text-decoration: underline;'), '_', '_'); - self::node2BBCode($doc, 'span', array('style'=>'font-style: italic;'), '/', '/'); - self::node2BBCode($doc, 'span', array('style'=>'font-weight: bold;'), '*', '*'); - - self::node2BBCode($doc, 'strong', array(), '*', '*'); - self::node2BBCode($doc, 'b', array(), '*', '*'); - self::node2BBCode($doc, 'i', array(), '/', '/'); - self::node2BBCode($doc, 'u', array(), '_', '_'); - */ - if ($compact) { self::tagToBBCode($doc, 'blockquote', [], "»", "«"); } else { @@ -621,8 +609,6 @@ class HTML self::tagToBBCode($doc, 'div', [], "\r", "\r"); self::tagToBBCode($doc, 'p', [], "\n", "\n"); - //self::node2BBCode($doc, 'ul', array(), "\n[list]", "[/list]\n"); - //self::node2BBCode($doc, 'ol', array(), "\n[list=1]", "[/list]\n"); self::tagToBBCode($doc, 'li', [], "\n* ", "\n"); self::tagToBBCode($doc, 'hr', [], "\n" . str_repeat("-", 70) . "\n", ""); @@ -637,12 +623,6 @@ class HTML self::tagToBBCode($doc, 'h5', [], "\n\n*", "*\n"); self::tagToBBCode($doc, 'h6', [], "\n\n*", "*\n"); - // Problem: there is no reliable way to detect if it is a link to a tag or profile - //self::node2BBCode($doc, 'a', array('href'=>'/(.+)/'), ' $1 ', ' ', true); - //self::node2BBCode($doc, 'a', array('href'=>'/(.+)/', 'rel'=>'oembed'), ' $1 ', '', true); - //self::node2BBCode($doc, 'img', array('alt'=>'/(.+)/'), '$1', ''); - //self::node2BBCode($doc, 'img', array('title'=>'/(.+)/'), '$1', ''); - //self::node2BBCode($doc, 'img', array(), '', ''); if (!$compact) { self::tagToBBCode($doc, 'img', ['src' => '/(.+)/'], ' [img]$1', '[/img] '); } else { From 5f9fb1f14f47b666f8716ca0b2e095fa6242886e Mon Sep 17 00:00:00 2001 From: Hypolite Petovan Date: Mon, 27 May 2019 12:02:12 -0400 Subject: [PATCH 2/3] Simplify BBCode::toPlaintext - Keep white spaces after tags --- src/Content/Text/BBCode.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Content/Text/BBCode.php b/src/Content/Text/BBCode.php index e417b38a46..c4735b9e93 100644 --- a/src/Content/Text/BBCode.php +++ b/src/Content/Text/BBCode.php @@ -355,7 +355,7 @@ class BBCode extends BaseObject */ public static function toPlaintext($text, $keep_urls = true) { - $naked_text = preg_replace('/\[(.+?)\]\s*/','', $text); + $naked_text = preg_replace('/\[.+?\]/','', $text); if (!$keep_urls) { $naked_text = preg_replace('#https?\://[^\s<]+[^\s\.\)]#i', '', $naked_text); } From 5d2bf735178ae0fcea8ba6b5831e65ef698cec6d Mon Sep 17 00:00:00 2001 From: Hypolite Petovan Date: Mon, 27 May 2019 12:02:28 -0400 Subject: [PATCH 3/3] Add Babel result screen for compact HTML::toPlaintext --- src/Module/Debug/Babel.php | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/Module/Debug/Babel.php b/src/Module/Debug/Babel.php index 1ce1ac4c6e..be10da7ea4 100644 --- a/src/Module/Debug/Babel.php +++ b/src/Module/Debug/Babel.php @@ -159,6 +159,12 @@ class Babel extends BaseModule 'title' => L10n::t('HTML::toPlaintext'), 'content' => '
' . $text . '
' ]; + + $text = Text\HTML::toPlaintext($html, 0, true); + $results[] = [ + 'title' => L10n::t('HTML::toPlaintext'), + 'content' => '
' . $text . '
' + ]; } }