Merge pull request #7474 from MrPetovan/bug/7457-html-bbcode-plaintext

Fix HTML/BBCode::toPlaintext
This commit is contained in:
Michael Vogel 2019-08-04 22:08:53 +02:00 committed by GitHub
commit a507963c7f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 131 additions and 63 deletions

View file

@ -614,7 +614,7 @@ function api_get_user(App $a, $contact_id = null)
'name' => $contact["name"], 'name' => $contact["name"],
'screen_name' => (($contact['nick']) ? $contact['nick'] : $contact['name']), 'screen_name' => (($contact['nick']) ? $contact['nick'] : $contact['name']),
'location' => ($contact["location"] != "") ? $contact["location"] : ContactSelector::networkToName($contact['network'], $contact['url']), 'location' => ($contact["location"] != "") ? $contact["location"] : ContactSelector::networkToName($contact['network'], $contact['url']),
'description' => HTML::toPlaintext(BBCode::toPlaintext($contact["about"])), 'description' => BBCode::toPlaintext($contact["about"]),
'profile_image_url' => $contact["micro"], 'profile_image_url' => $contact["micro"],
'profile_image_url_https' => $contact["micro"], 'profile_image_url_https' => $contact["micro"],
'profile_image_url_profile_size' => $contact["thumb"], 'profile_image_url_profile_size' => $contact["thumb"],
@ -693,7 +693,7 @@ function api_get_user(App $a, $contact_id = null)
'name' => (($uinfo[0]['name']) ? $uinfo[0]['name'] : $uinfo[0]['nick']), 'name' => (($uinfo[0]['name']) ? $uinfo[0]['name'] : $uinfo[0]['nick']),
'screen_name' => (($uinfo[0]['nick']) ? $uinfo[0]['nick'] : $uinfo[0]['name']), 'screen_name' => (($uinfo[0]['nick']) ? $uinfo[0]['nick'] : $uinfo[0]['name']),
'location' => $location, 'location' => $location,
'description' => HTML::toPlaintext(BBCode::toPlaintext($description)), 'description' => BBCode::toPlaintext($description),
'profile_image_url' => $uinfo[0]['micro'], 'profile_image_url' => $uinfo[0]['micro'],
'profile_image_url_https' => $uinfo[0]['micro'], 'profile_image_url_https' => $uinfo[0]['micro'],
'profile_image_url_profile_size' => $uinfo[0]["thumb"], 'profile_image_url_profile_size' => $uinfo[0]["thumb"],

View file

@ -357,10 +357,7 @@ class BBCode extends BaseObject
*/ */
public static function toPlaintext($text, $keep_urls = true) public static function toPlaintext($text, $keep_urls = true)
{ {
$naked_text = preg_replace('/\[.+?\]/','', $text); $naked_text = HTML::toPlaintext(BBCode::convert($text, false, 0, true), 0, !$keep_urls);
if (!$keep_urls) {
$naked_text = preg_replace('#https?\://[^\s<]+[^\s\.\)]#i', '', $naked_text);
}
return $naked_text; return $naked_text;
} }

View file

@ -56,6 +56,7 @@ class HTML
$xpath = new DOMXPath($doc); $xpath = new DOMXPath($doc);
/** @var \DOMNode[] $list */
$list = $xpath->query("//" . $tag); $list = $xpath->query("//" . $tag);
foreach ($list as $node) { foreach ($list as $node) {
$attr = []; $attr = [];
@ -98,11 +99,14 @@ class HTML
$node->parentNode->insertBefore($StartCode, $node); $node->parentNode->insertBefore($StartCode, $node);
if ($node->hasChildNodes()) { if ($node->hasChildNodes()) {
/** @var \DOMNode $child */
foreach ($node->childNodes as $child) { foreach ($node->childNodes as $child) {
if (trim($child->nodeValue)) {
$newNode = $child->cloneNode(true); $newNode = $child->cloneNode(true);
$node->parentNode->insertBefore($newNode, $node); $node->parentNode->insertBefore($newNode, $node);
} }
} }
}
$node->parentNode->insertBefore($EndCode, $node); $node->parentNode->insertBefore($EndCode, $node);
$node->parentNode->removeChild($node); $node->parentNode->removeChild($node);
@ -560,6 +564,8 @@ class HTML
$ignore = false; $ignore = false;
} }
$ignore = $ignore || strpos($treffer[1], '#') === 0;
if (!$ignore) { if (!$ignore) {
$urls[$treffer[1]] = $treffer[1]; $urls[$treffer[1]] = $treffer[1];
} }
@ -583,7 +589,7 @@ class HTML
$message = mb_convert_encoding($message, 'HTML-ENTITIES', "UTF-8"); $message = mb_convert_encoding($message, 'HTML-ENTITIES', "UTF-8");
@$doc->loadHTML($message); @$doc->loadHTML($message, LIBXML_HTML_NODEFDTD | LIBXML_NOBLANKS);
$message = $doc->saveHTML(); $message = $doc->saveHTML();
// Remove eventual UTF-8 BOM // Remove eventual UTF-8 BOM
@ -592,7 +598,7 @@ class HTML
// Collecting all links // Collecting all links
$urls = self::collectURLs($message); $urls = self::collectURLs($message);
@$doc->loadHTML($message); @$doc->loadHTML($message, LIBXML_HTML_NODEFDTD | LIBXML_NOBLANKS);
self::tagToBBCode($doc, 'html', [], '', ''); self::tagToBBCode($doc, 'html', [], '', '');
self::tagToBBCode($doc, 'body', [], '', ''); self::tagToBBCode($doc, 'body', [], '', '');

View file

@ -148,6 +148,12 @@ class Babel extends BaseModule
'content' => htmlspecialchars($html2) 'content' => htmlspecialchars($html2)
]; ];
$bbcode2plain = Text\BBCode::toPlaintext($bbcode);
$results[] = [
'title' => L10n::t('HTML::toBBCode => BBCode::toPlaintext'),
'content' => '<pre>' . $bbcode2plain . '</pre>'
];
$markdown = Text\HTML::toMarkdown($html); $markdown = Text\HTML::toMarkdown($html);
$results[] = [ $results[] = [
'title' => L10n::t('HTML::toMarkdown'), 'title' => L10n::t('HTML::toMarkdown'),
@ -162,7 +168,7 @@ class Babel extends BaseModule
$text = Text\HTML::toPlaintext($html, 0, true); $text = Text\HTML::toPlaintext($html, 0, true);
$results[] = [ $results[] = [
'title' => L10n::t('HTML::toPlaintext'), 'title' => L10n::t('HTML::toPlaintext (compact)'),
'content' => '<pre>' . $text . '</pre>' 'content' => '<pre>' . $text . '</pre>'
]; ];
} }

View file

@ -0,0 +1 @@
<h2 data-sourcepos="1:1-1:23" dir="auto">&#x000A;<a id="user-content-104-2019-08-01" class="anchor" href="#104-2019-08-01" aria-hidden="true"></a>[1.0.4] - 2019-08-01</h2>&#x000A;<h3 data-sourcepos="2:1-2:9" dir="auto">&#x000A;<a id="user-content-fixed" class="anchor" href="#fixed" aria-hidden="true"></a>Fixed</h3>&#x000A;<ul data-sourcepos="3:1-3:116" dir="auto">&#x000A;<li data-sourcepos="3:1-3:116">Invalid SemVer version generation, when the current branch does not have commits ahead of tag/checked out on a tag</li>&#x000A;</ul>

View file

@ -0,0 +1,5 @@
*[1.0.4] - 2019-08-01*
*Fixed*
* Invalid SemVer version generation, when the current branch does not have commits ahead of tag/checked out on a tag

View file

@ -0,0 +1,53 @@
<?php
namespace Friendica\Test\src\Content\Text;
use Friendica\Content\Text\HTML;
use Friendica\Test\MockedTest;
use Friendica\Test\Util\AppMockTrait;
use Friendica\Test\Util\VFSTrait;
class HTMLTest extends MockedTest
{
use VFSTrait;
use AppMockTrait;
protected function setUp()
{
parent::setUp();
$this->setUpVfsDir();
$this->mockApp($this->root);
}
public function dataHTML()
{
$inputFiles = glob(__DIR__ . '/../../../datasets/content/text/html/*.html');
$data = [];
foreach ($inputFiles as $file) {
$data[str_replace('.html', '', $file)] = [
'input' => file_get_contents($file),
'expected' => file_get_contents(str_replace('.html', '.txt', $file))
];
}
return $data;
}
/**
* Test convert different input Markdown text into HTML
*
* @dataProvider dataHTML
*
* @param string $input The Markdown text to test
* @param string $expected The expected HTML output
* @throws \Exception
*/
public function testToPlaintext($input, $expected)
{
$output = HTML::toPlaintext($input, 0);
$this->assertEquals($expected, $output);
}
}