Merge pull request #7474 from MrPetovan/bug/7457-html-bbcode-plaintext
Fix HTML/BBCode::toPlaintext
This commit is contained in:
commit
a507963c7f
8 changed files with 131 additions and 63 deletions
|
@ -614,7 +614,7 @@ function api_get_user(App $a, $contact_id = null)
|
|||
'name' => $contact["name"],
|
||||
'screen_name' => (($contact['nick']) ? $contact['nick'] : $contact['name']),
|
||||
'location' => ($contact["location"] != "") ? $contact["location"] : ContactSelector::networkToName($contact['network'], $contact['url']),
|
||||
'description' => HTML::toPlaintext(BBCode::toPlaintext($contact["about"])),
|
||||
'description' => BBCode::toPlaintext($contact["about"]),
|
||||
'profile_image_url' => $contact["micro"],
|
||||
'profile_image_url_https' => $contact["micro"],
|
||||
'profile_image_url_profile_size' => $contact["thumb"],
|
||||
|
@ -693,7 +693,7 @@ function api_get_user(App $a, $contact_id = null)
|
|||
'name' => (($uinfo[0]['name']) ? $uinfo[0]['name'] : $uinfo[0]['nick']),
|
||||
'screen_name' => (($uinfo[0]['nick']) ? $uinfo[0]['nick'] : $uinfo[0]['name']),
|
||||
'location' => $location,
|
||||
'description' => HTML::toPlaintext(BBCode::toPlaintext($description)),
|
||||
'description' => BBCode::toPlaintext($description),
|
||||
'profile_image_url' => $uinfo[0]['micro'],
|
||||
'profile_image_url_https' => $uinfo[0]['micro'],
|
||||
'profile_image_url_profile_size' => $uinfo[0]["thumb"],
|
||||
|
|
|
@ -357,10 +357,7 @@ class BBCode extends BaseObject
|
|||
*/
|
||||
public static function toPlaintext($text, $keep_urls = true)
|
||||
{
|
||||
$naked_text = preg_replace('/\[.+?\]/','', $text);
|
||||
if (!$keep_urls) {
|
||||
$naked_text = preg_replace('#https?\://[^\s<]+[^\s\.\)]#i', '', $naked_text);
|
||||
}
|
||||
$naked_text = HTML::toPlaintext(BBCode::convert($text, false, 0, true), 0, !$keep_urls);
|
||||
|
||||
return $naked_text;
|
||||
}
|
||||
|
|
|
@ -56,6 +56,7 @@ class HTML
|
|||
|
||||
$xpath = new DOMXPath($doc);
|
||||
|
||||
/** @var \DOMNode[] $list */
|
||||
$list = $xpath->query("//" . $tag);
|
||||
foreach ($list as $node) {
|
||||
$attr = [];
|
||||
|
@ -98,9 +99,12 @@ class HTML
|
|||
$node->parentNode->insertBefore($StartCode, $node);
|
||||
|
||||
if ($node->hasChildNodes()) {
|
||||
/** @var \DOMNode $child */
|
||||
foreach ($node->childNodes as $child) {
|
||||
$newNode = $child->cloneNode(true);
|
||||
$node->parentNode->insertBefore($newNode, $node);
|
||||
if (trim($child->nodeValue)) {
|
||||
$newNode = $child->cloneNode(true);
|
||||
$node->parentNode->insertBefore($newNode, $node);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -560,6 +564,8 @@ class HTML
|
|||
$ignore = false;
|
||||
}
|
||||
|
||||
$ignore = $ignore || strpos($treffer[1], '#') === 0;
|
||||
|
||||
if (!$ignore) {
|
||||
$urls[$treffer[1]] = $treffer[1];
|
||||
}
|
||||
|
@ -583,7 +589,7 @@ class HTML
|
|||
|
||||
$message = mb_convert_encoding($message, 'HTML-ENTITIES', "UTF-8");
|
||||
|
||||
@$doc->loadHTML($message);
|
||||
@$doc->loadHTML($message, LIBXML_HTML_NODEFDTD | LIBXML_NOBLANKS);
|
||||
|
||||
$message = $doc->saveHTML();
|
||||
// Remove eventual UTF-8 BOM
|
||||
|
@ -592,7 +598,7 @@ class HTML
|
|||
// Collecting all links
|
||||
$urls = self::collectURLs($message);
|
||||
|
||||
@$doc->loadHTML($message);
|
||||
@$doc->loadHTML($message, LIBXML_HTML_NODEFDTD | LIBXML_NOBLANKS);
|
||||
|
||||
self::tagToBBCode($doc, 'html', [], '', '');
|
||||
self::tagToBBCode($doc, 'body', [], '', '');
|
||||
|
|
|
@ -148,6 +148,12 @@ class Babel extends BaseModule
|
|||
'content' => htmlspecialchars($html2)
|
||||
];
|
||||
|
||||
$bbcode2plain = Text\BBCode::toPlaintext($bbcode);
|
||||
$results[] = [
|
||||
'title' => L10n::t('HTML::toBBCode => BBCode::toPlaintext'),
|
||||
'content' => '<pre>' . $bbcode2plain . '</pre>'
|
||||
];
|
||||
|
||||
$markdown = Text\HTML::toMarkdown($html);
|
||||
$results[] = [
|
||||
'title' => L10n::t('HTML::toMarkdown'),
|
||||
|
@ -162,7 +168,7 @@ class Babel extends BaseModule
|
|||
|
||||
$text = Text\HTML::toPlaintext($html, 0, true);
|
||||
$results[] = [
|
||||
'title' => L10n::t('HTML::toPlaintext'),
|
||||
'title' => L10n::t('HTML::toPlaintext (compact)'),
|
||||
'content' => '<pre>' . $text . '</pre>'
|
||||
];
|
||||
}
|
||||
|
|
1
tests/datasets/content/text/html/bug-7457.html
Normal file
1
tests/datasets/content/text/html/bug-7457.html
Normal file
|
@ -0,0 +1 @@
|
|||
<h2 data-sourcepos="1:1-1:23" dir="auto">
<a id="user-content-104-2019-08-01" class="anchor" href="#104-2019-08-01" aria-hidden="true"></a>[1.0.4] - 2019-08-01</h2>
<h3 data-sourcepos="2:1-2:9" dir="auto">
<a id="user-content-fixed" class="anchor" href="#fixed" aria-hidden="true"></a>Fixed</h3>
<ul data-sourcepos="3:1-3:116" dir="auto">
<li data-sourcepos="3:1-3:116">Invalid SemVer version generation, when the current branch does not have commits ahead of tag/checked out on a tag</li>
</ul>
|
5
tests/datasets/content/text/html/bug-7457.txt
Normal file
5
tests/datasets/content/text/html/bug-7457.txt
Normal file
|
@ -0,0 +1,5 @@
|
|||
*[1.0.4] - 2019-08-01*
|
||||
|
||||
*Fixed*
|
||||
|
||||
* Invalid SemVer version generation, when the current branch does not have commits ahead of tag/checked out on a tag
|
53
tests/src/Content/Text/HTMLTest.php
Normal file
53
tests/src/Content/Text/HTMLTest.php
Normal file
|
@ -0,0 +1,53 @@
|
|||
<?php
|
||||
|
||||
namespace Friendica\Test\src\Content\Text;
|
||||
|
||||
use Friendica\Content\Text\HTML;
|
||||
use Friendica\Test\MockedTest;
|
||||
use Friendica\Test\Util\AppMockTrait;
|
||||
use Friendica\Test\Util\VFSTrait;
|
||||
|
||||
class HTMLTest extends MockedTest
|
||||
{
|
||||
use VFSTrait;
|
||||
use AppMockTrait;
|
||||
|
||||
protected function setUp()
|
||||
{
|
||||
parent::setUp();
|
||||
$this->setUpVfsDir();
|
||||
$this->mockApp($this->root);
|
||||
}
|
||||
|
||||
public function dataHTML()
|
||||
{
|
||||
$inputFiles = glob(__DIR__ . '/../../../datasets/content/text/html/*.html');
|
||||
|
||||
$data = [];
|
||||
|
||||
foreach ($inputFiles as $file) {
|
||||
$data[str_replace('.html', '', $file)] = [
|
||||
'input' => file_get_contents($file),
|
||||
'expected' => file_get_contents(str_replace('.html', '.txt', $file))
|
||||
];
|
||||
}
|
||||
|
||||
return $data;
|
||||
}
|
||||
|
||||
/**
|
||||
* Test convert different input Markdown text into HTML
|
||||
*
|
||||
* @dataProvider dataHTML
|
||||
*
|
||||
* @param string $input The Markdown text to test
|
||||
* @param string $expected The expected HTML output
|
||||
* @throws \Exception
|
||||
*/
|
||||
public function testToPlaintext($input, $expected)
|
||||
{
|
||||
$output = HTML::toPlaintext($input, 0);
|
||||
|
||||
$this->assertEquals($expected, $output);
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue