Merge pull request #7474 from MrPetovan/bug/7457-html-bbcode-plaintext
Fix HTML/BBCode::toPlaintext
This commit is contained in:
commit
a507963c7f
8 changed files with 131 additions and 63 deletions
|
@ -614,7 +614,7 @@ function api_get_user(App $a, $contact_id = null)
|
||||||
'name' => $contact["name"],
|
'name' => $contact["name"],
|
||||||
'screen_name' => (($contact['nick']) ? $contact['nick'] : $contact['name']),
|
'screen_name' => (($contact['nick']) ? $contact['nick'] : $contact['name']),
|
||||||
'location' => ($contact["location"] != "") ? $contact["location"] : ContactSelector::networkToName($contact['network'], $contact['url']),
|
'location' => ($contact["location"] != "") ? $contact["location"] : ContactSelector::networkToName($contact['network'], $contact['url']),
|
||||||
'description' => HTML::toPlaintext(BBCode::toPlaintext($contact["about"])),
|
'description' => BBCode::toPlaintext($contact["about"]),
|
||||||
'profile_image_url' => $contact["micro"],
|
'profile_image_url' => $contact["micro"],
|
||||||
'profile_image_url_https' => $contact["micro"],
|
'profile_image_url_https' => $contact["micro"],
|
||||||
'profile_image_url_profile_size' => $contact["thumb"],
|
'profile_image_url_profile_size' => $contact["thumb"],
|
||||||
|
@ -693,7 +693,7 @@ function api_get_user(App $a, $contact_id = null)
|
||||||
'name' => (($uinfo[0]['name']) ? $uinfo[0]['name'] : $uinfo[0]['nick']),
|
'name' => (($uinfo[0]['name']) ? $uinfo[0]['name'] : $uinfo[0]['nick']),
|
||||||
'screen_name' => (($uinfo[0]['nick']) ? $uinfo[0]['nick'] : $uinfo[0]['name']),
|
'screen_name' => (($uinfo[0]['nick']) ? $uinfo[0]['nick'] : $uinfo[0]['name']),
|
||||||
'location' => $location,
|
'location' => $location,
|
||||||
'description' => HTML::toPlaintext(BBCode::toPlaintext($description)),
|
'description' => BBCode::toPlaintext($description),
|
||||||
'profile_image_url' => $uinfo[0]['micro'],
|
'profile_image_url' => $uinfo[0]['micro'],
|
||||||
'profile_image_url_https' => $uinfo[0]['micro'],
|
'profile_image_url_https' => $uinfo[0]['micro'],
|
||||||
'profile_image_url_profile_size' => $uinfo[0]["thumb"],
|
'profile_image_url_profile_size' => $uinfo[0]["thumb"],
|
||||||
|
|
|
@ -357,10 +357,7 @@ class BBCode extends BaseObject
|
||||||
*/
|
*/
|
||||||
public static function toPlaintext($text, $keep_urls = true)
|
public static function toPlaintext($text, $keep_urls = true)
|
||||||
{
|
{
|
||||||
$naked_text = preg_replace('/\[.+?\]/','', $text);
|
$naked_text = HTML::toPlaintext(BBCode::convert($text, false, 0, true), 0, !$keep_urls);
|
||||||
if (!$keep_urls) {
|
|
||||||
$naked_text = preg_replace('#https?\://[^\s<]+[^\s\.\)]#i', '', $naked_text);
|
|
||||||
}
|
|
||||||
|
|
||||||
return $naked_text;
|
return $naked_text;
|
||||||
}
|
}
|
||||||
|
|
|
@ -56,6 +56,7 @@ class HTML
|
||||||
|
|
||||||
$xpath = new DOMXPath($doc);
|
$xpath = new DOMXPath($doc);
|
||||||
|
|
||||||
|
/** @var \DOMNode[] $list */
|
||||||
$list = $xpath->query("//" . $tag);
|
$list = $xpath->query("//" . $tag);
|
||||||
foreach ($list as $node) {
|
foreach ($list as $node) {
|
||||||
$attr = [];
|
$attr = [];
|
||||||
|
@ -98,11 +99,14 @@ class HTML
|
||||||
$node->parentNode->insertBefore($StartCode, $node);
|
$node->parentNode->insertBefore($StartCode, $node);
|
||||||
|
|
||||||
if ($node->hasChildNodes()) {
|
if ($node->hasChildNodes()) {
|
||||||
|
/** @var \DOMNode $child */
|
||||||
foreach ($node->childNodes as $child) {
|
foreach ($node->childNodes as $child) {
|
||||||
|
if (trim($child->nodeValue)) {
|
||||||
$newNode = $child->cloneNode(true);
|
$newNode = $child->cloneNode(true);
|
||||||
$node->parentNode->insertBefore($newNode, $node);
|
$node->parentNode->insertBefore($newNode, $node);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
$node->parentNode->insertBefore($EndCode, $node);
|
$node->parentNode->insertBefore($EndCode, $node);
|
||||||
$node->parentNode->removeChild($node);
|
$node->parentNode->removeChild($node);
|
||||||
|
@ -560,6 +564,8 @@ class HTML
|
||||||
$ignore = false;
|
$ignore = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$ignore = $ignore || strpos($treffer[1], '#') === 0;
|
||||||
|
|
||||||
if (!$ignore) {
|
if (!$ignore) {
|
||||||
$urls[$treffer[1]] = $treffer[1];
|
$urls[$treffer[1]] = $treffer[1];
|
||||||
}
|
}
|
||||||
|
@ -583,7 +589,7 @@ class HTML
|
||||||
|
|
||||||
$message = mb_convert_encoding($message, 'HTML-ENTITIES', "UTF-8");
|
$message = mb_convert_encoding($message, 'HTML-ENTITIES', "UTF-8");
|
||||||
|
|
||||||
@$doc->loadHTML($message);
|
@$doc->loadHTML($message, LIBXML_HTML_NODEFDTD | LIBXML_NOBLANKS);
|
||||||
|
|
||||||
$message = $doc->saveHTML();
|
$message = $doc->saveHTML();
|
||||||
// Remove eventual UTF-8 BOM
|
// Remove eventual UTF-8 BOM
|
||||||
|
@ -592,7 +598,7 @@ class HTML
|
||||||
// Collecting all links
|
// Collecting all links
|
||||||
$urls = self::collectURLs($message);
|
$urls = self::collectURLs($message);
|
||||||
|
|
||||||
@$doc->loadHTML($message);
|
@$doc->loadHTML($message, LIBXML_HTML_NODEFDTD | LIBXML_NOBLANKS);
|
||||||
|
|
||||||
self::tagToBBCode($doc, 'html', [], '', '');
|
self::tagToBBCode($doc, 'html', [], '', '');
|
||||||
self::tagToBBCode($doc, 'body', [], '', '');
|
self::tagToBBCode($doc, 'body', [], '', '');
|
||||||
|
|
|
@ -148,6 +148,12 @@ class Babel extends BaseModule
|
||||||
'content' => htmlspecialchars($html2)
|
'content' => htmlspecialchars($html2)
|
||||||
];
|
];
|
||||||
|
|
||||||
|
$bbcode2plain = Text\BBCode::toPlaintext($bbcode);
|
||||||
|
$results[] = [
|
||||||
|
'title' => L10n::t('HTML::toBBCode => BBCode::toPlaintext'),
|
||||||
|
'content' => '<pre>' . $bbcode2plain . '</pre>'
|
||||||
|
];
|
||||||
|
|
||||||
$markdown = Text\HTML::toMarkdown($html);
|
$markdown = Text\HTML::toMarkdown($html);
|
||||||
$results[] = [
|
$results[] = [
|
||||||
'title' => L10n::t('HTML::toMarkdown'),
|
'title' => L10n::t('HTML::toMarkdown'),
|
||||||
|
@ -162,7 +168,7 @@ class Babel extends BaseModule
|
||||||
|
|
||||||
$text = Text\HTML::toPlaintext($html, 0, true);
|
$text = Text\HTML::toPlaintext($html, 0, true);
|
||||||
$results[] = [
|
$results[] = [
|
||||||
'title' => L10n::t('HTML::toPlaintext'),
|
'title' => L10n::t('HTML::toPlaintext (compact)'),
|
||||||
'content' => '<pre>' . $text . '</pre>'
|
'content' => '<pre>' . $text . '</pre>'
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
1
tests/datasets/content/text/html/bug-7457.html
Normal file
1
tests/datasets/content/text/html/bug-7457.html
Normal file
|
@ -0,0 +1 @@
|
||||||
|
<h2 data-sourcepos="1:1-1:23" dir="auto">
<a id="user-content-104-2019-08-01" class="anchor" href="#104-2019-08-01" aria-hidden="true"></a>[1.0.4] - 2019-08-01</h2>
<h3 data-sourcepos="2:1-2:9" dir="auto">
<a id="user-content-fixed" class="anchor" href="#fixed" aria-hidden="true"></a>Fixed</h3>
<ul data-sourcepos="3:1-3:116" dir="auto">
<li data-sourcepos="3:1-3:116">Invalid SemVer version generation, when the current branch does not have commits ahead of tag/checked out on a tag</li>
</ul>
|
5
tests/datasets/content/text/html/bug-7457.txt
Normal file
5
tests/datasets/content/text/html/bug-7457.txt
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
*[1.0.4] - 2019-08-01*
|
||||||
|
|
||||||
|
*Fixed*
|
||||||
|
|
||||||
|
* Invalid SemVer version generation, when the current branch does not have commits ahead of tag/checked out on a tag
|
53
tests/src/Content/Text/HTMLTest.php
Normal file
53
tests/src/Content/Text/HTMLTest.php
Normal file
|
@ -0,0 +1,53 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
namespace Friendica\Test\src\Content\Text;
|
||||||
|
|
||||||
|
use Friendica\Content\Text\HTML;
|
||||||
|
use Friendica\Test\MockedTest;
|
||||||
|
use Friendica\Test\Util\AppMockTrait;
|
||||||
|
use Friendica\Test\Util\VFSTrait;
|
||||||
|
|
||||||
|
class HTMLTest extends MockedTest
|
||||||
|
{
|
||||||
|
use VFSTrait;
|
||||||
|
use AppMockTrait;
|
||||||
|
|
||||||
|
protected function setUp()
|
||||||
|
{
|
||||||
|
parent::setUp();
|
||||||
|
$this->setUpVfsDir();
|
||||||
|
$this->mockApp($this->root);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function dataHTML()
|
||||||
|
{
|
||||||
|
$inputFiles = glob(__DIR__ . '/../../../datasets/content/text/html/*.html');
|
||||||
|
|
||||||
|
$data = [];
|
||||||
|
|
||||||
|
foreach ($inputFiles as $file) {
|
||||||
|
$data[str_replace('.html', '', $file)] = [
|
||||||
|
'input' => file_get_contents($file),
|
||||||
|
'expected' => file_get_contents(str_replace('.html', '.txt', $file))
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
return $data;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test convert different input Markdown text into HTML
|
||||||
|
*
|
||||||
|
* @dataProvider dataHTML
|
||||||
|
*
|
||||||
|
* @param string $input The Markdown text to test
|
||||||
|
* @param string $expected The expected HTML output
|
||||||
|
* @throws \Exception
|
||||||
|
*/
|
||||||
|
public function testToPlaintext($input, $expected)
|
||||||
|
{
|
||||||
|
$output = HTML::toPlaintext($input, 0);
|
||||||
|
|
||||||
|
$this->assertEquals($expected, $output);
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in a new issue