Merge pull request #14147 from annando/feed-no-html

Support for non HTML content for feed imports
This commit is contained in:
Hypolite Petovan 2024-05-11 18:13:40 -04:00 committed by GitHub
commit ec5e8a55b5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 22 additions and 3 deletions

View file

@ -253,8 +253,10 @@ class HTML
self::tagToBBCode($doc, 'span', ['class' => 'type-link'], '[class=type-link]', '[/class]');
self::tagToBBCode($doc, 'span', ['class' => 'type-video'], '[class=type-video]', '[/class]');
$elements = ['b', 'del', 'em', 'i', 'ins', 'kbd', 'mark',
's', 'samp', 'strong', 'sub', 'sup', 'u', 'var'];
$elements = [
'b', 'del', 'em', 'i', 'ins', 'kbd', 'mark',
's', 'samp', 'strong', 'sub', 'sup', 'u', 'var'
];
foreach ($elements as $element) {
self::tagToBBCode($doc, $element, [], '[' . $element . ']', '[/' . $element . ']');
}
@ -1059,4 +1061,15 @@ class HTML
return null;
}
/**
* Check if a document contains HTML or entities
*
* @param string $text
* @return boolean
*/
public static function isHTML(string $text): bool
{
return ($text != html_entity_decode($text)) || ($text != strip_tags($text));
}
}

View file

@ -29,7 +29,6 @@ use Friendica\Contact\LocalRelationship\Entity\LocalRelationship;
use Friendica\Content\PageInfo;
use Friendica\Content\Text\BBCode;
use Friendica\Content\Text\HTML;
use Friendica\Core\Cache\Enum\Duration;
use Friendica\Core\Logger;
use Friendica\Core\Protocol;
use Friendica\Core\Worker;
@ -546,6 +545,13 @@ class Feed
if (self::titleIsBody($item['title'], $body)) {
$item['title'] = '';
}
if (!HTML::isHTML($body)) {
$original = $body;
$body = BBCode::convert($body, false, BBCode::EXTERNAL);
Logger::debug('Body contained no HTML', ['original' => $original, 'converted' => $body]);
}
$item['body'] = HTML::toBBCode($body, $basepath);
// Remove tracking pixels