Support for non HTML content for feed imports

This commit is contained in:
Michael 2024-05-11 20:03:19 +00:00
parent ee9510e17d
commit bca86beda0
2 changed files with 22 additions and 3 deletions

View file

@ -253,8 +253,10 @@ class HTML
self::tagToBBCode($doc, 'span', ['class' => 'type-link'], '[class=type-link]', '[/class]');
self::tagToBBCode($doc, 'span', ['class' => 'type-video'], '[class=type-video]', '[/class]');
$elements = ['b', 'del', 'em', 'i', 'ins', 'kbd', 'mark',
's', 'samp', 'strong', 'sub', 'sup', 'u', 'var'];
$elements = [
'b', 'del', 'em', 'i', 'ins', 'kbd', 'mark',
's', 'samp', 'strong', 'sub', 'sup', 'u', 'var'
];
foreach ($elements as $element) {
self::tagToBBCode($doc, $element, [], '[' . $element . ']', '[/' . $element . ']');
}
@ -1059,4 +1061,15 @@ class HTML
return null;
}
/**
* Check if a document contains HTML or entities
*
* @param string $text
* @return boolean
*/
public static function isHTML(string $text): bool
{
return ($text != html_entity_decode($text)) || ($text != strip_tags($text));
}
}

View file

@ -29,7 +29,6 @@ use Friendica\Contact\LocalRelationship\Entity\LocalRelationship;
use Friendica\Content\PageInfo;
use Friendica\Content\Text\BBCode;
use Friendica\Content\Text\HTML;
use Friendica\Core\Cache\Enum\Duration;
use Friendica\Core\Logger;
use Friendica\Core\Protocol;
use Friendica\Core\Worker;
@ -546,6 +545,13 @@ class Feed
if (self::titleIsBody($item['title'], $body)) {
$item['title'] = '';
}
if (!HTML::isHTML($body)) {
$original = $body;
$body = BBCode::convert($body, false, BBCode::EXTERNAL);
Logger::debug('Body contained no HTML', ['original' => $original, 'converted' => $body]);
}
$item['body'] = HTML::toBBCode($body, $basepath);
// Remove tracking pixels