Added support for "Tumblr Neue Post Format"
This commit is contained in:
parent
08d1e484e3
commit
aca4b2eaae
465
src/Content/Text/NPF.php
Normal file
465
src/Content/Text/NPF.php
Normal file
|
@ -0,0 +1,465 @@
|
|||
<?php
|
||||
/**
|
||||
* @copyright Copyright (C) 2010-2023, the Friendica project
|
||||
*
|
||||
* @license GNU AGPL version 3 or any later version
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*/
|
||||
|
||||
namespace Friendica\Content\Text;
|
||||
|
||||
use DOMDocument;
|
||||
use DOMElement;
|
||||
use DOMNode;
|
||||
use Friendica\Model\Photo;
|
||||
use Friendica\Model\Post;
|
||||
|
||||
/**
|
||||
* Tumblr Neue Post Format
|
||||
* @see https://www.tumblr.com/docs/npf
|
||||
*/
|
||||
class NPF
|
||||
{
|
||||
static public function fromBBCode(string $bbcode, int $uri_id): array
|
||||
{
|
||||
$npf = [];
|
||||
|
||||
$bbcode = self::prepareBody($bbcode);
|
||||
|
||||
$html = BBCode::convert($bbcode, false, BBCode::CONNECTORS);
|
||||
if (empty($html)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
$doc = new DOMDocument();
|
||||
if (!@$doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'))) {
|
||||
return [];
|
||||
}
|
||||
|
||||
$node = $doc->getElementsByTagName('body')->item(0);
|
||||
foreach ($node->childNodes as $child) {
|
||||
$npf = self::routeElements($child, $uri_id, $npf);
|
||||
}
|
||||
|
||||
return self::addLinkBlock($uri_id, $npf);
|
||||
}
|
||||
|
||||
public static function prepareBody(string $body): string
|
||||
{
|
||||
$shared = BBCode::fetchShareAttributes($body);
|
||||
if (!empty($shared)) {
|
||||
$body = $shared['shared'];
|
||||
}
|
||||
|
||||
$body = BBCode::removeAttachment($body);
|
||||
|
||||
$body = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '[img]$3[/img]', $body);
|
||||
|
||||
if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img=([^\[\]]*)\]([^\[\]]*)\[\/img\]\s*\[/url\]#ism", $body, $pictures, PREG_SET_ORDER)) {
|
||||
foreach ($pictures as $picture) {
|
||||
if (preg_match('#/photo/.*-[01]\.#ism', $picture[2]) && (preg_match('#/photo/.*-0\.#ism', $picture[1]) || preg_match('#/photos/.*/image/#ism', $picture[1]))) {
|
||||
$body = str_replace($picture[0], "\n\n[img=" . str_replace('-1.', '-0.', $picture[2]) . "]" . $picture[3] . "[/img]\n\n", $body);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$body = preg_replace("/\[img\=(.*?)\](.*?)\[\/img\]/ism", "\n\n[img=$1]$2[/img]\n\n", $body);
|
||||
|
||||
if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img\]([^\[]+?)\[/img\]\s*\[/url\]#ism", $body, $pictures, PREG_SET_ORDER)) {
|
||||
foreach ($pictures as $picture) {
|
||||
if (preg_match('#/photo/.*-[01]\.#ism', $picture[2]) && (preg_match('#/photo/.*-0\.#ism', $picture[1]) || preg_match('#/photos/.*/image/#ism', $picture[1]))) {
|
||||
$body = str_replace($picture[0], "\n\n[img]" . str_replace('-1.', '-0.', $picture[2]) . "[/img]\n\n", $body);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$body = preg_replace("/\[img\](.*?)\[\/img\]/ism", "\n\n[img]$1[/img]\n\n", $body);
|
||||
$body = preg_replace("/\[audio\](.*?)\[\/audio\]/ism", "\n\n[audio]$1[/audio]\n\n", $body);
|
||||
$body = preg_replace("/\[video\](.*?)\[\/video\]/ism", "\n\n[video]$1[/video]\n\n", $body);
|
||||
|
||||
do {
|
||||
$oldbody = $body;
|
||||
$body = str_replace(["\n\n\n"], ["\n\n"], $body);
|
||||
} while ($oldbody != $body);
|
||||
|
||||
return trim($body);
|
||||
}
|
||||
|
||||
static private function routeElements(DOMElement|DOMNode $child, int $uri_id, array $npf): array
|
||||
{
|
||||
switch ($child->tagName ?? '') {
|
||||
case 'blockquote':
|
||||
$npf = self::addTextBlock($child, $uri_id, $npf, 'indented');
|
||||
break;
|
||||
|
||||
case 'h1':
|
||||
$npf = self::addTextBlock($child, $uri_id, $npf, 'heading1');
|
||||
break;
|
||||
|
||||
case 'h2':
|
||||
$npf = self::addTextBlock($child, $uri_id, $npf, 'heading1');
|
||||
break;
|
||||
|
||||
case 'h3':
|
||||
$npf = self::addTextBlock($child, $uri_id, $npf, 'heading1');
|
||||
break;
|
||||
|
||||
case 'h4':
|
||||
$npf = self::addTextBlock($child, $uri_id, $npf, 'heading2');
|
||||
break;
|
||||
|
||||
case 'h5':
|
||||
$npf = self::addTextBlock($child, $uri_id, $npf, 'heading2');
|
||||
break;
|
||||
|
||||
case 'h6':
|
||||
$npf = self::addTextBlock($child, $uri_id, $npf, 'heading2');
|
||||
break;
|
||||
|
||||
case 'ul':
|
||||
$npf = self::addListBlock($child, $uri_id, $npf, false, 0);
|
||||
break;
|
||||
|
||||
case 'ol':
|
||||
$npf = self::addListBlock($child, $uri_id, $npf, true, 0);
|
||||
break;
|
||||
|
||||
case 'hr':
|
||||
case 'br':
|
||||
break;
|
||||
|
||||
case 'pre':
|
||||
case 'code':
|
||||
$npf = self::addTextBlock($child, $uri_id, $npf, 'indented');
|
||||
break;
|
||||
|
||||
case 'a':
|
||||
$npf = self::addMediaBlock($child, $uri_id, $npf);
|
||||
break;
|
||||
|
||||
case 'table':
|
||||
// $child->ownerDocument->saveHTML($child)
|
||||
break;
|
||||
|
||||
case 'img':
|
||||
$npf = self::addImageBlock($child, $uri_id, $npf);
|
||||
break;
|
||||
|
||||
default:
|
||||
$npf = self::addTextBlock($child, $uri_id, $npf);
|
||||
break;
|
||||
}
|
||||
return $npf;
|
||||
}
|
||||
|
||||
static private function addImageBlock(DOMElement|DOMNode $child, int $uri_id, array $npf): array
|
||||
{
|
||||
$attributes = [];
|
||||
foreach ($child->attributes as $key => $attribute) {
|
||||
$attributes[$key] = $attribute->value;
|
||||
}
|
||||
if (empty($attributes['src'])) {
|
||||
return $npf;
|
||||
}
|
||||
|
||||
$entry = [
|
||||
'type' => 'image',
|
||||
'media' => [],
|
||||
];
|
||||
|
||||
if (!empty($attributes['alt'])) {
|
||||
$entry['alt_text'] = $attributes['alt'];
|
||||
}
|
||||
|
||||
if (!empty($attributes['title']) && ($attributes['alt'] ?? '' != $attributes['title'])) {
|
||||
$entry['caption'] = $attributes['title'];
|
||||
}
|
||||
|
||||
$rid = Photo::ridFromURI($attributes['src']);
|
||||
if (!empty($rid)) {
|
||||
$photos = Photo::selectToArray([], ['resource-id' => $rid]);
|
||||
foreach ($photos as $photo) {
|
||||
$entry['media'][] = [
|
||||
'type' => $photo['type'],
|
||||
'url' => str_replace('-0.', '-' . $photo['scale'] . '.', $attributes['src']),
|
||||
'width' => $photo['width'],
|
||||
'height' => $photo['height'],
|
||||
];
|
||||
}
|
||||
if (empty($attributes['alt']) && !empty($photos[0]['desc'])) {
|
||||
$entry['alt_text'] = $photos[0]['desc'];
|
||||
}
|
||||
} elseif ($media = Post\Media::getByURL($uri_id, $attributes['src'], [Post\Media::IMAGE])) {
|
||||
$entry['media'][] = [
|
||||
'type' => $media['mimetype'],
|
||||
'url' => $media['url'],
|
||||
'width' => $media['width'],
|
||||
'height' => $media['height'],
|
||||
];
|
||||
if (empty($attributes['alt']) && !empty($media['description'])) {
|
||||
$entry['alt_text'] = $media['description'];
|
||||
}
|
||||
} else {
|
||||
$entry['media'][] = ['url' => $attributes['src']];
|
||||
}
|
||||
|
||||
$npf[] = $entry;
|
||||
|
||||
return $npf;
|
||||
}
|
||||
|
||||
static private function addMediaBlock(DOMElement|DOMNode $child, int $uri_id, array $npf): array
|
||||
{
|
||||
$attributes = [];
|
||||
foreach ($child->attributes as $key => $attribute) {
|
||||
$attributes[$key] = $attribute->value;
|
||||
}
|
||||
if (empty($attributes['href'])) {
|
||||
return $npf;
|
||||
}
|
||||
|
||||
$media = Post\Media::getByURL($uri_id, $attributes['href'], [Post\Media::AUDIO, Post\Media::VIDEO]);
|
||||
if (!empty($media)) {
|
||||
switch ($media['type']) {
|
||||
case Post\Media::AUDIO:
|
||||
$entry = [
|
||||
'type' => 'audio',
|
||||
'media' => [
|
||||
'type' => $media['mimetype'],
|
||||
'url' => $media['url'],
|
||||
]
|
||||
];
|
||||
|
||||
if (!empty($media['name'])) {
|
||||
$entry['title'] = $media['name'];
|
||||
} elseif (!empty($media['description'])) {
|
||||
$entry['title'] = $media['description'];
|
||||
}
|
||||
|
||||
$npf[] = self::addPoster($media, $entry);
|
||||
break;
|
||||
|
||||
case Post\Media::VIDEO:
|
||||
$entry = [
|
||||
'type' => 'video',
|
||||
'media' => [
|
||||
'type' => $media['mimetype'],
|
||||
'url' => $media['url'],
|
||||
]
|
||||
];
|
||||
|
||||
$npf[] = self::addPoster($media, $entry);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
$npf[] = [
|
||||
'type' => 'text',
|
||||
'text' => $child->textContent,
|
||||
'formatting' => [
|
||||
'start' => 0,
|
||||
'end' => strlen($child->textContent),
|
||||
'type' => 'link',
|
||||
'url' => $attributes['href']
|
||||
]
|
||||
];
|
||||
}
|
||||
return $npf;
|
||||
}
|
||||
|
||||
static private function addPoster(array $media, array $entry): array
|
||||
{
|
||||
$poster = [];
|
||||
if (!empty($media['preview'])) {
|
||||
$poster['url'] = $media['preview'];
|
||||
}
|
||||
if (!empty($media['preview-width'])) {
|
||||
$poster['width'] = $media['preview-width'];
|
||||
}
|
||||
if (!empty($media['preview-height'])) {
|
||||
$poster['height'] = $media['preview-height'];
|
||||
}
|
||||
if (!empty($poster)) {
|
||||
$entry['poster'] = $poster;
|
||||
}
|
||||
return $entry;
|
||||
}
|
||||
|
||||
static private function fetchText(DOMElement|DOMNode $child, array $text = ['text' => '', 'formatting' => []]): array
|
||||
{
|
||||
foreach ($child->childNodes as $node) {
|
||||
$start = strlen($text['text']);
|
||||
|
||||
switch ($node->nodeName) {
|
||||
case 'b':
|
||||
case 'strong':
|
||||
$type = 'bold';
|
||||
break;
|
||||
|
||||
case 'i':
|
||||
case 'em':
|
||||
$type = 'italic';
|
||||
break;
|
||||
|
||||
case 's':
|
||||
$type = 'strikethrough';
|
||||
break;
|
||||
|
||||
default:
|
||||
$type = '';
|
||||
break;
|
||||
}
|
||||
if ($node->nodeName == 'br') {
|
||||
$text['text'] .= "\n";
|
||||
} else {
|
||||
$text['text'] .= $node->textContent;
|
||||
}
|
||||
if (!empty($type)) {
|
||||
$text['formatting'][] = ['start' => $start, 'end' => strlen($text['text']), 'type' => $type];
|
||||
}
|
||||
}
|
||||
return $text;
|
||||
}
|
||||
|
||||
static private function addTextBlock(DOMElement|DOMNode $child, int $uri_id, array $npf, string $subtype = ''): array
|
||||
{
|
||||
if (empty($subtype) && (($child->childElementCount) ?? 0 == 1) && ($child->textContent == $child->firstChild->textContent)) {
|
||||
return self::routeElements($child->firstChild, $uri_id, $npf);
|
||||
}
|
||||
|
||||
$element = ['type' => 'text'];
|
||||
|
||||
if (!empty($subtype)) {
|
||||
$element['subtype'] = $subtype;
|
||||
}
|
||||
|
||||
$text = self::fetchText($child);
|
||||
|
||||
$element['text'] = $text['text'];
|
||||
$element['formatting'] = $text['formatting'];
|
||||
|
||||
if (empty($subtype)) {
|
||||
switch ($child->tagName ?? '') {
|
||||
case 'b':
|
||||
case 'strong':
|
||||
$element['formatting'][] = ['start' => 0, 'end' => strlen($element['text']), 'type' => 'bold'];
|
||||
break;
|
||||
|
||||
case 'i':
|
||||
case 'em':
|
||||
$element['formatting'][] = ['start' => 0, 'end' => strlen($element['text']), 'type' => 'italic'];
|
||||
break;
|
||||
|
||||
case 's':
|
||||
$element['formatting'][] = ['start' => 0, 'end' => strlen($element['text']), 'type' => 'strikethrough'];
|
||||
break;
|
||||
|
||||
case 'span':
|
||||
case 'p':
|
||||
case 'div':
|
||||
case 'details';
|
||||
case '':
|
||||
break;
|
||||
default:
|
||||
print_r($element);
|
||||
die($child->tagName . "\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (empty($element['formatting'])) {
|
||||
unset($element['formatting']);
|
||||
}
|
||||
|
||||
$npf[] = $element;
|
||||
|
||||
return $npf;
|
||||
}
|
||||
|
||||
static private function addListBlock(DOMElement|DOMNode $child, int $uri_id, array $npf, bool $ordered, int $level): array
|
||||
{
|
||||
foreach ($child->childNodes as $node) {
|
||||
switch ($node->nodeName) {
|
||||
case 'ul':
|
||||
$npf = self::addListBlock($node, $uri_id, $npf, false, $level++);
|
||||
case 'ol':
|
||||
$npf = self::addListBlock($node, $uri_id, $npf, true, $level++);
|
||||
case 'li':
|
||||
$text = self::fetchText($node);
|
||||
|
||||
$entry = [
|
||||
'type' => 'text',
|
||||
'subtype' => $ordered ? 'ordered-list-item' : 'unordered-list-item',
|
||||
'text' => $text['text']
|
||||
];
|
||||
if ($level > 0) {
|
||||
$entry['indent_level'] = $level;
|
||||
}
|
||||
if (!empty($text['formatting'])) {
|
||||
$entry['formatting'] = $text['formatting'];
|
||||
}
|
||||
$npf[] = $entry;
|
||||
}
|
||||
}
|
||||
|
||||
return $npf;
|
||||
}
|
||||
|
||||
static private function addLinkBlock(int $uri_id, array $npf): array
|
||||
{
|
||||
foreach (Post\Media::getByURIId($uri_id, [Post\Media::HTML]) as $link) {
|
||||
$host = parse_url($link['url'], PHP_URL_HOST);
|
||||
if (in_array($host, ['www.youtube.com', 'youtu.be'])) {
|
||||
$entry = [
|
||||
'type' => 'video',
|
||||
'provider' => 'youtube',
|
||||
'url' => $link['url'],
|
||||
];
|
||||
} elseif (in_array($host, ['vimeo.com'])) {
|
||||
$entry = [
|
||||
'type' => 'video',
|
||||
'provider' => 'vimeo',
|
||||
'url' => $link['url'],
|
||||
];
|
||||
} elseif (in_array($host, ['open.spotify.com'])) {
|
||||
$entry = [
|
||||
'type' => 'audio',
|
||||
'provider' => 'spotify',
|
||||
'url' => $link['url'],
|
||||
];
|
||||
} else {
|
||||
$entry = [
|
||||
'type' => 'link',
|
||||
'url' => $link['url'],
|
||||
];
|
||||
if (!empty($link['name'])) {
|
||||
$entry['title'] = $link['name'];
|
||||
}
|
||||
if (!empty($link['description'])) {
|
||||
$entry['description'] = $link['description'];
|
||||
}
|
||||
if (!empty($link['author-name'])) {
|
||||
$entry['author'] = $link['author-name'];
|
||||
}
|
||||
if (!empty($link['publisher-name'])) {
|
||||
$entry['site_name'] = $link['publisher-name'];
|
||||
}
|
||||
}
|
||||
|
||||
$npf[] = self::addPoster($link, $entry);
|
||||
}
|
||||
return $npf;
|
||||
}
|
||||
}
|
|
@ -757,6 +757,17 @@ class Media
|
|||
return DBA::selectToArray('post-media', [], $condition, ['order' => ['id']]);
|
||||
}
|
||||
|
||||
public static function getByURL(int $uri_id, string $url, array $types = [])
|
||||
{
|
||||
$condition = ["`uri-id` = ? AND `url` = ? AND `type` != ?", $uri_id, $url, self::UNKNOWN];
|
||||
|
||||
if (!empty($types)) {
|
||||
$condition = DBA::mergeConditions($condition, ['type' => $types]);
|
||||
}
|
||||
|
||||
return DBA::selectFirst('post-media', [], $condition);
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the media attachment with the provided media id.
|
||||
*
|
||||
|
|
Loading…
Reference in a new issue