Merge pull request #12976 from annando/npf2

NPF functionality added
This commit is contained in:
Hypolite Petovan 2023-04-06 17:17:22 -04:00 committed by GitHub
commit bb0cb0dc11
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 700 additions and 3 deletions

View file

@ -35,6 +35,7 @@
"friendica/json-ld": "^1.0",
"geekwright/po": "^2.0",
"guzzlehttp/guzzle": "^6.5",
"guzzlehttp/oauth-subscriber": "^0.6",
"kornrunner/blurhash": "^1.2",
"league/html-to-markdown": "^4.8",
"level-2/dice": "^4",

57
composer.lock generated
View file

@ -4,7 +4,7 @@
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
"This file is @generated automatically"
],
"content-hash": "456d14e3ad9be265c5c9e6172a0d18d8",
"content-hash": "c208d7f5176358ea157f109c1c7d68dd",
"packages": [
{
"name": "asika/simple-console",
@ -967,6 +967,61 @@
],
"time": "2022-06-20T22:16:07+00:00"
},
{
"name": "guzzlehttp/oauth-subscriber",
"version": "0.6.0",
"source": {
"type": "git",
"url": "https://github.com/guzzle/oauth-subscriber.git",
"reference": "8d6cab29f8397e5712d00a383eeead36108a3c1f"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/guzzle/oauth-subscriber/zipball/8d6cab29f8397e5712d00a383eeead36108a3c1f",
"reference": "8d6cab29f8397e5712d00a383eeead36108a3c1f",
"shasum": ""
},
"require": {
"guzzlehttp/guzzle": "^6.5|^7.2",
"guzzlehttp/psr7": "^1.7|^2.0",
"php": ">=5.5.0"
},
"require-dev": {
"phpunit/phpunit": "~4.0|^9.3.3"
},
"suggest": {
"ext-openssl": "Required to sign using RSA-SHA1"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "0.6-dev"
}
},
"autoload": {
"psr-4": {
"GuzzleHttp\\Subscriber\\Oauth\\": "src"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Michael Dowling",
"email": "mtdowling@gmail.com",
"homepage": "https://github.com/mtdowling"
}
],
"description": "Guzzle OAuth 1.0 subscriber",
"homepage": "http://guzzlephp.org/",
"keywords": [
"Guzzle",
"oauth"
],
"time": "2021-07-13T12:01:32+00:00"
},
{
"name": "guzzlehttp/promises",
"version": "1.5.2",

View file

@ -57,6 +57,7 @@ class BBCode
const DIASPORA = 3;
const CONNECTORS = 4;
const TWITTER_API = 5;
const NPF = 6;
const OSTATUS = 7;
const TWITTER = 8;
const BACKLINK = 8;
@ -1355,7 +1356,9 @@ class BBCode
/// @todo Have a closer look at the different html modes
// Handle attached links or videos
if (in_array($simple_html, [self::MASTODON_API, self::TWITTER_API, self::ACTIVITYPUB])) {
if ($simple_html == self::NPF) {
$text = self::removeAttachment($text);
} elseif (in_array($simple_html, [self::MASTODON_API, self::TWITTER_API, self::ACTIVITYPUB])) {
$text = self::replaceAttachment($text);
} elseif (!in_array($simple_html, [self::INTERNAL, self::EXTERNAL, self::CONNECTORS])) {
$text = self::replaceAttachment($text, true);
@ -1605,7 +1608,18 @@ class BBCode
// Simplify "video" element
$text = preg_replace('(\[video[^\]]*?\ssrc\s?=\s?([^\s\]]+)[^\]]*?\].*?\[/video\])ism', '[video]$1[/video]', $text);
if ($try_oembed) {
if ($simple_html == self::NPF) {
$text = preg_replace(
"/\[video\](.*?)\[\/video\]/ism",
'</p><video src="$1" controls width="100%" height="auto">$1</video><p>',
$text
);
$text = preg_replace(
"/\[audio\](.*?)\[\/audio\]/ism",
'</p><audio src="$1" controls>$1">$1</audio><p>',
$text
);
} elseif ($try_oembed) {
// html5 video and audio
$text = preg_replace(
"/\[video\](.*?\.(ogg|ogv|oga|ogm|webm|mp4).*?)\[\/video\]/ism",

616
src/Content/Text/NPF.php Normal file
View file

@ -0,0 +1,616 @@
<?php
/**
* @copyright Copyright (C) 2010-2023, the Friendica project
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
namespace Friendica\Content\Text;
use DOMDocument;
use DOMElement;
use Friendica\Model\Photo;
use Friendica\Model\Post;
/**
* Tumblr Neue Post Format
* @see https://www.tumblr.com/docs/npf
*/
class NPF
{
private static $heading_subtype = [];
/**
* Convert BBCode into NPF (Tumblr Neue Post Format)
*
* @param string $bbcode
* @param integer $uri_id
* @return array NPF
*/
public static function fromBBCode(string $bbcode, int $uri_id): array
{
$bbcode = self::prepareBody($bbcode);
$html = BBCode::convert($bbcode, false, BBCode::NPF);
if (empty($html)) {
return [];
}
$doc = new DOMDocument();
$doc->formatOutput = true;
if (!@$doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'))) {
return [];
}
self::setHeadingSubStyles($doc);
$element = $doc->getElementsByTagName('body')->item(0);
list($npf, $text, $formatting) = self::routeChildren($element, $uri_id, true, []);
return self::addLinkBlockForUriId($uri_id, 0, $npf);
}
/**
* Fetch the heading types
*
* @param DOMDocument $doc
* @return void
*/
private static function setHeadingSubStyles(DOMDocument $doc)
{
self::$heading_subtype = [];
foreach (['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] as $element) {
if ($doc->getElementsByTagName($element)->count() > 0) {
if (empty(self::$heading_subtype)) {
self::$heading_subtype[$element] = 'heading1';
} else {
self::$heading_subtype[$element] = 'heading2';
}
}
}
}
/**
* Prepare the BBCode for the NPF conversion
*
* @param string $bbcode
* @return string
*/
private static function prepareBody(string $bbcode): string
{
$shared = BBCode::fetchShareAttributes($bbcode);
if (!empty($shared)) {
$bbcode = $shared['shared'];
}
$bbcode = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '[img]$3[/img]', $bbcode);
if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img=([^\[\]]*)\]([^\[\]]*)\[\/img\]\s*\[/url\]#ism", $bbcode, $pictures, PREG_SET_ORDER)) {
foreach ($pictures as $picture) {
if (preg_match('#/photo/.*-[01]\.#ism', $picture[2]) && (preg_match('#/photo/.*-0\.#ism', $picture[1]) || preg_match('#/photos/.*/image/#ism', $picture[1]))) {
$bbcode = str_replace($picture[0], "\n\n[img=" . str_replace('-1.', '-0.', $picture[2]) . "]" . $picture[3] . "[/img]\n\n", $bbcode);
}
}
}
$bbcode = preg_replace("/\[img\=(.*?)\](.*?)\[\/img\]/ism", "\n\n[img=$1]$2[/img]\n\n", $bbcode);
if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img\]([^\[]+?)\[/img\]\s*\[/url\]#ism", $bbcode, $pictures, PREG_SET_ORDER)) {
foreach ($pictures as $picture) {
if (preg_match('#/photo/.*-[01]\.#ism', $picture[2]) && (preg_match('#/photo/.*-0\.#ism', $picture[1]) || preg_match('#/photos/.*/image/#ism', $picture[1]))) {
$bbcode = str_replace($picture[0], "\n\n[img]" . str_replace('-1.', '-0.', $picture[2]) . "[/img]\n\n", $bbcode);
}
}
}
$bbcode = preg_replace("/\[img\](.*?)\[\/img\]/ism", "\n\n[img]$1[/img]\n\n", $bbcode);
do {
$oldbbcode = $bbcode;
$bbcode = str_replace(["\n\n\n"], ["\n\n"], $bbcode);
} while ($oldbbcode != $bbcode);
return trim($bbcode);
}
/**
* Walk recursively through the HTML
*
* @param DOMElement $element
* @param integer $uri_id
* @param boolean $parse_structure
* @param array $callstack
* @param array $npf
* @param string $text
* @param array $formatting
* @return array
*/
private static function routeChildren(DOMElement $element, int $uri_id, bool $parse_structure, array $callstack, array $npf = [], string $text = '', array $formatting = []): array
{
if ($parse_structure && $text) {
list($npf, $text, $formatting) = self::addBlock($text, $formatting, $npf, $callstack);
}
$callstack[] = $element->nodeName;
$level = self::getLevelByCallstack($callstack);
foreach ($element->childNodes as $child) {
switch ($child->nodeName) {
case 'b':
case 'strong':
list($npf, $text, $formatting) = self::addFormatting($child, $uri_id, 'bold', $callstack, $npf, $text, $formatting);
break;
case 'i':
case 'em':
list($npf, $text, $formatting) = self::addFormatting($child, $uri_id, 'italic', $callstack, $npf, $text, $formatting);
break;
case 's':
list($npf, $text, $formatting) = self::addFormatting($child, $uri_id, 'strikethrough', $callstack, $npf, $text, $formatting);
break;
case 'u':
case 'span':
list($npf, $text, $formatting) = self::addFormatting($child, $uri_id, '', $callstack, $npf, $text, $formatting);
break;
case 'hr':
case 'br':
if (!empty($text)) {
$text .= "\n";
}
break;
case '#text':
$text .= $child->textContent;
break;
case 'table':
case 'summary':
// Ignore tables and spoilers
break;
case 'a':
list($npf, $text, $formatting) = self::addInlineLink($child, $uri_id, $callstack, $npf, $text, $formatting);
break;
case 'img':
$npf = self::addImageBlock($child, $uri_id, $level, $npf);
break;
case 'audio':
case 'video':
$npf = self::addMediaBlock($child, $uri_id, $level, $npf);
break;
default:
list($npf, $text, $formatting) = self::routeChildren($child, $uri_id, true, $callstack, $npf, $text, $formatting);
break;
}
}
if ($parse_structure && $text) {
list($npf, $text, $formatting) = self::addBlock($text, $formatting, $npf, $callstack);
}
return [$npf, $text, $formatting];
}
/**
* Return the correct indent level
*
* @param array $callstack
* @return integer
*/
private static function getLevelByCallstack(array $callstack): int
{
$level = 0;
foreach ($callstack as $entry) {
if (in_array($entry, ['ol', 'ul', 'blockquote'])) {
++$level;
}
}
return max(0, $level - 1);
}
/**
* Detect the subtype via the HTML element callstack
*
* @param array $callstack
* @param string $text
* @return string
*/
private static function getSubTypeByCallstack(array $callstack, string $text): string
{
$subtype = '';
foreach ($callstack as $entry) {
switch ($entry) {
case 'ol':
$subtype = 'ordered-list-item';
break;
case 'ul':
$subtype = 'unordered-list-item';
break;
case 'h1':
$subtype = self::$heading_subtype[$entry];
break;
case 'h2':
$subtype = self::$heading_subtype[$entry];
break;
case 'h3':
$subtype = self::$heading_subtype[$entry];
break;
case 'h4':
$subtype = self::$heading_subtype[$entry];
break;
case 'h5':
$subtype = self::$heading_subtype[$entry];
break;
case 'h6':
$subtype = self::$heading_subtype[$entry];
break;
case 'blockquote':
$subtype = mb_strlen($text) < 100 ? 'quote' : 'indented';
break;
case 'pre':
$subtype = 'indented';
break;
case 'code':
$subtype = 'chat';
break;
}
}
return $subtype;
}
/**
* Add formatting for a text block
*
* @param DOMElement $element
* @param integer $uri_id
* @param string $type
* @param array $callstack
* @param array $npf
* @param string $text
* @param array $formatting
* @return array
*/
private static function addFormatting(DOMElement $element, int $uri_id, string $type, array $callstack, array $npf, string $text, array $formatting): array
{
$start = mb_strlen($text);
list($npf, $text, $formatting) = self::routeChildren($element, $uri_id, false, $callstack, $npf, $text, $formatting);
if (!empty($type)) {
$formatting[] = [
'start' => $start,
'end' => mb_strlen($text),
'type' => $type
];
}
return [$npf, $text, $formatting];
}
/**
* Add an inline link for a text block
*
* @param DOMElement $element
* @param integer $uri_id
* @param array $callstack
* @param array $npf
* @param string $text
* @param array $formatting
* @return array
*/
private static function addInlineLink(DOMElement $element, int $uri_id, array $callstack, array $npf, string $text, array $formatting): array
{
$start = mb_strlen($text);
list($npf, $text, $formatting) = self::routeChildren($element, $uri_id, false, $callstack, $npf, $text, $formatting);
$attributes = [];
foreach ($element->attributes as $key => $attribute) {
$attributes[$key] = trim($attribute->value);
}
if (!empty($attributes['href'])) {
$formatting[] = [
'start' => $start,
'end' => mb_strlen($text),
'type' => 'link',
'url' => $attributes['href']
];
}
return [$npf, $text, $formatting];
}
/**
* Add a text block
*
* @param string $text
* @param array $formatting
* @param array $npf
* @param array $callstack
* @return array
*/
private static function addBlock(string $text, array $formatting, array $npf, array $callstack): array
{
$block = [
'type' => 'text',
'subtype' => '',
'text' => $text,
];
if (!empty($formatting)) {
$block['formatting'] = $formatting;
}
$level = self::getLevelByCallstack($callstack);
if ($level > 0) {
$block['indent_level'] = $level;
}
$subtype = self::getSubTypeByCallstack($callstack, $text);
if ($subtype) {
$block['subtype'] = $subtype;
} else {
unset($block['subtype']);
}
$npf[] = $block;
return [$npf, '', []];
}
/**
* Add a block for a preview picture
*
* @param array $media
* @param array $block
* @return array
*/
private static function addPoster(array $media, array $block): array
{
$poster = [];
if (!empty($media['preview'])) {
$poster['url'] = $media['preview'];
}
if (!empty($media['preview-width'])) {
$poster['width'] = $media['preview-width'];
}
if (!empty($media['preview-height'])) {
$poster['height'] = $media['preview-height'];
}
if (!empty($poster)) {
$block['poster'] = [$poster];
}
return $block;
}
/**
* Add a link block from the HTML attachment of a given post uri-id
*
* @param integer $uri_id
* @param integer $level
* @param array $npf
* @return array
*/
private static function addLinkBlockForUriId(int $uri_id, int $level, array $npf): array
{
foreach (Post\Media::getByURIId($uri_id, [Post\Media::HTML]) as $link) {
$host = parse_url($link['url'], PHP_URL_HOST);
if (in_array($host, ['www.youtube.com', 'youtu.be'])) {
$block = [
'type' => 'video',
'provider' => 'youtube',
'url' => $link['url'],
];
} elseif (in_array($host, ['vimeo.com'])) {
$block = [
'type' => 'video',
'provider' => 'vimeo',
'url' => $link['url'],
];
} elseif (in_array($host, ['open.spotify.com'])) {
$block = [
'type' => 'audio',
'provider' => 'spotify',
'url' => $link['url'],
];
} else {
$block = [
'type' => 'link',
'url' => $link['url'],
];
if (!empty($link['name'])) {
$block['title'] = $link['name'];
}
if (!empty($link['description'])) {
$block['description'] = $link['description'];
}
if (!empty($link['author-name'])) {
$block['author'] = $link['author-name'];
}
if (!empty($link['publisher-name'])) {
$block['site_name'] = $link['publisher-name'];
}
}
if ($level > 0) {
$block['indent_level'] = $level;
}
$npf[] = self::addPoster($link, $block);
}
return $npf;
}
/**
* Add an image block
*
* @param DOMElement $element
* @param integer $uri_id
* @param integer $level
* @param array $npf
* @return array
*/
private static function addImageBlock(DOMElement $element, int $uri_id, int $level, array $npf): array
{
$attributes = [];
foreach ($element->attributes as $key => $attribute) {
$attributes[$key] = trim($attribute->value);
}
if (empty($attributes['src'])) {
return $npf;
}
$block = [
'type' => 'image',
'media' => [],
];
if (!empty($attributes['alt'])) {
$block['alt_text'] = $attributes['alt'];
}
if (!empty($attributes['title']) && (($attributes['alt'] ?? '') != $attributes['title'])) {
$block['caption'] = $attributes['title'];
}
$rid = Photo::ridFromURI($attributes['src']);
if (!empty($rid)) {
$photos = Photo::selectToArray([], ['resource-id' => $rid]);
foreach ($photos as $photo) {
$block['media'][] = [
'type' => $photo['type'],
'url' => str_replace('-0.', '-' . $photo['scale'] . '.', $attributes['src']),
'width' => $photo['width'],
'height' => $photo['height'],
];
}
if (empty($attributes['alt']) && !empty($photos[0]['desc'])) {
$block['alt_text'] = $photos[0]['desc'];
}
} elseif ($media = Post\Media::getByURL($uri_id, $attributes['src'], [Post\Media::IMAGE])) {
$block['media'][] = [
'type' => $media['mimetype'],
'url' => $media['url'],
'width' => $media['width'],
'height' => $media['height'],
];
if (empty($attributes['alt']) && !empty($media['description'])) {
$block['alt_text'] = $media['description'];
}
} else {
$block['media'][] = ['url' => $attributes['src']];
}
if ($level > 0) {
$block['indent_level'] = $level;
}
$npf[] = $block;
return $npf;
}
/**
* Add an audio or video block
*
* @param DOMElement $element
* @param integer $uri_id
* @param integer $level
* @param array $npf
* @return array
*/
private static function addMediaBlock(DOMElement $element, int $uri_id, int $level, array $npf): array
{
$attributes = [];
foreach ($element->attributes as $key => $attribute) {
$attributes[$key] = trim($attribute->value);
}
if (empty($attributes['src'])) {
return $npf;
}
$media = Post\Media::getByURL($uri_id, $attributes['src'], [Post\Media::AUDIO, Post\Media::VIDEO]);
if (!empty($media)) {
switch ($media['type']) {
case Post\Media::AUDIO:
$block = [
'type' => 'audio',
'media' => [
'type' => $media['mimetype'],
'url' => $media['url'],
]
];
if (!empty($media['name'])) {
$block['title'] = $media['name'];
} elseif (!empty($media['description'])) {
$block['title'] = $media['description'];
}
$block = self::addPoster($media, $block);
break;
case Post\Media::VIDEO:
$block = [
'type' => 'video',
'media' => [
'type' => $media['mimetype'],
'url' => $media['url'],
]
];
$block = self::addPoster($media, $block);
break;
}
} else {
$block = [
'type' => 'text',
'text' => $element->textContent,
'formatting' => [
[
'start' => 0,
'end' => mb_strlen($element->textContent),
'type' => 'link',
'url' => $attributes['src']
]
]
];
}
if ($level > 0) {
$block['indent_level'] = $level;
}
$npf[] = $block;
return $npf;
}
}

View file

@ -757,6 +757,17 @@ class Media
return DBA::selectToArray('post-media', [], $condition, ['order' => ['id']]);
}
public static function getByURL(int $uri_id, string $url, array $types = [])
{
$condition = ["`uri-id` = ? AND `url` = ? AND `type` != ?", $uri_id, $url, self::UNKNOWN];
if (!empty($types)) {
$condition = DBA::mergeConditions($condition, ['type' => $types]);
}
return DBA::selectFirst('post-media', [], $condition);
}
/**
* Retrieves the media attachment with the provided media id.
*