Nested blocks

This commit is contained in:
Michael 2023-03-29 14:54:43 +00:00
parent 459a7099ca
commit 5f683df711

View file

@ -48,22 +48,13 @@ class NPF
return []; return [];
} }
$node = $doc->getElementsByTagName('body')->item(0); $element = $doc->getElementsByTagName('body')->item(0);
foreach ($node->childNodes as $child) { $npf = self::routeChildren($element, $uri_id, 0, $npf);
if ($child->nodeName == '#text') {
$npf[] = [ return self::addLinkBlock($uri_id, 0, $npf);
'type' => 'text',
'text' => $child->textContent,
];
} else {
$npf = self::routeElements($child, $uri_id, $npf);
}
} }
return self::addLinkBlock($uri_id, $npf); static private function prepareBody(string $body): string
}
public static function prepareBody(string $body): string
{ {
$shared = BBCode::fetchShareAttributes($body); $shared = BBCode::fetchShareAttributes($body);
if (!empty($shared)) { if (!empty($shared)) {
@ -104,56 +95,72 @@ class NPF
return trim($body); return trim($body);
} }
static private function routeElements(DOMElement $child, int $uri_id, array $npf): array static private function routeChildren(DOMElement $element, int $uri_id, int $level, array $npf): array
{ {
$text = '';
$formatting = [];
foreach ($element->childNodes as $child) {
switch ($child->nodeName) { switch ($child->nodeName) {
case 'blockquote': case 'blockquote':
$npf = self::addTextBlock($child, $uri_id, $npf, 'indented'); $npf = self::addText($text, $formatting, $npf);
$npf = self::addQuoteBlock($child, $uri_id, $level, $npf);
break; break;
case 'h1': case 'h1':
$npf = self::addTextBlock($child, $uri_id, $npf, 'heading1'); $npf = self::addText($text, $formatting, $npf);
$npf = self::addTextBlock($child, $uri_id, $level, $npf, 'heading1');
break; break;
case 'h2': case 'h2':
$npf = self::addTextBlock($child, $uri_id, $npf, 'heading1'); $npf = self::addText($text, $formatting, $npf);
$npf = self::addTextBlock($child, $uri_id, $level, $npf, 'heading1');
break; break;
case 'h3': case 'h3':
$npf = self::addTextBlock($child, $uri_id, $npf, 'heading1'); $npf = self::addText($text, $formatting, $npf);
$npf = self::addTextBlock($child, $uri_id, $level, $npf, 'heading1');
break; break;
case 'h4': case 'h4':
$npf = self::addTextBlock($child, $uri_id, $npf, 'heading2'); $npf = self::addText($text, $formatting, $npf);
$npf = self::addTextBlock($child, $uri_id, $level, $npf, 'heading2');
break; break;
case 'h5': case 'h5':
$npf = self::addTextBlock($child, $uri_id, $npf, 'heading2'); $npf = self::addText($text, $formatting, $npf);
$npf = self::addTextBlock($child, $uri_id, $level, $npf, 'heading2');
break; break;
case 'h6': case 'h6':
$npf = self::addTextBlock($child, $uri_id, $npf, 'heading2'); $npf = self::addText($text, $formatting, $npf);
$npf = self::addTextBlock($child, $uri_id, $level, $npf, 'heading2');
break; break;
case 'ul': case 'ul':
$npf = self::addListBlock($child, $uri_id, $npf, false, 0); $npf = self::addText($text, $formatting, $npf);
$npf = self::addListBlock($child, $uri_id, $level, $npf, false, 0);
break; break;
case 'ol': case 'ol':
$npf = self::addListBlock($child, $uri_id, $npf, true, 0); $npf = self::addText($text, $formatting, $npf);
$npf = self::addListBlock($child, $uri_id, $level, $npf, true, 0);
break; break;
case 'hr': case 'hr':
case 'br': case 'br':
$text .= "\n";
break; break;
case 'pre': case 'pre':
case 'code': case 'code':
$npf = self::addTextBlock($child, $uri_id, $npf, 'indented'); $npf = self::addText($text, $formatting, $npf);
$npf = self::addTextBlock($child, $uri_id, $level, $npf, 'indented');
break; break;
case 'a': case 'a':
$npf = self::addMediaBlock($child, $uri_id, $npf); $npf = self::addText($text, $formatting, $npf);
$npf = self::addMediaBlock($child, $uri_id, $level, $npf);
break; break;
case 'table': case 'table':
@ -162,44 +169,139 @@ class NPF
break; break;
case 'img': case 'img':
$npf = self::addImageBlock($child, $uri_id, $npf); $npf = self::addText($text, $formatting, $npf);
$npf = self::addImageBlock($child, $uri_id, $level, $npf);
break;
case 'p':
case 'div':
$npf = self::addText($text, $formatting, $npf);
$npf = self::addTextBlock($child, $uri_id, $level, $npf);
break; break;
default: default:
$npf = self::addTextBlock($child, $uri_id, $npf); $text .= $child->textContent;
break;
}
}
return $npf;
}
static private function addText(string $text, array $formatting, array $npf): array
{
if (empty($text)) {
return $npf;
}
$block = [
'type' => 'text',
'text' => $text,
];
if (!empty($formatting)) {
$block['formatting'] = $formatting;
}
$npf[] = $block;
return $npf;
}
static private function routeElement(DOMElement $element, int $uri_id, int $level, array $npf): array
{
switch ($element->nodeName) {
case 'blockquote':
$npf = self::addQuoteBlock($element, $uri_id, $level, $npf);
break;
case 'h1':
$npf = self::addTextBlock($element, $uri_id, $level, $npf, 'heading1');
break;
case 'h2':
$npf = self::addTextBlock($element, $uri_id, $level, $npf, 'heading1');
break;
case 'h3':
$npf = self::addTextBlock($element, $uri_id, $level, $npf, 'heading1');
break;
case 'h4':
$npf = self::addTextBlock($element, $uri_id, $level, $npf, 'heading2');
break;
case 'h5':
$npf = self::addTextBlock($element, $uri_id, $level, $npf, 'heading2');
break;
case 'h6':
$npf = self::addTextBlock($element, $uri_id, $level, $npf, 'heading2');
break;
case 'ul':
$npf = self::addListBlock($element, $uri_id, $level, $npf, false, 0);
break;
case 'ol':
$npf = self::addListBlock($element, $uri_id, $level, $npf, true, 0);
break;
case 'hr':
case 'br':
break;
case 'pre':
case 'code':
$npf = self::addTextBlock($element, $uri_id, $level, $npf, 'indented');
break;
case 'a':
$npf = self::addMediaBlock($element, $uri_id, $level, $npf);
break;
case 'table':
// Unsupported
// $element->ownerDocument->saveHTML($element)
break;
case 'img':
$npf = self::addImageBlock($element, $uri_id, $level, $npf);
break;
default:
$npf = self::addTextBlock($element, $uri_id, $level, $npf);
break; break;
} }
return $npf; return $npf;
} }
static private function addImageBlock(DOMElement $child, int $uri_id, array $npf): array static private function addImageBlock(DOMElement $element, int $uri_id, int $level, array $npf): array
{ {
$attributes = []; $attributes = [];
foreach ($child->attributes as $key => $attribute) { foreach ($element->attributes as $key => $attribute) {
$attributes[$key] = $attribute->value; $attributes[$key] = $attribute->value;
} }
if (empty($attributes['src'])) { if (empty($attributes['src'])) {
return $npf; return $npf;
} }
$entry = [ $block = [
'type' => 'image', 'type' => 'image',
'media' => [], 'media' => [],
]; ];
if (!empty($attributes['alt'])) { if (!empty($attributes['alt'])) {
$entry['alt_text'] = $attributes['alt']; $block['alt_text'] = $attributes['alt'];
} }
if (!empty($attributes['title']) && ($attributes['alt'] ?? '' != $attributes['title'])) { if (!empty($attributes['title']) && ($attributes['alt'] ?? '' != $attributes['title'])) {
$entry['caption'] = $attributes['title']; $block['caption'] = $attributes['title'];
} }
$rid = Photo::ridFromURI($attributes['src']); $rid = Photo::ridFromURI($attributes['src']);
if (!empty($rid)) { if (!empty($rid)) {
$photos = Photo::selectToArray([], ['resource-id' => $rid]); $photos = Photo::selectToArray([], ['resource-id' => $rid]);
foreach ($photos as $photo) { foreach ($photos as $photo) {
$entry['media'][] = [ $block['media'][] = [
'type' => $photo['type'], 'type' => $photo['type'],
'url' => str_replace('-0.', '-' . $photo['scale'] . '.', $attributes['src']), 'url' => str_replace('-0.', '-' . $photo['scale'] . '.', $attributes['src']),
'width' => $photo['width'], 'width' => $photo['width'],
@ -207,31 +309,35 @@ class NPF
]; ];
} }
if (empty($attributes['alt']) && !empty($photos[0]['desc'])) { if (empty($attributes['alt']) && !empty($photos[0]['desc'])) {
$entry['alt_text'] = $photos[0]['desc']; $block['alt_text'] = $photos[0]['desc'];
} }
} elseif ($media = Post\Media::getByURL($uri_id, $attributes['src'], [Post\Media::IMAGE])) { } elseif ($media = Post\Media::getByURL($uri_id, $attributes['src'], [Post\Media::IMAGE])) {
$entry['media'][] = [ $block['media'][] = [
'type' => $media['mimetype'], 'type' => $media['mimetype'],
'url' => $media['url'], 'url' => $media['url'],
'width' => $media['width'], 'width' => $media['width'],
'height' => $media['height'], 'height' => $media['height'],
]; ];
if (empty($attributes['alt']) && !empty($media['description'])) { if (empty($attributes['alt']) && !empty($media['description'])) {
$entry['alt_text'] = $media['description']; $block['alt_text'] = $media['description'];
} }
} else { } else {
$entry['media'][] = ['url' => $attributes['src']]; $block['media'][] = ['url' => $attributes['src']];
} }
$npf[] = $entry; if ($level > 0) {
$block['indent_level'] = $level;
}
$npf[] = $block;
return $npf; return $npf;
} }
static private function addMediaBlock(DOMElement $child, int $uri_id, array $npf): array static private function addMediaBlock(DOMElement $element, int $uri_id, int $level, array $npf): array
{ {
$attributes = []; $attributes = [];
foreach ($child->attributes as $key => $attribute) { foreach ($element->attributes as $key => $attribute) {
$attributes[$key] = $attribute->value; $attributes[$key] = $attribute->value;
} }
if (empty($attributes['href'])) { if (empty($attributes['href'])) {
@ -242,7 +348,7 @@ class NPF
if (!empty($media)) { if (!empty($media)) {
switch ($media['type']) { switch ($media['type']) {
case Post\Media::AUDIO: case Post\Media::AUDIO:
$entry = [ $block = [
'type' => 'audio', 'type' => 'audio',
'media' => [ 'media' => [
'type' => $media['mimetype'], 'type' => $media['mimetype'],
@ -251,16 +357,16 @@ class NPF
]; ];
if (!empty($media['name'])) { if (!empty($media['name'])) {
$entry['title'] = $media['name']; $block['title'] = $media['name'];
} elseif (!empty($media['description'])) { } elseif (!empty($media['description'])) {
$entry['title'] = $media['description']; $block['title'] = $media['description'];
} }
$npf[] = self::addPoster($media, $entry); $block = self::addPoster($media, $block);
break; break;
case Post\Media::VIDEO: case Post\Media::VIDEO:
$entry = [ $block = [
'type' => 'video', 'type' => 'video',
'media' => [ 'media' => [
'type' => $media['mimetype'], 'type' => $media['mimetype'],
@ -268,25 +374,32 @@ class NPF
] ]
]; ];
$npf[] = self::addPoster($media, $entry); $block = self::addPoster($media, $block);
break; break;
} }
} else { } else {
$npf[] = [ $block = [
'type' => 'text', 'type' => 'text',
'text' => $child->textContent, 'text' => $element->textContent,
'formatting' => [ 'formatting' => [
'start' => 0, 'start' => 0,
'end' => strlen($child->textContent), 'end' => strlen($element->textContent),
'type' => 'link', 'type' => 'link',
'url' => $attributes['href'] 'url' => $attributes['href']
] ]
]; ];
} }
if ($level > 0) {
$block['indent_level'] = $level;
}
$npf[] = $block;
return $npf; return $npf;
} }
static private function addPoster(array $media, array $entry): array static private function addPoster(array $media, array $block): array
{ {
$poster = []; $poster = [];
if (!empty($media['preview'])) { if (!empty($media['preview'])) {
@ -299,9 +412,9 @@ class NPF
$poster['height'] = $media['preview-height']; $poster['height'] = $media['preview-height'];
} }
if (!empty($poster)) { if (!empty($poster)) {
$entry['poster'] = $poster; $block['poster'] = $poster;
} }
return $entry; return $block;
} }
static private function getTypeForNodeName(string $nodename): string static private function getTypeForNodeName(string $nodename): string
@ -321,20 +434,20 @@ class NPF
return ''; return '';
} }
static private function fetchText(DOMElement $child, array $text = ['text' => '', 'formatting' => []]): array static private function fetchText(DOMElement $element, array $text = ['text' => '', 'formatting' => []]): array
{ {
foreach ($child->childNodes as $node) { foreach ($element->childNodes as $child) {
$start = strlen($text['text']); $start = strlen($text['text']);
$type = self::getTypeForNodeName($node->nodeName); $type = self::getTypeForNodeName($child->nodeName);
if ($node->nodeName == 'br') { if ($child->nodeName == 'br') {
$text['text'] .= "\n"; $text['text'] .= "\n";
} elseif (($type != '') || in_array($node->nodeName, ['#text', 'code', 'a', 'p', 'span', 'u', 'img', 'summary', 'ul', 'blockquote', 'h3', 'ol'])) { } elseif (($type != '') || in_array($child->nodeName, ['#text', 'code', 'a', 'p', 'span', 'u', 'img', 'summary', 'ul', 'blockquote', 'h3', 'ol'])) {
$text['text'] .= $node->textContent; $text['text'] .= $child->textContent;
} else { } else {
echo $child->ownerDocument->saveHTML($child) . "\n"; echo $element->ownerDocument->saveHTML($element) . "\n";
die($node->nodeName . "\n"); die($child->nodeName . "\n");
} }
if (!empty($type)) { if (!empty($type)) {
$text['formatting'][] = ['start' => $start, 'end' => strlen($text['text']), 'type' => $type]; $text['formatting'][] = ['start' => $start, 'end' => strlen($text['text']), 'type' => $type];
@ -343,110 +456,133 @@ class NPF
return $text; return $text;
} }
static private function addTextBlock(DOMElement $child, int $uri_id, array $npf, string $subtype = ''): array static private function addQuoteBlock(DOMElement $element, int $uri_id, int $level, array $npf): array
{ {
if (empty($subtype) && ($child->textContent == $child->firstChild->textContent) && ($child->firstChild->nodeName != '#text')) { $block = ['type' => 'text', 'subtype' => 'indented'];
return self::routeElements($child->firstChild, $uri_id, $npf);
if ($level > 0) {
$block['indent_level'] = $level;
} }
$element = ['type' => 'text']; $npf[] = $block;
if (!empty($subtype)) { $npf = self::routeChildren($element, $uri_id, 0, $npf);
$element['subtype'] = $subtype;
}
$text = self::fetchText($child);
$element['text'] = $text['text'];
$element['formatting'] = $text['formatting'];
if (empty($subtype)) {
$type = self::getTypeForNodeName($child->nodeName);
if (!empty($type)) {
$element['formatting'][] = ['start' => 0, 'end' => strlen($element['text']), 'type' => $type];
}
}
if (empty($element['formatting'])) {
unset($element['formatting']);
}
$npf[] = $element;
return $npf; return $npf;
} }
static private function addListBlock(DOMElement $child, int $uri_id, array $npf, bool $ordered, int $level): array static private function addTextBlock(DOMElement $element, int $uri_id, int $level, array $npf, string $subtype = ''): array
{ {
foreach ($child->childNodes as $node) { if (empty($subtype) && ($element->textContent == $element->firstChild->textContent) && ($element->firstChild->nodeName != '#text')) {
switch ($node->nodeName) { return self::routeElement($element->firstChild, $uri_id, $level, $npf);
case 'ul': }
$npf = self::addListBlock($node, $uri_id, $npf, false, $level++);
case 'ol':
$npf = self::addListBlock($node, $uri_id, $npf, true, $level++);
case 'li':
$text = self::fetchText($node);
$entry = [ $block = ['type' => 'text'];
if (!empty($subtype)) {
$block['subtype'] = $subtype;
}
$text = self::fetchText($element);
$block['text'] = $text['text'];
$block['formatting'] = $text['formatting'];
if (empty($subtype)) {
$type = self::getTypeForNodeName($element->nodeName);
if (!empty($type)) {
$block['formatting'][] = ['start' => 0, 'end' => strlen($block['text']), 'type' => $type];
}
}
if (empty($block['formatting'])) {
unset($block['formatting']);
}
if ($level > 0) {
$block['indent_level'] = $level;
}
$npf[] = $block;
return $npf;
}
static private function addListBlock(DOMElement $element, int $uri_id, int $level, array $npf, bool $ordered): array
{
foreach ($element->childNodes as $child) {
switch ($child->nodeName) {
case 'ul':
$npf = self::addListBlock($child, $uri_id, $level++, $npf, false);
case 'ol':
$npf = self::addListBlock($child, $uri_id, $level++, $npf, true);
case 'li':
$text = self::fetchText($child);
$block = [
'type' => 'text', 'type' => 'text',
'subtype' => $ordered ? 'ordered-list-item' : 'unordered-list-item', 'subtype' => $ordered ? 'ordered-list-item' : 'unordered-list-item',
'text' => $text['text'] 'text' => $text['text']
]; ];
if ($level > 0) { if ($level > 0) {
$entry['indent_level'] = $level; $block['indent_level'] = $level;
} }
if (!empty($text['formatting'])) { if (!empty($text['formatting'])) {
$entry['formatting'] = $text['formatting']; $block['formatting'] = $text['formatting'];
} }
$npf[] = $entry; $npf[] = $block;
} }
} }
return $npf; return $npf;
} }
static private function addLinkBlock(int $uri_id, array $npf): array static private function addLinkBlock(int $uri_id, int $level, array $npf): array
{ {
foreach (Post\Media::getByURIId($uri_id, [Post\Media::HTML]) as $link) { foreach (Post\Media::getByURIId($uri_id, [Post\Media::HTML]) as $link) {
$host = parse_url($link['url'], PHP_URL_HOST); $host = parse_url($link['url'], PHP_URL_HOST);
if (in_array($host, ['www.youtube.com', 'youtu.be'])) { if (in_array($host, ['www.youtube.com', 'youtu.be'])) {
$entry = [ $block = [
'type' => 'video', 'type' => 'video',
'provider' => 'youtube', 'provider' => 'youtube',
'url' => $link['url'], 'url' => $link['url'],
]; ];
} elseif (in_array($host, ['vimeo.com'])) { } elseif (in_array($host, ['vimeo.com'])) {
$entry = [ $block = [
'type' => 'video', 'type' => 'video',
'provider' => 'vimeo', 'provider' => 'vimeo',
'url' => $link['url'], 'url' => $link['url'],
]; ];
} elseif (in_array($host, ['open.spotify.com'])) { } elseif (in_array($host, ['open.spotify.com'])) {
$entry = [ $block = [
'type' => 'audio', 'type' => 'audio',
'provider' => 'spotify', 'provider' => 'spotify',
'url' => $link['url'], 'url' => $link['url'],
]; ];
} else { } else {
$entry = [ $block = [
'type' => 'link', 'type' => 'link',
'url' => $link['url'], 'url' => $link['url'],
]; ];
if (!empty($link['name'])) { if (!empty($link['name'])) {
$entry['title'] = $link['name']; $block['title'] = $link['name'];
} }
if (!empty($link['description'])) { if (!empty($link['description'])) {
$entry['description'] = $link['description']; $block['description'] = $link['description'];
} }
if (!empty($link['author-name'])) { if (!empty($link['author-name'])) {
$entry['author'] = $link['author-name']; $block['author'] = $link['author-name'];
} }
if (!empty($link['publisher-name'])) { if (!empty($link['publisher-name'])) {
$entry['site_name'] = $link['publisher-name']; $block['site_name'] = $link['publisher-name'];
} }
} }
$npf[] = self::addPoster($link, $entry); if ($level > 0) {
$block['indent_level'] = $level;
}
$npf[] = self::addPoster($link, $block);
} }
return $npf; return $npf;
} }