Improved page info detection

This commit is contained in:
Michael 2021-12-04 19:54:33 +00:00
parent ba98242b9f
commit 34260fc619

View file

@ -344,9 +344,6 @@ class ParseUrl
$siteinfo['title'] = trim($list->item(0)->nodeValue); $siteinfo['title'] = trim($list->item(0)->nodeValue);
} }
$twitter_card = false;
$twitter_image = false;
$list = $xpath->query('//meta[@name]'); $list = $xpath->query('//meta[@name]');
foreach ($list as $node) { foreach ($list as $node) {
$meta_tag = []; $meta_tag = [];
@ -374,23 +371,28 @@ class ParseUrl
break; break;
case 'twitter:image': case 'twitter:image':
$siteinfo['image'] = $meta_tag['content']; $siteinfo['image'] = $meta_tag['content'];
$twitter_image = true;
break; break;
case 'twitter:image:src': case 'twitter:image:src':
$siteinfo['image'] = $meta_tag['content']; $siteinfo['image'] = $meta_tag['content'];
break; break;
case 'twitter:card':
// Detect photo pages
if ($meta_tag['content'] == 'summary_large_image') {
$twitter_card = true;
}
break;
case 'twitter:description': case 'twitter:description':
$siteinfo['text'] = trim($meta_tag['content']); $siteinfo['text'] = trim($meta_tag['content']);
break; break;
case 'twitter:title': case 'twitter:title':
$siteinfo['title'] = trim($meta_tag['content']); $siteinfo['title'] = trim($meta_tag['content']);
break; break;
case 'twitter:player':
$siteinfo['player']['embed'] = trim($meta_tag['content']);
break;
case 'twitter:player:stream':
$siteinfo['player']['stream'] = trim($meta_tag['content']);
break;
case 'twitter:player:width':
$siteinfo['player']['width'] = intval($meta_tag['content']);
break;
case 'twitter:player:height':
$siteinfo['player']['height'] = intval($meta_tag['content']);
break;
case 'dc.title': case 'dc.title':
$siteinfo['title'] = trim($meta_tag['content']); $siteinfo['title'] = trim($meta_tag['content']);
break; break;
@ -457,7 +459,6 @@ class ParseUrl
break; break;
case 'twitter:image': case 'twitter:image':
$siteinfo['image'] = $meta_tag['content']; $siteinfo['image'] = $meta_tag['content'];
$twitter_image = true;
break; break;
} }
} }
@ -472,11 +473,22 @@ class ParseUrl
} }
} }
// Currently deactivated, see https://github.com/friendica/friendica/pull/10148#issuecomment-821512658 if (!empty($siteinfo['player']['stream'])) {
// Prevent to have a photo type without an image // Only add player data to media arrays if there is no duplicate
// if ($twitter_card && $twitter_image && !empty($siteinfo['image'])) { $content_urls = array_merge(array_column($siteinfo['audio'] ?? [], 'content'), array_column($siteinfo['video'] ?? [], 'content'));
// $siteinfo['type'] = 'photo'; if (!in_array($siteinfo['player']['stream'], $content_urls)) {
// } $contenttype = self::getContentType($siteinfo['player']['stream']);
if (!empty($contenttype[0]) && in_array($contenttype[0], ['audio', 'video'])) {
$media = ['content' => $siteinfo['player']['stream']];
if (!empty($siteinfo['player']['embed'])) {
$media['embed'] = $siteinfo['player']['embed'];
}
$siteinfo[$contenttype[0]][] = $media;
}
}
}
if (!empty($siteinfo['image'])) { if (!empty($siteinfo['image'])) {
$siteinfo['images'] = $siteinfo['images'] ?? []; $siteinfo['images'] = $siteinfo['images'] ?? [];
@ -498,6 +510,8 @@ class ParseUrl
Hook::callAll('getsiteinfo', $siteinfo); Hook::callAll('getsiteinfo', $siteinfo);
ksort($siteinfo);
return $siteinfo; return $siteinfo;
} }
@ -509,7 +523,7 @@ class ParseUrl
* @param array $siteinfo * @param array $siteinfo
* @return void * @return void
*/ */
private static function checkMedia(string $page_url, array $siteinfo) private static function checkMedia(string $page_url, array $siteinfo) : array
{ {
if (!empty($siteinfo['images'])) { if (!empty($siteinfo['images'])) {
array_walk($siteinfo['images'], function (&$image) use ($page_url) { array_walk($siteinfo['images'], function (&$image) use ($page_url) {
@ -572,8 +586,8 @@ class ParseUrl
} }
if (!empty($embed)) { if (!empty($embed)) {
$media['embed'] = $embed; $media['embed'] = $embed;
if (!empty($media['main'])) { if (empty($siteinfo['player']['embed'])) {
$siteinfo['embed'] = $embed; $siteinfo['player']['embed'] = $embed;
} }
} }
if (!empty($content)) { if (!empty($content)) {