Merge pull request #10038 from annando/issue-10019

Issue 10019: Fix embedding of media objects
This commit is contained in:
Hypolite Petovan 2021-03-13 08:26:56 -05:00 committed by GitHub
commit 7b41866410
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 34 additions and 18 deletions

View file

@ -96,8 +96,7 @@ class ParseUrl extends BaseModule
if ($format == 'json') { if ($format == 'json') {
$siteinfo = Util\ParseUrl::getSiteinfoCached($url); $siteinfo = Util\ParseUrl::getSiteinfoCached($url);
if (empty($siteinfo['title']) && empty($siteinfo['text']) && empty($siteinfo['image']) if (in_array($siteinfo['type'], ['image', 'video', 'audio'])) {
&& in_array($siteinfo['type'], ['image', 'video', 'audio'])) {
switch ($siteinfo['type']) { switch ($siteinfo['type']) {
case 'video': case 'video':
$content_type = 'video'; $content_type = 'video';

View file

@ -51,6 +51,26 @@ class ParseUrl
*/ */
const MIN_DESC_COUNT = 100; const MIN_DESC_COUNT = 100;
/**
* Fetch the content type of the given url
* @param string $url URL of the page
* @return array content type
*/
public static function getContentType(string $url)
{
$curlResult = DI::httpRequest()->head($url);
if (!$curlResult->isSuccess()) {
return [];
}
$contenttype = $curlResult->getHeader('Content-Type');
if (empty($contenttype)) {
return [];
}
return explode('/', current(explode(';', $contenttype)));
}
/** /**
* Search for chached embeddable data of an url otherwise fetch it * Search for chached embeddable data of an url otherwise fetch it
* *
@ -186,6 +206,18 @@ class ParseUrl
return $siteinfo; return $siteinfo;
} }
$type = self::getContentType($url);
Logger::info('Got content-type', ['content-type' => $type, 'url' => $url]);
if (!empty($type) && in_array($type[0], ['image', 'video', 'audio'])) {
$siteinfo['type'] = $type[0];
return $siteinfo;
}
if ((count($type) >= 2) && (($type[0] != 'text') || ($type[1] != 'html'))) {
Logger::info('Unparseable content-type, quitting here, ', ['content-type' => $type, 'url' => $url]);
return $siteinfo;
}
$curlResult = DI::httpRequest()->get($url); $curlResult = DI::httpRequest()->get($url);
if (!$curlResult->isSuccess()) { if (!$curlResult->isSuccess()) {
return $siteinfo; return $siteinfo;
@ -198,21 +230,6 @@ class ParseUrl
return $siteinfo; return $siteinfo;
} }
// Native media type, no need for HTML parsing
$type = $curlResult->getHeader('Content-Type');
if ($type) {
preg_match('#(image|video|audio)/#i', $type, $matches);
if ($matches) {
$siteinfo['type'] = array_pop($matches);
return $siteinfo;
}
}
// If it isn't a HTML file then exit
if (($curlResult->getContentType() != '') && !strstr(strtolower($curlResult->getContentType()), 'html')) {
return $siteinfo;
}
if ($cacheControlHeader = $curlResult->getHeader('Cache-Control')) { if ($cacheControlHeader = $curlResult->getHeader('Cache-Control')) {
if (preg_match('/max-age=([0-9]+)/i', $cacheControlHeader, $matches)) { if (preg_match('/max-age=([0-9]+)/i', $cacheControlHeader, $matches)) {
$maxAge = max(86400, (int)array_pop($matches)); $maxAge = max(86400, (int)array_pop($matches));
@ -227,7 +244,7 @@ class ParseUrl
$oembed_data = OEmbed::fetchURL($url); $oembed_data = OEmbed::fetchURL($url);
if (!empty($oembed_data->type)) { if (!empty($oembed_data->type)) {
if (!in_array($oembed_data->type, ['error', 'rich', ''])) { if (!in_array($oembed_data->type, ['error', 'rich', 'image', 'video', 'audio', ''])) {
$siteinfo['type'] = $oembed_data->type; $siteinfo['type'] = $oembed_data->type;
} }