More general content type detection
This commit is contained in:
parent
acffafe6b9
commit
7adbd73eca
1 changed files with 12 additions and 25 deletions
|
@ -54,25 +54,21 @@ class ParseUrl
|
||||||
/**
|
/**
|
||||||
* Fetch the content type of the given url
|
* Fetch the content type of the given url
|
||||||
* @param string $url URL of the page
|
* @param string $url URL of the page
|
||||||
* @return string content type
|
* @return array content type
|
||||||
*/
|
*/
|
||||||
public static function getContentType(string $url)
|
public static function getContentType(string $url)
|
||||||
{
|
{
|
||||||
$curlResult = DI::httpRequest()->head($url);
|
$curlResult = DI::httpRequest()->head($url);
|
||||||
if (!$curlResult->isSuccess()) {
|
if (!$curlResult->isSuccess()) {
|
||||||
return '';
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
$contenttype = $curlResult->getHeader('Content-Type');
|
$contenttype = $curlResult->getHeader('Content-Type');
|
||||||
if (empty($contenttype)) {
|
if (empty($contenttype)) {
|
||||||
return '';
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!preg_match('#(image|video|audio)/#i', $contenttype, $matches)) {
|
return explode('/', current(explode(';', $contenttype)));
|
||||||
return '';
|
|
||||||
}
|
|
||||||
|
|
||||||
return array_pop($matches);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -211,8 +207,14 @@ class ParseUrl
|
||||||
}
|
}
|
||||||
|
|
||||||
$type = self::getContentType($url);
|
$type = self::getContentType($url);
|
||||||
if (in_array($type, ['image', 'video', 'audio'])) {
|
Logger::info('Got content-type', ['content-type' => $type, 'url' => $url]);
|
||||||
$siteinfo['type'] = $type;
|
if (!empty($type) && in_array($type[0], ['image', 'video', 'audio'])) {
|
||||||
|
$siteinfo['type'] = $type[0];
|
||||||
|
return $siteinfo;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((count($type) >= 2) && (($type[0] != 'text') || ($type[1] != 'html'))) {
|
||||||
|
Logger::info('Unparseable content-type, quitting here, ', ['content-type' => $type, 'url' => $url]);
|
||||||
return $siteinfo;
|
return $siteinfo;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -228,21 +230,6 @@ class ParseUrl
|
||||||
return $siteinfo;
|
return $siteinfo;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Native media type, no need for HTML parsing
|
|
||||||
$type = $curlResult->getHeader('Content-Type');
|
|
||||||
if ($type) {
|
|
||||||
preg_match('#(image|video|audio)/#i', $type, $matches);
|
|
||||||
if ($matches) {
|
|
||||||
$siteinfo['type'] = array_pop($matches);
|
|
||||||
return $siteinfo;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If it isn't a HTML file then exit
|
|
||||||
if (($curlResult->getContentType() != '') && !strstr(strtolower($curlResult->getContentType()), 'html')) {
|
|
||||||
return $siteinfo;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($cacheControlHeader = $curlResult->getHeader('Cache-Control')) {
|
if ($cacheControlHeader = $curlResult->getHeader('Cache-Control')) {
|
||||||
if (preg_match('/max-age=([0-9]+)/i', $cacheControlHeader, $matches)) {
|
if (preg_match('/max-age=([0-9]+)/i', $cacheControlHeader, $matches)) {
|
||||||
$maxAge = max(86400, (int)array_pop($matches));
|
$maxAge = max(86400, (int)array_pop($matches));
|
||||||
|
|
Loading…
Reference in a new issue