From b65f73e3483d49ea09163d5d0255693ade135976 Mon Sep 17 00:00:00 2001 From: Michael Date: Wed, 14 Apr 2021 19:12:01 +0000 Subject: [PATCH] Improved mimeType detection and setting of the "type" field --- src/Model/Item.php | 7 +- src/Model/Post/Media.php | 119 ++++++++++++++++++++----- src/Protocol/ActivityPub/Processor.php | 16 +--- 3 files changed, 104 insertions(+), 38 deletions(-) diff --git a/src/Model/Item.php b/src/Model/Item.php index 1d50dba2e7..863d89790e 100644 --- a/src/Model/Item.php +++ b/src/Model/Item.php @@ -2684,7 +2684,12 @@ class Item { $leading = ''; $trailing = ''; - foreach (Post\Media::getByURIId($item['uri-id'], [Post\Media::DOCUMENT, Post\Media::TORRENT, Post\Media::UNKNOWN]) as $attachment) { + foreach (Post\Media::getByURIId($item['uri-id'], [Post\Media::AUDIO, Post\Media::VIDEO, + Post\Media::DOCUMENT, Post\Media::TORRENT, Post\Media::UNKNOWN]) as $attachment) { + if (in_array($attachment['type'], [Post\Media::AUDIO, Post\Media::VIDEO]) && strpos($item['body'], $attachment['url'])) { + continue; + } + $mime = $attachment['mimetype']; $author = ['uid' => 0, 'id' => $item['author-id'], diff --git a/src/Model/Post/Media.php b/src/Model/Post/Media.php index 5dc5a0daa2..8601be02a3 100644 --- a/src/Model/Post/Media.php +++ b/src/Model/Post/Media.php @@ -36,12 +36,17 @@ use Friendica\Util\Images; */ class Media { - const UNKNOWN = 0; - const IMAGE = 1; - const VIDEO = 2; - const AUDIO = 3; - const TORRENT = 16; - const DOCUMENT = 128; + const UNKNOWN = 0; + const IMAGE = 1; + const VIDEO = 2; + const AUDIO = 3; + const TEXT = 4; + const APPLICATION = 5; + const TORRENT = 16; + const HTML = 17; + const XML = 18; + const PLAIN = 19; + const DOCUMENT = 128; /** * Insert a post-media record @@ -51,7 +56,7 @@ class Media */ public static function insert(array $media, bool $force = false) { - if (empty($media['url']) || empty($media['uri-id']) || empty($media['type'])) { + if (empty($media['url']) || empty($media['uri-id']) || !isset($media['type'])) { Logger::warning('Incomplete media data', ['media' => $media]); return; } @@ -64,12 +69,7 @@ class Media return; } - $fields = ['mimetype', 'height', 'width', 'size', 'preview', 'preview-height', 'preview-width', 'description']; - foreach ($fields as $field) { - if (empty($media[$field])) { - unset($media[$field]); - } - } + $media = self::unsetEmptyFields($media); // We are storing as fast as possible to avoid duplicated network requests // when fetching additional information for pictures and other content. @@ -78,6 +78,7 @@ class Media $stored = $media; $media = self::fetchAdditionalData($media); + $media = self::unsetEmptyFields($media); if (array_diff_assoc($media, $stored)) { $result = DBA::insert('post-media', $media, Database::INSERT_UPDATE); @@ -87,6 +88,23 @@ class Media } } + /** + * Remove empty media fields + * + * @param array $media + * @return array cleaned media array + */ + private static function unsetEmptyFields(array $media) + { + $fields = ['mimetype', 'height', 'width', 'size', 'preview', 'preview-height', 'preview-width', 'description']; + foreach ($fields as $field) { + if (empty($media[$field])) { + unset($media[$field]); + } + } + return $media; + } + /** * Copy attachments from one uri-id to another * @@ -130,23 +148,22 @@ class Media public static function fetchAdditionalData(array $media) { // Fetch the mimetype or size if missing. - // We don't do it for torrent links since they need special treatment. - // We don't do this for images, since we are fetching their details some lines later anyway. - if (!in_array($media['type'], [self::TORRENT, self::IMAGE]) && (empty($media['mimetype']) || empty($media['size']))) { + if (empty($media['mimetype']) || empty($media['size'])) { $timeout = DI::config()->get('system', 'xrd_timeout'); $curlResult = DI::httpRequest()->head($media['url'], ['timeout' => $timeout]); if ($curlResult->isSuccess()) { - $header = $curlResult->getHeaderArray(); - if (empty($media['mimetype']) && !empty($header['content-type'])) { - $media['mimetype'] = $header['content-type']; + if (empty($media['mimetype'])) { + $media['mimetype'] = $curlResult->getHeader('Content-Type'); } - if (empty($media['size']) && !empty($header['content-length'])) { - $media['size'] = $header['content-length']; + if (empty($media['size'])) { + $media['size'] = (int)$curlResult->getHeader('Content-Length'); } + } else { + Logger::notice('Could not fetch head', ['media' => $media]); } } - $filetype = !empty($media['mimetype']) ? strtolower(substr($media['mimetype'], 0, strpos($media['mimetype'], '/'))) : ''; + $filetype = !empty($media['mimetype']) ? strtolower(current(explode('/', $media['mimetype']))) : ''; if (($media['type'] == self::IMAGE) || ($filetype == 'image')) { $imagedata = Images::getInfoFromURLCached($media['url']); @@ -155,6 +172,8 @@ class Media $media['size'] = $imagedata['size']; $media['width'] = $imagedata[0]; $media['height'] = $imagedata[1]; + } else { + Logger::notice('No image data', ['media' => $media]); } if (!empty($media['preview'])) { $imagedata = Images::getInfoFromURLCached($media['preview']); @@ -164,9 +183,65 @@ class Media } } } + + if ($media['type'] != self::DOCUMENT) { + $media = self::addType($media); + } + return $media; } + /** + * Add the detected type to the media array + * + * @param array $data + * @return array data array with the detected type + */ + public static function addType(array $data) + { + if (empty($data['mimetype'])) { + Logger::info('No MimeType provided', ['media' => $data]); + return $data; + } + + $type = explode('/', current(explode(';', $data['mimetype']))); + if (count($type) < 2) { + Logger::info('Unknown MimeType', ['type' => $type, 'media' => $data]); + $data['type'] = self::UNKNOWN; + return $data; + } + + $filetype = strtolower($type[0]); + $subtype = strtolower($type[1]); + + if ($filetype == 'image') { + $data['type'] = self::IMAGE; + } elseif ($filetype == 'video') { + $data['type'] = self::VIDEO; + } elseif ($filetype == 'audio') { + $data['type'] = self::AUDIO; + } elseif (($filetype == 'text') && ($subtype == 'html')) { + $data['type'] = self::HTML; + } elseif (($filetype == 'text') && ($subtype == 'xml')) { + $data['type'] = self::XML; + } elseif (($filetype == 'text') && ($subtype == 'plain')) { + $data['type'] = self::PLAIN; + } elseif ($filetype == 'text') { + $data['type'] = self::TEXT; + } elseif (($filetype == 'application') && ($subtype == 'x-bittorrent')) { + $data['type'] = self::TORRENT; + } elseif ($filetype == 'application') { + $data['type'] = self::APPLICATION; + } else { + $data['type'] = self::UNKNOWN; + Logger::info('Unknown type', ['filetype' => $filetype, 'subtype' => $subtype, 'media' => $data]); + return $data; + } + + Logger::debug('Detected type', ['filetype' => $filetype, 'subtype' => $subtype, 'media' => $data]); + return $data; + } + /** * Tests for path patterns that are usef for picture links in Friendica * diff --git a/src/Protocol/ActivityPub/Processor.php b/src/Protocol/ActivityPub/Processor.php index 66aedf65b0..fdb97337fd 100644 --- a/src/Protocol/ActivityPub/Processor.php +++ b/src/Protocol/ActivityPub/Processor.php @@ -123,21 +123,7 @@ class Processor } $data = ['uri-id' => $uriid]; - - $filetype = strtolower(substr($attachment['mediaType'], 0, strpos($attachment['mediaType'], '/'))); - if ($filetype == 'image') { - $data['type'] = Post\Media::IMAGE; - } elseif ($filetype == 'video') { - $data['type'] = Post\Media::VIDEO; - } elseif ($filetype == 'audio') { - $data['type'] = Post\Media::AUDIO; - } elseif (in_array($attachment['mediaType'], ['application/x-bittorrent', 'application/x-bittorrent;x-scheme-handler/magnet'])) { - $data['type'] = Post\Media::TORRENT; - } else { - Logger::info('Unknown type', ['attachment' => $attachment]); - return; - } - + $data['type'] = Post\Media::UNKNOWN; $data['url'] = $attachment['url']; $data['mimetype'] = $attachment['mediaType']; $data['height'] = $attachment['height'] ?? null;