Merge pull request #13827 from annando/ocr

Use OCR to fetch text in images
This commit is contained in:
Hypolite Petovan 2024-01-15 01:06:50 -05:00 committed by GitHub
commit faa1cbeaf6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 20 additions and 5 deletions

View file

@ -208,13 +208,17 @@ class Media
$filetype = !empty($media['mimetype']) ? strtolower(current(explode('/', $media['mimetype']))) : ''; $filetype = !empty($media['mimetype']) ? strtolower(current(explode('/', $media['mimetype']))) : '';
if (($media['type'] == self::IMAGE) || ($filetype == 'image')) { if (($media['type'] == self::IMAGE) || ($filetype == 'image')) {
$imagedata = Images::getInfoFromURLCached($media['url']); $imagedata = Images::getInfoFromURLCached($media['url'], empty($media['description']));
if ($imagedata) { if ($imagedata) {
$media['mimetype'] = $imagedata['mime']; $media['mimetype'] = $imagedata['mime'];
$media['size'] = $imagedata['size']; $media['size'] = $imagedata['size'];
$media['width'] = $imagedata[0]; $media['width'] = $imagedata[0];
$media['height'] = $imagedata[1]; $media['height'] = $imagedata[1];
$media['blurhash'] = $imagedata['blurhash'] ?? null; $media['blurhash'] = $imagedata['blurhash'] ?? null;
if (!empty($imagedata['description']) && empty($media['description'])) {
$media['description'] = $imagedata['description'];
Logger::debug('Detected text for image', $media);
}
} else { } else {
Logger::notice('No image data', ['media' => $media]); Logger::notice('No image data', ['media' => $media]);
} }

View file

@ -21,6 +21,7 @@
namespace Friendica\Util; namespace Friendica\Util;
use Friendica\Core\Hook;
use Friendica\Core\Logger; use Friendica\Core\Logger;
use Friendica\DI; use Friendica\DI;
use Friendica\Model\Photo; use Friendica\Model\Photo;
@ -181,10 +182,11 @@ class Images
* Gets info array from given URL, cached data has priority * Gets info array from given URL, cached data has priority
* *
* @param string $url * @param string $url
* @param bool $ocr
* @return array Info * @return array Info
* @throws \Friendica\Network\HTTPException\InternalServerErrorException * @throws \Friendica\Network\HTTPException\InternalServerErrorException
*/ */
public static function getInfoFromURLCached(string $url): array public static function getInfoFromURLCached(string $url, bool $ocr = false): array
{ {
$data = []; $data = [];
@ -192,12 +194,12 @@ class Images
return $data; return $data;
} }
$cacheKey = 'getInfoFromURL:' . sha1($url); $cacheKey = 'getInfoFromURL:' . sha1($url . $ocr);
$data = DI::cache()->get($cacheKey); $data = DI::cache()->get($cacheKey);
if (empty($data) || !is_array($data)) { if (empty($data) || !is_array($data)) {
$data = self::getInfoFromURL($url); $data = self::getInfoFromURL($url, $ocr);
DI::cache()->set($cacheKey, $data); DI::cache()->set($cacheKey, $data);
} }
@ -209,10 +211,11 @@ class Images
* Gets info from URL uncached * Gets info from URL uncached
* *
* @param string $url * @param string $url
* @param bool $ocr
* @return array Info array * @return array Info array
* @throws \Friendica\Network\HTTPException\InternalServerErrorException * @throws \Friendica\Network\HTTPException\InternalServerErrorException
*/ */
public static function getInfoFromURL(string $url): array public static function getInfoFromURL(string $url, bool $ocr = false): array
{ {
$data = []; $data = [];
@ -257,6 +260,14 @@ class Images
if ($image->isValid()) { if ($image->isValid()) {
$data['blurhash'] = $image->getBlurHash(); $data['blurhash'] = $image->getBlurHash();
if ($ocr) {
$media = ['img_str' => $img_str];
Hook::callAll('ocr-detection', $media);
if (!empty($media['description'])) {
$data['description'] = $media['description'];
}
}
} }
$data['size'] = $filesize; $data['size'] = $filesize;