diff --git a/cld/cld.php b/cld/cld.php index 54e7a73e4..fc8fa8145 100644 --- a/cld/cld.php +++ b/cld/cld.php @@ -12,7 +12,7 @@ use Friendica\DI; function cld_install() { - Hook::register('detect_languages', 'addon/cld/cld.php', 'cld_detect_languages'); + Hook::register('detect_languages', __FILE__, 'cld_detect_languages'); } function cld_detect_languages(array &$data) @@ -25,6 +25,7 @@ function cld_detect_languages(array &$data) $cld2 = new \CLD2Detector(); $cld2->setEncodingHint(CLD2Encoding::UTF8); // optional, hints about text encoding + $cld2->setPlainText(true); $result = $cld2->detect($data['text']); @@ -37,18 +38,37 @@ function cld_detect_languages(array &$data) $detected = $result['language_code']; if ($detected == 'pt') { $detected = 'pt-PT'; + } elseif ($detected == 'az') { + $detected = 'az-Latn'; + } elseif ($detected == 'bs') { + $detected = 'bs-Latn'; } elseif ($detected == 'el') { $detected = 'el-monoton'; + } elseif ($detected == 'ht') { + $detected = 'fr'; + } elseif ($detected == 'iw') { + $detected = 'he'; + } elseif ($detected == 'jw') { + $detected = 'jv'; + } elseif ($detected == 'ms') { + $detected = 'ms-Latn'; } elseif ($detected == 'no') { $detected = 'nb'; + } elseif ($detected == 'sr') { + $detected = 'sr-Cyrl'; } elseif ($detected == 'zh') { $detected = 'zh-Hans'; } elseif ($detected == 'zh-Hant') { $detected = 'zh-hant'; } + // languages that aren't supported via the base language detection + if (in_array($detected, ['ceb', 'hmn', 'ht', 'kk', 'ky', 'mg', 'mk', 'ml', 'ny', 'or', 'pa', 'rw', 'su', 'st', 'tg', 'ts', 'xx-Qaai'])) { + return; + } + if (!$result['is_reliable']) { - Logger::debug('Unreliable detection', ['original' => $original, 'detected' => $detected, 'name' => $result['language_name'], 'probability' => $result['language_probability'], 'text' => $data['text']]); + Logger::debug('Unreliable detection', ['uri-id' => $data['uri-id'], 'original' => $original, 'detected' => $detected, 'name' => $result['language_name'], 'probability' => $result['language_probability'], 'text' => $data['text']]); return; } @@ -59,10 +79,10 @@ function cld_detect_languages(array &$data) $available = array_keys(DI::l10n()->convertForLanguageDetection(DI::l10n()->getAvailableLanguages(true))); if (!in_array($detected, $available)) { - Logger::debug('Unsupported language', ['original' => $original, 'detected' => $detected, 'name' => $result['language_name'], 'probability' => $result['language_probability'], 'text' => $data['text']]); + Logger::debug('Unsupported language', ['uri-id' => $data['uri-id'], 'original' => $original, 'detected' => $detected, 'name' => $result['language_name'], 'probability' => $result['language_probability'], 'text' => $data['text']]); return; } - Logger::debug('Detected different language', ['original' => $original, 'detected' => $detected, 'name' => $result['language_name'], 'probability' => $result['language_probability'], 'text' => $data['text']]); + Logger::debug('Detected different language', ['uri-id' => $data['uri-id'], 'original' => $original, 'detected' => $detected, 'name' => $result['language_name'], 'probability' => $result['language_probability'], 'text' => $data['text']]); $data['detected'] = [$detected => $result['language_probability'] / 100]; }