forked from friendica/friendica-addons
Changed hook parameter / more languages added
This commit is contained in:
parent
80ce855189
commit
18266ea6ef
1 changed files with 24 additions and 4 deletions
28
cld/cld.php
28
cld/cld.php
|
@ -12,7 +12,7 @@ use Friendica\DI;
|
||||||
|
|
||||||
function cld_install()
|
function cld_install()
|
||||||
{
|
{
|
||||||
Hook::register('detect_languages', 'addon/cld/cld.php', 'cld_detect_languages');
|
Hook::register('detect_languages', __FILE__, 'cld_detect_languages');
|
||||||
}
|
}
|
||||||
|
|
||||||
function cld_detect_languages(array &$data)
|
function cld_detect_languages(array &$data)
|
||||||
|
@ -25,6 +25,7 @@ function cld_detect_languages(array &$data)
|
||||||
$cld2 = new \CLD2Detector();
|
$cld2 = new \CLD2Detector();
|
||||||
|
|
||||||
$cld2->setEncodingHint(CLD2Encoding::UTF8); // optional, hints about text encoding
|
$cld2->setEncodingHint(CLD2Encoding::UTF8); // optional, hints about text encoding
|
||||||
|
$cld2->setPlainText(true);
|
||||||
|
|
||||||
$result = $cld2->detect($data['text']);
|
$result = $cld2->detect($data['text']);
|
||||||
|
|
||||||
|
@ -37,18 +38,37 @@ function cld_detect_languages(array &$data)
|
||||||
$detected = $result['language_code'];
|
$detected = $result['language_code'];
|
||||||
if ($detected == 'pt') {
|
if ($detected == 'pt') {
|
||||||
$detected = 'pt-PT';
|
$detected = 'pt-PT';
|
||||||
|
} elseif ($detected == 'az') {
|
||||||
|
$detected = 'az-Latn';
|
||||||
|
} elseif ($detected == 'bs') {
|
||||||
|
$detected = 'bs-Latn';
|
||||||
} elseif ($detected == 'el') {
|
} elseif ($detected == 'el') {
|
||||||
$detected = 'el-monoton';
|
$detected = 'el-monoton';
|
||||||
|
} elseif ($detected == 'ht') {
|
||||||
|
$detected = 'fr';
|
||||||
|
} elseif ($detected == 'iw') {
|
||||||
|
$detected = 'he';
|
||||||
|
} elseif ($detected == 'jw') {
|
||||||
|
$detected = 'jv';
|
||||||
|
} elseif ($detected == 'ms') {
|
||||||
|
$detected = 'ms-Latn';
|
||||||
} elseif ($detected == 'no') {
|
} elseif ($detected == 'no') {
|
||||||
$detected = 'nb';
|
$detected = 'nb';
|
||||||
|
} elseif ($detected == 'sr') {
|
||||||
|
$detected = 'sr-Cyrl';
|
||||||
} elseif ($detected == 'zh') {
|
} elseif ($detected == 'zh') {
|
||||||
$detected = 'zh-Hans';
|
$detected = 'zh-Hans';
|
||||||
} elseif ($detected == 'zh-Hant') {
|
} elseif ($detected == 'zh-Hant') {
|
||||||
$detected = 'zh-hant';
|
$detected = 'zh-hant';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// languages that aren't supported via the base language detection
|
||||||
|
if (in_array($detected, ['ceb', 'hmn', 'ht', 'kk', 'ky', 'mg', 'mk', 'ml', 'ny', 'or', 'pa', 'rw', 'su', 'st', 'tg', 'ts', 'xx-Qaai'])) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (!$result['is_reliable']) {
|
if (!$result['is_reliable']) {
|
||||||
Logger::debug('Unreliable detection', ['original' => $original, 'detected' => $detected, 'name' => $result['language_name'], 'probability' => $result['language_probability'], 'text' => $data['text']]);
|
Logger::debug('Unreliable detection', ['uri-id' => $data['uri-id'], 'original' => $original, 'detected' => $detected, 'name' => $result['language_name'], 'probability' => $result['language_probability'], 'text' => $data['text']]);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -59,10 +79,10 @@ function cld_detect_languages(array &$data)
|
||||||
$available = array_keys(DI::l10n()->convertForLanguageDetection(DI::l10n()->getAvailableLanguages(true)));
|
$available = array_keys(DI::l10n()->convertForLanguageDetection(DI::l10n()->getAvailableLanguages(true)));
|
||||||
|
|
||||||
if (!in_array($detected, $available)) {
|
if (!in_array($detected, $available)) {
|
||||||
Logger::debug('Unsupported language', ['original' => $original, 'detected' => $detected, 'name' => $result['language_name'], 'probability' => $result['language_probability'], 'text' => $data['text']]);
|
Logger::debug('Unsupported language', ['uri-id' => $data['uri-id'], 'original' => $original, 'detected' => $detected, 'name' => $result['language_name'], 'probability' => $result['language_probability'], 'text' => $data['text']]);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
Logger::debug('Detected different language', ['original' => $original, 'detected' => $detected, 'name' => $result['language_name'], 'probability' => $result['language_probability'], 'text' => $data['text']]);
|
Logger::debug('Detected different language', ['uri-id' => $data['uri-id'], 'original' => $original, 'detected' => $detected, 'name' => $result['language_name'], 'probability' => $result['language_probability'], 'text' => $data['text']]);
|
||||||
$data['detected'] = [$detected => $result['language_probability'] / 100];
|
$data['detected'] = [$detected => $result['language_probability'] / 100];
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue