Additional languages / new hook for additional language detection

This commit is contained in:
Michael 2023-09-30 15:51:53 +00:00
parent 6057146a41
commit e84a7e8c2a
3 changed files with 44 additions and 8 deletions

View file

@ -221,6 +221,13 @@ Please note: body contents are bbcode - not HTML
Called when receiving a post from another source. This may also be used to post local activity or system generated messages.
`$b` is the item array of information to be stored in the database and the item body is bbcode.
### get_language
Called after the language detection. This can be used for alternative language detection methods.
`$data` is an array:
- **text**: The text that is analyzed.
- **detected**: The array with the original language detection. Can be overwritten via an addon.
### addon_settings
Called when generating the HTML for the addon settings page.
`$data` is an array containing:
@ -800,6 +807,7 @@ Here is a complete list of all hook callbacks with file locations (as of 24-Sep-
### src/Model/Item.php
Hook::callAll('get_language', $item);
Hook::callAll('post_local', $item);
Hook::callAll('post_remote', $item);
Hook::callAll('post_local_end', $posted_item);

View file

@ -397,13 +397,31 @@ class L10n
// See https://github.com/friendica/friendica/issues/10511
// Persian is manually added to language detection until a persian translation is provided for the interface, at
// which point it will be automatically available through `getAvailableLanguages()` and this should be removed.
// Additionally Portuguese, Ukrainian, traditional Chinese and Welsh are added to that list.
// Additionally some more languages are added to that list that are used in the Fediverse.
$additional_langs = [
'cy' => 'Cymraeg',
'uk' => 'Українська',
'pt-PT' => 'Português',
'zh-hant' => '繁體',
'fa' => 'فارسی'
'af' => 'Afrikaans',
'cy' => 'Cymraeg',
'el-monoton' => 'Ελληνικά',
'eu' => 'euskara',
'fa' => 'فارسی',
'gl' => 'Galego',
'hi' => 'हिन्दी',
'hr' => 'Hrvatski',
'id' => 'bahasa Indonesia',
'ko' => '한국인',
'lt' => 'lietuvių',
'lv' => 'latviešu',
'sk' => 'slovenský',
'sl' => 'Slovenščina',
'sw' => 'Kiswahili',
'th' => 'แบบไทย',
'tl' => 'Wikang Tagalog',
'tr' => 'Türkçe',
'pt-PT' => 'Português',
'uk' => 'Українська',
'uz' => 'Ўзбек',
'vi' => 'Tiếng Việt',
'zh-hant' => '繁體',
];
$langs = array_merge($additional_langs, $langs);
ksort($langs);

View file

@ -2017,7 +2017,7 @@ class Item
$naked_body = BBCode::toPlaintext($naked_body);
// Remove possibly remaining links
$naked_body = preg_replace(Strings::autoLinkRegEx(), '', $naked_body);
$naked_body = trim(preg_replace(Strings::autoLinkRegEx(), '', $naked_body));
if (empty($naked_body)) {
return [];
@ -2029,7 +2029,17 @@ class Item
$availableLanguages = DI::l10n()->convertForLanguageDetection($availableLanguages);
$ld = new Language(array_keys($availableLanguages));
return $ld->detect($naked_body)->limit(0, $count)->close() ?: [];
$languages = $ld->detect($naked_body)->limit(0, $count)->close() ?: [];
$data = [
'text' => $naked_body,
'detected' => $languages,
];
Hook::callAll('get_language', $data);
$languages = $data['detected'];
return $languages;
}
/**