From e84a7e8c2a9a26040d77f5e33382cfa470cbaf0a Mon Sep 17 00:00:00 2001 From: Michael Date: Sat, 30 Sep 2023 15:51:53 +0000 Subject: [PATCH 1/2] Additional languages / new hook for additional language detection --- doc/Addons.md | 8 ++++++++ src/Core/L10n.php | 30 ++++++++++++++++++++++++------ src/Model/Item.php | 14 ++++++++++++-- 3 files changed, 44 insertions(+), 8 deletions(-) diff --git a/doc/Addons.md b/doc/Addons.md index 567e5b2756..497717e6f7 100644 --- a/doc/Addons.md +++ b/doc/Addons.md @@ -221,6 +221,13 @@ Please note: body contents are bbcode - not HTML Called when receiving a post from another source. This may also be used to post local activity or system generated messages. `$b` is the item array of information to be stored in the database and the item body is bbcode. +### get_language +Called after the language detection. This can be used for alternative language detection methods. +`$data` is an array: + +- **text**: The text that is analyzed. +- **detected**: The array with the original language detection. Can be overwritten via an addon. + ### addon_settings Called when generating the HTML for the addon settings page. `$data` is an array containing: @@ -800,6 +807,7 @@ Here is a complete list of all hook callbacks with file locations (as of 24-Sep- ### src/Model/Item.php + Hook::callAll('get_language', $item); Hook::callAll('post_local', $item); Hook::callAll('post_remote', $item); Hook::callAll('post_local_end', $posted_item); diff --git a/src/Core/L10n.php b/src/Core/L10n.php index 636cb11908..7fd7fc4e87 100644 --- a/src/Core/L10n.php +++ b/src/Core/L10n.php @@ -397,13 +397,31 @@ class L10n // See https://github.com/friendica/friendica/issues/10511 // Persian is manually added to language detection until a persian translation is provided for the interface, at // which point it will be automatically available through `getAvailableLanguages()` and this should be removed. - // Additionally Portuguese, Ukrainian, traditional Chinese and Welsh are added to that list. + // Additionally some more languages are added to that list that are used in the Fediverse. $additional_langs = [ - 'cy' => 'Cymraeg', - 'uk' => 'Українська', - 'pt-PT' => 'Português', - 'zh-hant' => '繁體', - 'fa' => 'فارسی' + 'af' => 'Afrikaans', + 'cy' => 'Cymraeg', + 'el-monoton' => 'Ελληνικά', + 'eu' => 'euskara', + 'fa' => 'فارسی', + 'gl' => 'Galego', + 'hi' => 'हिन्दी', + 'hr' => 'Hrvatski', + 'id' => 'bahasa Indonesia', + 'ko' => '한국인', + 'lt' => 'lietuvių', + 'lv' => 'latviešu', + 'sk' => 'slovenský', + 'sl' => 'Slovenščina', + 'sw' => 'Kiswahili', + 'th' => 'แบบไทย', + 'tl' => 'Wikang Tagalog', + 'tr' => 'Türkçe', + 'pt-PT' => 'Português', + 'uk' => 'Українська', + 'uz' => 'Ўзбек', + 'vi' => 'Tiếng Việt', + 'zh-hant' => '繁體', ]; $langs = array_merge($additional_langs, $langs); ksort($langs); diff --git a/src/Model/Item.php b/src/Model/Item.php index 41aae04134..c529c2c166 100644 --- a/src/Model/Item.php +++ b/src/Model/Item.php @@ -2017,7 +2017,7 @@ class Item $naked_body = BBCode::toPlaintext($naked_body); // Remove possibly remaining links - $naked_body = preg_replace(Strings::autoLinkRegEx(), '', $naked_body); + $naked_body = trim(preg_replace(Strings::autoLinkRegEx(), '', $naked_body)); if (empty($naked_body)) { return []; @@ -2029,7 +2029,17 @@ class Item $availableLanguages = DI::l10n()->convertForLanguageDetection($availableLanguages); $ld = new Language(array_keys($availableLanguages)); - return $ld->detect($naked_body)->limit(0, $count)->close() ?: []; + $languages = $ld->detect($naked_body)->limit(0, $count)->close() ?: []; + + $data = [ + 'text' => $naked_body, + 'detected' => $languages, + ]; + + Hook::callAll('get_language', $data); + $languages = $data['detected']; + + return $languages; } /** From 0840086a0a686be3d6de667669a43999b9b45068 Mon Sep 17 00:00:00 2001 From: Michael Date: Sun, 1 Oct 2023 04:10:24 +0000 Subject: [PATCH 2/2] Renamed hook --- doc/Addons.md | 6 +++--- doc/de/Addons.md | 8 ++++++++ src/Model/Item.php | 2 +- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/doc/Addons.md b/doc/Addons.md index 497717e6f7..a0b1226796 100644 --- a/doc/Addons.md +++ b/doc/Addons.md @@ -221,12 +221,12 @@ Please note: body contents are bbcode - not HTML Called when receiving a post from another source. This may also be used to post local activity or system generated messages. `$b` is the item array of information to be stored in the database and the item body is bbcode. -### get_language +### detect_languages Called after the language detection. This can be used for alternative language detection methods. `$data` is an array: - **text**: The text that is analyzed. -- **detected**: The array with the original language detection. Can be overwritten via an addon. +- **detected**: (input/output) Array of language codes detected in the related text. ### addon_settings Called when generating the HTML for the addon settings page. @@ -807,7 +807,7 @@ Here is a complete list of all hook callbacks with file locations (as of 24-Sep- ### src/Model/Item.php - Hook::callAll('get_language', $item); + Hook::callAll('detect_languages', $item); Hook::callAll('post_local', $item); Hook::callAll('post_remote', $item); Hook::callAll('post_local_end', $posted_item); diff --git a/doc/de/Addons.md b/doc/de/Addons.md index 0caf3f2545..bd13f6334c 100644 --- a/doc/de/Addons.md +++ b/doc/de/Addons.md @@ -103,6 +103,13 @@ Derzeitige Hooks $b ist das Item-Array einer Information, die in der Datenbank und im Item gespeichert ist. {Bitte beachte: der Seiteninhalt ist bbcode - nicht HTML) +**'detect_languages'** +Wird nach der Sprachenerkennung aufgerufen. +Dieser Hook kann dafür verwendet werden, alternative Erkennungsfunktionen einzubinden. +`$data` ist ein Array: + 'text' => Der analysierte Text. + 'detected' => (Eingabe/Ausgabe) Das Array mit den erkannten Sprachen. + **'addon_settings'** - wird aufgerufen, wenn die HTML-Ausgabe der Addon-Einstellungsseite generiert wird. $b ist die HTML-Ausgabe (String) der Addon-Einstellungsseite vor dem finalen ""-Tag. @@ -316,6 +323,7 @@ Eine komplette Liste aller Hook-Callbacks mit den zugehörigen Dateien (am 01-Ap ### src/Model/Item.php + Hook::callAll('detect_languages', $item); Hook::callAll('post_local', $item); Hook::callAll('post_remote', $item); Hook::callAll('post_local_end', $posted_item); diff --git a/src/Model/Item.php b/src/Model/Item.php index c529c2c166..fbb608e5c8 100644 --- a/src/Model/Item.php +++ b/src/Model/Item.php @@ -2036,7 +2036,7 @@ class Item 'detected' => $languages, ]; - Hook::callAll('get_language', $data); + Hook::callAll('detect_languages', $data); $languages = $data['detected']; return $languages;