Use ISO-639-1 for the language detection
This commit is contained in:
parent
74b4eddcf7
commit
3f2b0b9422
7 changed files with 113 additions and 114 deletions
|
@ -35,13 +35,13 @@ final class Channel extends Timeline
|
||||||
*/
|
*/
|
||||||
public function getTimelines(int $uid): Timelines
|
public function getTimelines(int $uid): Timelines
|
||||||
{
|
{
|
||||||
$language = User::getLanguageCode($uid);
|
$iso639 = new \Matriphe\ISO639\ISO639;
|
||||||
$languages = $this->l10n->getAvailableLanguages(true);
|
$native = $iso639->nativeByCode1(User::getLanguageCode($uid));
|
||||||
|
|
||||||
$tabs = [
|
$tabs = [
|
||||||
new ChannelEntity(ChannelEntity::FORYOU, $this->l10n->t('For you'), $this->l10n->t('Posts from contacts you interact with and who interact with you'), 'y'),
|
new ChannelEntity(ChannelEntity::FORYOU, $this->l10n->t('For you'), $this->l10n->t('Posts from contacts you interact with and who interact with you'), 'y'),
|
||||||
new ChannelEntity(ChannelEntity::WHATSHOT, $this->l10n->t('What\'s Hot'), $this->l10n->t('Posts with a lot of interactions'), 'h'),
|
new ChannelEntity(ChannelEntity::WHATSHOT, $this->l10n->t('What\'s Hot'), $this->l10n->t('Posts with a lot of interactions'), 'h'),
|
||||||
new ChannelEntity(ChannelEntity::LANGUAGE, $languages[$language], $this->l10n->t('Posts in %s', $languages[$language]), 'g'),
|
new ChannelEntity(ChannelEntity::LANGUAGE, $native, $this->l10n->t('Posts in %s', $native), 'g'),
|
||||||
new ChannelEntity(ChannelEntity::FOLLOWERS, $this->l10n->t('Followers'), $this->l10n->t('Posts from your followers that you don\'t follow'), 'f'),
|
new ChannelEntity(ChannelEntity::FOLLOWERS, $this->l10n->t('Followers'), $this->l10n->t('Posts from your followers that you don\'t follow'), 'f'),
|
||||||
new ChannelEntity(ChannelEntity::SHARERSOFSHARERS, $this->l10n->t('Sharers of sharers'), $this->l10n->t('Posts from accounts that are followed by accounts that you follow'), 'r'),
|
new ChannelEntity(ChannelEntity::SHARERSOFSHARERS, $this->l10n->t('Sharers of sharers'), $this->l10n->t('Posts from accounts that are followed by accounts that you follow'), 'r'),
|
||||||
new ChannelEntity(ChannelEntity::IMAGE, $this->l10n->t('Images'), $this->l10n->t('Posts with images'), 'i'),
|
new ChannelEntity(ChannelEntity::IMAGE, $this->l10n->t('Images'), $this->l10n->t('Posts with images'), 'i'),
|
||||||
|
|
|
@ -378,7 +378,7 @@ class L10n
|
||||||
*
|
*
|
||||||
* @return array
|
* @return array
|
||||||
*/
|
*/
|
||||||
public function getAvailableLanguages(bool $additional = false): array
|
public function getAvailableLanguages(): array
|
||||||
{
|
{
|
||||||
$langs = [];
|
$langs = [];
|
||||||
$strings_file_paths = glob('view/lang/*/strings.php');
|
$strings_file_paths = glob('view/lang/*/strings.php');
|
||||||
|
@ -392,107 +392,94 @@ class L10n
|
||||||
$path_array = explode('/', $strings_file_path);
|
$path_array = explode('/', $strings_file_path);
|
||||||
$langs[$path_array[2]] = self::LANG_NAMES[$path_array[2]] ?? $path_array[2];
|
$langs[$path_array[2]] = self::LANG_NAMES[$path_array[2]] ?? $path_array[2];
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($additional) {
|
|
||||||
// See https://github.com/friendica/friendica/issues/10511
|
|
||||||
// Persian is manually added to language detection until a persian translation is provided for the interface, at
|
|
||||||
// which point it will be automatically available through `getAvailableLanguages()` and this should be removed.
|
|
||||||
// Additionally some more languages are added to that list that are used in the Fediverse.
|
|
||||||
$additional_langs = [
|
|
||||||
'af' => 'Afrikaans',
|
|
||||||
'az-Latn' => 'azərbaycan dili',
|
|
||||||
'bs-Latn' => 'bosanski jezik',
|
|
||||||
'be' => 'беларуская мова',
|
|
||||||
'bn' => 'বাংলা',
|
|
||||||
'cy' => 'Cymraeg',
|
|
||||||
'el-monoton' => 'ελληνικά',
|
|
||||||
'eu' => 'euskara, euskera',
|
|
||||||
'fa' => 'فارسی',
|
|
||||||
'ga' => 'Gaeilge',
|
|
||||||
'gl' => 'galego',
|
|
||||||
'he' => 'עברית',
|
|
||||||
'hi' => 'हिन्दी, हिंदी',
|
|
||||||
'hr' => 'hrvatski jezik',
|
|
||||||
'hy' => 'Հայերեն',
|
|
||||||
'id' => 'Bahasa Indonesia',
|
|
||||||
'jv' => 'basa Jawa',
|
|
||||||
'ka' => 'ქართული',
|
|
||||||
'ko' => '한국어, 조선어',
|
|
||||||
'lt' => 'lietuvių kalba',
|
|
||||||
'lv' => 'latviešu valoda',
|
|
||||||
'ms-Latn' => 'bahasa Melayu, بهاس ملايو',
|
|
||||||
'sr-Cyrl' => 'српски језик',
|
|
||||||
'sk' => 'slovenčina, slovenský jazyk',
|
|
||||||
'sl' => 'slovenski jezik, slovenščina',
|
|
||||||
'sq' => 'Shqip',
|
|
||||||
'sw' => 'Kiswahili',
|
|
||||||
'ta' => 'தமிழ்',
|
|
||||||
'th' => 'ไทย',
|
|
||||||
'tl' => 'Wikang Tagalog, ᜏᜒᜃᜅ᜔ ᜆᜄᜎᜓᜄ᜔',
|
|
||||||
'tr' => 'Türkçe',
|
|
||||||
'pt-PT' => 'português',
|
|
||||||
'uk' => 'українська мова',
|
|
||||||
'uz' => 'Oʻzbek, Ўзбек, أۇزبېك',
|
|
||||||
'vi' => 'Việt Nam',
|
|
||||||
'zh-hant' => '繁體',
|
|
||||||
];
|
|
||||||
$langs = array_merge($additional_langs, $langs);
|
|
||||||
ksort($langs);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return $langs;
|
return $langs;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The language detection routine uses some slightly different language codes.
|
* Get language codes that are detectable by our language detection routines.
|
||||||
* This function changes the language array accordingly.
|
* Öanguages are excluded that aren't used often and that tend to false detections.
|
||||||
|
* The listed codes are a collection of both the official ISO 639-1 codes and
|
||||||
|
* the codes that are used by our built-in language detection routine.
|
||||||
|
* When the detection is done, the result only consists of the official ISO 639-1 codes.
|
||||||
*
|
*
|
||||||
* @param array $languages
|
|
||||||
* @return array
|
* @return array
|
||||||
*/
|
*/
|
||||||
public function convertForLanguageDetection(array $languages): array
|
public function getDetectableLanguages(): array
|
||||||
{
|
{
|
||||||
foreach ($languages as $key => $language) {
|
$additional_langs = [
|
||||||
$newkey = $this->convertCodeForLanguageDetection($key);
|
'af', 'az', 'az-Cyrl', 'az-Latn', 'be', 'bn', 'bs', 'bs-Cyrl', 'bs-Latn',
|
||||||
if ($newkey != $key) {
|
'cy', 'da', 'el', 'el-monoton', 'el-polyton', 'en', 'eu',
|
||||||
if (!isset($languages[$newkey])) {
|
'fa', 'fi', 'ga', 'gl', 'he', 'hi', 'hr', 'hy', 'id', 'in', 'iw', 'jv', 'jw',
|
||||||
$languages[$newkey] = $language;
|
'ka', 'ko', 'lt', 'lv', 'mo', 'ms', 'ms-Arab', 'ms-Latn', 'nb', 'nn', 'no',
|
||||||
}
|
'pt', 'pt-PT', 'pt-BR', 'ro', 'sa', 'sk', 'sl', 'sq', 'sr', 'sr-Cyrl', 'sr-Latn', 'sw',
|
||||||
unset($languages[$key]);
|
'ta', 'th', 'tl', 'tr', 'ug', 'uk', 'uz', 'vi', 'zh', 'zh-Hant', 'zh-Hans',
|
||||||
}
|
];
|
||||||
|
|
||||||
|
if (in_array('cld2', get_loaded_extensions())) {
|
||||||
|
$additional_langs = array_merge($additional_langs,
|
||||||
|
['sd', 'si', 'yi', 'km', 'iu', 'lo', 'dv', 'gu', 'kn', 'te', 'ml', 'or', 'pa', 'iu']);
|
||||||
}
|
}
|
||||||
|
|
||||||
ksort($languages);
|
$langs = array_merge($additional_langs, array_keys($this->getAvailableLanguages()));
|
||||||
|
sort($langs);
|
||||||
|
return $langs;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return a list of supported languages witzh their two byte language codes.
|
||||||
|
*
|
||||||
|
* @param bool $international If set to true, additionally the international language name is returned as well.
|
||||||
|
* @return array
|
||||||
|
*/
|
||||||
|
public function getLanguageCodes(bool $international = false): array
|
||||||
|
{
|
||||||
|
$iso639 = new \Matriphe\ISO639\ISO639;
|
||||||
|
|
||||||
|
$languages = [];
|
||||||
|
|
||||||
|
foreach ($this->getDetectableLanguages() as $code) {
|
||||||
|
$code = $this->toISO6391($code);
|
||||||
|
$native = $iso639->nativeByCode1($code);
|
||||||
|
$language = $iso639->languageByCode1($code);
|
||||||
|
if ($native != $iso639->languageByCode1($code) && $international) {
|
||||||
|
$languages[$code] = $this->t('%s (%s)', $native, $language);
|
||||||
|
} else {
|
||||||
|
$languages[$code] = $native;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return $languages;
|
return $languages;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The language detection routine uses some slightly different language codes.
|
* Convert the language code to ISO639-1
|
||||||
* This function changes the language codes accordingly.
|
* It also converts old codes to their new counterparts.
|
||||||
*
|
*
|
||||||
* @param string $language
|
* @param string $code
|
||||||
* @return string
|
* @return string
|
||||||
*/
|
*/
|
||||||
public function convertCodeForLanguageDetection(string $language): string
|
public function toISO6391(string $code): string
|
||||||
{
|
{
|
||||||
switch ($language) {
|
if ((strlen($code) > 2) && (substr($code, 2, 1) == '-')) {
|
||||||
case 'da-dk':
|
$code = substr($code, 0, 2);
|
||||||
return 'da';
|
|
||||||
case 'en-us':
|
|
||||||
case 'en-gb':
|
|
||||||
return 'en';
|
|
||||||
case 'fi-fi':
|
|
||||||
return 'fi';
|
|
||||||
case 'nb-no':
|
|
||||||
return 'nb';
|
|
||||||
case 'pt-br':
|
|
||||||
return 'pt-BR';
|
|
||||||
case 'zh-cn':
|
|
||||||
return 'zh-Hans';
|
|
||||||
default:
|
|
||||||
return $language;
|
|
||||||
}
|
}
|
||||||
|
if (in_array($code, ['nb', 'nn'])) {
|
||||||
|
$code = 'no';
|
||||||
|
}
|
||||||
|
if ($code == 'in') {
|
||||||
|
$code = 'id';
|
||||||
|
}
|
||||||
|
if ($code == 'iw') {
|
||||||
|
$code = 'he';
|
||||||
|
}
|
||||||
|
if ($code == 'jw') {
|
||||||
|
$code = 'jv';
|
||||||
|
}
|
||||||
|
if ($code == 'mo') {
|
||||||
|
$code = 'ro';
|
||||||
|
}
|
||||||
|
return $code;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -2034,15 +2034,12 @@ class Item
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
$availableLanguages = DI::l10n()->getAvailableLanguages(true);
|
$ld = new Language(DI::l10n()->getDetectableLanguages());
|
||||||
$availableLanguages = DI::l10n()->convertForLanguageDetection($availableLanguages);
|
|
||||||
|
|
||||||
$ld = new Language(array_keys($availableLanguages));
|
|
||||||
|
|
||||||
$result = [];
|
$result = [];
|
||||||
|
|
||||||
foreach (self::splitByBlocks($searchtext) as $block) {
|
foreach (self::splitByBlocks($searchtext) as $block) {
|
||||||
$languages = $ld->detect($block)->limit(0, $count)->close() ?: [];
|
$languages = $ld->detect($block)->close() ?: [];
|
||||||
|
|
||||||
$data = [
|
$data = [
|
||||||
'text' => $block,
|
'text' => $block,
|
||||||
|
@ -2057,10 +2054,32 @@ class Item
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
arsort($result);
|
$result = self::compactLanguages($result);
|
||||||
$result = array_slice($result, 0, $count);
|
|
||||||
|
|
||||||
return $result;
|
arsort($result);
|
||||||
|
return array_slice($result, 0, $count);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Concert the language code in the detection result to ISO 639-1.
|
||||||
|
* On duplicates the system uses the higher quality value.
|
||||||
|
*
|
||||||
|
* @param array $result
|
||||||
|
* @return array
|
||||||
|
*/
|
||||||
|
private static function compactLanguages(array $result): array
|
||||||
|
{
|
||||||
|
$languages = [];
|
||||||
|
foreach ($result as $language => $quality) {
|
||||||
|
if ($quality == 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
$code = DI::l10n()->toISO6391($language);
|
||||||
|
if (empty($languages[$code]) || ($languages[$code] < $quality)) {
|
||||||
|
$languages[$code] = $quality;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return $languages;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -35,9 +35,6 @@ use Friendica\Model\Verb;
|
||||||
use Friendica\Protocol\Activity;
|
use Friendica\Protocol\Activity;
|
||||||
use Friendica\Protocol\Relay;
|
use Friendica\Protocol\Relay;
|
||||||
use Friendica\Util\DateTimeFormat;
|
use Friendica\Util\DateTimeFormat;
|
||||||
use Friendica\Util\Strings;
|
|
||||||
|
|
||||||
// Channel
|
|
||||||
|
|
||||||
class Engagement
|
class Engagement
|
||||||
{
|
{
|
||||||
|
|
|
@ -127,7 +127,6 @@ class User
|
||||||
|
|
||||||
case 'community':
|
case 'community':
|
||||||
return User::ACCOUNT_TYPE_COMMUNITY;
|
return User::ACCOUNT_TYPE_COMMUNITY;
|
||||||
|
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
@ -425,7 +424,7 @@ class User
|
||||||
* @return array user
|
* @return array user
|
||||||
* @throws Exception
|
* @throws Exception
|
||||||
*/
|
*/
|
||||||
public static function getFirstAdmin(array $fields = []) : array
|
public static function getFirstAdmin(array $fields = []): array
|
||||||
{
|
{
|
||||||
if (!empty(DI::config()->get('config', 'admin_nickname'))) {
|
if (!empty(DI::config()->get('config', 'admin_nickname'))) {
|
||||||
return self::getByNickname(DI::config()->get('config', 'admin_nickname'), $fields);
|
return self::getByNickname(DI::config()->get('config', 'admin_nickname'), $fields);
|
||||||
|
@ -560,22 +559,20 @@ class User
|
||||||
return $default_circle;
|
return $default_circle;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Fetch the language code from the given user. If the code is invalid, return the system language
|
* Fetch the language code from the given user. If the code is invalid, return the system language
|
||||||
*
|
*
|
||||||
* @param integer $uid User-Id
|
* @param integer $uid User-Id
|
||||||
* @return string
|
* @return string
|
||||||
*/
|
*/
|
||||||
public static function getLanguageCode(int $uid): string
|
public static function getLanguageCode(int $uid): string
|
||||||
{
|
{
|
||||||
$owner = self::getOwnerDataById($uid);
|
$owner = self::getOwnerDataById($uid);
|
||||||
$languages = DI::l10n()->getAvailableLanguages(true);
|
$language = DI::l10n()->toISO6391($owner['language']);
|
||||||
if (in_array($owner['language'], array_keys($languages))) {
|
if (in_array($language, array_keys(DI::l10n()->getLanguageCodes()))) {
|
||||||
$language = $owner['language'];
|
return $language;
|
||||||
} else {
|
|
||||||
$language = DI::config()->get('system', 'language');
|
|
||||||
}
|
}
|
||||||
return $language;
|
return DI::l10n()->toISO6391(DI::config()->get('system', 'language'));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1480,7 +1477,7 @@ class User
|
||||||
Photo::delete(['uid' => $register['uid']]);
|
Photo::delete(['uid' => $register['uid']]);
|
||||||
|
|
||||||
return DBA::delete('user', ['uid' => $register['uid']]) &&
|
return DBA::delete('user', ['uid' => $register['uid']]) &&
|
||||||
Register::deleteByHash($register['hash']);
|
Register::deleteByHash($register['hash']);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -304,7 +304,7 @@ class Timeline extends BaseModule
|
||||||
} elseif ($this->selectedTab == ChannelEntity::AUDIO) {
|
} elseif ($this->selectedTab == ChannelEntity::AUDIO) {
|
||||||
$condition = ["`media-type` & ?", 4];
|
$condition = ["`media-type` & ?", 4];
|
||||||
} elseif ($this->selectedTab == ChannelEntity::LANGUAGE) {
|
} elseif ($this->selectedTab == ChannelEntity::LANGUAGE) {
|
||||||
$condition = ["JSON_EXTRACT(JSON_KEYS(language), '$[0]') = ?", $this->l10n->convertCodeForLanguageDetection(User::getLanguageCode($uid))];
|
$condition = ["JSON_EXTRACT(JSON_KEYS(language), '$[0]') = ?", User::getLanguageCode($uid)];
|
||||||
} elseif (is_numeric($this->selectedTab)) {
|
} elseif (is_numeric($this->selectedTab)) {
|
||||||
$condition = $this->getUserChannelConditions($this->selectedTab, $this->session->getLocalUserId());
|
$condition = $this->getUserChannelConditions($this->selectedTab, $this->session->getLocalUserId());
|
||||||
}
|
}
|
||||||
|
@ -421,7 +421,6 @@ class Timeline extends BaseModule
|
||||||
{
|
{
|
||||||
$conditions = [];
|
$conditions = [];
|
||||||
$languages = $this->pConfig->get($uid, 'channel', 'languages', [User::getLanguageCode($uid)]);
|
$languages = $this->pConfig->get($uid, 'channel', 'languages', [User::getLanguageCode($uid)]);
|
||||||
$languages = $this->l10n->convertForLanguageDetection($languages);
|
|
||||||
foreach ($languages as $language) {
|
foreach ($languages as $language) {
|
||||||
$conditions[] = "JSON_EXTRACT(JSON_KEYS(language), '$[0]') = ?";
|
$conditions[] = "JSON_EXTRACT(JSON_KEYS(language), '$[0]') = ?";
|
||||||
$condition[] = $language;
|
$condition[] = $language;
|
||||||
|
|
|
@ -260,7 +260,7 @@ class Display extends BaseSettings
|
||||||
$bookmarked_timelines = $this->pConfig->get($uid, 'system', 'network_timelines', $this->getAvailableTimelines($uid, true)->column('code'));
|
$bookmarked_timelines = $this->pConfig->get($uid, 'system', 'network_timelines', $this->getAvailableTimelines($uid, true)->column('code'));
|
||||||
$enabled_timelines = $this->pConfig->get($uid, 'system', 'enabled_timelines', $this->getAvailableTimelines($uid, false)->column('code'));
|
$enabled_timelines = $this->pConfig->get($uid, 'system', 'enabled_timelines', $this->getAvailableTimelines($uid, false)->column('code'));
|
||||||
$channel_languages = $this->pConfig->get($uid, 'channel', 'languages', [User::getLanguageCode($uid)]);
|
$channel_languages = $this->pConfig->get($uid, 'channel', 'languages', [User::getLanguageCode($uid)]);
|
||||||
$languages = $this->l10n->getAvailableLanguages(true);
|
$languages = $this->l10n->getLanguageCodes(true);
|
||||||
|
|
||||||
$timelines = [];
|
$timelines = [];
|
||||||
foreach ($this->getAvailableTimelines($uid) as $timeline) {
|
foreach ($this->getAvailableTimelines($uid) as $timeline) {
|
||||||
|
|
Loading…
Reference in a new issue