2023-09-30 17:56:50 +02:00
< ? php
/**
* Name : Compact Language Detector
* Description : Improved language detection
* Version : 0.1
* Author : Michael Vogel < heluecht @ pirati . ca >
*/
use Friendica\Core\Hook ;
use Friendica\Core\Logger ;
use Friendica\DI ;
function cld_install ()
{
2023-10-01 06:14:10 +02:00
Hook :: register ( 'detect_languages' , 'addon/cld/cld.php' , 'cld_detect_languages' );
2023-09-30 17:56:50 +02:00
}
2023-10-01 06:14:10 +02:00
function cld_detect_languages ( array & $data )
2023-09-30 17:56:50 +02:00
{
if ( ! in_array ( 'cld2' , get_loaded_extensions ())) {
Logger :: warning ( 'CLD2 is not installed.' );
return ;
}
$cld2 = new \CLD2Detector ();
$cld2 -> setEncodingHint ( CLD2Encoding :: UTF8 ); // optional, hints about text encoding
$result = $cld2 -> detect ( $data [ 'text' ]);
if ( $data [ 'detected' ]) {
$original = array_key_first ( $data [ 'detected' ]);
} else {
$original = '' ;
}
$detected = $result [ 'language_code' ];
if ( $detected == 'pt' ) {
$detected = 'pt-PT' ;
} elseif ( $detected == 'el' ) {
$detected = 'el-monoton' ;
} elseif ( $detected == 'no' ) {
$detected = 'nb' ;
} elseif ( $detected == 'zh' ) {
$detected = 'zh-Hans' ;
} elseif ( $detected == 'zh-Hant' ) {
$detected = 'zh-hant' ;
}
if ( ! $result [ 'is_reliable' ]) {
Logger :: debug ( 'Unreliable detection' , [ 'original' => $original , 'detected' => $detected , 'name' => $result [ 'language_name' ], 'probability' => $result [ 'language_probability' ], 'text' => $data [ 'text' ]]);
return ;
}
if ( $original == $detected ) {
return ;
}
$available = array_keys ( DI :: l10n () -> convertForLanguageDetection ( DI :: l10n () -> getAvailableLanguages ( true )));
if ( ! in_array ( $detected , $available )) {
Logger :: debug ( 'Unsupported language' , [ 'original' => $original , 'detected' => $detected , 'name' => $result [ 'language_name' ], 'probability' => $result [ 'language_probability' ], 'text' => $data [ 'text' ]]);
return ;
}
2023-09-30 18:00:26 +02:00
Logger :: debug ( 'Detected different language' , [ 'original' => $original , 'detected' => $detected , 'name' => $result [ 'language_name' ], 'probability' => $result [ 'language_probability' ], 'text' => $data [ 'text' ]]);
2023-09-30 17:56:50 +02:00
$data [ 'detected' ] = [ $detected => $result [ 'language_probability' ] / 100 ];
}