158 lines
2.8 KiB
ReStructuredText
158 lines
2.8 KiB
ReStructuredText
*******************
|
|
Text_LanguageDetect
|
|
*******************
|
|
PHP library to identify human languages from text samples.
|
|
Returns confidence scores for each.
|
|
|
|
|
|
Installation
|
|
============
|
|
|
|
PEAR
|
|
----
|
|
::
|
|
|
|
$ pear install Text_LanguageDetect
|
|
|
|
Composer
|
|
--------
|
|
::
|
|
|
|
$ composer require pear/text_languagedetect
|
|
|
|
|
|
Usage
|
|
=====
|
|
Also see the examples in the ``docs/`` directory and
|
|
the `official documentation`__.
|
|
|
|
__ http://pear.php.net/package/Text_LanguageDetect/docs
|
|
|
|
Language detection
|
|
------------------
|
|
Simple language detection::
|
|
|
|
<?php
|
|
require_once 'Text/LanguageDetect.php';
|
|
|
|
$text = 'Was wäre, wenn ich Ihnen das jetzt sagen würde?';
|
|
|
|
$ld = new Text_LanguageDetect();
|
|
$language = $ld->detectSimple($text);
|
|
|
|
echo $language;
|
|
//output: german
|
|
|
|
Show the three most probable languages with their confidence score::
|
|
|
|
<?php
|
|
require_once 'Text/LanguageDetect.php';
|
|
|
|
$text = 'Was wäre, wenn ich Ihnen das jetzt sagen würde?';
|
|
|
|
$ld = new Text_LanguageDetect();
|
|
//3 most probable languages
|
|
$results = $ld->detect($text, 3);
|
|
|
|
foreach ($results as $language => $confidence) {
|
|
echo $language . ': ' . number_format($confidence, 2) . "\n";
|
|
}
|
|
|
|
//output:
|
|
//german: 0.35
|
|
//dutch: 0.25
|
|
//swedish: 0.20
|
|
?>
|
|
|
|
|
|
Language code
|
|
-------------
|
|
Instead of returning the full language name, ISO 639-2 two and three
|
|
letter codes can be returned::
|
|
|
|
<?php
|
|
require_once 'Text/LanguageDetect.php';
|
|
$ld = new Text_LanguageDetect();
|
|
|
|
//will output the ISO 639-1 two-letter language code
|
|
// "de"
|
|
$ld->setNameMode(2);
|
|
echo $ld->detectSimple('Das ist ein kleiner Text') . "\n";
|
|
|
|
//will output the ISO 639-2 three-letter language code
|
|
// "deu"
|
|
$ld->setNameMode(3);
|
|
echo $ld->detectSimple('Das ist ein kleiner Text') . "\n";
|
|
?>
|
|
|
|
|
|
Supported languages
|
|
===================
|
|
- albanian
|
|
- arabic
|
|
- azeri
|
|
- bengali
|
|
- bulgarian
|
|
- cebuano
|
|
- croatian
|
|
- czech
|
|
- danish
|
|
- dutch
|
|
- english
|
|
- estonian
|
|
- farsi
|
|
- finnish
|
|
- french
|
|
- german
|
|
- hausa
|
|
- hawaiian
|
|
- hindi
|
|
- hungarian
|
|
- icelandic
|
|
- indonesian
|
|
- italian
|
|
- kazakh
|
|
- kyrgyz
|
|
- latin
|
|
- latvian
|
|
- lithuanian
|
|
- macedonian
|
|
- mongolian
|
|
- nepali
|
|
- norwegian
|
|
- pashto
|
|
- pidgin
|
|
- polish
|
|
- portuguese
|
|
- romanian
|
|
- russian
|
|
- serbian
|
|
- slovak
|
|
- slovene
|
|
- somali
|
|
- spanish
|
|
- swahili
|
|
- swedish
|
|
- tagalog
|
|
- turkish
|
|
- ukrainian
|
|
- urdu
|
|
- uzbek
|
|
- vietnamese
|
|
- welsh
|
|
|
|
|
|
Links
|
|
=====
|
|
Homepage
|
|
http://pear.php.net/package/Text_LanguageDetect
|
|
Bug tracker
|
|
http://pear.php.net/bugs/search.php?cmd=display&package_name[]=Text_LanguageDetect
|
|
Documentation
|
|
http://pear.php.net/package/Text_LanguageDetect/docs
|
|
Unit test status
|
|
https://travis-ci.org/pear/Text_LanguageDetect
|
|
|
|
.. image:: https://travis-ci.org/pear/Text_LanguageDetect.svg?branch=master
|
|
:target: https://travis-ci.org/pear/Text_LanguageDetect
|