Revert "Update languagedetect library"
This commit is contained in:
parent
c22920edba
commit
071946fa78
101 changed files with 3632 additions and 311 deletions
18
library/langdet/docs/confidence.php
Normal file
18
library/langdet/docs/confidence.php
Normal file
|
@ -0,0 +1,18 @@
|
|||
<?php
|
||||
require_once 'Text/LanguageDetect.php';
|
||||
|
||||
$text = 'Was wäre, wenn ich Ihnen das jetzt sagen würde?';
|
||||
|
||||
$ld = new Text_LanguageDetect();
|
||||
//3 most probable languages
|
||||
$results = $ld->detect($text, 3);
|
||||
|
||||
foreach ($results as $language => $confidence) {
|
||||
echo $language . ': ' . number_format($confidence, 2) . "\n";
|
||||
}
|
||||
|
||||
//output:
|
||||
//german: 0.35
|
||||
//dutch: 0.25
|
||||
//swedish: 0.20
|
||||
?>
|
15
library/langdet/docs/errorhandling.php
Normal file
15
library/langdet/docs/errorhandling.php
Normal file
|
@ -0,0 +1,15 @@
|
|||
<?php
|
||||
/**
|
||||
* How to handle errors
|
||||
*/
|
||||
require_once 'Text/LanguageDetect.php';
|
||||
require_once 'Text/LanguageDetect/Exception.php';
|
||||
|
||||
try {
|
||||
$ld = new Text_LanguageDetect();
|
||||
$lang = $ld->detectSimple('Das ist ein kleiner Text');
|
||||
echo "Language is: $lang\n";
|
||||
} catch (Text_LanguageDetect_Exception $e) {
|
||||
echo 'An error occured! Message: ' . $e . "\n";
|
||||
}
|
||||
?>
|
35
library/langdet/docs/example_clui.php
Normal file
35
library/langdet/docs/example_clui.php
Normal file
|
@ -0,0 +1,35 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* example usage (CLI)
|
||||
*
|
||||
* @package Text_LanguageDetect
|
||||
* @version CVS: $Id$
|
||||
*/
|
||||
|
||||
require_once 'Text/LanguageDetect.php';
|
||||
|
||||
$l = new Text_LanguageDetect;
|
||||
|
||||
$stdin = fopen('php://stdin', 'r');
|
||||
|
||||
echo "Supported languages:\n";
|
||||
$langs = $l->getLanguages();
|
||||
sort($langs);
|
||||
echo join(', ', $langs);
|
||||
|
||||
echo "\ntotal ", count($langs), "\n\n";
|
||||
|
||||
while ($line = fgets($stdin)) {
|
||||
$result = $l->detect($line, 4);
|
||||
print_r($result);
|
||||
$blocks = $l->detectUnicodeBlocks($line, true);
|
||||
print_r($blocks);
|
||||
}
|
||||
|
||||
fclose($stdin);
|
||||
unset($l);
|
||||
|
||||
/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
|
||||
|
||||
?>
|
72
library/langdet/docs/example_web.php
Normal file
72
library/langdet/docs/example_web.php
Normal file
|
@ -0,0 +1,72 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* example usage (web)
|
||||
*
|
||||
* @package Text_LanguageDetect
|
||||
* @version CVS: $Id$
|
||||
*/
|
||||
|
||||
// browsers will encode multi-byte characters wrong unless they think the page is utf8-encoded
|
||||
header('Content-type: text/html; charset=utf-8', true);
|
||||
|
||||
require_once 'Text/LanguageDetect.php';
|
||||
|
||||
$l = new Text_LanguageDetect;
|
||||
if (isset($_REQUEST['q'])) {
|
||||
$q = stripslashes($_REQUEST['q']);
|
||||
}
|
||||
|
||||
?>
|
||||
<html>
|
||||
<head>
|
||||
<title>Text_LanguageDetect demonstration</title>
|
||||
</head>
|
||||
<body>
|
||||
<h2>Text_LanguageDetect</h2>
|
||||
<?
|
||||
echo "<small>Supported languages:\n";
|
||||
$langs = $l->getLanguages();
|
||||
sort($langs);
|
||||
foreach ($langs as $lang) {
|
||||
echo ucfirst($lang), ', ';
|
||||
$i++;
|
||||
}
|
||||
|
||||
echo "<br />total $i</small><br /><br />";
|
||||
|
||||
?>
|
||||
<form method="post">
|
||||
Enter text to identify language (at least a couple of sentences):<br />
|
||||
<textarea name="q" wrap="virtual" cols="80" rows="8"><?= $q ?></textarea>
|
||||
<br />
|
||||
<input type="submit" value="Submit" />
|
||||
</form>
|
||||
<?
|
||||
if (isset($q) && strlen($q)) {
|
||||
$len = $l->utf8strlen($q);
|
||||
if ($len < 20) { // this value picked somewhat arbitrarily
|
||||
echo "Warning: string not very long ($len chars)<br />\n";
|
||||
}
|
||||
|
||||
$result = $l->detectConfidence($q);
|
||||
|
||||
if ($result == null) {
|
||||
echo "Text_LanguageDetect cannot identify this piece of text. <br /><br />\n";
|
||||
} else {
|
||||
echo "Text_LanguageDetect thinks this text is written in <b>{$result['language']}</b> ({$result['similarity']}, {$result['confidence']})<br /><br />\n";
|
||||
}
|
||||
|
||||
$result = $l->detectUnicodeBlocks($q, false);
|
||||
if (!empty($result)) {
|
||||
arsort($result);
|
||||
echo "Unicode blocks present: ", join(', ', array_keys($result)), "\n<br /><br />";
|
||||
}
|
||||
}
|
||||
|
||||
unset($l);
|
||||
|
||||
/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
|
||||
|
||||
?>
|
||||
</body></html>
|
19
library/langdet/docs/iso.php
Normal file
19
library/langdet/docs/iso.php
Normal file
|
@ -0,0 +1,19 @@
|
|||
<?php
|
||||
/**
|
||||
* Demonstrates how to use ISO language codes.
|
||||
*
|
||||
* The "name mode" changes the way languages are accepted and returned.
|
||||
*/
|
||||
require_once 'Text/LanguageDetect.php';
|
||||
$ld = new Text_LanguageDetect();
|
||||
|
||||
//will output the ISO 639-1 two-letter language code
|
||||
// "de"
|
||||
$ld->setNameMode(2);
|
||||
echo $ld->detectSimple('Das ist ein kleiner Text') . "\n";
|
||||
|
||||
//will output the ISO 639-2 three-letter language code
|
||||
// "deu"
|
||||
$ld->setNameMode(3);
|
||||
echo $ld->detectSimple('Das ist ein kleiner Text') . "\n";
|
||||
?>
|
11
library/langdet/docs/languages.php
Normal file
11
library/langdet/docs/languages.php
Normal file
|
@ -0,0 +1,11 @@
|
|||
<?php
|
||||
/**
|
||||
* List all supported languages
|
||||
*/
|
||||
require_once 'Text/LanguageDetect.php';
|
||||
$ld = new Text_LanguageDetect();
|
||||
|
||||
foreach ($ld->getLanguages() as $lang) {
|
||||
echo $lang . "\n";
|
||||
}
|
||||
?>
|
10
library/langdet/docs/simple.php
Normal file
10
library/langdet/docs/simple.php
Normal file
|
@ -0,0 +1,10 @@
|
|||
<?php
|
||||
require_once 'Text/LanguageDetect.php';
|
||||
|
||||
$text = 'Was wäre, wenn ich Ihnen das jetzt sagen würde?';
|
||||
|
||||
$ld = new Text_LanguageDetect();
|
||||
$result = $ld->detectSimple($text);
|
||||
var_dump($result);
|
||||
//output: german
|
||||
?>
|
Loading…
Add table
Add a link
Reference in a new issue