Friendica Communications Platform (please note that this is a clone of the repository at github, issues are handled there) https://friendi.ca
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2057 lines
90 KiB

<?php
/**
* @package Text_LanguageDetect
* @version CVS: $Id: Text_LanguageDetectTest.php 322353 2012-01-16 08:41:43Z cweiske $
*/
set_include_path(
__DIR__ . '/../' . PATH_SEPARATOR . get_include_path()
);
error_reporting(E_ALL|E_STRICT);
require_once 'Text/LanguageDetect.php';
require_once 'PHPUnit/Framework/TestCase.php';
class Text_LanguageDetectTest extends PHPUnit_Framework_TestCase {
function setup ()
{
ini_set('magic_quotes_runtime', 0);
$this->x = new Text_LanguageDetect();
}
function tearDown ()
{
unset($this->x);
}
function test_get_data_locAbsolute()
{
$this->assertEquals(
'/path/to/file',
$this->x->_get_data_loc('/path/to/file')
);
}
function test_get_data_locPearPath()
{
$this->x->_data_dir = '/path/to/pear/data';
$this->assertEquals(
'/path/to/pear/data/Text_LanguageDetect/file',
$this->x->_get_data_loc('file')
);
}
/**
* @expectedException Text_LanguageDetect_Exception
* @expectedExceptionMessage Language database does not exist:
*/
function test_readdbNonexistingFile()
{
$this->x->_readdb('thisfiledoesnotexist');
}
/**
* @expectedException Text_LanguageDetect_Exception
* @expectedExceptionMessage Language database is not readable:
*/
function test_readdbUnreadableFile()
{
$name = tempnam(sys_get_temp_dir(), 'unittest-Text_LanguageDetect-');
chmod($name, 0000);
$this->x->_readdb($name);
}
/**
* @expectedException Text_LanguageDetect_Exception
* @expectedExceptionMessage Language database has no elements.
*/
function test_checkTrigramEmpty()
{
$this->x->_checkTrigram(array());
}
/**
* @expectedException Text_LanguageDetect_Exception
* @expectedExceptionMessage Language database is not an array
*/
function test_checkTrigramNoArray()
{
$this->x->_checkTrigram('foo');
}
/**
* @expectedException Text_LanguageDetect_Exception
* @expectedExceptionMessage Error loading database. Try turning magic_quotes_runtime off
*/
function test_checkTrigramNoArrayMagicQuotes()
{
if (version_compare(PHP_VERSION, '5.4.0-dev') >= 0) {
$this->markTestSkipped('5.4.0 has no magic quotes anymore');
}
ini_set('magic_quotes_runtime', 1);
$this->x->_checkTrigram('foo');
}
function test_splitter ()
{
$str = 'hello';
$result = $this->x->_trigram($str);
$this->assertEquals(array(' he' => 1, 'hel' => 1, 'ell' => 1, 'llo' => 1, 'lo ' => 1), $result);
$str = 'aa aa whatever';
$result = $this->x->_trigram($str);
$this->assertEquals(2, $result[' aa']);
$this->assertEquals(2, $result['aa ']);
$this->assertEquals(1, $result['a a']);
$str = 'aa aa';
$result = $this->x->_trigram($str);
$this->assertArrayNotHasKey(' a', $result, ' a');
$this->assertArrayNotHasKey('a ', $result, 'a ');
}
function test_splitter2 ()
{
$str = 'resumé';
$result = $this->x->_trigram($str);
$this->assertTrue(isset($result['mé ']), 'mé ');
$this->assertTrue(isset($result['umé']), 'umé');
$this->assertTrue(!isset($result['é ']), 'é');
// tests lower-casing accented characters
$str = 'resumÉ';
$result = $this->x->_trigram($str);
$this->assertTrue(isset($result['mé ']),'mé ');
$this->assertTrue(isset($result['umé']),'umé');
$this->assertTrue(!isset($result['é ']),'é');
}
function test_sort ()
{
$arr = array('a' => 1, 'b' => 2, 'c' => 2);
$this->x->_bub_sort($arr);
$final_arr = array('b' => 2, 'c' => 2, 'a' => 1);
$this->assertEquals($final_arr, $arr);
}
function test_error ()
{
// this test passes the object a series of bad strings to see how it handles them
$result = $this->x->detectSimple("");
$this->assertTrue(!$result);
$result = $this->x->detectSimple("\n");
$this->assertTrue(!$result);
// should fail on extremely short strings
$result = $this->x->detectSimple("a");
$this->assertTrue(!$result);
$result = $this->x->detectSimple("aa");
$this->assertTrue(!$result);
$result = $this->x->detectSimple('xxxxxxxxxxxxxxxxxxx');
$this->assertEquals(null, $result);
}
function testOmitLanguages()
{
$str = 'This function may return Boolean FALSE, but may also return a non-Boolean value which evaluates to FALSE, such as 0 or "". Please read the section on Booleans for more information. Use the === operator for testing the return value of this function.';
$myobj = new Text_LanguageDetect;
$myobj->_use_unicode_narrowing = false;
$count = $myobj->getLanguageCount();
$returnval = $myobj->omitLanguages('english');
$newcount = $myobj->getLanguageCount();
$this->assertEquals(1, $returnval);
$this->assertEquals(1, $count - $newcount);
$result = strtolower($myobj->detectSimple($str));
$this->assertTrue($result != 'english', $result);
$myobj = new Text_LanguageDetect;
$count = $myobj->getLanguageCount();
$returnval = $myobj->omitLanguages(array('danish', 'italian'), true);
$newcount = $myobj->getLanguageCount();
$this->assertEquals($count - $newcount, $returnval);
$this->assertEquals($count - $returnval, $newcount);
$result = strtolower($myobj->detectSimple($str));
$this->assertTrue($result == 'danish' || $result == 'italian', $result);
$result = $myobj->detect($str);
$this->assertEquals(2, count($result));
$this->assertTrue(isset($result['danish']));
$this->assertTrue(isset($result['italian']));
unset($myobj);
}
function testOmitLanguagesNameMode2()
{
$this->x->setNameMode(2);
$this->assertEquals(1, $this->x->omitLanguages('en'));
}
function testOmitLanguagesIncludeString()
{
$this->assertGreaterThan(1, $this->x->omitLanguages('english', true));
$langs = $this->x->getLanguages();
$this->assertEquals(1, count($langs));
$this->assertContains('english', $langs);
}
function testOmitLanguagesClearsClusterCache()
{
$this->x->omitLanguages(array('english', 'german'), true);
$this->assertNull($this->x->_clusters);
$this->x->clusterLanguages();
$this->assertNotNull($this->x->_clusters);
$this->x->omitLanguages('german');
$this->assertNull($this->x->_clusters, 'cluster cache be empty now');
}
function test_perl_compatibility()
{
// if this test fails, then many of the others will
$myobj = new Text_LanguageDetect;
$myobj->setPerlCompatible(true);
$testtext = "hello";
$result = $myobj->_trigram($testtext);
$this->assertTrue(!isset($result[' he']));
}
function test_french_db ()
{
$safe_model = array(
"es " => 0, " de" => 1, "de " => 2, " le" => 3, "ent" => 4,
"le " => 5, "nt " => 6, "la " => 7, "s d" => 8, " la" => 9,
"ion" => 10, "on " => 11, "re " => 12, " pa" => 13, "e l" => 14,
"e d" => 15, " l'" => 16, "e p" => 17, " co" => 18, " pr" => 19,
"tio" => 20, "ns " => 21, " en" => 22, "ne " => 23, "que" => 24,
"r l" => 25, "les" => 26, "ur " => 27, "en " => 28, "ati" => 29,
"ue " => 30, " po" => 31, " d'" => 32, "par" => 33, " a " => 34,
"et " => 35, "it " => 36, " qu" => 37, "men" => 38, "ons" => 39,
"te " => 40, " et" => 41, "t d" => 42, " re" => 43, "des" => 44,
" un" => 45, "ie " => 46, "s l" => 47, " su" => 48, "pou" => 49,
" au" => 50, " à " => 51, "con" => 52, "er " => 53, " no" => 54,
"ait" => 55, "e c" => 56, "se " => 57, "té " => 58, "du " => 59,
" du" => 60, " dé" => 61, "ce " => 62, "e e" => 63, "is " => 64,
"n d" => 65, "s a" => 66, " so" => 67, "e r" => 68, "e s" => 69,
"our" => 70, "res" => 71, "ssi" => 72, "eur" => 73, " se" => 74,
"eme" => 75, "est" => 76, "us " => 77, "sur" => 78, "ant" => 79,
"iqu" => 80, "s p" => 81, "une" => 82, "uss" => 83, "l'a" => 84,
"pro" => 85, "ter" => 86, "tre" => 87, "end" => 88, "rs " => 89,
" ce" => 90, "e a" => 91, "t p" => 92, "un " => 93, " ma" => 94,
" ru" => 95, " ré" => 96, "ous" => 97, "ris" => 98, "rus" => 99,
"sse" => 100, "ans" => 101, "ar " => 102, "com" => 103, "e m" => 104,
"ire" => 105, "nce" => 106, "nte" => 107, "t l" => 108, " av" => 109,
" mo" => 110, " te" => 111, "il " => 112, "me " => 113, "ont" => 114,
"ten" => 115, "a p" => 116, "dan" => 117, "pas" => 118, "qui" => 119,
"s e" => 120, "s s" => 121, " in" => 122, "ist" => 123, "lle" => 124,
"nou" => 125, "pré" => 126, "'un" => 127, "air" => 128, "d'a" => 129,
"ir " => 130, "n e" => 131, "rop" => 132, "ts " => 133, " da" => 134,
"a s" => 135, "as " => 136, "au " => 137, "den" => 138, "mai" => 139,
"mis" => 140, "ori" => 141, "out" => 142, "rme" => 143, "sio" => 144,
"tte" => 145, "ux " => 146, "a d" => 147, "ien" => 148, "n a" => 149,
"ntr" => 150, "omm" => 151, "ort" => 152, "ouv" => 153, "s c" => 154,
"son" => 155, "tes" => 156, "ver" => 157, "ère" => 158, " il" => 159,
" m " => 160, " sa" => 161, " ve" => 162, "a r" => 163, "ais" => 164,
"ava" => 165, "di " => 166, "n p" => 167, "sti" => 168, "ven" => 169,
" mi" => 170, "ain" => 171, "enc" => 172, "for" => 173, "ité" => 174,
"lar" => 175, "oir" => 176, "rem" => 177, "ren" => 178, "rro" => 179,
"rés" => 180, "sie" => 181, "t a" => 182, "tur" => 183, " pe" => 184,
" to" => 185, "d'u" => 186, "ell" => 187, "err" => 188, "ers" => 189,
"ide" => 190, "ine" => 191, "iss" => 192, "mes" => 193, "por" => 194,
"ran" => 195, "sit" => 196, "st " => 197, "t r" => 198, "uti" => 199,
"vai" => 200, "é l" => 201, "ési" => 202, " di" => 203, " n'" => 204,
" ét" => 205, "a c" => 206, "ass" => 207, "e t" => 208, "in " => 209,
"nde" => 210, "pre" => 211, "rat" => 212, "s m" => 213, "ste" => 214,
"tai" => 215, "tch" => 216, "ui " => 217, "uro" => 218, "ès " => 219,
" es" => 220, " fo" => 221, " tr" => 222, "'ad" => 223, "app" => 224,
"aux" => 225, "e à" => 226, "ett" => 227, "iti" => 228, "lit" => 229,
"nal" => 230, "opé" => 231, "r d" => 232, "ra " => 233, "rai" => 234,
"ror" => 235, "s r" => 236, "tat" => 237, "uté" => 238, "à l" => 239,
" af" => 240, "anc" => 241, "ara" => 242, "art" => 243, "bre" => 244,
"ché" => 245, "dre" => 246, "e f" => 247, "ens" => 248, "lem" => 249,
"n r" => 250, "n t" => 251, "ndr" => 252, "nne" => 253, "onn" => 254,
"pos" => 255, "s t" => 256, "tiq" => 257, "ure" => 258, " tu" => 259,
"ale" => 260, "and" => 261, "ave" => 262, "cla" => 263, "cou" => 264,
"e n" => 265, "emb" => 266, "ins" => 267, "jou" => 268, "mme" => 269,
"rie" => 270, "rès" => 271, "sem" => 272, "str" => 273, "t i" => 274,
"ues" => 275, "uni" => 276, "uve" => 277, "é d" => 278, "ée " => 279,
" ch" => 280, " do" => 281, " eu" => 282, " fa" => 283, " lo" => 284,
" ne" => 285, " ra" => 286, "arl" => 287, "att" => 288, "ec " => 289,
"ica" => 290, "l a" => 291, "l'o" => 292, "l'é" => 293, "mmi" => 294,
"nta" => 295, "orm" => 296, "ou " => 297, "r u" => 298, "rle" => 299
);
$my_arr = $this->x->_lang_db['french'];
foreach ($safe_model as $key => $value) {
$this->assertTrue(isset($my_arr[$key]),$key);
if (isset($my_arr[$key])) {
$this->assertEquals($value, $my_arr[$key], $key);
}
}
}
function test_english_db ()
{
$realdb = array(
" th" => 0, "the" => 1, "he " => 2, "ed " => 3, " to" => 4,
" in" => 5, "er " => 6, "ing" => 7, "ng " => 8, " an" => 9,
"nd " => 10, " of" => 11, "and" => 12, "to " => 13, "of " => 14,
" co" => 15, "at " => 16, "on " => 17, "in " => 18, " a " => 19,
"d t" => 20, " he" => 21, "e t" => 22, "ion" => 23, "es " => 24,
" re" => 25, "re " => 26, "hat" => 27, " sa" => 28, " st" => 29,
" ha" => 30, "her" => 31, "tha" => 32, "tio" => 33, "or " => 34,
" ''" => 35, "en " => 36, " wh" => 37, "e s" => 38, "ent" => 39,
"n t" => 40, "s a" => 41, "as " => 42, "for" => 43, "is " => 44,
"t t" => 45, " be" => 46, "ld " => 47, "e a" => 48, "rs " => 49,
" wa" => 50, "ut " => 51, "ve " => 52, "ll " => 53, "al " => 54,
" ma" => 55, "e i" => 56, " fo" => 57, "'s " => 58, "an " => 59,
"est" => 60, " hi" => 61, " mo" => 62, " se" => 63, " pr" => 64,
"s t" => 65, "ate" => 66, "st " => 67, "ter" => 68, "ere" => 69,
"ted" => 70, "nt " => 71, "ver" => 72, "d a" => 73, " wi" => 74,
"se " => 75, "e c" => 76, "ect" => 77, "ns " => 78, " on" => 79,
"ly " => 80, "tol" => 81, "ey " => 82, "r t" => 83, " ca" => 84,
"ati" => 85, "ts " => 86, "all" => 87, " no" => 88, "his" => 89,
"s o" => 90, "ers" => 91, "con" => 92, "e o" => 93, "ear" => 94,
"f t" => 95, "e w" => 96, "was" => 97, "ons" => 98, "sta" => 99,
"'' " => 100, "sti" => 101, "n a" => 102, "sto" => 103, "t h" => 104,
" we" => 105, "id " => 106, "th " => 107, " it" => 108, "ce " => 109,
" di" => 110, "ave" => 111, "d h" => 112, "cou" => 113, "pro" => 114,
"ad " => 115, "oll" => 116, "ry " => 117, "d s" => 118, "e m" => 119,
" so" => 120, "ill" => 121, "cti" => 122, "te " => 123, "tor" => 124,
"eve" => 125, "g t" => 126, "it " => 127, " ch" => 128, " de" => 129,
"hav" => 130, "oul" => 131, "ty " => 132, "uld" => 133, "use" => 134,
" al" => 135, "are" => 136, "ch " => 137, "me " => 138, "out" => 139,
"ove" => 140, "wit" => 141, "ys " => 142, "chi" => 143, "t a" => 144,
"ith" => 145, "oth" => 146, " ab" => 147, " te" => 148, " wo" => 149,
"s s" => 150, "res" => 151, "t w" => 152, "tin" => 153, "e b" => 154,
"e h" => 155, "nce" => 156, "t s" => 157, "y t" => 158, "e p" => 159,
"ele" => 160, "hin" => 161, "s i" => 162, "nte" => 163, " li" => 164,
"le " => 165, " do" => 166, "aid" => 167, "hey" => 168, "ne " => 169,
"s w" => 170, " as" => 171, " fr" => 172, " tr" => 173, "end" => 174,
"sai" => 175, " el" => 176, " ne" => 177, " su" => 178, "'t " => 179,
"ay " => 180, "hou" => 181, "ive" => 182, "lec" => 183, "n't" => 184,
" ye" => 185, "but" => 186, "d o" => 187, "o t" => 188, "y o" => 189,
" ho" => 190, " me" => 191, "be " => 192, "cal" => 193, "e e" => 194,
"had" => 195, "ple" => 196, " at" => 197, " bu" => 198, " la" => 199,
"d b" => 200, "s h" => 201, "say" => 202, "t i" => 203, " ar" => 204,
"e f" => 205, "ght" => 206, "hil" => 207, "igh" => 208, "int" => 209,
"not" => 210, "ren" => 211, " is" => 212, " pa" => 213, " sh" => 214,
"ays" => 215, "com" => 216, "n s" => 217, "r a" => 218, "rin" => 219,
"y a" => 220, " un" => 221, "n c" => 222, "om " => 223, "thi" => 224,
" mi" => 225, "by " => 226, "d i" => 227, "e d" => 228, "e n" => 229,
"t o" => 230, " by" => 231, "e r" => 232, "eri" => 233, "old" => 234,
"ome" => 235, "whe" => 236, "yea" => 237, " gr" => 238, "ar " => 239,
"ity" => 240, "mpl" => 241, "oun" => 242, "one" => 243, "ow " => 244,
"r s" => 245, "s f" => 246, "tat" => 247, " ba" => 248, " vo" => 249,
"bou" => 250, "sam" => 251, "tim" => 252, "vot" => 253, "abo" => 254,
"ant" => 255, "ds " => 256, "ial" => 257, "ine" => 258, "man" => 259,
"men" => 260, " or" => 261, " po" => 262, "amp" => 263, "can" => 264,
"der" => 265, "e l" => 266, "les" => 267, "ny " => 268, "ot " => 269,
"rec" => 270, "tes" => 271, "tho" => 272, "ica" => 273, "ild" => 274,
"ir " => 275, "nde" => 276, "ose" => 277, "ous" => 278, "pre" => 279,
"ste" => 280, "era" => 281, "per" => 282, "r o" => 283, "red" => 284,
"rie" => 285, " bo" => 286, " le" => 287, "ali" => 288, "ars" => 289,
"ore" => 290, "ric" => 291, "s m" => 292, "str" => 293, " fa" => 294,
"ess" => 295, "ie " => 296, "ist" => 297, "lat" => 298, "uri" => 299,
);
$mod = $this->x->_lang_db['english'];
foreach ($realdb as $key => $value) {
$this->assertTrue(isset($mod[$key]), $key);
if (isset($mod[$key])) {
$this->assertEquals($value, $mod[$key], $key);
}
}
foreach ($mod as $key => $value) {
$this->assertTrue(isset($realdb[$key]));
if (isset($realdb[$key])) {
$this->assertEquals($value, $realdb[$key], $key);
}
}
}
function test_confidence ()
{
$str = 'The next thing to notice is the Content-length header. The Content-length header notifies the server of the size of the data that you intend to send. This prevents unexpected end-of-data errors from the server when dealing with binary data, because the server will read the specified number of bytes from the data stream regardless of any spurious end-of-data characters.';
$result = $this->x->detectConfidence($str);
$this->assertEquals(3, count($result));
$this->assertTrue(isset($result['language']), 'language');
$this->assertTrue(isset($result['similarity']), 'similarity');
$this->assertTrue(isset($result['confidence']), 'confidence');
$this->assertEquals('english', $result['language']);
$this->assertTrue($result['similarity'] <= 300 && $result['similarity'] >= 0, $result['similarity']);
$this->assertTrue($result['confidence'] <= 1 && $result['confidence'] >= 0, $result['confidence']);
// todo: tests for Danish and Norwegian should have lower confidence
}
function test_long_example ()
{
// an example that is more than 300 trigrams long
$str = 'The Italian Renaissance began the opening phase of the Renaissance, a period of great cultural change and achievement from the 14th to the 16th century. The word renaissance means "rebirth," and the era is best known for the renewed interest in the culture of classical antiquity. The Italian Renaissance began in northern Italy, centering in Florence. It then spread south, having an especially significant impact on Rome, which was largely rebuilt by the Renaissance popes. The Italian Renaissance is best known for its cultural achievements. This includes works of literature by such figures as Petrarch, Castiglione, and Machiavelli; artists such as Michaelangelo and Leonardo da Vinci, and great works of architecture such as The Duomo in Florence and St. Peter\'s Basilica in Rome. At the same time, present-day historians also see the era as one of economic regression and of little progress in science. Furthermore, some historians argue that the lot of the peasants and urban poor, the majority of the population, worsened during this period.';
$this->x->setPerlCompatible();
$tri = $this->x->_trigram($str);
$exp_tri = array(
' th',
'the',
'he ',
' an',
' re',
' of',
'ce ',
'nce',
'of ',
'ren',
' in',
'and',
'nd ',
'an ',
'san',
' it',
'ais',
'anc',
'ena',
'in ',
'iss',
'nai',
'ssa',
'tur',
' pe',
'as ',
'ch ',
'ent',
'ian',
'me ',
'n r',
'res',
' as',
' be',
' wo',
'at ',
'chi',
'e i',
'e o',
'e p',
'gre',
'his',
'ing',
'is ',
'ita',
'n f',
'ng ',
're ',
's a',
'st ',
'tal',
'ter',
'th ',
'ts ',
'ure',
'wor',
' ar',
' cu',
' po',
' su',
'ach',
'al ',
'ali',
'ans',
'ant',
'cul',
'e b',
'e r',
'e t',
'enc',
'era',
'eri',
'es ',
'est',
'f t',
'ica',
'ion',
'ist',
'lia',
'ltu',
'ly ',
'ns ',
'nt ',
'ome',
'on ',
'or ',
'ore',
'ori',
'rea',
'rom',
'rth',
's b',
's o',
'suc',
't t',
'uch',
'ult',
' ac',
' by',
' ce',
' da',
' du',
' er',
' fl',
' fo',
' gr',
' hi',
' is',
' kn',
' li',
' ma',
' on',
' pr',
' ro',
' so',
'a i',
'ang',
'arc',
'arg',
'beg',
'bes',
'by ',
'cen',
'cha',
'd o',
'd s',
'e a',
'e e',
'e m',
'e s',
'eat',
'ed ',
'ega',
'eme',
'ene',
'ess',
'eve',
'f l',
'flo',
'for',
'gan',
'gel',
'h a',
'her',
'hie',
'ich',
'iev',
'inc',
'iod',
'ite',
'ity',
'kno',
'ks ',
'l a',
'lit',
'lor',
'men',
'mic',
'n i',
'n s',
'n t',
'ne ',
'nge',
'now',
'nte',
'nts',
'od ',
'one',
'ope',
'ork',
'own',
'per',
'pet',
'pop',
'pre',
'ra ',
'ral',
'rch',
'reb',
'ria',
'rin',
'rio',
'rks',
's i',
's p',
'sen',
'ssi',
'sto',
't i',
't k',
't o',
'thi',
'tor',
'ty ',
'ura',
'vem',
'vin',
'wn ',
'y s',
' a ',
' al',
' at',
' ba',
' ca',
' ch',
' cl',
' ec',
' es',
' fi',
' fr',
' fu',
' ha',
' im',
' la',
' le',
' lo',
' me',
' mi',
' no',
' op',
' ph',
' sa',
' sc',
' se',
' si',
' sp',
' st',
' ti',
' to',
' ur',
' vi',
' wa',
' wh',
'\'s ',
'a a',
'a p',
'a v',
'act',
'ad ',
'ael',
'ajo',
'all',
'als',
'aly',
'ame',
'ard',
'art',
'asa',
'ase',
'asi',
'ass',
'ast',
'ati',
'atu',
'ave',
'avi',
'ay ',
'ban',
'bas',
'bir',
'bui',
'c r',
'ca ',
'cal',
'can',
'cas',
'ci ',
'cia',
'cie',
'cla',
'clu',
'con',
'ct ',
'ctu',
'd a',
'd d',
'd g',
'd i',
'd l',
'd m',
'd r',
'd t',
'd u',
'da ',
'day',
'des',
'do ',
'duo',
'dur',
'e c',
'e d',
'e h',
'e l',
'e w',
'ead',
'ean',
'eas',
'ebi',
'ebu',
'eci',
'eco',
'ect',
'ee ',
'egr',
'ela',
'ell',
'elo',
'ely',
'en ',
'eni',
'eon',
'er\'',
'ere',
'erm',
'ern',
'ese',
'esp',
'ete',
'etr',
'ewe',
'f a',
'f c',
'f e',
'f g',
'fic',
'fig',
'fro',
'fur',
'g a',
'g i',
'g p',
'g t',
'ge ',
'gli',
'gni',
'gue',
'gur',
'h c',
'h f',
'h t',
'h w',
'hae',
'han',
'has',
'hat',
'hav',
'hen',
'hia',
'hic',
'hit',
'ial',
'iav',
'ic ',
'ien',
'ifi',
'igl',
'ign',
'igu',
'ili',
'ilt',
'ime',
'imp',
'int',
'iqu',
'irt',
'it ',
'its',
'itt',
'jor',
'l c',
'lan',
'lar',
'las',
'lat',
'le ',
'leo',
'li ',
'lic',
'lio',
'lli',
'lly',
'lo ',
'lot',
'lso',
'lt ',
'lud',
'm t',
'mac',
'maj',
'mea',
'mo ',
'mor',
'mpa',
'n a',
'n e',
'n n',
'n p',
'nar',
'nci',
'ncl',
'ned',
'new',
'nif',
'nin',
'nom',
'nor',
'nti',
'ntu',
'o a',
'o d',
'o i',
'o s',
'o t',
'ogr',
'om ',
'omi',
'omo',
'ona',
'ono',
'oor',
'opu',
'ord',
'ors',
'ort',
'ot ',
'out',
'pac',
'pea',
'pec',
'pen',
'pes',
'pha',
'poo',
'pro',
'pul',
'qui',
'r i',
'r t',
'r\'s',
'rar',
'rat',
'rba',
'rd ',
'rdo',
'reg',
'rge',
'rgu',
'rit',
'rmo',
'rn ',
'rog',
'rse',
'rti',
'ry ',
's c',
's l',
's m',
's s',
's t',
's w',
'sam',
'sci',
'se ',
'see',
'sic',
'sig',
'sil',
'sio',
'so ',
'som',
'sou',
'spe',
'spr',
'ss ',
'sti',
'sts',
't b',
't c',
't d',
't f',
't w',
'tec',
'tha',
'tig',
'tim',
'tio',
'tiq',
'tis',
'tle',
'to ',
'tra',
'ttl',
'ude',
'ue ',
'uil',
'uit',
'ula',
'uom',
'urb',
'uri',
'urt',
'ury',
'uth',
'vel',
'was',
'wed',
'whi',
'y h',
'y o',
'y r',
'y t'
);
$differences = array_diff(array_keys($tri), $exp_tri);
$this->assertEquals(0, count($differences));
$this->assertEquals(0, count(array_diff($exp_tri, array_keys($tri))));
$this->assertEquals(count($exp_tri), count($tri));
//print_r(array_diff($exp_tri, array_keys($tri)));
//print_r(array_diff(array_keys($tri), $exp_tri));
// tests the bubble sort mechanism
$this->x->_bub_sort($tri);
$this->assertEquals($exp_tri, array_keys($tri));
$true_differences = array(
"cas" => array('change' => 300, 'baserank' => 265, 'refrank' => null), "s i" => array('change' => 21, 'baserank' => 183, 'refrank' => 162),
"e b" => array('change' => 88, 'baserank' => 66, 'refrank' => 154), "ent" => array('change' => 12, 'baserank' => 27, 'refrank' => 39),
"ome" => array('change' => 152, 'baserank' => 83, 'refrank' => 235), "ral" => array('change' => 300, 'baserank' => 176, 'refrank' => null),
"ita" => array('change' => 300, 'baserank' => 44, 'refrank' => null), "bas" => array('change' => 300, 'baserank' => 258, 'refrank' => null),
" ar" => array('change' => 148, 'baserank' => 56, 'refrank' => 204), " in" => array('change' => 5, 'baserank' => 10, 'refrank' => 5),
" ti" => array('change' => 300, 'baserank' => 227, 'refrank' => null), "ty " => array('change' => 61, 'baserank' => 193, 'refrank' => 132),
"tur" => array('change' => 300, 'baserank' => 23, 'refrank' => null), "iss" => array('change' => 300, 'baserank' => 20, 'refrank' => null),
"ria" => array('change' => 300, 'baserank' => 179, 'refrank' => null), " me" => array('change' => 25, 'baserank' => 216, 'refrank' => 191),
"t k" => array('change' => 300, 'baserank' => 189, 'refrank' => null), " es" => array('change' => 300, 'baserank' => 207, 'refrank' => null),
"ren" => array('change' => 202, 'baserank' => 9, 'refrank' => 211), "in " => array('change' => 1, 'baserank' => 19, 'refrank' => 18),
"ly " => array('change' => 0, 'baserank' => 80, 'refrank' => 80), "st " => array('change' => 18, 'baserank' => 49, 'refrank' => 67),
"ne " => array('change' => 8, 'baserank' => 161, 'refrank' => 169), "all" => array('change' => 154, 'baserank' => 241, 'refrank' => 87),
"vin" => array('change' => 300, 'baserank' => 196, 'refrank' => null), " op" => array('change' => 300, 'baserank' => 219, 'refrank' => null),
"chi" => array('change' => 107, 'baserank' => 36, 'refrank' => 143), "e w" => array('change' => 197, 'baserank' => 293, 'refrank' => 96),
" ro" => array('change' => 300, 'baserank' => 113, 'refrank' => null), "act" => array('change' => 300, 'baserank' => 237, 'refrank' => null),
"d r" => array('change' => 300, 'baserank' => 280, 'refrank' => null), "nt " => array('change' => 11, 'baserank' => 82, 'refrank' => 71),
"can" => array('change' => 0, 'baserank' => 264, 'refrank' => 264), "rea" => array('change' => 300, 'baserank' => 88, 'refrank' => null),
"ssa" => array('change' => 300, 'baserank' => 22, 'refrank' => null), " fo" => array('change' => 47, 'baserank' => 104, 'refrank' => 57),
"eas" => array('change' => 300, 'baserank' => 296, 'refrank' => null), "mic" => array('change' => 300, 'baserank' => 157, 'refrank' => null),
"cul" => array('change' => 300, 'baserank' => 65, 'refrank' => null), " an" => array('change' => 6, 'baserank' => 3, 'refrank' => 9),
"n t" => array('change' => 120, 'baserank' => 160, 'refrank' => 40), "arg" => array('change' => 300, 'baserank' => 118, 'refrank' => null),
" it" => array('change' => 93, 'baserank' => 15, 'refrank' => 108), "ebi" => array('change' => 300, 'baserank' => 297, 'refrank' => null),
" re" => array('change' => 21, 'baserank' => 4, 'refrank' => 25), "res" => array('change' => 120, 'baserank' => 31, 'refrank' => 151),
" be" => array('change' => 13, 'baserank' => 33, 'refrank' => 46), "rom" => array('change' => 300, 'baserank' => 89, 'refrank' => null),
"'s " => array('change' => 175, 'baserank' => 233, 'refrank' => 58), "arc" => array('change' => 300, 'baserank' => 117, 'refrank' => null),
" su" => array('change' => 119, 'baserank' => 59, 'refrank' => 178), "s p" => array('change' => 300, 'baserank' => 184, 'refrank' => null),
"ich" => array('change' => 300, 'baserank' => 145, 'refrank' => null), "d d" => array('change' => 300, 'baserank' => 275, 'refrank' => null),
"cal" => array('change' => 70, 'baserank' => 263, 'refrank' => 193), "ci " => array('change' => 300, 'baserank' => 266, 'refrank' => null),
"ssi" => array('change' => 300, 'baserank' => 186, 'refrank' => null), "bes" => array('change' => 300, 'baserank' => 120, 'refrank' => null),
"des" => array('change' => 300, 'baserank' => 285, 'refrank' => null), "e s" => array('change' => 91, 'baserank' => 129, 'refrank' => 38),
"ch " => array('change' => 111, 'baserank' => 26, 'refrank' => 137), "san" => array('change' => 300, 'baserank' => 14, 'refrank' => null),
"asi" => array('change' => 300, 'baserank' => 249, 'refrank' => null), "ajo" => array('change' => 300, 'baserank' => 240, 'refrank' => null),
"ase" => array('change' => 300, 'baserank' => 248, 'refrank' => null), " wa" => array('change' => 181, 'baserank' => 231, 'refrank' => 50),
"vem" => array('change' => 300, 'baserank' => 195, 'refrank' => null), "ed " => array('change' => 128, 'baserank' => 131, 'refrank' => 3),
"ant" => array('change' => 191, 'baserank' => 64, 'refrank' => 255), "a p" => array('change' => 300, 'baserank' => 235, 'refrank' => null),
"lor" => array('change' => 300, 'baserank' => 155, 'refrank' => null), "kno" => array('change' => 300, 'baserank' => 151, 'refrank' => null),
"ais" => array('change' => 300, 'baserank' => 16, 'refrank' => null), " pe" => array('change' => 300, 'baserank' => 24, 'refrank' => null),
"or " => array('change' => 51, 'baserank' => 85, 'refrank' => 34), "e i" => array('change' => 19, 'baserank' => 37, 'refrank' => 56),
" sp" => array('change' => 300, 'baserank' => 225, 'refrank' => null), "ad " => array('change' => 123, 'baserank' => 238, 'refrank' => 115),
" kn" => array('change' => 300, 'baserank' => 108, 'refrank' => null), "ega" => array('change' => 300, 'baserank' => 132, 'refrank' => null),
" ba" => array('change' => 46, 'baserank' => 202, 'refrank' => 248), "d t" => array('change' => 261, 'baserank' => 281, 'refrank' => 20),
"ork" => array('change' => 300, 'baserank' => 169, 'refrank' => null), "lia" => array('change' => 300, 'baserank' => 78, 'refrank' => null),
"ard" => array('change' => 300, 'baserank' => 245, 'refrank' => null), "iev" => array('change' => 300, 'baserank' => 146, 'refrank' => null),
"of " => array('change' => 6, 'baserank' => 8, 'refrank' => 14), " cu" => array('change' => 300, 'baserank' => 57, 'refrank' => null),
"day" => array('change' => 300, 'baserank' => 284, 'refrank' => null), "cen" => array('change' => 300, 'baserank' => 122, 'refrank' => null),
"re " => array('change' => 21, 'baserank' => 47, 'refrank' => 26), "ist" => array('change' => 220, 'baserank' => 77, 'refrank' => 297),
" fl" => array('change' => 300, 'baserank' => 103, 'refrank' => null), "anc" => array('change' => 300, 'baserank' => 17, 'refrank' => null),
"at " => array('change' => 19, 'baserank' => 35, 'refrank' => 16), "rch" => array('change' => 300, 'baserank' => 177, 'refrank' => null),
"ang" => array('change' => 300, 'baserank' => 116, 'refrank' => null), " mi" => array('change' => 8, 'baserank' => 217, 'refrank' => 225),
"y s" => array('change' => 300, 'baserank' => 198, 'refrank' => null), "ca " => array('change' => 300, 'baserank' => 262, 'refrank' => null),
" ma" => array('change' => 55, 'baserank' => 110, 'refrank' => 55), " lo" => array('change' => 300, 'baserank' => 215, 'refrank' => null),
"rin" => array('change' => 39, 'baserank' => 180, 'refrank' => 219), " im" => array('change' => 300, 'baserank' => 212, 'refrank' => null),
" er" => array('change' => 300, 'baserank' => 102, 'refrank' => null), "ce " => array('change' => 103, 'baserank' => 6, 'refrank' => 109),
"bui" => array('change' => 300, 'baserank' => 260, 'refrank' => null), "lit" => array('change' => 300, 'baserank' => 154, 'refrank' => null),
"iod" => array('change' => 300, 'baserank' => 148, 'refrank' => null), "ame" => array('change' => 300, 'baserank' => 244, 'refrank' => null),
"ter" => array('change' => 17, 'baserank' => 51, 'refrank' => 68), "e a" => array('change' => 78, 'baserank' => 126, 'refrank' => 48),
"f l" => array('change' => 300, 'baserank' => 137, 'refrank' => null), "eri" => array('change' => 162, 'baserank' => 71, 'refrank' => 233),
"ra " => array('change' => 300, 'baserank' => 175, 'refrank' => null), "ng " => array('change' => 38, 'baserank' => 46, 'refrank' => 8),
"d i" => array('change' => 50, 'baserank' => 277, 'refrank' => 227), "asa" => array('change' => 300, 'baserank' => 247, 'refrank' => null),
"wn " => array('change' => 300, 'baserank' => 197, 'refrank' => null), " at" => array('change' => 4, 'baserank' => 201, 'refrank' => 197),
"now" => array('change' => 300, 'baserank' => 163, 'refrank' => null), " by" => array('change' => 133, 'baserank' => 98, 'refrank' => 231),
"n s" => array('change' => 58, 'baserank' => 159, 'refrank' => 217), " li" => array('change' => 55, 'baserank' => 109, 'refrank' => 164),
"l a" => array('change' => 300, 'baserank' => 153, 'refrank' => null), "da " => array('change' => 300, 'baserank' => 283, 'refrank' => null),
"ean" => array('change' => 300, 'baserank' => 295, 'refrank' => null), "tal" => array('change' => 300, 'baserank' => 50, 'refrank' => null),
"d a" => array('change' => 201, 'baserank' => 274, 'refrank' => 73), "ct " => array('change' => 300, 'baserank' => 272, 'refrank' => null),
"ali" => array('change' => 226, 'baserank' => 62, 'refrank' => 288), "ian" => array('change' => 300, 'baserank' => 28, 'refrank' => null),
" sa" => array('change' => 193, 'baserank' => 221, 'refrank' => 28), "do " => array('change' => 300, 'baserank' => 286, 'refrank' => null),
"t o" => array('change' => 40, 'baserank' => 190, 'refrank' => 230), "ure" => array('change' => 300, 'baserank' => 54, 'refrank' => null),
"e c" => array('change' => 213, 'baserank' => 289, 'refrank' => 76), "ing" => array('change' => 35, 'baserank' => 42, 'refrank' => 7),
"d o" => array('change' => 63, 'baserank' => 124, 'refrank' => 187), " ha" => array('change' => 181, 'baserank' => 211, 'refrank' => 30),
"ts " => array('change' => 33, 'baserank' => 53, 'refrank' => 86), "rth" => array('change' => 300, 'baserank' => 90, 'refrank' => null),
"cla" => array('change' => 300, 'baserank' => 269, 'refrank' => null), " ac" => array('change' => 300, 'baserank' => 97, 'refrank' => null),
"th " => array('change' => 55, 'baserank' => 52, 'refrank' => 107), "rio" => array('change' => 300, 'baserank' => 181, 'refrank' => null),
"al " => array('change' => 7, 'baserank' => 61, 'refrank' => 54), "sto" => array('change' => 84, 'baserank' => 187, 'refrank' => 103),
"e o" => array('change' => 55, 'baserank' => 38, 'refrank' => 93), "bir" => array('change' => 300, 'baserank' => 259, 'refrank' => null),
" pr" => array('change' => 48, 'baserank' => 112, 'refrank' => 64), " le" => array('change' => 73, 'baserank' => 214, 'refrank' => 287),
"nai" => array('change' => 300, 'baserank' => 21, 'refrank' => null), "t i" => array('change' => 15, 'baserank' => 188, 'refrank' => 203),
" po" => array('change' => 204, 'baserank' => 58, 'refrank' => 262), "f t" => array('change' => 21, 'baserank' => 74, 'refrank' => 95),
"ban" => array('change' => 300, 'baserank' => 257, 'refrank' => null), "an " => array('change' => 46, 'baserank' => 13, 'refrank' => 59),
"wor" => array('change' => 300, 'baserank' => 55, 'refrank' => null), "pet" => array('change' => 300, 'baserank' => 172, 'refrank' => null),
"ael" => array('change' => 300, 'baserank' => 239, 'refrank' => null), "ura" => array('change' => 300, 'baserank' => 194, 'refrank' => null),
"eve" => array('change' => 11, 'baserank' => 136, 'refrank' => 125), "ion" => array('change' => 53, 'baserank' => 76, 'refrank' => 23),
"nge" => array('change' => 300, 'baserank' => 162, 'refrank' => null), "cha" => array('change' => 300, 'baserank' => 123, 'refrank' => null),
"ity" => array('change' => 90, 'baserank' => 150, 'refrank' => 240), " se" => array('change' => 160, 'baserank' => 223, 'refrank' => 63),
" on" => array('change' => 32, 'baserank' => 111, 'refrank' => 79), "s b" => array('change' => 300, 'baserank' => 91, 'refrank' => null),
"ans" => array('change' => 300, 'baserank' => 63, 'refrank' => null), "own" => array('change' => 300, 'baserank' => 170, 'refrank' => null),
" si" => array('change' => 300, 'baserank' => 224, 'refrank' => null), "e r" => array('change' => 165, 'baserank' => 67, 'refrank' => 232),
"est" => array('change' => 13, 'baserank' => 73, 'refrank' => 60), "hie" => array('change' => 300, 'baserank' => 144, 'refrank' => null),
"aly" => array('change' => 300, 'baserank' => 243, 'refrank' => null), "and" => array('change' => 1, 'baserank' => 11, 'refrank' => 12),
"beg" => array('change' => 300, 'baserank' => 119, 'refrank' => null), "dur" => array('change' => 300, 'baserank' => 288, 'refrank' => null),
"reb" => array('change' => 300, 'baserank' => 178, 'refrank' => null), "e e" => array('change' => 67, 'baserank' => 127, 'refrank' => 194),
"men" => array('change' => 104, 'baserank' => 156, 'refrank' => 260), " la" => array('change' => 14, 'baserank' => 213, 'refrank' => 199),
"con" => array('change' => 179, 'baserank' => 271, 'refrank' => 92), " fu" => array('change' => 300, 'baserank' => 210, 'refrank' => null),
"e l" => array('change' => 26, 'baserank' => 292, 'refrank' => 266), "s a" => array('change' => 7, 'baserank' => 48, 'refrank' => 41),
"art" => array('change' => 300, 'baserank' => 246, 'refrank' => null), "ltu" => array('change' => 300, 'baserank' => 79, 'refrank' => null),
"a i" => array('change' => 300, 'baserank' => 115, 'refrank' => null), "ctu" => array('change' => 300, 'baserank' => 273, 'refrank' => null),
"tor" => array('change' => 68, 'baserank' => 192, 'refrank' => 124), "ach" => array('change' => 300, 'baserank' => 60, 'refrank' => null),
"d g" => array('change' => 300, 'baserank' => 276, 'refrank' => null), "od " => array('change' => 300, 'baserank' => 166, 'refrank' => null),
"nte" => array('change' => 1, 'baserank' => 164, 'refrank' => 163), "ena" => array('change' => 300, 'baserank' => 18, 'refrank' => null),
"d l" => array('change' => 300, 'baserank' => 278, 'refrank' => null), "ene" => array('change' => 300, 'baserank' => 134, 'refrank' => null),
"e h" => array('change' => 136, 'baserank' => 291, 'refrank' => 155), "era" => array('change' => 211, 'baserank' => 70, 'refrank' => 281),
"on " => array('change' => 67, 'baserank' => 84, 'refrank' => 17), " ce" => array('change' => 300, 'baserank' => 99, 'refrank' => null),
"ay " => array('change' => 76, 'baserank' => 256, 'refrank' => 180), " da" => array('change' => 300, 'baserank' => 100, 'refrank' => null),
"ori" => array('change' => 300, 'baserank' => 87, 'refrank' => null), "atu" => array('change' => 300, 'baserank' => 253, 'refrank' => null),
"ave" => array('change' => 143, 'baserank' => 254, 'refrank' => 111), "rks" => array('change' => 300, 'baserank' => 182, 'refrank' => null),
"e d" => array('change' => 62, 'baserank' => 290, 'refrank' => 228), "ns " => array('change' => 3, 'baserank' => 81, 'refrank' => 78),
" ca" => array('change' => 119, 'baserank' => 203, 'refrank' => 84), "d s" => array('change' => 7, 'baserank' => 125, 'refrank' => 118),
"uch" => array('change' => 300, 'baserank' => 95, 'refrank' => null), "a v" => array('change' => 300, 'baserank' => 236, 'refrank' => null),
"nce" => array('change' => 149, 'baserank' => 7, 'refrank' => 156), "his" => array('change' => 48, 'baserank' => 41, 'refrank' => 89),
"flo" => array('change' => 300, 'baserank' => 138, 'refrank' => null), "ead" => array('change' => 300, 'baserank' => 294, 'refrank' => null),
" vi" => array('change' => 300, 'baserank' => 230, 'refrank' => null), "me " => array('change' => 109, 'baserank' => 29, 'refrank' => 138),
"suc" => array('change' => 300, 'baserank' => 93, 'refrank' => null), "e p" => array('change' => 120, 'baserank' => 39, 'refrank' => 159),
"eci" => array('change' => 300, 'baserank' => 299, 'refrank' => null), "eme" => array('change' => 300, 'baserank' => 133, 'refrank' => null),
"sen" => array('change' => 300, 'baserank' => 185, 'refrank' => null), "ks " => array('change' => 300, 'baserank' => 152, 'refrank' => null),
" to" => array('change' => 224, 'baserank' => 228, 'refrank' => 4), " gr" => array('change' => 133, 'baserank' => 105, 'refrank' => 238),
" ch" => array('change' => 76, 'baserank' => 204, 'refrank' => 128), "ati" => array('change' => 167, 'baserank' => 252, 'refrank' => 85),
" th" => array('change' => 0, 'baserank' => 0, 'refrank' => 0), " ec" => array('change' => 300, 'baserank' => 206, 'refrank' => null),
" wo" => array('change' => 115, 'baserank' => 34, 'refrank' => 149), "ope" => array('change' => 300, 'baserank' => 168, 'refrank' => null),
" a " => array('change' => 180, 'baserank' => 199, 'refrank' => 19), "one" => array('change' => 76, 'baserank' => 167, 'refrank' => 243),
"n f" => array('change' => 300, 'baserank' => 45, 'refrank' => null), "eat" => array('change' => 300, 'baserank' => 130, 'refrank' => null),
"ica" => array('change' => 198, 'baserank' => 75, 'refrank' => 273), "inc" => array('change' => 300, 'baserank' => 147, 'refrank' => null),
"enc" => array('change' => 300, 'baserank' => 69, 'refrank' => null), "ore" => array('change' => 204, 'baserank' => 86, 'refrank' => 290),
"is " => array('change' => 1, 'baserank' => 43, 'refrank' => 44), " as" => array('change' => 139, 'baserank' => 32, 'refrank' => 171),
"nts" => array('change' => 300, 'baserank' => 165, 'refrank' => null), "d m" => array('change' => 300, 'baserank' => 279, 'refrank' => null),
"her" => array('change' => 112, 'baserank' => 143, 'refrank' => 31), " al" => array('change' => 65, 'baserank' => 200, 'refrank' => 135),
" is" => array('change' => 105, 'baserank' => 107, 'refrank' => 212), "e t" => array('change' => 46, 'baserank' => 68, 'refrank' => 22),
"c r" => array('change' => 300, 'baserank' => 261, 'refrank' => null), " hi" => array('change' => 45, 'baserank' => 106, 'refrank' => 61),
"cia" => array('change' => 300, 'baserank' => 267, 'refrank' => null), " fr" => array('change' => 37, 'baserank' => 209, 'refrank' => 172),
"ult" => array('change' => 300, 'baserank' => 96, 'refrank' => null), "e m" => array('change' => 9, 'baserank' => 128, 'refrank' => 119),
"ass" => array('change' => 300, 'baserank' => 250, 'refrank' => null), "s o" => array('change' => 2, 'baserank' => 92, 'refrank' => 90),
"pop" => array('change' => 300, 'baserank' => 173, 'refrank' => null), "nd " => array('change' => 2, 'baserank' => 12, 'refrank' => 10),
"the" => array('change' => 0, 'baserank' => 1, 'refrank' => 1), " st" => array('change' => 197, 'baserank' => 226, 'refrank' => 29),
" no" => array('change' => 130, 'baserank' => 218, 'refrank' => 88), "ast" => array('change' => 300, 'baserank' => 251, 'refrank' => null),
" fi" => array('change' => 300, 'baserank' => 208, 'refrank' => null), "ess" => array('change' => 160, 'baserank' => 135, 'refrank' => 295),
"gre" => array('change' => 300, 'baserank' => 40, 'refrank' => null), "h a" => array('change' => 300, 'baserank' => 142, 'refrank' => null),
"duo" => array('change' => 300, 'baserank' => 287, 'refrank' => null), " so" => array('change' => 6, 'baserank' => 114, 'refrank' => 120),
"es " => array('change' => 48, 'baserank' => 72, 'refrank' => 24), "for" => array('change' => 96, 'baserank' => 139, 'refrank' => 43),
"gan" => array('change' => 300, 'baserank' => 140, 'refrank' => null), "per" => array('change' => 111, 'baserank' => 171, 'refrank' => 282),
"thi" => array('change' => 33, 'baserank' => 191, 'refrank' => 224), " of" => array('change' => 6, 'baserank' => 5, 'refrank' => 11),
" cl" => array('change' => 300, 'baserank' => 205, 'refrank' => null), " sc" => array('change' => 300, 'baserank' => 222, 'refrank' => null),
"t t" => array('change' => 49, 'baserank' => 94, 'refrank' => 45), "als" => array('change' => 300, 'baserank' => 242, 'refrank' => null),
"avi" => array('change' => 300, 'baserank' => 255, 'refrank' => null), "cie" => array('change' => 300, 'baserank' => 268, 'refrank' => null),
" du" => array('change' => 300, 'baserank' => 101, 'refrank' => null), "pre" => array('change' => 105, 'baserank' => 174, 'refrank' => 279),
"as " => array('change' => 17, 'baserank' => 25, 'refrank' => 42), "a a" => array('change' => 300, 'baserank' => 234, 'refrank' => null),
"gel" => array('change' => 300, 'baserank' => 141, 'refrank' => null), "ite" => array('change' => 300, 'baserank' => 149, 'refrank' => null),
"n r" => array('change' => 300, 'baserank' => 30, 'refrank' => null), "by " => array('change' => 105, 'baserank' => 121, 'refrank' => 226),
"d u" => array('change' => 300, 'baserank' => 282, 'refrank' => null), "clu" => array('change' => 300, 'baserank' => 270, 'refrank' => null),
" ur" => array('change' => 300, 'baserank' => 229, 'refrank' => null), "ebu" => array('change' => 300, 'baserank' => 298, 'refrank' => null),
"n i" => array('change' => 300, 'baserank' => 158, 'refrank' => null), "he " => array('change' => 0, 'baserank' => 2, 'refrank' => 2),
" wh" => array('change' => 195, 'baserank' => 232, 'refrank' => 37), " ph" => array('change' => 300, 'baserank' => 220, 'refrank' => null),
);
$ranked = $this->x->_arr_rank($this->x->_trigram($str));
$results = $this->x->detect($str);
$count = count($ranked);
$sum = 0;
//foreach ($this->x->_lang_db['english'] as $key => $value) {
foreach ($ranked as $key => $value) {
if (isset($ranked[$key]) && isset($this->x->_lang_db['english'][$key])) {
$difference = abs($this->x->_lang_db['english'][$key] - $ranked[$key]);
} else {
$difference = 300;
}
$this->assertTrue(isset($true_differences[$key]), "'$key'");
if (isset($true_differences[$key])) {
$this->assertEquals($true_differences[$key]['change'], $difference, "'$key'");
}
$sum += $difference;
}
$this->assertEquals(300, $count);
$this->assertEquals(59490, $sum);
$this->assertEquals('english', key($results));
$this->assertEquals(198, floor(current($results)));
next($results);
$this->assertEquals('italian', key($results));
$this->assertEquals(228, floor(current($results)));
}
function test_french ()
{
$this->x->setPerlCompatible();
$str = "Verifions que le détecteur de langues marche";
$trigrams = $this->x->_trigram($str);
$this->assertEquals(42, count($trigrams));
// verified in Language::Guess
$ranked = $this->x->_arr_rank($trigrams);
$this->assertEquals(0, $ranked['e l']);
$correct_ranks = array(
' de' => 1,
"éte" => 41,
"dét" => 12,
'fio' => 18,
'de ' => 11,
'ons' => 28,
'ect' => 14,
'le ' => 24,
'arc' => 8,
'lan' => 23,
'es ' => 16,
'mar' => 25,
" dé" => 2,
'ifi' => 21,
'gue' => 19,
'ur ' => 39,
'rch' => 31,
'ang' => 7,
'que' => 29,
'ngu' => 26,
'e d' => 13,
'rif' => 32,
' ma' => 5,
'tec' => 35,
'ns ' => 27,
' la' => 3,
' le' => 4,
'r d' => 30,
'e l' => 0,
'che' => 9,
's m' => 33,
'ue ' => 37,
'ver' => 40,
'teu' => 36,
'eri' => 15,
'cte' => 10,
'ues' => 38,
's q' => 34,
'eur' => 17,
' qu' => 6,
'he ' => 20,
'ion' => 22
);
$this->assertEquals(count($correct_ranks), count($ranked), "different number of trigrams found");
$distances = array(
' de' => array('change' => 0, 'baserank' => 1, 'refrank' => 1),
'éte' => array('change' => 300, 'baserank' => 41, 'refrank' => null),
'dét' => array('change' => 300, 'baserank' => 12, 'refrank' => null),
'fio' => array('change' => 300, 'baserank' => 18, 'refrank' => null),
'de ' => array('change' => 9, 'baserank' => 11, 'refrank' => 2),
'ons' => array('change' => 11, 'baserank' => 28, 'refrank' => 39),
'ect' => array('change' => 300, 'baserank' => 14, 'refrank' => null),
'le ' => array('change' => 19, 'baserank' => 24, 'refrank' => 5),
'arc' => array('change' => 300, 'baserank' => 8, 'refrank' => null),
'lan' => array('change' => 300, 'baserank' => 23, 'refrank' => null),
'es ' => array('change' => 16, 'baserank' => 16, 'refrank' => 0),
'mar' => array('change' => 300, 'baserank' => 25, 'refrank' => null),
' dé' => array('change' => 59, 'baserank' => 2, 'refrank' => 61),
'ifi' => array('change' => 300, 'baserank' => 21, 'refrank' => null),
'gue' => array('change' => 300, 'baserank' => 19, 'refrank' => null),
'ur ' => array('change' => 12, 'baserank' => 39, 'refrank' => 27),
'rch' => array('change' => 300, 'baserank' => 31, 'refrank' => null),
'ang' => array('change' => 300, 'baserank' => 7, 'refrank' => null),
'que' => array('change' => 5, 'baserank' => 29, 'refrank' => 24),
'ngu' => array('change' => 300, 'baserank' => 26, 'refrank' => null),
'e d' => array('change' => 2, 'baserank' => 13, 'refrank' => 15),
'rif' => array('change' => 300, 'baserank' => 32, 'refrank' => null),
' ma' => array('change' => 89, 'baserank' => 5, 'refrank' => 94),
'tec' => array('change' => 300, 'baserank' => 35, 'refrank' => null),
'ns ' => array('change' => 6, 'baserank' => 27, 'refrank' => 21),
' la' => array('change' => 6, 'baserank' => 3, 'refrank' => 9),
' le' => array('change' => 1, 'baserank' => 4, 'refrank' => 3),
'r d' => array('change' => 202, 'baserank' => 30, 'refrank' => 232),
'e l' => array('change' => 14, 'baserank' => 0, 'refrank' => 14),
'che' => array('change' => 300, 'baserank' => 9, 'refrank' => null),
's m' => array('change' => 180, 'baserank' => 33, 'refrank' => 213),
'ue ' => array('change' => 7, 'baserank' => 37, 'refrank' => 30),
'ver' => array('change' => 117, 'baserank' => 40, 'refrank' => 157),
'teu' => array('change' => 300, 'baserank' => 36, 'refrank' => null),
'eri' => array('change' => 300, 'baserank' => 15, 'refrank' => null),
'cte' => array('change' => 300, 'baserank' => 10, 'refrank' => null),
'ues' => array('change' => 237, 'baserank' => 38, 'refrank' => 275),
's q' => array('change' => 300, 'baserank' => 34, 'refrank' => null),
'eur' => array('change' => 56, 'baserank' => 17, 'refrank' => 73),
' qu' => array('change' => 31, 'baserank' => 6, 'refrank' => 37),
'he ' => array('change' => 300, 'baserank' => 20, 'refrank' => null),
'ion' => array('change' => 12, 'baserank' => 22, 'refrank' => 10),
);
$french_ranks = $this->x->_lang_db['french'];
$sumchange = 0;
foreach ($ranked as $key => $value) {
if (isset($french_ranks[$key])) {
$difference = abs($french_ranks[$key] - $ranked[$key]);
} else {
$difference = 300;
}
$this->assertTrue(isset($distances[$key]), $key);
if (isset($distances[$key])) {
$this->assertEquals($distances[$key]['baserank'], $ranked[$key], "baserank for $key");
if ($distances[$key]['refrank'] === null) {
$this->assertArrayNotHasKey($key, $french_ranks);
} else {
$this->assertEquals($distances[$key]['refrank'], $french_ranks[$key], "refrank for $key");
}
$this->assertEquals($distances[$key]['change'], $difference, "difference for $key");
}
$sumchange += $difference;
}
$actual_result = $this->x->_distance($french_ranks, $ranked);
$this->assertEquals($sumchange, $actual_result);
$this->assertEquals(7091, $actual_result);
$this->assertEquals(168, floor($sumchange/count($trigrams)));
$final_result = $this->x->detect($str);
$this->assertEquals(168, floor($final_result['french']));
$this->assertEquals(211, $final_result['spanish']);
}
function test_russian ()
{
$str = 'авай проверить узнает ли наш угадатель русски язык';
$this->x->setPerlCompatible();
$trigrams = $this->x->_trigram($str);
$ranked = $this->x->_arr_rank($trigrams);