Merge pull request #5302 from annando/language

The detected language now moved to "item-content" as well
This commit is contained in:
Hypolite Petovan 2018-06-30 08:42:35 -04:00 committed by GitHub
commit 3227cd4207
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 23 additions and 38 deletions

View file

@ -41,7 +41,7 @@ define('FRIENDICA_PLATFORM', 'Friendica');
define('FRIENDICA_CODENAME', 'The Tazmans Flax-lily'); define('FRIENDICA_CODENAME', 'The Tazmans Flax-lily');
define('FRIENDICA_VERSION', '2018.08-dev'); define('FRIENDICA_VERSION', '2018.08-dev');
define('DFRN_PROTOCOL_VERSION', '2.23'); define('DFRN_PROTOCOL_VERSION', '2.23');
define('DB_UPDATE_VERSION', 1272); define('DB_UPDATE_VERSION', 1273);
define('NEW_UPDATE_ROUTINE_VERSION', 1170); define('NEW_UPDATE_ROUTINE_VERSION', 1170);
/** /**

View file

@ -1,6 +1,6 @@
-- ------------------------------------------ -- ------------------------------------------
-- Friendica 2018.08-dev (The Tazmans Flax-lily) -- Friendica 2018.08-dev (The Tazmans Flax-lily)
-- DB_UPDATE_VERSION 1272 -- DB_UPDATE_VERSION 1273
-- ------------------------------------------ -- ------------------------------------------
@ -557,6 +557,7 @@ CREATE TABLE IF NOT EXISTS `item-content` (
`body` mediumtext COMMENT 'item body content', `body` mediumtext COMMENT 'item body content',
`location` varchar(255) NOT NULL DEFAULT '' COMMENT 'text location where this item originated', `location` varchar(255) NOT NULL DEFAULT '' COMMENT 'text location where this item originated',
`coord` varchar(255) NOT NULL DEFAULT '' COMMENT 'longitude/latitude pair representing location where this item originated', `coord` varchar(255) NOT NULL DEFAULT '' COMMENT 'longitude/latitude pair representing location where this item originated',
`language` text COMMENT 'Language information about this post',
`app` varchar(255) NOT NULL DEFAULT '' COMMENT 'application which generated this item', `app` varchar(255) NOT NULL DEFAULT '' COMMENT 'application which generated this item',
`rendered-hash` varchar(32) NOT NULL DEFAULT '' COMMENT '', `rendered-hash` varchar(32) NOT NULL DEFAULT '' COMMENT '',
`rendered-html` mediumtext COMMENT 'item.body converted to html', `rendered-html` mediumtext COMMENT 'item.body converted to html',

View file

@ -1262,6 +1262,7 @@ class DBStructure
"body" => ["type" => "mediumtext", "comment" => "item body content"], "body" => ["type" => "mediumtext", "comment" => "item body content"],
"location" => ["type" => "varchar(255)", "not null" => "1", "default" => "", "comment" => "text location where this item originated"], "location" => ["type" => "varchar(255)", "not null" => "1", "default" => "", "comment" => "text location where this item originated"],
"coord" => ["type" => "varchar(255)", "not null" => "1", "default" => "", "comment" => "longitude/latitude pair representing location where this item originated"], "coord" => ["type" => "varchar(255)", "not null" => "1", "default" => "", "comment" => "longitude/latitude pair representing location where this item originated"],
"language" => ["type" => "text", "comment" => "Language information about this post"],
"app" => ["type" => "varchar(255)", "not null" => "1", "default" => "", "comment" => "application which generated this item"], "app" => ["type" => "varchar(255)", "not null" => "1", "default" => "", "comment" => "application which generated this item"],
"rendered-hash" => ["type" => "varchar(32)", "not null" => "1", "default" => "", "comment" => ""], "rendered-hash" => ["type" => "varchar(32)", "not null" => "1", "default" => "", "comment" => ""],
"rendered-html" => ["type" => "mediumtext", "comment" => "item.body converted to html"], "rendered-html" => ["type" => "mediumtext", "comment" => "item.body converted to html"],

View file

@ -37,7 +37,7 @@ class Item extends BaseObject
// Field list that is used to display the items // Field list that is used to display the items
const DISPLAY_FIELDLIST = ['uid', 'id', 'parent', 'uri', 'thr-parent', 'parent-uri', 'guid', 'network', const DISPLAY_FIELDLIST = ['uid', 'id', 'parent', 'uri', 'thr-parent', 'parent-uri', 'guid', 'network',
'commented', 'created', 'edited', 'received', 'verb', 'object-type', 'postopts', 'plink', 'commented', 'created', 'edited', 'received', 'verb', 'object-type', 'postopts', 'plink',
'wall', 'private', 'starred', 'origin', 'title', 'body', 'file', 'attach', 'wall', 'private', 'starred', 'origin', 'title', 'body', 'file', 'attach', 'language',
'content-warning', 'location', 'coord', 'app', 'rendered-hash', 'rendered-html', 'object', 'content-warning', 'location', 'coord', 'app', 'rendered-hash', 'rendered-html', 'object',
'allow_cid', 'allow_gid', 'deny_cid', 'deny_gid', 'item_id', 'allow_cid', 'allow_gid', 'deny_cid', 'deny_gid', 'item_id',
'author-id', 'author-link', 'author-name', 'author-avatar', 'author-id', 'author-link', 'author-name', 'author-avatar',
@ -58,10 +58,13 @@ class Item extends BaseObject
'signed_text', 'signature', 'signer']; 'signed_text', 'signature', 'signer'];
// Field list for "item-content" table that is mixed with the item table // Field list for "item-content" table that is mixed with the item table
const CONTENT_FIELDLIST = ['title', 'content-warning', 'body', 'location', const MIXED_CONTENT_FIELDLIST = ['title', 'content-warning', 'body', 'location',
'coord', 'app', 'rendered-hash', 'rendered-html', 'verb', 'coord', 'app', 'rendered-hash', 'rendered-html', 'verb',
'object-type', 'object', 'target-type', 'target', 'plink']; 'object-type', 'object', 'target-type', 'target', 'plink'];
// Field list for "item-content" table that is not present in the "item" table
const CONTENT_FIELDLIST = ['language'];
// All fields in the item table // All fields in the item table
const ITEM_FIELDLIST = ['id', 'uid', 'parent', 'uri', 'parent-uri', 'thr-parent', 'guid', const ITEM_FIELDLIST = ['id', 'uid', 'parent', 'uri', 'parent-uri', 'thr-parent', 'guid',
'contact-id', 'type', 'wall', 'gravity', 'extid', 'icid', 'contact-id', 'type', 'wall', 'gravity', 'extid', 'icid',
@ -86,7 +89,7 @@ class Item extends BaseObject
$row = dba::fetch($stmt); $row = dba::fetch($stmt);
// Fetch data from the item-content table whenever there is content there // Fetch data from the item-content table whenever there is content there
foreach (self::CONTENT_FIELDLIST as $field) { foreach (self::MIXED_CONTENT_FIELDLIST as $field) {
if (empty($row[$field]) && !empty($row['item-' . $field])) { if (empty($row[$field]) && !empty($row['item-' . $field])) {
$row[$field] = $row['item-' . $field]; $row[$field] = $row['item-' . $field];
} }
@ -406,7 +409,7 @@ class Item extends BaseObject
'unseen', 'deleted', 'origin', 'forum_mode', 'mention', 'global', 'unseen', 'deleted', 'origin', 'forum_mode', 'mention', 'global',
'id' => 'item_id', 'network', 'icid']; 'id' => 'item_id', 'network', 'icid'];
$fields['item-content'] = self::CONTENT_FIELDLIST; $fields['item-content'] = array_merge(self::CONTENT_FIELDLIST, self::MIXED_CONTENT_FIELDLIST);
$fields['author'] = ['url' => 'author-link', 'name' => 'author-name', $fields['author'] = ['url' => 'author-link', 'name' => 'author-name',
'thumb' => 'author-avatar', 'nick' => 'author-nick']; 'thumb' => 'author-avatar', 'nick' => 'author-nick'];
@ -526,7 +529,7 @@ class Item extends BaseObject
foreach ($fields as $table => $table_fields) { foreach ($fields as $table => $table_fields) {
foreach ($table_fields as $field => $select) { foreach ($table_fields as $field => $select) {
if (empty($selected) || in_array($select, $selected)) { if (empty($selected) || in_array($select, $selected)) {
if (in_array($select, self::CONTENT_FIELDLIST)) { if (in_array($select, self::MIXED_CONTENT_FIELDLIST)) {
$selection[] = "`item`.`".$select."` AS `item-" . $select . "`"; $selection[] = "`item`.`".$select."` AS `item-" . $select . "`";
} }
if (is_int($field)) { if (is_int($field)) {
@ -594,7 +597,7 @@ class Item extends BaseObject
$items = dba::select('item', ['id', 'origin', 'uri', 'plink'], $condition); $items = dba::select('item', ['id', 'origin', 'uri', 'plink'], $condition);
$content_fields = []; $content_fields = [];
foreach (self::CONTENT_FIELDLIST as $field) { foreach (array_merge(self::CONTENT_FIELDLIST, self::MIXED_CONTENT_FIELDLIST) as $field) {
if (isset($fields[$field])) { if (isset($fields[$field])) {
$content_fields[$field] = $fields[$field]; $content_fields[$field] = $fields[$field];
unset($fields[$field]); unset($fields[$field]);
@ -1032,7 +1035,7 @@ class Item extends BaseObject
} }
} }
self::addLanguageInPostopts($item); self::addLanguageToItemArray($item);
$item['wall'] = intval(defaults($item, 'wall', 0)); $item['wall'] = intval(defaults($item, 'wall', 0));
$item['extid'] = trim(defaults($item, 'extid', '')); $item['extid'] = trim(defaults($item, 'extid', ''));
@ -1498,7 +1501,7 @@ class Item extends BaseObject
$fields = ['uri' => $item['uri'], 'plink' => $item['plink'], $fields = ['uri' => $item['uri'], 'plink' => $item['plink'],
'uri-plink-hash' => hash('sha1', $item['plink']).hash('sha1', $item['uri'])]; 'uri-plink-hash' => hash('sha1', $item['plink']).hash('sha1', $item['uri'])];
foreach (self::CONTENT_FIELDLIST as $field) { foreach (array_merge(self::CONTENT_FIELDLIST, self::MIXED_CONTENT_FIELDLIST) as $field) {
if (isset($item[$field])) { if (isset($item[$field])) {
$fields[$field] = $item[$field]; $fields[$field] = $item[$field];
unset($item[$field]); unset($item[$field]);
@ -1556,7 +1559,7 @@ class Item extends BaseObject
{ {
// We have to select only the fields from the "item-content" table // We have to select only the fields from the "item-content" table
$fields = []; $fields = [];
foreach (self::CONTENT_FIELDLIST as $field) { foreach (array_merge(self::CONTENT_FIELDLIST, self::MIXED_CONTENT_FIELDLIST) as $field) {
if (isset($item[$field])) { if (isset($item[$field])) {
$fields[$field] = $item[$field]; $fields[$field] = $item[$field];
} }
@ -1822,39 +1825,19 @@ class Item extends BaseObject
} }
/** /**
* Adds a "lang" specification in a "postopts" element of given $arr, * Adds a language specification in a "language" element of given $arr.
* if possible and not already present.
* Expects "body" element to exist in $arr. * Expects "body" element to exist in $arr.
*/ */
private static function addLanguageInPostopts(&$item) private static function addLanguageToItemArray(&$item)
{ {
$postopts = "";
if (!empty($item['postopts'])) {
if (strstr($item['postopts'], 'lang=')) {
// do not override
return;
}
$postopts = $item['postopts'];
}
$naked_body = Text\BBCode::toPlaintext($item['body'], false); $naked_body = Text\BBCode::toPlaintext($item['body'], false);
$languages = (new Text_LanguageDetect())->detect($naked_body, 3); $ld = new Text_LanguageDetect();
$ld->setNameMode(2);
$languages = $ld->detect($naked_body, 3);
if (sizeof($languages) > 0) { if (is_array($languages)) {
if ($postopts != '') { $item['language'] = json_encode($languages);
$postopts .= '&'; // arbitrary separator, to be reviewed
}
$postopts .= 'lang=';
$sep = "";
foreach ($languages as $language => $score) {
$postopts .= $sep . $language . ";" . $score;
$sep = ':';
}
$item['postopts'] = $postopts;
} }
} }