Merge pull request #5302 from annando/language

The detected language now moved to "item-content" as well
This commit is contained in:
Hypolite Petovan 2018-06-30 08:42:35 -04:00 committed by GitHub
commit 3227cd4207
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 23 additions and 38 deletions

View file

@ -41,7 +41,7 @@ define('FRIENDICA_PLATFORM', 'Friendica');
define('FRIENDICA_CODENAME', 'The Tazmans Flax-lily');
define('FRIENDICA_VERSION', '2018.08-dev');
define('DFRN_PROTOCOL_VERSION', '2.23');
define('DB_UPDATE_VERSION', 1272);
define('DB_UPDATE_VERSION', 1273);
define('NEW_UPDATE_ROUTINE_VERSION', 1170);
/**

View file

@ -1,6 +1,6 @@
-- ------------------------------------------
-- Friendica 2018.08-dev (The Tazmans Flax-lily)
-- DB_UPDATE_VERSION 1272
-- DB_UPDATE_VERSION 1273
-- ------------------------------------------
@ -557,6 +557,7 @@ CREATE TABLE IF NOT EXISTS `item-content` (
`body` mediumtext COMMENT 'item body content',
`location` varchar(255) NOT NULL DEFAULT '' COMMENT 'text location where this item originated',
`coord` varchar(255) NOT NULL DEFAULT '' COMMENT 'longitude/latitude pair representing location where this item originated',
`language` text COMMENT 'Language information about this post',
`app` varchar(255) NOT NULL DEFAULT '' COMMENT 'application which generated this item',
`rendered-hash` varchar(32) NOT NULL DEFAULT '' COMMENT '',
`rendered-html` mediumtext COMMENT 'item.body converted to html',

View file

@ -1262,6 +1262,7 @@ class DBStructure
"body" => ["type" => "mediumtext", "comment" => "item body content"],
"location" => ["type" => "varchar(255)", "not null" => "1", "default" => "", "comment" => "text location where this item originated"],
"coord" => ["type" => "varchar(255)", "not null" => "1", "default" => "", "comment" => "longitude/latitude pair representing location where this item originated"],
"language" => ["type" => "text", "comment" => "Language information about this post"],
"app" => ["type" => "varchar(255)", "not null" => "1", "default" => "", "comment" => "application which generated this item"],
"rendered-hash" => ["type" => "varchar(32)", "not null" => "1", "default" => "", "comment" => ""],
"rendered-html" => ["type" => "mediumtext", "comment" => "item.body converted to html"],

View file

@ -37,7 +37,7 @@ class Item extends BaseObject
// Field list that is used to display the items
const DISPLAY_FIELDLIST = ['uid', 'id', 'parent', 'uri', 'thr-parent', 'parent-uri', 'guid', 'network',
'commented', 'created', 'edited', 'received', 'verb', 'object-type', 'postopts', 'plink',
'wall', 'private', 'starred', 'origin', 'title', 'body', 'file', 'attach',
'wall', 'private', 'starred', 'origin', 'title', 'body', 'file', 'attach', 'language',
'content-warning', 'location', 'coord', 'app', 'rendered-hash', 'rendered-html', 'object',
'allow_cid', 'allow_gid', 'deny_cid', 'deny_gid', 'item_id',
'author-id', 'author-link', 'author-name', 'author-avatar',
@ -58,10 +58,13 @@ class Item extends BaseObject
'signed_text', 'signature', 'signer'];
// Field list for "item-content" table that is mixed with the item table
const CONTENT_FIELDLIST = ['title', 'content-warning', 'body', 'location',
const MIXED_CONTENT_FIELDLIST = ['title', 'content-warning', 'body', 'location',
'coord', 'app', 'rendered-hash', 'rendered-html', 'verb',
'object-type', 'object', 'target-type', 'target', 'plink'];
// Field list for "item-content" table that is not present in the "item" table
const CONTENT_FIELDLIST = ['language'];
// All fields in the item table
const ITEM_FIELDLIST = ['id', 'uid', 'parent', 'uri', 'parent-uri', 'thr-parent', 'guid',
'contact-id', 'type', 'wall', 'gravity', 'extid', 'icid',
@ -86,7 +89,7 @@ class Item extends BaseObject
$row = dba::fetch($stmt);
// Fetch data from the item-content table whenever there is content there
foreach (self::CONTENT_FIELDLIST as $field) {
foreach (self::MIXED_CONTENT_FIELDLIST as $field) {
if (empty($row[$field]) && !empty($row['item-' . $field])) {
$row[$field] = $row['item-' . $field];
}
@ -406,7 +409,7 @@ class Item extends BaseObject
'unseen', 'deleted', 'origin', 'forum_mode', 'mention', 'global',
'id' => 'item_id', 'network', 'icid'];
$fields['item-content'] = self::CONTENT_FIELDLIST;
$fields['item-content'] = array_merge(self::CONTENT_FIELDLIST, self::MIXED_CONTENT_FIELDLIST);
$fields['author'] = ['url' => 'author-link', 'name' => 'author-name',
'thumb' => 'author-avatar', 'nick' => 'author-nick'];
@ -526,7 +529,7 @@ class Item extends BaseObject
foreach ($fields as $table => $table_fields) {
foreach ($table_fields as $field => $select) {
if (empty($selected) || in_array($select, $selected)) {
if (in_array($select, self::CONTENT_FIELDLIST)) {
if (in_array($select, self::MIXED_CONTENT_FIELDLIST)) {
$selection[] = "`item`.`".$select."` AS `item-" . $select . "`";
}
if (is_int($field)) {
@ -594,7 +597,7 @@ class Item extends BaseObject
$items = dba::select('item', ['id', 'origin', 'uri', 'plink'], $condition);
$content_fields = [];
foreach (self::CONTENT_FIELDLIST as $field) {
foreach (array_merge(self::CONTENT_FIELDLIST, self::MIXED_CONTENT_FIELDLIST) as $field) {
if (isset($fields[$field])) {
$content_fields[$field] = $fields[$field];
unset($fields[$field]);
@ -1032,7 +1035,7 @@ class Item extends BaseObject
}
}
self::addLanguageInPostopts($item);
self::addLanguageToItemArray($item);
$item['wall'] = intval(defaults($item, 'wall', 0));
$item['extid'] = trim(defaults($item, 'extid', ''));
@ -1498,7 +1501,7 @@ class Item extends BaseObject
$fields = ['uri' => $item['uri'], 'plink' => $item['plink'],
'uri-plink-hash' => hash('sha1', $item['plink']).hash('sha1', $item['uri'])];
foreach (self::CONTENT_FIELDLIST as $field) {
foreach (array_merge(self::CONTENT_FIELDLIST, self::MIXED_CONTENT_FIELDLIST) as $field) {
if (isset($item[$field])) {
$fields[$field] = $item[$field];
unset($item[$field]);
@ -1556,7 +1559,7 @@ class Item extends BaseObject
{
// We have to select only the fields from the "item-content" table
$fields = [];
foreach (self::CONTENT_FIELDLIST as $field) {
foreach (array_merge(self::CONTENT_FIELDLIST, self::MIXED_CONTENT_FIELDLIST) as $field) {
if (isset($item[$field])) {
$fields[$field] = $item[$field];
}
@ -1822,39 +1825,19 @@ class Item extends BaseObject
}
/**
* Adds a "lang" specification in a "postopts" element of given $arr,
* if possible and not already present.
* Adds a language specification in a "language" element of given $arr.
* Expects "body" element to exist in $arr.
*/
private static function addLanguageInPostopts(&$item)
private static function addLanguageToItemArray(&$item)
{
$postopts = "";
if (!empty($item['postopts'])) {
if (strstr($item['postopts'], 'lang=')) {
// do not override
return;
}
$postopts = $item['postopts'];
}
$naked_body = Text\BBCode::toPlaintext($item['body'], false);
$languages = (new Text_LanguageDetect())->detect($naked_body, 3);
$ld = new Text_LanguageDetect();
$ld->setNameMode(2);
$languages = $ld->detect($naked_body, 3);
if (sizeof($languages) > 0) {
if ($postopts != '') {
$postopts .= '&'; // arbitrary separator, to be reviewed
}
$postopts .= 'lang=';
$sep = "";
foreach ($languages as $language => $score) {
$postopts .= $sep . $language . ";" . $score;
$sep = ':';
}
$item['postopts'] = $postopts;
if (is_array($languages)) {
$item['language'] = json_encode($languages);
}
}