We now store the tags in two separate tables

This commit is contained in:
Michael 2020-04-17 06:35:20 +00:00
parent d3f4e4d629
commit db657b0149
8 changed files with 121 additions and 98 deletions

View file

@ -141,13 +141,14 @@ function query_page_info($url, $photo = "", $keywords = false, $keyword_blacklis
return $data; return $data;
} }
function add_page_keywords($url, $photo = "", $keywords = false, $keyword_blacklist = "", $return_array = false) function add_page_keywords($url, $photo = "", $keywords = false, $keyword_blacklist = "")
{ {
$data = query_page_info($url, $photo, $keywords, $keyword_blacklist); $data = query_page_info($url, $photo, $keywords, $keyword_blacklist);
if (empty($data['keywords']) || !is_array($data['keywords'])) {
return '';
}
$tags = ""; $tags = "";
$taglist = [];
if (isset($data["keywords"]) && count($data["keywords"])) {
foreach ($data["keywords"] as $keyword) { foreach ($data["keywords"] as $keyword) {
$hashtag = str_replace([" ", "+", "/", ".", "#", "'"], $hashtag = str_replace([" ", "+", "/", ".", "#", "'"],
["", "", "", "", "", ""], $keyword); ["", "", "", "", "", ""], $keyword);
@ -157,15 +158,27 @@ function add_page_keywords($url, $photo = "", $keywords = false, $keyword_blackl
} }
$tags .= "#[url=" . DI::baseUrl() . "/search?tag=" . $hashtag . "]" . $hashtag . "[/url]"; $tags .= "#[url=" . DI::baseUrl() . "/search?tag=" . $hashtag . "]" . $hashtag . "[/url]";
$taglist[] = $hashtag;
}
} }
if ($return_array) {
return $taglist;
} else {
return $tags; return $tags;
}
function get_page_keywords($url, $photo = "", $keywords = false, $keyword_blacklist = "")
{
$data = query_page_info($url, $photo, $keywords, $keyword_blacklist);
if (empty($data['keywords']) || !is_array($data['keywords'])) {
return [];
} }
$taglist = [];
foreach ($data['keywords'] as $keyword) {
$hashtag = str_replace([" ", "+", "/", ".", "#", "'"],
["", "", "", "", "", ""], $keyword);
$taglist[] = $hashtag;
}
return $taglist;
} }
function add_page_info($url, $no_photos = false, $photo = "", $keywords = false, $keyword_blacklist = "") function add_page_info($url, $no_photos = false, $photo = "", $keywords = false, $keyword_blacklist = "")

View file

@ -54,6 +54,54 @@ class Tag
self::EXCLUSIVE_MENTION => '!', self::EXCLUSIVE_MENTION => '!',
]; ];
public static function store(int $uriid, int $type, string $name, string $url = '')
{
$name = trim($name, "\x00..\x20\xFF#!@");
if (empty($name)) {
return;
}
$fields = ['name' => substr($name, 0, 64), 'type' => $type];
if (!empty($url) && ($url != $name)) {
$fields['url'] = strtolower($url);
}
$tag = DBA::selectFirst('tag', ['id'], $fields);
if (!DBA::isResult($tag)) {
DBA::insert('tag', $fields, true);
$tagid = DBA::lastInsertId();
} else {
$tagid = $tag['id'];
}
if (empty($tagid)) {
Logger::error('No tag id created', $fields);
return;
}
DBA::insert('post-tag', ['uri-id' => $uriid, 'tid' => $tagid], true);
Logger::info('Stored tag/mention', ['uri-id' => $uriid, 'tag-id' => $tagid, 'tag' => $fields]);
}
public static function storeByHash(int $uriid, string $hash, string $name, string $url = '')
{
if ($hash == self::TAG_CHARACTER[self::MENTION]) {
$type = self::MENTION;
} elseif ($hash == self::TAG_CHARACTER[self::EXCLUSIVE_MENTION]) {
$type = self::EXCLUSIVE_MENTION;
} elseif ($hash == self::TAG_CHARACTER[self::IMPLICIT_MENTION]) {
$type = self::IMPLICIT_MENTION;
} elseif ($hash == self::TAG_CHARACTER[self::HASHTAG]) {
$type = self::HASHTAG;
} else {
return;
}
self::store($uriid, $type, $name, $url);
}
/** /**
* Store tags from the body * Store tags from the body
* *
@ -73,9 +121,7 @@ class Tag
continue; continue;
} }
$fields = ['uri-id' => $uriid, 'name' => substr($tag, 1, 64), 'type' => self::HASHTAG]; self::storeByHash($uriid, '#', $tag);
DBA::insert('tag', $fields, true);
Logger::info('Stored tag', ['uriid' => $uriid, 'tag' => $tag, 'fields' => $fields]);
} }
} }
} }

View file

@ -34,6 +34,7 @@ use Friendica\Model\Event;
use Friendica\Model\Item; use Friendica\Model\Item;
use Friendica\Model\ItemURI; use Friendica\Model\ItemURI;
use Friendica\Model\Mail; use Friendica\Model\Mail;
use Friendica\Model\Tag;
use Friendica\Model\Term; use Friendica\Model\Term;
use Friendica\Model\User; use Friendica\Model\User;
use Friendica\Protocol\Activity; use Friendica\Protocol\Activity;
@ -585,53 +586,43 @@ class Processor
private static function storeTags(int $uriid, array $tags = null) private static function storeTags(int $uriid, array $tags = null)
{ {
// Make sure to delete all existing tags (can happen when called via the update functionality) // Make sure to delete all existing tags (can happen when called via the update functionality)
DBA::delete('tag', ['uri-id' => $uriid]); DBA::delete('post-tag', ['uri-id' => $uriid]);
foreach ($tags as $tag) { foreach ($tags as $tag) {
if (empty($tag['name']) || empty($tag['type']) || !in_array($tag['type'], ['Mention', 'Hashtag'])) { if (empty($tag['name']) || empty($tag['type']) || !in_array($tag['type'], ['Mention', 'Hashtag'])) {
continue; continue;
} }
$fields = ['uri-id' => $uriid, 'name' => $tag['name']]; $hash = substr($tag['name'], 0, 1);
if ($tag['type'] == 'Mention') { if ($tag['type'] == 'Mention') {
$fields['type'] = Term::MENTION; if (in_array($hash, [Tag::TAG_CHARACTER[Tag::MENTION],
Tag::TAG_CHARACTER[Tag::EXCLUSIVE_MENTION],
if (substr($fields['name'], 0, 1) == Term::TAG_CHARACTER[Term::MENTION]) { Tag::TAG_CHARACTER[Tag::IMPLICIT_MENTION]])) {
$fields['name'] = substr($fields['name'], 1); $tag['name'] = substr($tag['name'], 1);
} elseif (substr($fields['name'], 0, 1) == Term::TAG_CHARACTER[Term::EXCLUSIVE_MENTION]) { } else {
$fields['type'] = Term::EXCLUSIVE_MENTION; $hash = '#';
$fields['name'] = substr($fields['name'], 1);
} elseif (substr($fields['name'], 0, 1) == Term::TAG_CHARACTER[Term::IMPLICIT_MENTION]) {
$fields['type'] = Term::IMPLICIT_MENTION;
$fields['name'] = substr($fields['name'], 1);
} }
if (!empty($tag['href'])) { if (!empty($tag['href'])) {
$apcontact = APContact::getByURL($tag['href']); $apcontact = APContact::getByURL($tag['href']);
if (!empty($apcontact['name']) || !empty($apcontact['nick'])) { if (!empty($apcontact['name']) || !empty($apcontact['nick'])) {
$fields['name'] = $apcontact['name'] ?: $apcontact['nick']; $tag['name'] = $apcontact['name'] ?: $apcontact['nick'];
} }
} }
} elseif ($tag['type'] == 'Hashtag') { } elseif ($tag['type'] == 'Hashtag') {
$fields['type'] = Term::HASHTAG; if (substr($tag['name'], 0, 1) == Term::TAG_CHARACTER[Term::HASHTAG]) {
if (substr($fields['name'], 0, 1) == Term::TAG_CHARACTER[Term::HASHTAG]) { $tag['name'] = substr($tag['name'], 1);
$fields['name'] = substr($fields['name'], 1);
}
}
if (empty($fields['name'])) {
continue;
} else { } else {
$fields['name'] = substr($fields['name'], 0, 64); $hash = '@';
}
} }
if (!empty($tag['href'] && ($tag['href'] != $tag['name']))) { if (empty($tag['name'])) {
$fields['url'] = $tag['href']; continue;
} }
DBA::insert('tag', $fields, true); Tag::storeByHash($uriid, $hash, $tag['name'], $tag['href']);
Logger::info('Stored tag/mention', ['uriid' => $uriid, 'tag' => $tag, 'fields' => $fields]);
} }
} }

View file

@ -2464,26 +2464,7 @@ class DFRN
$item["tag"] .= $termhash . "[url=" . $termurl . "]" . $term . "[/url]"; $item["tag"] .= $termhash . "[url=" . $termurl . "]" . $term . "[/url]";
// Store the hashtag/mention Tag::storeByHash($item['uri-id'], $termhash, $term, $termurl);
$fields = ['uri-id' => $item['uri-id'], 'name' => substr($term, 0, 64)];
if ($termhash == Term::TAG_CHARACTER[Term::MENTION]) {
$fields['type'] = Term::MENTION;
} elseif ($termhash == Term::TAG_CHARACTER[Term::EXCLUSIVE_MENTION]) {
$fields['type'] = Term::EXCLUSIVE_MENTION;
} elseif ($termhash == Term::TAG_CHARACTER[Term::IMPLICIT_MENTION]) {
$fields['type'] = Term::IMPLICIT_MENTION;
} elseif ($termhash == Term::TAG_CHARACTER[Term::HASHTAG]) {
$fields['type'] = Term::HASHTAG;
}
if (!empty($termurl)) {
$fields['url'] = $termurl;
}
DBA::insert('tag', $fields, true);
Logger::info('Stored tag/mention', ['uri-id' => $item['uri-id'], 'tag' => $term, 'url' => $termurl, 'hash' => $termhash, 'fields' => $fields]);
} }
} }
} }

View file

@ -1840,20 +1840,7 @@ class Diaspora
continue; continue;
} }
$fields = ['uri-id' => $uriid, 'name' => substr($person['name'] ?: $person['nick'], 0, 64), 'url' => $person['url']]; Tag::storeByHash($uriid, $match[1], $person['name'] ?: $person['nick'], $person['url']);
if ($match[1] == Term::TAG_CHARACTER[Term::MENTION]) {
$fields['type'] = Term::MENTION;
} elseif ($match[1] == Term::TAG_CHARACTER[Term::EXCLUSIVE_MENTION]) {
$fields['type'] = Term::EXCLUSIVE_MENTION;
} elseif ($match[1] == Term::TAG_CHARACTER[Term::IMPLICIT_MENTION]) {
$fields['type'] = Term::IMPLICIT_MENTION;
} else {
continue;
}
DBA::insert('tag', $fields, true);
Logger::info('Stored mention', ['uriid' => $uriid, 'match' => $match, 'fields' => $fields]);
} }
} }

View file

@ -29,7 +29,7 @@ use Friendica\Core\Protocol;
use Friendica\Database\DBA; use Friendica\Database\DBA;
use Friendica\DI; use Friendica\DI;
use Friendica\Model\Item; use Friendica\Model\Item;
use Friendica\Model\Term; use Friendica\Model\Tag;
use Friendica\Util\Network; use Friendica\Util\Network;
use Friendica\Util\ParseUrl; use Friendica\Util\ParseUrl;
use Friendica\Util\XML; use Friendica\Util\XML;
@ -478,7 +478,7 @@ class Feed {
$item["title"] = ""; $item["title"] = "";
$item["body"] = $item["body"] . add_page_info($item["plink"], false, $preview, ($contact["fetch_further_information"] == 2), $contact["ffi_keyword_blacklist"]); $item["body"] = $item["body"] . add_page_info($item["plink"], false, $preview, ($contact["fetch_further_information"] == 2), $contact["ffi_keyword_blacklist"]);
$item["tag"] = add_page_keywords($item["plink"], $preview, ($contact["fetch_further_information"] == 2), $contact["ffi_keyword_blacklist"]); $item["tag"] = add_page_keywords($item["plink"], $preview, ($contact["fetch_further_information"] == 2), $contact["ffi_keyword_blacklist"]);
$taglist = add_page_keywords($item["plink"], $preview, ($contact["fetch_further_information"] == 2), $contact["ffi_keyword_blacklist"], true); $taglist = get_page_keywords($item["plink"], $preview, ($contact["fetch_further_information"] == 2), $contact["ffi_keyword_blacklist"]);
$item["object-type"] = Activity\ObjectType::BOOKMARK; $item["object-type"] = Activity\ObjectType::BOOKMARK;
unset($item["attach"]); unset($item["attach"]);
} else { } else {
@ -492,7 +492,7 @@ class Feed {
} else { } else {
// @todo $preview is never set in this case, is it intended? - @MrPetovan 2018-02-13 // @todo $preview is never set in this case, is it intended? - @MrPetovan 2018-02-13
$item["tag"] = add_page_keywords($item["plink"], $preview, true, $contact["ffi_keyword_blacklist"]); $item["tag"] = add_page_keywords($item["plink"], $preview, true, $contact["ffi_keyword_blacklist"]);
$taglist = add_page_keywords($item["plink"], $preview, true, $contact["ffi_keyword_blacklist"], true); $taglist = get_page_keywords($item["plink"], $preview, true, $contact["ffi_keyword_blacklist"]);
} }
$item["body"] .= "\n" . $item['tag']; $item["body"] .= "\n" . $item['tag'];
} else { } else {
@ -531,10 +531,7 @@ class Feed {
if (!empty($id) && !empty($taglist)) { if (!empty($id) && !empty($taglist)) {
$feeditem = Item::selectFirst(['uri-id'], ['id' => $id]); $feeditem = Item::selectFirst(['uri-id'], ['id' => $id]);
foreach ($taglist as $tag) { foreach ($taglist as $tag) {
$fields = ['uri-id' => $feeditem['uri-id'], 'name' => substr($tag, 0, 64), 'type' => Term::HASHTAG]; Tag::storeByHash($feeditem['uri-id'], '#', $tag);
DBA::insert('tag', $fields, true);
Logger::info('Stored tag', ['uri-id' => $feeditem['uri-id'], 'tag' => $tag, 'fields' => $fields]);
} }
} }
} }

View file

@ -36,7 +36,7 @@ use Friendica\Model\Conversation;
use Friendica\Model\GContact; use Friendica\Model\GContact;
use Friendica\Model\Item; use Friendica\Model\Item;
use Friendica\Model\ItemURI; use Friendica\Model\ItemURI;
use Friendica\Model\Term; use Friendica\Model\Tag;
use Friendica\Model\User; use Friendica\Model\User;
use Friendica\Network\Probe; use Friendica\Network\Probe;
use Friendica\Util\DateTimeFormat; use Friendica\Util\DateTimeFormat;
@ -665,10 +665,7 @@ class OStatus
$item['tag'] .= '#[url=' . DI::baseUrl() . '/search?tag=' . $term . ']' . $term . '[/url]'; $item['tag'] .= '#[url=' . DI::baseUrl() . '/search?tag=' . $term . ']' . $term . '[/url]';
// Store the hashtag // Store the hashtag
$fields = ['uri-id' => $item['uri-id'], 'name' => substr($term, 0, 64), 'type' => Term::HASHTAG]; Tag::storeByHash($item['uri-id'], '#', $term);
DBA::insert('tag', $fields, true);
Logger::info('Stored tag', ['uri-id' => $item['uri-id'], 'tag' => $term, 'fields' => $fields]);
} }
} }
} }

View file

@ -1293,16 +1293,27 @@ return [
] ]
], ],
"tag" => [ "tag" => [
"comment" => "item tags and mentions", "comment" => "tags and mentions",
"fields" => [ "fields" => [
"uri-id" => ["type" => "int unsigned", "not null" => "1", "primary" => "1", "relation" => ["item-uri" => "id"], "comment" => "Id of the item-uri table entry that contains the item uri"], "id" => ["type" => "int unsigned", "not null" => "1", "extra" => "auto_increment", "primary" => "1", "comment" => ""],
"type" => ["type" => "tinyint unsigned", "not null" => "1", "default" => "0", "primary" => "1", "comment" => ""], "type" => ["type" => "tinyint unsigned", "not null" => "1", "default" => "0", "comment" => ""],
"name" => ["type" => "varchar(64)", "not null" => "1", "default" => "", "primary" => "1", "comment" => ""], "name" => ["type" => "varchar(64)", "not null" => "1", "default" => "", "comment" => ""],
"url" => ["type" => "varchar(255)", "not null" => "1", "default" => "", "comment" => ""] "url" => ["type" => "varbinary(255)", "not null" => "1", "default" => "", "comment" => ""]
], ],
"indexes" => [ "indexes" => [
"PRIMARY" => ["uri-id", "type", "name"], "PRIMARY" => ["id"],
"type_name" => ["type", "name"] "type_name_url" => ["UNIQUE", "type", "name", "url"]
]
],
"post-tag" => [
"comment" => "post relation to tags",
"fields" => [
"tid" => ["type" => "int unsigned", "not null" => "1", "relation" => ["tag" => "id"], "primary" => "1", "comment" => ""],
"uri-id" => ["type" => "int unsigned", "not null" => "1", "primary" => "1", "relation" => ["item-uri" => "id"], "comment" => "Id of the item-uri table entry that contains the item uri"],
],
"indexes" => [
"PRIMARY" => ["tid", "uri-id"],
"uri-id" => ["uri-id"]
] ]
], ],
"thread" => [ "thread" => [