We now store the tags in two separate tables

This commit is contained in:
Michael 2020-04-17 06:35:20 +00:00
parent d3f4e4d629
commit db657b0149
8 changed files with 121 additions and 98 deletions

View file

@ -141,31 +141,44 @@ function query_page_info($url, $photo = "", $keywords = false, $keyword_blacklis
return $data;
}
function add_page_keywords($url, $photo = "", $keywords = false, $keyword_blacklist = "", $return_array = false)
function add_page_keywords($url, $photo = "", $keywords = false, $keyword_blacklist = "")
{
$data = query_page_info($url, $photo, $keywords, $keyword_blacklist);
if (empty($data['keywords']) || !is_array($data['keywords'])) {
return '';
}
$tags = "";
$taglist = [];
if (isset($data["keywords"]) && count($data["keywords"])) {
foreach ($data["keywords"] as $keyword) {
$hashtag = str_replace([" ", "+", "/", ".", "#", "'"],
["", "", "", "", "", ""], $keyword);
foreach ($data["keywords"] as $keyword) {
$hashtag = str_replace([" ", "+", "/", ".", "#", "'"],
["", "", "", "", "", ""], $keyword);
if ($tags != "") {
$tags .= ", ";
}
$tags .= "#[url=" . DI::baseUrl() . "/search?tag=" . $hashtag . "]" . $hashtag . "[/url]";
$taglist[] = $hashtag;
if ($tags != "") {
$tags .= ", ";
}
$tags .= "#[url=" . DI::baseUrl() . "/search?tag=" . $hashtag . "]" . $hashtag . "[/url]";
}
if ($return_array) {
return $taglist;
} else {
return $tags;
return $tags;
}
function get_page_keywords($url, $photo = "", $keywords = false, $keyword_blacklist = "")
{
$data = query_page_info($url, $photo, $keywords, $keyword_blacklist);
if (empty($data['keywords']) || !is_array($data['keywords'])) {
return [];
}
$taglist = [];
foreach ($data['keywords'] as $keyword) {
$hashtag = str_replace([" ", "+", "/", ".", "#", "'"],
["", "", "", "", "", ""], $keyword);
$taglist[] = $hashtag;
}
return $taglist;
}
function add_page_info($url, $no_photos = false, $photo = "", $keywords = false, $keyword_blacklist = "")

View file

@ -54,6 +54,54 @@ class Tag
self::EXCLUSIVE_MENTION => '!',
];
public static function store(int $uriid, int $type, string $name, string $url = '')
{
$name = trim($name, "\x00..\x20\xFF#!@");
if (empty($name)) {
return;
}
$fields = ['name' => substr($name, 0, 64), 'type' => $type];
if (!empty($url) && ($url != $name)) {
$fields['url'] = strtolower($url);
}
$tag = DBA::selectFirst('tag', ['id'], $fields);
if (!DBA::isResult($tag)) {
DBA::insert('tag', $fields, true);
$tagid = DBA::lastInsertId();
} else {
$tagid = $tag['id'];
}
if (empty($tagid)) {
Logger::error('No tag id created', $fields);
return;
}
DBA::insert('post-tag', ['uri-id' => $uriid, 'tid' => $tagid], true);
Logger::info('Stored tag/mention', ['uri-id' => $uriid, 'tag-id' => $tagid, 'tag' => $fields]);
}
public static function storeByHash(int $uriid, string $hash, string $name, string $url = '')
{
if ($hash == self::TAG_CHARACTER[self::MENTION]) {
$type = self::MENTION;
} elseif ($hash == self::TAG_CHARACTER[self::EXCLUSIVE_MENTION]) {
$type = self::EXCLUSIVE_MENTION;
} elseif ($hash == self::TAG_CHARACTER[self::IMPLICIT_MENTION]) {
$type = self::IMPLICIT_MENTION;
} elseif ($hash == self::TAG_CHARACTER[self::HASHTAG]) {
$type = self::HASHTAG;
} else {
return;
}
self::store($uriid, $type, $name, $url);
}
/**
* Store tags from the body
*
@ -73,9 +121,7 @@ class Tag
continue;
}
$fields = ['uri-id' => $uriid, 'name' => substr($tag, 1, 64), 'type' => self::HASHTAG];
DBA::insert('tag', $fields, true);
Logger::info('Stored tag', ['uriid' => $uriid, 'tag' => $tag, 'fields' => $fields]);
self::storeByHash($uriid, '#', $tag);
}
}
}

View file

@ -34,6 +34,7 @@ use Friendica\Model\Event;
use Friendica\Model\Item;
use Friendica\Model\ItemURI;
use Friendica\Model\Mail;
use Friendica\Model\Tag;
use Friendica\Model\Term;
use Friendica\Model\User;
use Friendica\Protocol\Activity;
@ -585,53 +586,43 @@ class Processor
private static function storeTags(int $uriid, array $tags = null)
{
// Make sure to delete all existing tags (can happen when called via the update functionality)
DBA::delete('tag', ['uri-id' => $uriid]);
DBA::delete('post-tag', ['uri-id' => $uriid]);
foreach ($tags as $tag) {
if (empty($tag['name']) || empty($tag['type']) || !in_array($tag['type'], ['Mention', 'Hashtag'])) {
continue;
}
$fields = ['uri-id' => $uriid, 'name' => $tag['name']];
$hash = substr($tag['name'], 0, 1);
if ($tag['type'] == 'Mention') {
$fields['type'] = Term::MENTION;
if (substr($fields['name'], 0, 1) == Term::TAG_CHARACTER[Term::MENTION]) {
$fields['name'] = substr($fields['name'], 1);
} elseif (substr($fields['name'], 0, 1) == Term::TAG_CHARACTER[Term::EXCLUSIVE_MENTION]) {
$fields['type'] = Term::EXCLUSIVE_MENTION;
$fields['name'] = substr($fields['name'], 1);
} elseif (substr($fields['name'], 0, 1) == Term::TAG_CHARACTER[Term::IMPLICIT_MENTION]) {
$fields['type'] = Term::IMPLICIT_MENTION;
$fields['name'] = substr($fields['name'], 1);
if (in_array($hash, [Tag::TAG_CHARACTER[Tag::MENTION],
Tag::TAG_CHARACTER[Tag::EXCLUSIVE_MENTION],
Tag::TAG_CHARACTER[Tag::IMPLICIT_MENTION]])) {
$tag['name'] = substr($tag['name'], 1);
} else {
$hash = '#';
}
if (!empty($tag['href'])) {
$apcontact = APContact::getByURL($tag['href']);
if (!empty($apcontact['name']) || !empty($apcontact['nick'])) {
$fields['name'] = $apcontact['name'] ?: $apcontact['nick'];
$tag['name'] = $apcontact['name'] ?: $apcontact['nick'];
}
}
} elseif ($tag['type'] == 'Hashtag') {
$fields['type'] = Term::HASHTAG;
if (substr($fields['name'], 0, 1) == Term::TAG_CHARACTER[Term::HASHTAG]) {
$fields['name'] = substr($fields['name'], 1);
if (substr($tag['name'], 0, 1) == Term::TAG_CHARACTER[Term::HASHTAG]) {
$tag['name'] = substr($tag['name'], 1);
} else {
$hash = '@';
}
}
if (empty($fields['name'])) {
if (empty($tag['name'])) {
continue;
} else {
$fields['name'] = substr($fields['name'], 0, 64);
}
if (!empty($tag['href'] && ($tag['href'] != $tag['name']))) {
$fields['url'] = $tag['href'];
}
DBA::insert('tag', $fields, true);
Logger::info('Stored tag/mention', ['uriid' => $uriid, 'tag' => $tag, 'fields' => $fields]);
Tag::storeByHash($uriid, $hash, $tag['name'], $tag['href']);
}
}

View file

@ -2464,26 +2464,7 @@ class DFRN
$item["tag"] .= $termhash . "[url=" . $termurl . "]" . $term . "[/url]";
// Store the hashtag/mention
$fields = ['uri-id' => $item['uri-id'], 'name' => substr($term, 0, 64)];
if ($termhash == Term::TAG_CHARACTER[Term::MENTION]) {
$fields['type'] = Term::MENTION;
} elseif ($termhash == Term::TAG_CHARACTER[Term::EXCLUSIVE_MENTION]) {
$fields['type'] = Term::EXCLUSIVE_MENTION;
} elseif ($termhash == Term::TAG_CHARACTER[Term::IMPLICIT_MENTION]) {
$fields['type'] = Term::IMPLICIT_MENTION;
} elseif ($termhash == Term::TAG_CHARACTER[Term::HASHTAG]) {
$fields['type'] = Term::HASHTAG;
}
if (!empty($termurl)) {
$fields['url'] = $termurl;
}
DBA::insert('tag', $fields, true);
Logger::info('Stored tag/mention', ['uri-id' => $item['uri-id'], 'tag' => $term, 'url' => $termurl, 'hash' => $termhash, 'fields' => $fields]);
Tag::storeByHash($item['uri-id'], $termhash, $term, $termurl);
}
}
}

View file

@ -1840,20 +1840,7 @@ class Diaspora
continue;
}
$fields = ['uri-id' => $uriid, 'name' => substr($person['name'] ?: $person['nick'], 0, 64), 'url' => $person['url']];
if ($match[1] == Term::TAG_CHARACTER[Term::MENTION]) {
$fields['type'] = Term::MENTION;
} elseif ($match[1] == Term::TAG_CHARACTER[Term::EXCLUSIVE_MENTION]) {
$fields['type'] = Term::EXCLUSIVE_MENTION;
} elseif ($match[1] == Term::TAG_CHARACTER[Term::IMPLICIT_MENTION]) {
$fields['type'] = Term::IMPLICIT_MENTION;
} else {
continue;
}
DBA::insert('tag', $fields, true);
Logger::info('Stored mention', ['uriid' => $uriid, 'match' => $match, 'fields' => $fields]);
Tag::storeByHash($uriid, $match[1], $person['name'] ?: $person['nick'], $person['url']);
}
}

View file

@ -29,7 +29,7 @@ use Friendica\Core\Protocol;
use Friendica\Database\DBA;
use Friendica\DI;
use Friendica\Model\Item;
use Friendica\Model\Term;
use Friendica\Model\Tag;
use Friendica\Util\Network;
use Friendica\Util\ParseUrl;
use Friendica\Util\XML;
@ -478,7 +478,7 @@ class Feed {
$item["title"] = "";
$item["body"] = $item["body"] . add_page_info($item["plink"], false, $preview, ($contact["fetch_further_information"] == 2), $contact["ffi_keyword_blacklist"]);
$item["tag"] = add_page_keywords($item["plink"], $preview, ($contact["fetch_further_information"] == 2), $contact["ffi_keyword_blacklist"]);
$taglist = add_page_keywords($item["plink"], $preview, ($contact["fetch_further_information"] == 2), $contact["ffi_keyword_blacklist"], true);
$taglist = get_page_keywords($item["plink"], $preview, ($contact["fetch_further_information"] == 2), $contact["ffi_keyword_blacklist"]);
$item["object-type"] = Activity\ObjectType::BOOKMARK;
unset($item["attach"]);
} else {
@ -492,7 +492,7 @@ class Feed {
} else {
// @todo $preview is never set in this case, is it intended? - @MrPetovan 2018-02-13
$item["tag"] = add_page_keywords($item["plink"], $preview, true, $contact["ffi_keyword_blacklist"]);
$taglist = add_page_keywords($item["plink"], $preview, true, $contact["ffi_keyword_blacklist"], true);
$taglist = get_page_keywords($item["plink"], $preview, true, $contact["ffi_keyword_blacklist"]);
}
$item["body"] .= "\n" . $item['tag'];
} else {
@ -531,10 +531,7 @@ class Feed {
if (!empty($id) && !empty($taglist)) {
$feeditem = Item::selectFirst(['uri-id'], ['id' => $id]);
foreach ($taglist as $tag) {
$fields = ['uri-id' => $feeditem['uri-id'], 'name' => substr($tag, 0, 64), 'type' => Term::HASHTAG];
DBA::insert('tag', $fields, true);
Logger::info('Stored tag', ['uri-id' => $feeditem['uri-id'], 'tag' => $tag, 'fields' => $fields]);
Tag::storeByHash($feeditem['uri-id'], '#', $tag);
}
}
}

View file

@ -36,7 +36,7 @@ use Friendica\Model\Conversation;
use Friendica\Model\GContact;
use Friendica\Model\Item;
use Friendica\Model\ItemURI;
use Friendica\Model\Term;
use Friendica\Model\Tag;
use Friendica\Model\User;
use Friendica\Network\Probe;
use Friendica\Util\DateTimeFormat;
@ -665,10 +665,7 @@ class OStatus
$item['tag'] .= '#[url=' . DI::baseUrl() . '/search?tag=' . $term . ']' . $term . '[/url]';
// Store the hashtag
$fields = ['uri-id' => $item['uri-id'], 'name' => substr($term, 0, 64), 'type' => Term::HASHTAG];
DBA::insert('tag', $fields, true);
Logger::info('Stored tag', ['uri-id' => $item['uri-id'], 'tag' => $term, 'fields' => $fields]);
Tag::storeByHash($item['uri-id'], '#', $term);
}
}
}

View file

@ -1293,16 +1293,27 @@ return [
]
],
"tag" => [
"comment" => "item tags and mentions",
"comment" => "tags and mentions",
"fields" => [
"uri-id" => ["type" => "int unsigned", "not null" => "1", "primary" => "1", "relation" => ["item-uri" => "id"], "comment" => "Id of the item-uri table entry that contains the item uri"],
"type" => ["type" => "tinyint unsigned", "not null" => "1", "default" => "0", "primary" => "1", "comment" => ""],
"name" => ["type" => "varchar(64)", "not null" => "1", "default" => "", "primary" => "1", "comment" => ""],
"url" => ["type" => "varchar(255)", "not null" => "1", "default" => "", "comment" => ""]
"id" => ["type" => "int unsigned", "not null" => "1", "extra" => "auto_increment", "primary" => "1", "comment" => ""],
"type" => ["type" => "tinyint unsigned", "not null" => "1", "default" => "0", "comment" => ""],
"name" => ["type" => "varchar(64)", "not null" => "1", "default" => "", "comment" => ""],
"url" => ["type" => "varbinary(255)", "not null" => "1", "default" => "", "comment" => ""]
],
"indexes" => [
"PRIMARY" => ["uri-id", "type", "name"],
"type_name" => ["type", "name"]
"PRIMARY" => ["id"],
"type_name_url" => ["UNIQUE", "type", "name", "url"]
]
],
"post-tag" => [
"comment" => "post relation to tags",
"fields" => [
"tid" => ["type" => "int unsigned", "not null" => "1", "relation" => ["tag" => "id"], "primary" => "1", "comment" => ""],
"uri-id" => ["type" => "int unsigned", "not null" => "1", "primary" => "1", "relation" => ["item-uri" => "id"], "comment" => "Id of the item-uri table entry that contains the item uri"],
],
"indexes" => [
"PRIMARY" => ["tid", "uri-id"],
"uri-id" => ["uri-id"]
]
],
"thread" => [