From db657b0149cc6a29eeb62bc8ccc96a06f505a7bf Mon Sep 17 00:00:00 2001 From: Michael Date: Fri, 17 Apr 2020 06:35:20 +0000 Subject: [PATCH] We now store the tags in two separate tables --- include/items.php | 45 ++++++++++++++-------- src/Model/Tag.php | 52 ++++++++++++++++++++++++-- src/Protocol/ActivityPub/Processor.php | 43 +++++++++------------ src/Protocol/DFRN.php | 21 +---------- src/Protocol/Diaspora.php | 15 +------- src/Protocol/Feed.php | 11 ++---- src/Protocol/OStatus.php | 7 +--- static/dbstructure.config.php | 25 +++++++++---- 8 files changed, 121 insertions(+), 98 deletions(-) diff --git a/include/items.php b/include/items.php index 6068be4b94..2d73308582 100644 --- a/include/items.php +++ b/include/items.php @@ -141,31 +141,44 @@ function query_page_info($url, $photo = "", $keywords = false, $keyword_blacklis return $data; } -function add_page_keywords($url, $photo = "", $keywords = false, $keyword_blacklist = "", $return_array = false) +function add_page_keywords($url, $photo = "", $keywords = false, $keyword_blacklist = "") { $data = query_page_info($url, $photo, $keywords, $keyword_blacklist); + if (empty($data['keywords']) || !is_array($data['keywords'])) { + return ''; + } $tags = ""; - $taglist = []; - if (isset($data["keywords"]) && count($data["keywords"])) { - foreach ($data["keywords"] as $keyword) { - $hashtag = str_replace([" ", "+", "/", ".", "#", "'"], - ["", "", "", "", "", ""], $keyword); + foreach ($data["keywords"] as $keyword) { + $hashtag = str_replace([" ", "+", "/", ".", "#", "'"], + ["", "", "", "", "", ""], $keyword); - if ($tags != "") { - $tags .= ", "; - } - - $tags .= "#[url=" . DI::baseUrl() . "/search?tag=" . $hashtag . "]" . $hashtag . "[/url]"; - $taglist[] = $hashtag; + if ($tags != "") { + $tags .= ", "; } + + $tags .= "#[url=" . DI::baseUrl() . "/search?tag=" . $hashtag . "]" . $hashtag . "[/url]"; } - if ($return_array) { - return $taglist; - } else { - return $tags; + return $tags; +} + +function get_page_keywords($url, $photo = "", $keywords = false, $keyword_blacklist = "") +{ + $data = query_page_info($url, $photo, $keywords, $keyword_blacklist); + if (empty($data['keywords']) || !is_array($data['keywords'])) { + return []; } + + $taglist = []; + foreach ($data['keywords'] as $keyword) { + $hashtag = str_replace([" ", "+", "/", ".", "#", "'"], + ["", "", "", "", "", ""], $keyword); + + $taglist[] = $hashtag; + } + + return $taglist; } function add_page_info($url, $no_photos = false, $photo = "", $keywords = false, $keyword_blacklist = "") diff --git a/src/Model/Tag.php b/src/Model/Tag.php index 7578372a9b..43885e493a 100644 --- a/src/Model/Tag.php +++ b/src/Model/Tag.php @@ -54,6 +54,54 @@ class Tag self::EXCLUSIVE_MENTION => '!', ]; + public static function store(int $uriid, int $type, string $name, string $url = '') + { + $name = trim($name, "\x00..\x20\xFF#!@"); + if (empty($name)) { + return; + } + + $fields = ['name' => substr($name, 0, 64), 'type' => $type]; + + if (!empty($url) && ($url != $name)) { + $fields['url'] = strtolower($url); + } + + $tag = DBA::selectFirst('tag', ['id'], $fields); + if (!DBA::isResult($tag)) { + DBA::insert('tag', $fields, true); + $tagid = DBA::lastInsertId(); + } else { + $tagid = $tag['id']; + } + + if (empty($tagid)) { + Logger::error('No tag id created', $fields); + return; + } + + DBA::insert('post-tag', ['uri-id' => $uriid, 'tid' => $tagid], true); + + Logger::info('Stored tag/mention', ['uri-id' => $uriid, 'tag-id' => $tagid, 'tag' => $fields]); + } + + public static function storeByHash(int $uriid, string $hash, string $name, string $url = '') + { + if ($hash == self::TAG_CHARACTER[self::MENTION]) { + $type = self::MENTION; + } elseif ($hash == self::TAG_CHARACTER[self::EXCLUSIVE_MENTION]) { + $type = self::EXCLUSIVE_MENTION; + } elseif ($hash == self::TAG_CHARACTER[self::IMPLICIT_MENTION]) { + $type = self::IMPLICIT_MENTION; + } elseif ($hash == self::TAG_CHARACTER[self::HASHTAG]) { + $type = self::HASHTAG; + } else { + return; + } + + self::store($uriid, $type, $name, $url); + } + /** * Store tags from the body * @@ -73,9 +121,7 @@ class Tag continue; } - $fields = ['uri-id' => $uriid, 'name' => substr($tag, 1, 64), 'type' => self::HASHTAG]; - DBA::insert('tag', $fields, true); - Logger::info('Stored tag', ['uriid' => $uriid, 'tag' => $tag, 'fields' => $fields]); + self::storeByHash($uriid, '#', $tag); } } } diff --git a/src/Protocol/ActivityPub/Processor.php b/src/Protocol/ActivityPub/Processor.php index 023100dc72..1b605c30ab 100644 --- a/src/Protocol/ActivityPub/Processor.php +++ b/src/Protocol/ActivityPub/Processor.php @@ -34,6 +34,7 @@ use Friendica\Model\Event; use Friendica\Model\Item; use Friendica\Model\ItemURI; use Friendica\Model\Mail; +use Friendica\Model\Tag; use Friendica\Model\Term; use Friendica\Model\User; use Friendica\Protocol\Activity; @@ -585,53 +586,43 @@ class Processor private static function storeTags(int $uriid, array $tags = null) { // Make sure to delete all existing tags (can happen when called via the update functionality) - DBA::delete('tag', ['uri-id' => $uriid]); + DBA::delete('post-tag', ['uri-id' => $uriid]); foreach ($tags as $tag) { if (empty($tag['name']) || empty($tag['type']) || !in_array($tag['type'], ['Mention', 'Hashtag'])) { continue; } - $fields = ['uri-id' => $uriid, 'name' => $tag['name']]; + $hash = substr($tag['name'], 0, 1); if ($tag['type'] == 'Mention') { - $fields['type'] = Term::MENTION; - - if (substr($fields['name'], 0, 1) == Term::TAG_CHARACTER[Term::MENTION]) { - $fields['name'] = substr($fields['name'], 1); - } elseif (substr($fields['name'], 0, 1) == Term::TAG_CHARACTER[Term::EXCLUSIVE_MENTION]) { - $fields['type'] = Term::EXCLUSIVE_MENTION; - $fields['name'] = substr($fields['name'], 1); - } elseif (substr($fields['name'], 0, 1) == Term::TAG_CHARACTER[Term::IMPLICIT_MENTION]) { - $fields['type'] = Term::IMPLICIT_MENTION; - $fields['name'] = substr($fields['name'], 1); + if (in_array($hash, [Tag::TAG_CHARACTER[Tag::MENTION], + Tag::TAG_CHARACTER[Tag::EXCLUSIVE_MENTION], + Tag::TAG_CHARACTER[Tag::IMPLICIT_MENTION]])) { + $tag['name'] = substr($tag['name'], 1); + } else { + $hash = '#'; } + if (!empty($tag['href'])) { $apcontact = APContact::getByURL($tag['href']); if (!empty($apcontact['name']) || !empty($apcontact['nick'])) { - $fields['name'] = $apcontact['name'] ?: $apcontact['nick']; + $tag['name'] = $apcontact['name'] ?: $apcontact['nick']; } } } elseif ($tag['type'] == 'Hashtag') { - $fields['type'] = Term::HASHTAG; - if (substr($fields['name'], 0, 1) == Term::TAG_CHARACTER[Term::HASHTAG]) { - $fields['name'] = substr($fields['name'], 1); + if (substr($tag['name'], 0, 1) == Term::TAG_CHARACTER[Term::HASHTAG]) { + $tag['name'] = substr($tag['name'], 1); + } else { + $hash = '@'; } } - if (empty($fields['name'])) { + if (empty($tag['name'])) { continue; - } else { - $fields['name'] = substr($fields['name'], 0, 64); } - if (!empty($tag['href'] && ($tag['href'] != $tag['name']))) { - $fields['url'] = $tag['href']; - } - - DBA::insert('tag', $fields, true); - - Logger::info('Stored tag/mention', ['uriid' => $uriid, 'tag' => $tag, 'fields' => $fields]); + Tag::storeByHash($uriid, $hash, $tag['name'], $tag['href']); } } diff --git a/src/Protocol/DFRN.php b/src/Protocol/DFRN.php index 9ab8bc70c9..32067613f1 100644 --- a/src/Protocol/DFRN.php +++ b/src/Protocol/DFRN.php @@ -2464,26 +2464,7 @@ class DFRN $item["tag"] .= $termhash . "[url=" . $termurl . "]" . $term . "[/url]"; - // Store the hashtag/mention - $fields = ['uri-id' => $item['uri-id'], 'name' => substr($term, 0, 64)]; - - if ($termhash == Term::TAG_CHARACTER[Term::MENTION]) { - $fields['type'] = Term::MENTION; - } elseif ($termhash == Term::TAG_CHARACTER[Term::EXCLUSIVE_MENTION]) { - $fields['type'] = Term::EXCLUSIVE_MENTION; - } elseif ($termhash == Term::TAG_CHARACTER[Term::IMPLICIT_MENTION]) { - $fields['type'] = Term::IMPLICIT_MENTION; - } elseif ($termhash == Term::TAG_CHARACTER[Term::HASHTAG]) { - $fields['type'] = Term::HASHTAG; - } - - if (!empty($termurl)) { - $fields['url'] = $termurl; - } - - DBA::insert('tag', $fields, true); - - Logger::info('Stored tag/mention', ['uri-id' => $item['uri-id'], 'tag' => $term, 'url' => $termurl, 'hash' => $termhash, 'fields' => $fields]); + Tag::storeByHash($item['uri-id'], $termhash, $term, $termurl); } } } diff --git a/src/Protocol/Diaspora.php b/src/Protocol/Diaspora.php index 5580234c64..20a2be4cc1 100644 --- a/src/Protocol/Diaspora.php +++ b/src/Protocol/Diaspora.php @@ -1840,20 +1840,7 @@ class Diaspora continue; } - $fields = ['uri-id' => $uriid, 'name' => substr($person['name'] ?: $person['nick'], 0, 64), 'url' => $person['url']]; - - if ($match[1] == Term::TAG_CHARACTER[Term::MENTION]) { - $fields['type'] = Term::MENTION; - } elseif ($match[1] == Term::TAG_CHARACTER[Term::EXCLUSIVE_MENTION]) { - $fields['type'] = Term::EXCLUSIVE_MENTION; - } elseif ($match[1] == Term::TAG_CHARACTER[Term::IMPLICIT_MENTION]) { - $fields['type'] = Term::IMPLICIT_MENTION; - } else { - continue; - } - - DBA::insert('tag', $fields, true); - Logger::info('Stored mention', ['uriid' => $uriid, 'match' => $match, 'fields' => $fields]); + Tag::storeByHash($uriid, $match[1], $person['name'] ?: $person['nick'], $person['url']); } } diff --git a/src/Protocol/Feed.php b/src/Protocol/Feed.php index 8171348409..c03f959865 100644 --- a/src/Protocol/Feed.php +++ b/src/Protocol/Feed.php @@ -29,7 +29,7 @@ use Friendica\Core\Protocol; use Friendica\Database\DBA; use Friendica\DI; use Friendica\Model\Item; -use Friendica\Model\Term; +use Friendica\Model\Tag; use Friendica\Util\Network; use Friendica\Util\ParseUrl; use Friendica\Util\XML; @@ -478,7 +478,7 @@ class Feed { $item["title"] = ""; $item["body"] = $item["body"] . add_page_info($item["plink"], false, $preview, ($contact["fetch_further_information"] == 2), $contact["ffi_keyword_blacklist"]); $item["tag"] = add_page_keywords($item["plink"], $preview, ($contact["fetch_further_information"] == 2), $contact["ffi_keyword_blacklist"]); - $taglist = add_page_keywords($item["plink"], $preview, ($contact["fetch_further_information"] == 2), $contact["ffi_keyword_blacklist"], true); + $taglist = get_page_keywords($item["plink"], $preview, ($contact["fetch_further_information"] == 2), $contact["ffi_keyword_blacklist"]); $item["object-type"] = Activity\ObjectType::BOOKMARK; unset($item["attach"]); } else { @@ -492,7 +492,7 @@ class Feed { } else { // @todo $preview is never set in this case, is it intended? - @MrPetovan 2018-02-13 $item["tag"] = add_page_keywords($item["plink"], $preview, true, $contact["ffi_keyword_blacklist"]); - $taglist = add_page_keywords($item["plink"], $preview, true, $contact["ffi_keyword_blacklist"], true); + $taglist = get_page_keywords($item["plink"], $preview, true, $contact["ffi_keyword_blacklist"]); } $item["body"] .= "\n" . $item['tag']; } else { @@ -531,10 +531,7 @@ class Feed { if (!empty($id) && !empty($taglist)) { $feeditem = Item::selectFirst(['uri-id'], ['id' => $id]); foreach ($taglist as $tag) { - $fields = ['uri-id' => $feeditem['uri-id'], 'name' => substr($tag, 0, 64), 'type' => Term::HASHTAG]; - DBA::insert('tag', $fields, true); - - Logger::info('Stored tag', ['uri-id' => $feeditem['uri-id'], 'tag' => $tag, 'fields' => $fields]); + Tag::storeByHash($feeditem['uri-id'], '#', $tag); } } } diff --git a/src/Protocol/OStatus.php b/src/Protocol/OStatus.php index e155708aec..7dad68550b 100644 --- a/src/Protocol/OStatus.php +++ b/src/Protocol/OStatus.php @@ -36,7 +36,7 @@ use Friendica\Model\Conversation; use Friendica\Model\GContact; use Friendica\Model\Item; use Friendica\Model\ItemURI; -use Friendica\Model\Term; +use Friendica\Model\Tag; use Friendica\Model\User; use Friendica\Network\Probe; use Friendica\Util\DateTimeFormat; @@ -665,10 +665,7 @@ class OStatus $item['tag'] .= '#[url=' . DI::baseUrl() . '/search?tag=' . $term . ']' . $term . '[/url]'; // Store the hashtag - $fields = ['uri-id' => $item['uri-id'], 'name' => substr($term, 0, 64), 'type' => Term::HASHTAG]; - DBA::insert('tag', $fields, true); - - Logger::info('Stored tag', ['uri-id' => $item['uri-id'], 'tag' => $term, 'fields' => $fields]); + Tag::storeByHash($item['uri-id'], '#', $term); } } } diff --git a/static/dbstructure.config.php b/static/dbstructure.config.php index 110842cbc0..204fca9bfe 100755 --- a/static/dbstructure.config.php +++ b/static/dbstructure.config.php @@ -1293,16 +1293,27 @@ return [ ] ], "tag" => [ - "comment" => "item tags and mentions", + "comment" => "tags and mentions", "fields" => [ - "uri-id" => ["type" => "int unsigned", "not null" => "1", "primary" => "1", "relation" => ["item-uri" => "id"], "comment" => "Id of the item-uri table entry that contains the item uri"], - "type" => ["type" => "tinyint unsigned", "not null" => "1", "default" => "0", "primary" => "1", "comment" => ""], - "name" => ["type" => "varchar(64)", "not null" => "1", "default" => "", "primary" => "1", "comment" => ""], - "url" => ["type" => "varchar(255)", "not null" => "1", "default" => "", "comment" => ""] + "id" => ["type" => "int unsigned", "not null" => "1", "extra" => "auto_increment", "primary" => "1", "comment" => ""], + "type" => ["type" => "tinyint unsigned", "not null" => "1", "default" => "0", "comment" => ""], + "name" => ["type" => "varchar(64)", "not null" => "1", "default" => "", "comment" => ""], + "url" => ["type" => "varbinary(255)", "not null" => "1", "default" => "", "comment" => ""] ], "indexes" => [ - "PRIMARY" => ["uri-id", "type", "name"], - "type_name" => ["type", "name"] + "PRIMARY" => ["id"], + "type_name_url" => ["UNIQUE", "type", "name", "url"] + ] + ], + "post-tag" => [ + "comment" => "post relation to tags", + "fields" => [ + "tid" => ["type" => "int unsigned", "not null" => "1", "relation" => ["tag" => "id"], "primary" => "1", "comment" => ""], + "uri-id" => ["type" => "int unsigned", "not null" => "1", "primary" => "1", "relation" => ["item-uri" => "id"], "comment" => "Id of the item-uri table entry that contains the item uri"], + ], + "indexes" => [ + "PRIMARY" => ["tid", "uri-id"], + "uri-id" => ["uri-id"] ] ], "thread" => [