From fc22a3e83f7c9e2217b3416b9330970978becca2 Mon Sep 17 00:00:00 2001 From: Michael Date: Thu, 1 Feb 2024 23:08:53 +0000 Subject: [PATCH] Unify searchindex table with engagement table --- database.sql | 15 ++++++++++----- doc/database/db_post-engagement.md | 2 +- doc/database/db_post-searchindex.md | 19 ++++++++++++------- src/Database/PostUpdate.php | 28 +++++++++++++++++++--------- src/Model/Item.php | 2 +- src/Model/Post/Content.php | 8 ++++---- src/Model/Post/Engagement.php | 11 ++++++----- src/Model/Post/SearchIndex.php | 18 ++++++++++++------ src/Module/Api/Mastodon/Search.php | 2 +- src/Module/Conversation/Timeline.php | 4 ++-- static/dbstructure.config.php | 18 +++++++++++------- 11 files changed, 79 insertions(+), 48 deletions(-) diff --git a/database.sql b/database.sql index da167e2c41..871651bdb7 100644 --- a/database.sql +++ b/database.sql @@ -1,6 +1,6 @@ -- ------------------------------------------ -- Friendica 2024.03-dev (Yellow Archangel) --- DB_UPDATE_VERSION 1549 +-- DB_UPDATE_VERSION 1550 -- ------------------------------------------ @@ -1346,7 +1346,7 @@ CREATE TABLE IF NOT EXISTS `post-engagement` ( `owner-id` int unsigned NOT NULL DEFAULT 0 COMMENT 'Item owner', `contact-type` tinyint NOT NULL DEFAULT 0 COMMENT 'Person, organisation, news, community, relay', `media-type` tinyint NOT NULL DEFAULT 0 COMMENT 'Type of media in a bit array (1 = image, 2 = video, 4 = audio', - `language` varchar(128) COMMENT 'Language information about this post', + `iso-639-1` char(2) COMMENT 'Language information about this post in the ISO 639-1 format', `searchtext` mediumtext COMMENT 'Simplified text for the full text search', `size` int unsigned COMMENT 'Body size', `created` datetime COMMENT '', @@ -1467,14 +1467,19 @@ CREATE TABLE IF NOT EXISTS `post-question-option` ( -- CREATE TABLE IF NOT EXISTS `post-searchindex` ( `uri-id` int unsigned NOT NULL COMMENT 'Id of the item-uri table entry that contains the item uri', - `network` char(4) COMMENT '', - `private` tinyint unsigned COMMENT '0=public, 1=private, 2=unlisted', + `owner-id` int unsigned NOT NULL DEFAULT 0 COMMENT 'Item owner', + `media-type` tinyint NOT NULL DEFAULT 0 COMMENT 'Type of media in a bit array (1 = image, 2 = video, 4 = audio', + `iso-639-1` char(2) COMMENT 'Language information about this post in the ISO 639-1 format', `searchtext` mediumtext COMMENT 'Simplified text for the full text search', + `size` int unsigned COMMENT 'Body size', `created` datetime COMMENT '', + `restricted` boolean NOT NULL DEFAULT '0' COMMENT 'If true, this post is either unlisted or not from a federated network', PRIMARY KEY(`uri-id`), + INDEX `owner-id` (`owner-id`), INDEX `created` (`created`), FULLTEXT INDEX `searchtext` (`searchtext`), - FOREIGN KEY (`uri-id`) REFERENCES `item-uri` (`id`) ON UPDATE RESTRICT ON DELETE CASCADE + FOREIGN KEY (`uri-id`) REFERENCES `item-uri` (`id`) ON UPDATE RESTRICT ON DELETE CASCADE, + FOREIGN KEY (`owner-id`) REFERENCES `contact` (`id`) ON UPDATE RESTRICT ON DELETE CASCADE ) DEFAULT COLLATE utf8mb4_general_ci COMMENT='Content for all posts'; -- diff --git a/doc/database/db_post-engagement.md b/doc/database/db_post-engagement.md index 027ae56a79..2166702040 100644 --- a/doc/database/db_post-engagement.md +++ b/doc/database/db_post-engagement.md @@ -12,7 +12,7 @@ Fields | owner-id | Item owner | int unsigned | NO | | 0 | | | contact-type | Person, organisation, news, community, relay | tinyint | NO | | 0 | | | media-type | Type of media in a bit array (1 = image, 2 = video, 4 = audio | tinyint | NO | | 0 | | -| language | Language information about this post | varchar(128) | YES | | NULL | | +| iso-639-1 | Language information about this post in the ISO 639-1 format | char(2) | YES | | NULL | | | searchtext | Simplified text for the full text search | mediumtext | YES | | NULL | | | size | Body size | int unsigned | YES | | NULL | | | created | | datetime | YES | | NULL | | diff --git a/doc/database/db_post-searchindex.md b/doc/database/db_post-searchindex.md index a6f5879854..18efd607fc 100644 --- a/doc/database/db_post-searchindex.md +++ b/doc/database/db_post-searchindex.md @@ -6,13 +6,16 @@ Content for all posts Fields ------ -| Field | Description | Type | Null | Key | Default | Extra | -| ---------- | --------------------------------------------------------- | ---------------- | ---- | --- | ------- | ----- | -| uri-id | Id of the item-uri table entry that contains the item uri | int unsigned | NO | PRI | NULL | | -| network | | char(4) | YES | | NULL | | -| private | 0=public, 1=private, 2=unlisted | tinyint unsigned | YES | | NULL | | -| searchtext | Simplified text for the full text search | mediumtext | YES | | NULL | | -| created | | datetime | YES | | NULL | | +| Field | Description | Type | Null | Key | Default | Extra | +| ---------- | --------------------------------------------------------------------- | ------------ | ---- | --- | ------- | ----- | +| uri-id | Id of the item-uri table entry that contains the item uri | int unsigned | NO | PRI | NULL | | +| owner-id | Item owner | int unsigned | NO | | 0 | | +| media-type | Type of media in a bit array (1 = image, 2 = video, 4 = audio | tinyint | NO | | 0 | | +| iso-639-1 | Language information about this post in the ISO 639-1 format | char(2) | YES | | NULL | | +| searchtext | Simplified text for the full text search | mediumtext | YES | | NULL | | +| size | Body size | int unsigned | YES | | NULL | | +| created | | datetime | YES | | NULL | | +| restricted | If true, this post is either unlisted or not from a federated network | boolean | NO | | 0 | | Indexes ------------ @@ -20,6 +23,7 @@ Indexes | Name | Fields | | ---------- | -------------------- | | PRIMARY | uri-id | +| owner-id | owner-id | | created | created | | searchtext | FULLTEXT, searchtext | @@ -29,5 +33,6 @@ Foreign Keys | Field | Target Table | Target Field | |-------|--------------|--------------| | uri-id | [item-uri](help/database/db_item-uri) | id | +| owner-id | [contact](help/database/db_contact) | id | Return to [database documentation](help/database) diff --git a/src/Database/PostUpdate.php b/src/Database/PostUpdate.php index 09c97708d2..bad13ad4a9 100644 --- a/src/Database/PostUpdate.php +++ b/src/Database/PostUpdate.php @@ -52,7 +52,7 @@ class PostUpdate // Needed for the helper function to read from the legacy term table const OBJECT_TYPE_POST = 1; - const VERSION = 1547; + const VERSION = 1550; /** * Calls the post update functions @@ -128,7 +128,7 @@ class PostUpdate if (!self::update1544()) { return false; } - if (!self::update1547()) { + if (!self::update1550()) { return false; } return true; @@ -1369,14 +1369,24 @@ class PostUpdate * @throws \Friendica\Network\HTTPException\InternalServerErrorException * @throws \ImagickException */ - private static function update1547() + private static function update1550() { // Was the script completed? - if (DI::keyValue()->get('post_update_version') >= 1547) { + if (DI::keyValue()->get('post_update_version') >= 1550) { return true; } - $id = (int)(DI::keyValue()->get('post_update_version_1547_id') ?? 0); + $engagements = DBA::select('post-engagement', ['uri-id'], ["`iso-639-1` IS NULL"], ['order' => ['uri-id' => true], 'limit' => 1000]); + while ($engagement = DBA::fetch($engagements)) { + $item = Post::selectFirst([], ['uri-id' => $engagement['uri-id']]); + if (empty($item)) { + continue; + } + Post\Engagement::storeFromItem($item); + } + DBA::close($engagements); + + $id = (int)(DI::keyValue()->get('post_update_version_1550_id') ?? 0); if ($id == 0) { $post = Post::selectFirstPost(['uri-id'], [], ['order' => ['uri-id' => true]]); $id = (int)($post['uri-id'] ?? 0); @@ -1393,7 +1403,7 @@ class PostUpdate DBA::mergeConditions($condition, ["`created` > ?", $limit]); } - $posts = Post::selectPosts(['uri-id', 'network', 'private', 'created'], $condition, ['order' => ['uri-id' => true], 'limit' => 1000]); + $posts = Post::selectPosts(['uri-id', 'created'], $condition, ['order' => ['uri-id' => true], 'limit' => 1000]); if (DBA::errorNo() != 0) { Logger::error('Database error', ['no' => DBA::errorNo(), 'message' => DBA::errorMessage()]); @@ -1402,17 +1412,17 @@ class PostUpdate while ($post = Post::fetch($posts)) { $id = $post['uri-id']; - Post\SearchIndex::insert($post['uri-id'], $post['network'], $post['private'], $post['created'], true); + Post\SearchIndex::insert($post['uri-id'], $post['created'], true); ++$rows; } DBA::close($posts); - DI::keyValue()->set('post_update_version_1547_id', $id); + DI::keyValue()->set('post_update_version_1550_id', $id); Logger::info('Processed', ['rows' => $rows, 'last' => $id]); if ($rows <= 100) { - DI::keyValue()->set('post_update_version', 1547); + DI::keyValue()->set('post_update_version', 1550); Logger::info('Done'); return true; } diff --git a/src/Model/Item.php b/src/Model/Item.php index 9134bc149f..3d35618396 100644 --- a/src/Model/Item.php +++ b/src/Model/Item.php @@ -1450,7 +1450,7 @@ class Item $engagement_uri_id = Post\Engagement::storeFromItem($posted_item); if (in_array($posted_item['gravity'], [self::GRAVITY_PARENT, self::GRAVITY_COMMENT])) { - Post\SearchIndex::insert($posted_item['uri-id'], $posted_item['network'], $posted_item['private'], $posted_item['created']); + Post\SearchIndex::insert($posted_item['uri-id'], $posted_item['created']); } elseif ($posted_item['verb'] == Activity::ANNOUNCE) { Post\SearchIndex::update($posted_item['thr-parent-id']); } diff --git a/src/Model/Post/Content.php b/src/Model/Post/Content.php index 74cfbfe6f5..2f735bbb87 100644 --- a/src/Model/Post/Content.php +++ b/src/Model/Post/Content.php @@ -110,9 +110,9 @@ class Content { $search = Post\Engagement::escapeKeywords($search); if ($uid != 0) { - $condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) and (private = ? OR `uri-id` in (SELECT `uri-id` FROM `post-user` where `uid` = ?))", $search, Item::PUBLIC, $uid]; + $condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) AND (NOT `restricted` OR `uri-id` IN (SELECT `uri-id` FROM `post-user` WHERE `uid` = ?))", $search, $uid]; } else { - $condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) and private = ?", $search, Item::PUBLIC]; + $condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) AND NOT `restricted`", $search]; } if (!empty($last_uriid)) { @@ -139,9 +139,9 @@ class Content { $search = Post\Engagement::escapeKeywords($search); if ($uid != 0) { - $condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) and (private = ? OR `uri-id` in (SELECT `uri-id` FROM `post-user` where `uid` = ?))", $search, Item::PUBLIC, $uid]; + $condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) AND (NOT `restricted` OR `uri-id` IN (SELECT `uri-id` FROM `post-user` WHERE `uid` = ?))", $search, $uid]; } else { - $condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) and private = ?", $search, Item::PUBLIC]; + $condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) AND NOT `restricted", $search]; } return DBA::count('post-searchindex', $condition); } diff --git a/src/Model/Post/Engagement.php b/src/Model/Post/Engagement.php index a460ba85b3..3841a55bd9 100644 --- a/src/Model/Post/Engagement.php +++ b/src/Model/Post/Engagement.php @@ -22,6 +22,7 @@ namespace Friendica\Model\Post; use Friendica\Content\Text\BBCode; +use Friendica\Core\L10n; use Friendica\Core\Logger; use Friendica\Core\Protocol; use Friendica\Database\DBA; @@ -93,9 +94,9 @@ class Engagement } $searchtext = self::getSearchTextForItem($parent); + $language = !empty($parent['language']) ? (array_key_first(json_decode($parent['language'], true)) ?? L10n::UNDETERMINED_LANGUAGE) : L10n::UNDETERMINED_LANGUAGE; if (!$store) { - $language = !empty($parent['language']) ? (array_key_first(json_decode($parent['language'], true)) ?? '') : ''; - $store = DI::userDefinedChannel()->match($searchtext, $language); + $store = DI::userDefinedChannel()->match($searchtext, $language); } $engagement = [ @@ -103,7 +104,7 @@ class Engagement 'owner-id' => $parent['owner-id'], 'contact-type' => $parent['contact-contact-type'], 'media-type' => $mediatype, - 'language' => $parent['language'], + 'iso-639-1' => $language, 'searchtext' => $searchtext, 'size' => self::getContentSize($parent), 'created' => $parent['created'], @@ -130,7 +131,7 @@ class Engagement return ($ret && !$exists) ? $engagement['uri-id'] : 0; } - private static function getContentSize(array $item): int + public static function getContentSize(array $item): int { $body = ' ' . $item['title'] . ' ' . $item['content-warning'] . ' ' . $item['body']; $body = BBCode::removeAttachment($body); @@ -315,7 +316,7 @@ class Engagement return $text; } - private static function getMediaType(int $uri_id): int + public static function getMediaType(int $uri_id): int { $media = Post\Media::getByURIId($uri_id); $type = 0; diff --git a/src/Model/Post/SearchIndex.php b/src/Model/Post/SearchIndex.php index 67d6c91d12..042f800d46 100644 --- a/src/Model/Post/SearchIndex.php +++ b/src/Model/Post/SearchIndex.php @@ -21,10 +21,13 @@ namespace Friendica\Model\Post; +use Friendica\Core\L10n; use Friendica\Core\Logger; +use Friendica\Core\Protocol; use Friendica\Database\Database; use Friendica\Database\DBA; use Friendica\DI; +use Friendica\Model\Item; use Friendica\Model\Post; use Friendica\Util\DateTimeFormat; @@ -34,24 +37,27 @@ class SearchIndex * Insert a post-searchindex entry * * @param int $uri_id - * @param string $network - * @param int $private * @param string $created * @param bool $refresh */ - public static function insert(int $uri_id, string $network, int $private, string $created, bool $refresh = false) + public static function insert(int $uri_id, string $created, bool $refresh = false) { $limit = self::searchAgeDateLimit(); if (!empty($limit) && (strtotime($created) < strtotime($limit))) { return; } + $item = Post::selectFirstPost(['created', 'owner-id', 'private', 'language', 'network', 'title', 'content-warning', 'body'], ['uri-id' => $uri_id]); + $search = [ 'uri-id' => $uri_id, - 'network' => $network, - 'private' => $private, - 'created' => $created, + 'owner-id' => $item['owner-id'], + 'media-type' => Engagement::getMediaType($uri_id), + 'iso-639-1' => !empty($item['language']) ? (array_key_first(json_decode($item['language'], true)) ?? L10n::UNDETERMINED_LANGUAGE) : L10n::UNDETERMINED_LANGUAGE, 'searchtext' => Post\Engagement::getSearchTextForUriId($uri_id, $refresh), + 'size' => Engagement::getContentSize($item), + 'created' => $item['created'], + 'restricted' => !in_array($item['network'], Protocol::FEDERATED) || ($item['private'] != Item::PUBLIC), ]; return DBA::insert('post-searchindex', $search, Database::INSERT_UPDATE); } diff --git a/src/Module/Api/Mastodon/Search.php b/src/Module/Api/Mastodon/Search.php index 23f920dc3e..25db39950a 100644 --- a/src/Module/Api/Mastodon/Search.php +++ b/src/Module/Api/Mastodon/Search.php @@ -154,7 +154,7 @@ class Search extends BaseApi $table = 'tag-search-view'; } else { $q = Post\Engagement::escapeKeywords($q); - $condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) and (private = ? OR `uri-id` in (SELECT `uri-id` FROM `post-user` where `uid` = ?))", $q, Item::PUBLIC, $uid]; + $condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) AND (NOT `restricted` OR `uri-id` IN (SELECT `uri-id` FROM `post-user` WHERE `uid` = ?))", $q, $uid]; $table = 'post-searchindex'; } diff --git a/src/Module/Conversation/Timeline.php b/src/Module/Conversation/Timeline.php index d0f8f09736..311a48a204 100644 --- a/src/Module/Conversation/Timeline.php +++ b/src/Module/Conversation/Timeline.php @@ -324,7 +324,7 @@ class Timeline extends BaseModule } elseif ($this->selectedTab == ChannelEntity::AUDIO) { $condition = ["`media-type` & ?", 4]; } elseif ($this->selectedTab == ChannelEntity::LANGUAGE) { - $condition = ["JSON_EXTRACT(JSON_KEYS(language), '$[0]') = ?", User::getLanguageCode($uid)]; + $condition = ["`iso-639-1` = ?", User::getLanguageCode($uid)]; } elseif (is_numeric($this->selectedTab)) { $condition = $this->getUserChannelConditions($this->selectedTab, $uid); } @@ -450,7 +450,7 @@ class Timeline extends BaseModule $conditions = []; $languages = $languages ?: User::getWantedLanguages($uid); foreach ($languages as $language) { - $conditions[] = "JSON_EXTRACT(JSON_KEYS(language), '$[0]') = ?"; + $conditions[] = "`iso-639-1` = ?"; $condition[] = $language; } if (!empty($conditions)) { diff --git a/static/dbstructure.config.php b/static/dbstructure.config.php index a75403fcd9..3dc1d4d38c 100644 --- a/static/dbstructure.config.php +++ b/static/dbstructure.config.php @@ -56,7 +56,7 @@ use Friendica\Database\DBA; // This file is required several times during the test in DbaDefinition which justifies this condition if (!defined('DB_UPDATE_VERSION')) { - define('DB_UPDATE_VERSION', 1549); + define('DB_UPDATE_VERSION', 1550); } return [ @@ -1245,7 +1245,7 @@ return [ "post-activity" => [ "comment" => "Original remote activity", "fields" => [ - "uri-id" => ["type" => "int unsigned", "not null" => "1", "primary" => "1", "foreign" => ["item-uri" => "id"], "comment" => "Id of the item-uri table entry that contains the item uri"], + "uri-id" => ["type" => "int unsigned", "not null" => "1", "primary" => "1", "foreign" => ["item-uri" => "id"], "comment" => "Id of the item-uri table entry that contains the item uri"], "activity" => ["type" => "mediumtext", "comment" => "Original activity"], "received" => ["type" => "datetime", "comment" => ""], ], @@ -1256,7 +1256,7 @@ return [ "post-category" => [ "comment" => "post relation to categories", "fields" => [ - "uri-id" => ["type" => "int unsigned", "not null" => "1", "primary" => "1", "foreign" => ["item-uri" => "id"], "comment" => "Id of the item-uri table entry that contains the item uri"], + "uri-id" => ["type" => "int unsigned", "not null" => "1", "primary" => "1", "foreign" => ["item-uri" => "id"], "comment" => "Id of the item-uri table entry that contains the item uri"], "uid" => ["type" => "mediumint unsigned", "not null" => "1", "default" => "0", "primary" => "1", "foreign" => ["user" => "uid"], "comment" => "User id"], "type" => ["type" => "tinyint unsigned", "not null" => "1", "default" => "0", "primary" => "1", "comment" => ""], "tid" => ["type" => "int unsigned", "not null" => "1", "default" => "0", "primary" => "1", "foreign" => ["tag" => "id", "on delete" => "restrict"], "comment" => ""], @@ -1363,11 +1363,11 @@ return [ "post-engagement" => [ "comment" => "Engagement data per post", "fields" => [ - "uri-id" => ["type" => "int unsigned", "not null" => "1", "primary" => "1", "foreign" => ["item-uri" => "id"], "comment" => "Id of the item-uri table entry that contains the item uri"], + "uri-id" => ["type" => "int unsigned", "not null" => "1", "primary" => "1", "foreign" => ["item-uri" => "id"], "comment" => "Id of the item-uri table entry that contains the item uri"], "owner-id" => ["type" => "int unsigned", "not null" => "1", "default" => "0", "foreign" => ["contact" => "id"], "comment" => "Item owner"], "contact-type" => ["type" => "tinyint", "not null" => "1", "default" => "0", "comment" => "Person, organisation, news, community, relay"], "media-type" => ["type" => "tinyint", "not null" => "1", "default" => "0", "comment" => "Type of media in a bit array (1 = image, 2 = video, 4 = audio"], - "language" => ["type" => "varchar(128)", "comment" => "Language information about this post"], + "iso-639-1" => ["type" => "char(2)", "comment" => "Language information about this post in the ISO 639-1 format"], "searchtext" => ["type" => "mediumtext", "comment" => "Simplified text for the full text search"], "size" => ["type" => "int unsigned", "comment" => "Body size"], "created" => ["type" => "datetime", "comment" => ""], @@ -1486,13 +1486,17 @@ return [ "comment" => "Content for all posts", "fields" => [ "uri-id" => ["type" => "int unsigned", "not null" => "1", "primary" => "1", "foreign" => ["item-uri" => "id"], "comment" => "Id of the item-uri table entry that contains the item uri"], - "network" => ["type" => "char(4)", "comment" => ""], - "private" => ["type" => "tinyint unsigned", "comment" => "0=public, 1=private, 2=unlisted"], + "owner-id" => ["type" => "int unsigned", "not null" => "1", "default" => "0", "foreign" => ["contact" => "id"], "comment" => "Item owner"], + "media-type" => ["type" => "tinyint", "not null" => "1", "default" => "0", "comment" => "Type of media in a bit array (1 = image, 2 = video, 4 = audio"], + "iso-639-1" => ["type" => "char(2)", "comment" => "Language information about this post in the ISO 639-1 format"], "searchtext" => ["type" => "mediumtext", "comment" => "Simplified text for the full text search"], + "size" => ["type" => "int unsigned", "comment" => "Body size"], "created" => ["type" => "datetime", "comment" => ""], + "restricted" => ["type" => "boolean", "not null" => "1", "default" => "0", "comment" => "If true, this post is either unlisted or not from a federated network"], ], "indexes" => [ "PRIMARY" => ["uri-id"], + "owner-id" => ["owner-id"], "created" => ["created"], "searchtext" => ["FULLTEXT", "searchtext"], ]