From ee9a68e40cf1bfca755032599baadb3f3890a2ad Mon Sep 17 00:00:00 2001 From: Michael Date: Wed, 17 Jan 2024 19:46:22 +0000 Subject: [PATCH] New table "post-searchindex" --- .../Repository/UserDefinedChannel.php | 12 +---- src/Content/Text/BBCode.php | 13 +++-- src/Model/Item.php | 16 ++++++ src/Model/Post/Content.php | 25 +++++---- src/Model/Post/Engagement.php | 53 +++++++++++++------ src/Module/Api/Mastodon/Search.php | 8 ++- src/Module/Conversation/Timeline.php | 6 +-- static/dbstructure.config.php | 15 +++++- 8 files changed, 97 insertions(+), 51 deletions(-) diff --git a/src/Content/Conversation/Repository/UserDefinedChannel.php b/src/Content/Conversation/Repository/UserDefinedChannel.php index 0452b6dd2b..0fb282099d 100644 --- a/src/Content/Conversation/Repository/UserDefinedChannel.php +++ b/src/Content/Conversation/Repository/UserDefinedChannel.php @@ -156,7 +156,7 @@ class UserDefinedChannel extends \Friendica\BaseRepository return true; } - return $this->db->select('check-full-text-search', [], ["`pid` = ? AND MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE)", getmypid(), $this->escapeKeywords($searchtext)]) !== false; + return $this->db->select('check-full-text-search', [], ["`pid` = ? AND MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE)", getmypid(), Engagement::escapeKeywords($searchtext)]) !== false; } /** @@ -310,15 +310,7 @@ class UserDefinedChannel extends \Friendica\BaseRepository private function inFulltext(string $fullTextSearch): bool { - return $this->db->exists('check-full-text-search', ["`pid` = ? AND MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE)", getmypid(), $this->escapeKeywords($fullTextSearch)]); - } - - private function escapeKeywords(string $fullTextSearch): string - { - foreach (Engagement::KEYWORDS as $keyword) { - $fullTextSearch = preg_replace('~(' . $keyword . ':.[\w@\.-]+)~', '"$1"', $fullTextSearch); - } - return $fullTextSearch; + return $this->db->exists('check-full-text-search', ["`pid` = ? AND MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE)", getmypid(), Engagement::escapeKeywords($fullTextSearch)]); } private function getUserCondition() diff --git a/src/Content/Text/BBCode.php b/src/Content/Text/BBCode.php index 8396a95cf8..a824340300 100644 --- a/src/Content/Text/BBCode.php +++ b/src/Content/Text/BBCode.php @@ -255,12 +255,15 @@ class BBCode // Removes attachments $text = self::removeAttachment($text); - // Add images because of possible alt texts + // Add text from attached media if (!empty($uri_id)) { - $text = Post\Media::addAttachmentsToBody($uri_id, $text, [Post\Media::IMAGE]); - - foreach (Post\Media::getByURIId($uri_id, [Post\Media::HTML]) as $media) { - $text .= ' ' . $media['name'] . ' ' . $media['description']; + foreach (Post\Media::getByURIId($uri_id) as $media) { + if (!empty($media['description']) && (stripos($text, $media['description']) === false)) { + $text .= ' ' . $media['description']; + } + if (in_array($media['type'], [Post\Media::HTML, Post\Media::ACTIVITY]) && !empty($media['name']) && (stripos($text, $media['name']) === false)) { + $text .= ' ' . $media['name']; + } } } diff --git a/src/Model/Item.php b/src/Model/Item.php index 91e7e8613e..d28009a4e6 100644 --- a/src/Model/Item.php +++ b/src/Model/Item.php @@ -34,6 +34,7 @@ use Friendica\Core\Protocol; use Friendica\Core\Renderer; use Friendica\Core\System; use Friendica\Core\Worker; +use Friendica\Database\Database; use Friendica\Database\DBA; use Friendica\DI; use Friendica\Model\Post\Category; @@ -243,6 +244,11 @@ class Item $content_fields['raw-body'] = BBCode::removeAttachment($content_fields['raw-body']); Post\Content::update($item['uri-id'], $content_fields); + + $searchtext = Post\Engagement::getSearchTextForUriId($item['uri-id'], true); + DBA::update('post-engagement', ['searchtext' => $searchtext], ['uri-id' => $item['uri-id']]); + DBA::update('post-searchindex', ['searchtext' => $searchtext], ['uri-id' => $item['uri-id']]); + } if (!empty($fields['file'])) { @@ -1443,6 +1449,16 @@ class Item } $engagement_uri_id = Post\Engagement::storeFromItem($posted_item); + + if (in_array($item['gravity'], [self::GRAVITY_PARENT, self::GRAVITY_COMMENT])) { + $search = [ + 'uri-id' => $posted_item['uri-id'], + 'network' => $posted_item['network'], + 'private' => $posted_item['private'], + 'searchtext' => Post\Engagement::getSearchTextForUriId($posted_item['uri-id']), + ]; + DBA::insert('post-searchindex', $search, Database::INSERT_IGNORE); + } if (($posted_item['gravity'] == self::GRAVITY_ACTIVITY) && ($posted_item['verb'] == Activity::ANNOUNCE) && ($posted_item['parent-uri-id'] == $posted_item['thr-parent-id'])) { self::reshareChannelPost($posted_item['thr-parent-id'], $posted_item['author-id']); diff --git a/src/Model/Post/Content.php b/src/Model/Post/Content.php index 7253f9a342..74cfbfe6f5 100644 --- a/src/Model/Post/Content.php +++ b/src/Model/Post/Content.php @@ -22,11 +22,10 @@ namespace Friendica\Model\Post; use \BadMethodCallException; -use Friendica\Core\Protocol; use Friendica\Database\Database; use Friendica\Database\DBA; -use Friendica\Database\DBStructure; use Friendica\DI; +use Friendica\Model\Item; use Friendica\Model\Post; class Content @@ -109,9 +108,12 @@ class Content */ public static function getURIIdListBySearch(string $search, int $uid = 0, int $start = 0, int $limit = 100, int $last_uriid = 0) { - $condition = ["`uri-id` IN (SELECT `uri-id` FROM `post-content` WHERE MATCH (`title`, `content-warning`, `body`) AGAINST (? IN BOOLEAN MODE)) - AND (`uid` = ? OR (`uid` = ? AND NOT `global`)) AND (`network` IN (?, ?, ?, ?) OR (`uid` = ? AND `uid` != ?))", - str_replace('@', ' ', $search), 0, $uid, Protocol::ACTIVITYPUB, Protocol::DFRN, Protocol::DIASPORA, Protocol::OSTATUS, $uid, 0]; + $search = Post\Engagement::escapeKeywords($search); + if ($uid != 0) { + $condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) and (private = ? OR `uri-id` in (SELECT `uri-id` FROM `post-user` where `uid` = ?))", $search, Item::PUBLIC, $uid]; + } else { + $condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) and private = ?", $search, Item::PUBLIC]; + } if (!empty($last_uriid)) { $condition = DBA::mergeConditions($condition, ["`uri-id` < ?", $last_uriid]); @@ -122,7 +124,7 @@ class Content 'limit' => [$start, $limit] ]; - $tags = Post::select(['uri-id'], $condition, $params); + $tags = DBA::select('post-searchindex', ['uri-id'], $condition, $params); $uriids = []; while ($tag = DBA::fetch($tags)) { @@ -135,9 +137,12 @@ class Content public static function countBySearch(string $search, int $uid = 0) { - $condition = ["`uri-id` IN (SELECT `uri-id` FROM `post-content` WHERE MATCH (`title`, `content-warning`, `body`) AGAINST (? IN BOOLEAN MODE)) - AND (`uid` = ? OR (`uid` = ? AND NOT `global`)) AND (`network` IN (?, ?, ?, ?) OR (`uid` = ? AND `uid` != ?))", - str_replace('@', ' ', $search), 0, $uid, Protocol::ACTIVITYPUB, Protocol::DFRN, Protocol::DIASPORA, Protocol::OSTATUS, $uid, 0]; - return Post::count($condition); + $search = Post\Engagement::escapeKeywords($search); + if ($uid != 0) { + $condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) and (private = ? OR `uri-id` in (SELECT `uri-id` FROM `post-user` where `uid` = ?))", $search, Item::PUBLIC, $uid]; + } else { + $condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) and private = ?", $search, Item::PUBLIC]; + } + return DBA::count('post-searchindex', $condition); } } diff --git a/src/Model/Post/Engagement.php b/src/Model/Post/Engagement.php index 61f73948de..5047d71dd2 100644 --- a/src/Model/Post/Engagement.php +++ b/src/Model/Post/Engagement.php @@ -146,7 +146,7 @@ class Engagement 'owner-contact-type' => $author['contact-type'], 'owner-nick' => $author['nick'], 'owner-addr' => $author['addr'], - 'author-gsid' => $author['gsid'], + 'owner-gsid' => $author['gsid'], ]; foreach ($receivers as $receiver) { @@ -158,6 +158,21 @@ class Engagement return self::getSearchText($item, $receivers, $tags); } + public static function getSearchTextForUriId(int $uri_id, bool $refresh = false): string + { + if (!$refresh) { + $engagement = DBA::selectFirst('post-engagement', ['searchtext'], ['uri-id' => $uri_id]); + if (!empty($engagement['searchtext'])) { + return $engagement['searchtext']; + } + } + + $post = Post::selectFirstPost(['uri-id', 'network', 'title', 'content-warning', 'body', 'private', + 'author-id', 'author-contact-type', 'author-nick', 'author-addr', 'author-gsid', + 'owner-id', 'owner-contact-type', 'owner-nick', 'owner-addr', 'owner-gsid'], ['uri-id' => $uri_id]); + return self::getSearchTextForItem($post); + } + private static function getSearchTextForItem(array $item): string { $receivers = array_column(Tag::getByURIId($item['uri-id'], [Tag::MENTION, Tag::IMPLICIT_MENTION, Tag::EXCLUSIVE_MENTION, Tag::AUDIENCE]), 'url'); @@ -167,24 +182,24 @@ class Engagement private static function getSearchText(array $item, array $receivers, array $tags): string { - $body = '[nosmile]network:' . $item['network']; + $body = '[nosmile]network_' . $item['network']; if (!empty($item['author-gsid'])) { $gserver = DBA::selectFirst('gserver', ['platform', 'nurl'], ['id' => $item['author-gsid']]); $platform = preg_replace( '/[\W]/', '', $gserver['platform'] ?? ''); if (!empty($platform)) { - $body .= ' platform:' . $platform; + $body .= ' platform_' . $platform; } - $body .= ' server:' . parse_url($gserver['nurl'], PHP_URL_HOST); + $body .= ' server_' . parse_url($gserver['nurl'], PHP_URL_HOST); } if (($item['owner-contact-type'] == Contact::TYPE_COMMUNITY) && !empty($item['owner-gsid']) && ($item['owner-gsid'] != ($item['author-gsid'] ?? 0))) { $gserver = DBA::selectFirst('gserver', ['platform', 'nurl'], ['id' => $item['owner-gsid']]); $platform = preg_replace( '/[\W]/', '', $gserver['platform'] ?? ''); - if (!empty($platform) && !strpos($body, 'platform:' . $platform)) { - $body .= ' platform:' . $platform; + if (!empty($platform) && !strpos($body, 'platform_' . $platform)) { + $body .= ' platform_' . $platform; } - $body .= ' server:' . parse_url($gserver['nurl'], PHP_URL_HOST); + $body .= ' server_' . parse_url($gserver['nurl'], PHP_URL_HOST); } switch ($item['private']) { @@ -212,16 +227,16 @@ class Engagement } if ($item['author-contact-type'] == Contact::TYPE_COMMUNITY) { - $body .= ' group:' . $item['author-nick'] . ' group:' . $item['author-addr']; + $body .= ' group_' . $item['author-nick'] . ' group_' . $item['author-addr']; } elseif (in_array($item['author-contact-type'], [Contact::TYPE_PERSON, Contact::TYPE_NEWS, Contact::TYPE_ORGANISATION])) { - $body .= ' from:' . $item['author-nick'] . ' from:' . $item['author-addr']; + $body .= ' from_' . $item['author-nick'] . ' from_' . $item['author-addr']; } if ($item['author-id'] != $item['owner-id']) { if ($item['owner-contact-type'] == Contact::TYPE_COMMUNITY) { - $body .= ' group:' . $item['owner-nick'] . ' group:' . $item['owner-addr']; + $body .= ' group_' . $item['owner-nick'] . ' group_' . $item['owner-addr']; } elseif (in_array($item['owner-contact-type'], [Contact::TYPE_PERSON, Contact::TYPE_NEWS, Contact::TYPE_ORGANISATION])) { - $body .= ' from:' . $item['owner-nick'] . ' from:' . $item['owner-addr']; + $body .= ' from_' . $item['owner-nick'] . ' from_' . $item['owner-addr']; } } @@ -231,15 +246,15 @@ class Engagement continue; } - if (($contact['contact-type'] == Contact::TYPE_COMMUNITY) && !strpos($body, 'group:' . $contact['addr'])) { - $body .= ' group:' . $contact['nick'] . ' group:' . $contact['addr']; + if (($contact['contact-type'] == Contact::TYPE_COMMUNITY) && !strpos($body, 'group_' . $contact['addr'])) { + $body .= ' group_' . $contact['nick'] . ' group_' . $contact['addr']; } elseif (in_array($contact['contact-type'], [Contact::TYPE_PERSON, Contact::TYPE_NEWS, Contact::TYPE_ORGANISATION])) { - $body .= ' to:' . $contact['nick'] . ' to:' . $contact['addr']; + $body .= ' to_' . $contact['nick'] . ' to_' . $contact['addr']; } } foreach ($tags as $tag) { - $body .= ' tag:' . $tag; + $body .= ' tag_' . $tag; } $body .= ' ' . $item['title'] . ' ' . $item['content-warning'] . ' ' . $item['body']; @@ -293,4 +308,12 @@ class Engagement return DateTimeFormat::utc('now - ' . DI::config()->get('channel', 'engagement_hours') . ' hour'); } + + public static function escapeKeywords(string $fullTextSearch): string + { + foreach (Engagement::KEYWORDS as $keyword) { + $fullTextSearch = preg_replace('~(' . $keyword . '):(.[\w\*@\.-]+)~', '$1_$2', $fullTextSearch); + } + return $fullTextSearch; + } } diff --git a/src/Module/Api/Mastodon/Search.php b/src/Module/Api/Mastodon/Search.php index facaa06eb1..23f920dc3e 100644 --- a/src/Module/Api/Mastodon/Search.php +++ b/src/Module/Api/Mastodon/Search.php @@ -23,7 +23,6 @@ namespace Friendica\Module\Api\Mastodon; use Friendica\Core\Logger; use Friendica\Core\Protocol; -use Friendica\Core\System; use Friendica\Database\DBA; use Friendica\DI; use Friendica\Model\Contact; @@ -154,10 +153,9 @@ class Search extends BaseApi substr($q, 1), 0, $uid, Protocol::ACTIVITYPUB, Protocol::DFRN, Protocol::DIASPORA, Protocol::OSTATUS, $uid, 0]; $table = 'tag-search-view'; } else { - $condition = ["`uri-id` IN (SELECT `uri-id` FROM `post-content` WHERE MATCH (`title`, `content-warning`, `body`) AGAINST (? IN BOOLEAN MODE)) - AND (`uid` = ? OR (`uid` = ? AND NOT `global`)) AND (`network` IN (?, ?, ?, ?) OR (`uid` = ? AND `uid` != ?))", - str_replace('@', ' ', $q), 0, $uid, Protocol::ACTIVITYPUB, Protocol::DFRN, Protocol::DIASPORA, Protocol::OSTATUS, $uid, 0]; - $table = 'post-user-view'; + $q = Post\Engagement::escapeKeywords($q); + $condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) and (private = ? OR `uri-id` in (SELECT `uri-id` FROM `post-user` where `uid` = ?))", $q, Item::PUBLIC, $uid]; + $table = 'post-searchindex'; } if (!empty($max_id)) { diff --git a/src/Module/Conversation/Timeline.php b/src/Module/Conversation/Timeline.php index dd8e885e7c..db801d707b 100644 --- a/src/Module/Conversation/Timeline.php +++ b/src/Module/Conversation/Timeline.php @@ -398,11 +398,7 @@ class Timeline extends BaseModule } if (!empty($channel->fullTextSearch)) { - $search = $channel->fullTextSearch; - foreach (Engagement::KEYWORDS as $keyword) { - $search = preg_replace('~(' . $keyword . ':.[\w@\.-]+)~', '"$1"', $search); - } - $condition = DBA::mergeConditions($condition, ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE)", $search]); + $condition = DBA::mergeConditions($condition, ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE)", Engagement::escapeKeywords($channel->fullTextSearch)]); } if (!empty($channel->includeTags)) { diff --git a/static/dbstructure.config.php b/static/dbstructure.config.php index b244621fbe..d89c2058bb 100644 --- a/static/dbstructure.config.php +++ b/static/dbstructure.config.php @@ -56,7 +56,7 @@ use Friendica\Database\DBA; // This file is required several times during the test in DbaDefinition which justifies this condition if (!defined('DB_UPDATE_VERSION')) { - define('DB_UPDATE_VERSION', 1546); + define('DB_UPDATE_VERSION', 1547); } return [ @@ -1480,6 +1480,19 @@ return [ "PRIMARY" => ["uri-id", "id"], ] ], + "post-searchindex" => [ + "comment" => "Content for all posts", + "fields" => [ + "uri-id" => ["type" => "int unsigned", "not null" => "1", "primary" => "1", "foreign" => ["item-uri" => "id"], "comment" => "Id of the item-uri table entry that contains the item uri"], + "network" => ["type" => "char(4)", "comment" => ""], + "private" => ["type" => "tinyint unsigned", "comment" => "0=public, 1=private, 2=unlisted"], + "searchtext" => ["type" => "mediumtext", "comment" => "Simplified text for the full text search"], + ], + "indexes" => [ + "PRIMARY" => ["uri-id"], + "searchtext" => ["FULLTEXT", "searchtext"], + ] + ], "post-tag" => [ "comment" => "post relation to tags", "fields" => [