Merge pull request #13846 from annando/search

Improved search results and performance by adding a separate search index table
This commit is contained in:
Hypolite Petovan 2024-01-22 22:31:20 -05:00 committed by GitHub
commit 2e5046f8c3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
22 changed files with 672 additions and 370 deletions

View file

@ -1,6 +1,6 @@
-- ------------------------------------------ -- ------------------------------------------
-- Friendica 2024.03-dev (Yellow Archangel) -- Friendica 2024.03-dev (Yellow Archangel)
-- DB_UPDATE_VERSION 1546 -- DB_UPDATE_VERSION 1547
-- ------------------------------------------ -- ------------------------------------------
@ -1293,7 +1293,6 @@ CREATE TABLE IF NOT EXISTS `post-content` (
PRIMARY KEY(`uri-id`), PRIMARY KEY(`uri-id`),
INDEX `plink` (`plink`(191)), INDEX `plink` (`plink`(191)),
INDEX `resource-id` (`resource-id`), INDEX `resource-id` (`resource-id`),
FULLTEXT INDEX `title-content-warning-body` (`title`,`content-warning`,`body`),
INDEX `quote-uri-id` (`quote-uri-id`), INDEX `quote-uri-id` (`quote-uri-id`),
FOREIGN KEY (`uri-id`) REFERENCES `item-uri` (`id`) ON UPDATE RESTRICT ON DELETE CASCADE, FOREIGN KEY (`uri-id`) REFERENCES `item-uri` (`id`) ON UPDATE RESTRICT ON DELETE CASCADE,
FOREIGN KEY (`quote-uri-id`) REFERENCES `item-uri` (`id`) ON UPDATE RESTRICT ON DELETE CASCADE FOREIGN KEY (`quote-uri-id`) REFERENCES `item-uri` (`id`) ON UPDATE RESTRICT ON DELETE CASCADE
@ -1460,6 +1459,21 @@ CREATE TABLE IF NOT EXISTS `post-question-option` (
FOREIGN KEY (`uri-id`) REFERENCES `item-uri` (`id`) ON UPDATE RESTRICT ON DELETE CASCADE FOREIGN KEY (`uri-id`) REFERENCES `item-uri` (`id`) ON UPDATE RESTRICT ON DELETE CASCADE
) DEFAULT COLLATE utf8mb4_general_ci COMMENT='Question option'; ) DEFAULT COLLATE utf8mb4_general_ci COMMENT='Question option';
--
-- TABLE post-searchindex
--
CREATE TABLE IF NOT EXISTS `post-searchindex` (
`uri-id` int unsigned NOT NULL COMMENT 'Id of the item-uri table entry that contains the item uri',
`network` char(4) COMMENT '',
`private` tinyint unsigned COMMENT '0=public, 1=private, 2=unlisted',
`searchtext` mediumtext COMMENT 'Simplified text for the full text search',
`created` datetime COMMENT '',
PRIMARY KEY(`uri-id`),
INDEX `created` (`created`),
FULLTEXT INDEX `searchtext` (`searchtext`),
FOREIGN KEY (`uri-id`) REFERENCES `item-uri` (`id`) ON UPDATE RESTRICT ON DELETE CASCADE
) DEFAULT COLLATE utf8mb4_general_ci COMMENT='Content for all posts';
-- --
-- TABLE post-tag -- TABLE post-tag
-- --
@ -1711,7 +1725,6 @@ CREATE TABLE IF NOT EXISTS `profile` (
`net-publish` boolean NOT NULL DEFAULT '0' COMMENT 'publish profile in global directory', `net-publish` boolean NOT NULL DEFAULT '0' COMMENT 'publish profile in global directory',
PRIMARY KEY(`id`), PRIMARY KEY(`id`),
INDEX `uid_is-default` (`uid`,`is-default`), INDEX `uid_is-default` (`uid`,`is-default`),
FULLTEXT INDEX `pub_keywords` (`pub_keywords`),
FOREIGN KEY (`uid`) REFERENCES `user` (`uid`) ON UPDATE RESTRICT ON DELETE CASCADE FOREIGN KEY (`uid`) REFERENCES `user` (`uid`) ON UPDATE RESTRICT ON DELETE CASCADE
) DEFAULT COLLATE utf8mb4_general_ci COMMENT='user profiles data'; ) DEFAULT COLLATE utf8mb4_general_ci COMMENT='user profiles data';

View file

@ -80,6 +80,7 @@ Additionally to the search for content, there are additional keywords that can b
* visibility:public * visibility:public
* visibility:unlisted * visibility:unlisted
* visibility:private * visibility:private
* language - Use "language:code" to search for posts with the given language in the [ISO 639-1](https://en.wikipedia.org/wiki/ISO_639-1) format.
Remember that you can combine these kerywords. Remember that you can combine these kerywords.
So for example you can create a channel with all posts that talk about the Fediverse - that aren't posted in the Fediverse with the search terms: "fediverse -network:apub -network:dfrn" So for example you can create a channel with all posts that talk about the Fediverse - that aren't posted in the Fediverse with the search terms: "fediverse -network:apub -network:dfrn"

View file

@ -70,6 +70,7 @@ Database Tables
| [post-media](help/database/db_post-media) | Attached media | | [post-media](help/database/db_post-media) | Attached media |
| [post-question](help/database/db_post-question) | Question | | [post-question](help/database/db_post-question) | Question |
| [post-question-option](help/database/db_post-question-option) | Question option | | [post-question-option](help/database/db_post-question-option) | Question option |
| [post-searchindex](help/database/db_post-searchindex) | Content for all posts |
| [post-tag](help/database/db_post-tag) | post relation to tags | | [post-tag](help/database/db_post-tag) | post relation to tags |
| [post-thread](help/database/db_post-thread) | Thread related data | | [post-thread](help/database/db_post-thread) | Thread related data |
| [post-thread-user](help/database/db_post-thread-user) | Thread related data per user | | [post-thread-user](help/database/db_post-thread-user) | Thread related data per user |

View file

@ -31,11 +31,10 @@ Indexes
------------ ------------
| Name | Fields | | Name | Fields |
| -------------------------- | -------------------------------------- | | ------------ | ------------ |
| PRIMARY | uri-id | | PRIMARY | uri-id |
| plink | plink(191) | | plink | plink(191) |
| resource-id | resource-id | | resource-id | resource-id |
| title-content-warning-body | FULLTEXT, title, content-warning, body |
| quote-uri-id | quote-uri-id | | quote-uri-id | quote-uri-id |
Foreign Keys Foreign Keys

View file

@ -0,0 +1,33 @@
Table post-searchindex
===========
Content for all posts
Fields
------
| Field | Description | Type | Null | Key | Default | Extra |
| ---------- | --------------------------------------------------------- | ---------------- | ---- | --- | ------- | ----- |
| uri-id | Id of the item-uri table entry that contains the item uri | int unsigned | NO | PRI | NULL | |
| network | | char(4) | YES | | NULL | |
| private | 0=public, 1=private, 2=unlisted | tinyint unsigned | YES | | NULL | |
| searchtext | Simplified text for the full text search | mediumtext | YES | | NULL | |
| created | | datetime | YES | | NULL | |
Indexes
------------
| Name | Fields |
| ---------- | -------------------- |
| PRIMARY | uri-id |
| created | created |
| searchtext | FULLTEXT, searchtext |
Foreign Keys
------------
| Field | Target Table | Target Field |
|-------|--------------|--------------|
| uri-id | [item-uri](help/database/db_item-uri) | id |
Return to [database documentation](help/database)

View file

@ -57,10 +57,9 @@ Indexes
------------ ------------
| Name | Fields | | Name | Fields |
| -------------- | ---------------------- | | -------------- | --------------- |
| PRIMARY | id | | PRIMARY | id |
| uid_is-default | uid, is-default | | uid_is-default | uid, is-default |
| pub_keywords | FULLTEXT, pub_keywords |
Foreign Keys Foreign Keys
------------ ------------

View file

@ -156,7 +156,7 @@ class UserDefinedChannel extends \Friendica\BaseRepository
return true; return true;
} }
return $this->db->select('check-full-text-search', [], ["`pid` = ? AND MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE)", getmypid(), $this->escapeKeywords($searchtext)]) !== false; return $this->db->select('check-full-text-search', [], ["`pid` = ? AND MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE)", getmypid(), Engagement::escapeKeywords($searchtext)]) !== false;
} }
/** /**
@ -310,15 +310,7 @@ class UserDefinedChannel extends \Friendica\BaseRepository
private function inFulltext(string $fullTextSearch): bool private function inFulltext(string $fullTextSearch): bool
{ {
return $this->db->exists('check-full-text-search', ["`pid` = ? AND MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE)", getmypid(), $this->escapeKeywords($fullTextSearch)]); return $this->db->exists('check-full-text-search', ["`pid` = ? AND MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE)", getmypid(), Engagement::escapeKeywords($fullTextSearch)]);
}
private function escapeKeywords(string $fullTextSearch): string
{
foreach (Engagement::KEYWORDS as $keyword) {
$fullTextSearch = preg_replace('~(' . $keyword . ':.[\w@\.-]+)~', '"$1"', $fullTextSearch);
}
return $fullTextSearch;
} }
private function getUserCondition() private function getUserCondition()

View file

@ -255,12 +255,19 @@ class BBCode
// Removes attachments // Removes attachments
$text = self::removeAttachment($text); $text = self::removeAttachment($text);
// Add images because of possible alt texts // Add text from attached media
if (!empty($uri_id)) { if (!empty($uri_id)) {
$text = Post\Media::addAttachmentsToBody($uri_id, $text, [Post\Media::IMAGE]); foreach (Post\Media::getByURIId($uri_id) as $media) {
if (!empty($media['description']) && (stripos($text, $media['description']) === false)) {
foreach (Post\Media::getByURIId($uri_id, [Post\Media::HTML]) as $media) { $text .= ' ' . $media['description'];
$text .= ' ' . $media['name'] . ' ' . $media['description']; }
if (in_array($media['type'], [Post\Media::HTML, Post\Media::ACTIVITY])) {
foreach (['name', 'author-name', 'publisher-name'] as $key) {
if (!empty($media[$key] && stripos($text, $media[$key]) === false)) {
$text .= ' ' . $media[$key];
}
}
}
} }
} }

View file

@ -52,7 +52,7 @@ class PostUpdate
// Needed for the helper function to read from the legacy term table // Needed for the helper function to read from the legacy term table
const OBJECT_TYPE_POST = 1; const OBJECT_TYPE_POST = 1;
const VERSION = 1544; const VERSION = 1547;
/** /**
* Calls the post update functions * Calls the post update functions
@ -128,6 +128,9 @@ class PostUpdate
if (!self::update1544()) { if (!self::update1544()) {
return false; return false;
} }
if (!self::update1547()) {
return false;
}
return true; return true;
} }
@ -1358,4 +1361,62 @@ class PostUpdate
return false; return false;
} }
/**
* Create "post-searchindex" entries for old entries.
*
* @return bool "true" when the job is done
* @throws \Friendica\Network\HTTPException\InternalServerErrorException
* @throws \ImagickException
*/
private static function update1547()
{
// Was the script completed?
if (DI::keyValue()->get('post_update_version') >= 1547) {
return true;
}
$id = (int)(DI::keyValue()->get('post_update_version_1547_id') ?? 0);
if ($id == 0) {
$post = Post::selectFirstPost(['uri-id'], [], ['order' => ['uri-id' => true]]);
$id = (int)($post['uri-id'] ?? 0);
}
Logger::info('Start', ['uri-id' => $id]);
$rows = 0;
$condition = ["`uri-id` < ? AND `gravity` IN (?, ?)", $id, Item::GRAVITY_COMMENT, Item::GRAVITY_PARENT];
$limit = Post\SearchIndex::searchAgeDateLimit();
if (!empty($limit)) {
DBA::mergeConditions($condition, ["`created` > ?", $limit]);
}
$posts = Post::selectPosts(['uri-id', 'network', 'private', 'created'], $condition, ['order' => ['uri-id' => true], 'limit' => 1000]);
if (DBA::errorNo() != 0) {
Logger::error('Database error', ['no' => DBA::errorNo(), 'message' => DBA::errorMessage()]);
return false;
}
while ($post = Post::fetch($posts)) {
$id = $post['uri-id'];
Post\SearchIndex::insert($post['uri-id'], $post['network'], $post['private'], $post['created'], true);
++$rows;
}
DBA::close($posts);
DI::keyValue()->set('post_update_version_1547_id', $id);
Logger::info('Processed', ['rows' => $rows, 'last' => $id]);
if ($rows <= 100) {
DI::keyValue()->set('post_update_version', 1547);
Logger::info('Done');
return true;
}
return false;
}
} }

View file

@ -34,6 +34,7 @@ use Friendica\Core\Protocol;
use Friendica\Core\Renderer; use Friendica\Core\Renderer;
use Friendica\Core\System; use Friendica\Core\System;
use Friendica\Core\Worker; use Friendica\Core\Worker;
use Friendica\Database\Database;
use Friendica\Database\DBA; use Friendica\Database\DBA;
use Friendica\DI; use Friendica\DI;
use Friendica\Model\Post\Category; use Friendica\Model\Post\Category;
@ -243,6 +244,10 @@ class Item
$content_fields['raw-body'] = BBCode::removeAttachment($content_fields['raw-body']); $content_fields['raw-body'] = BBCode::removeAttachment($content_fields['raw-body']);
Post\Content::update($item['uri-id'], $content_fields); Post\Content::update($item['uri-id'], $content_fields);
$searchtext = Post\Engagement::getSearchTextForUriId($item['uri-id'], true);
DBA::update('post-engagement', ['searchtext' => $searchtext], ['uri-id' => $item['uri-id']]);
Post\SearchIndex::update($item['uri-id']);
} }
if (!empty($fields['file'])) { if (!empty($fields['file'])) {
@ -1444,6 +1449,10 @@ class Item
$engagement_uri_id = Post\Engagement::storeFromItem($posted_item); $engagement_uri_id = Post\Engagement::storeFromItem($posted_item);
if (in_array($posted_item['gravity'], [self::GRAVITY_PARENT, self::GRAVITY_COMMENT])) {
Post\SearchIndex::insert($posted_item['uri-id'], $posted_item['network'], $posted_item['private'], $posted_item['created']);
}
if (($posted_item['gravity'] == self::GRAVITY_ACTIVITY) && ($posted_item['verb'] == Activity::ANNOUNCE) && ($posted_item['parent-uri-id'] == $posted_item['thr-parent-id'])) { if (($posted_item['gravity'] == self::GRAVITY_ACTIVITY) && ($posted_item['verb'] == Activity::ANNOUNCE) && ($posted_item['parent-uri-id'] == $posted_item['thr-parent-id'])) {
self::reshareChannelPost($posted_item['thr-parent-id'], $posted_item['author-id']); self::reshareChannelPost($posted_item['thr-parent-id'], $posted_item['author-id']);
} elseif ($engagement_uri_id) { } elseif ($engagement_uri_id) {

View file

@ -22,11 +22,10 @@
namespace Friendica\Model\Post; namespace Friendica\Model\Post;
use \BadMethodCallException; use \BadMethodCallException;
use Friendica\Core\Protocol;
use Friendica\Database\Database; use Friendica\Database\Database;
use Friendica\Database\DBA; use Friendica\Database\DBA;
use Friendica\Database\DBStructure;
use Friendica\DI; use Friendica\DI;
use Friendica\Model\Item;
use Friendica\Model\Post; use Friendica\Model\Post;
class Content class Content
@ -109,9 +108,12 @@ class Content
*/ */
public static function getURIIdListBySearch(string $search, int $uid = 0, int $start = 0, int $limit = 100, int $last_uriid = 0) public static function getURIIdListBySearch(string $search, int $uid = 0, int $start = 0, int $limit = 100, int $last_uriid = 0)
{ {
$condition = ["`uri-id` IN (SELECT `uri-id` FROM `post-content` WHERE MATCH (`title`, `content-warning`, `body`) AGAINST (? IN BOOLEAN MODE)) $search = Post\Engagement::escapeKeywords($search);
AND (`uid` = ? OR (`uid` = ? AND NOT `global`)) AND (`network` IN (?, ?, ?, ?) OR (`uid` = ? AND `uid` != ?))", if ($uid != 0) {
str_replace('@', ' ', $search), 0, $uid, Protocol::ACTIVITYPUB, Protocol::DFRN, Protocol::DIASPORA, Protocol::OSTATUS, $uid, 0]; $condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) and (private = ? OR `uri-id` in (SELECT `uri-id` FROM `post-user` where `uid` = ?))", $search, Item::PUBLIC, $uid];
} else {
$condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) and private = ?", $search, Item::PUBLIC];
}
if (!empty($last_uriid)) { if (!empty($last_uriid)) {
$condition = DBA::mergeConditions($condition, ["`uri-id` < ?", $last_uriid]); $condition = DBA::mergeConditions($condition, ["`uri-id` < ?", $last_uriid]);
@ -122,7 +124,7 @@ class Content
'limit' => [$start, $limit] 'limit' => [$start, $limit]
]; ];
$tags = Post::select(['uri-id'], $condition, $params); $tags = DBA::select('post-searchindex', ['uri-id'], $condition, $params);
$uriids = []; $uriids = [];
while ($tag = DBA::fetch($tags)) { while ($tag = DBA::fetch($tags)) {
@ -135,9 +137,12 @@ class Content
public static function countBySearch(string $search, int $uid = 0) public static function countBySearch(string $search, int $uid = 0)
{ {
$condition = ["`uri-id` IN (SELECT `uri-id` FROM `post-content` WHERE MATCH (`title`, `content-warning`, `body`) AGAINST (? IN BOOLEAN MODE)) $search = Post\Engagement::escapeKeywords($search);
AND (`uid` = ? OR (`uid` = ? AND NOT `global`)) AND (`network` IN (?, ?, ?, ?) OR (`uid` = ? AND `uid` != ?))", if ($uid != 0) {
str_replace('@', ' ', $search), 0, $uid, Protocol::ACTIVITYPUB, Protocol::DFRN, Protocol::DIASPORA, Protocol::OSTATUS, $uid, 0]; $condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) and (private = ? OR `uri-id` in (SELECT `uri-id` FROM `post-user` where `uid` = ?))", $search, Item::PUBLIC, $uid];
return Post::count($condition); } else {
$condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) and private = ?", $search, Item::PUBLIC];
}
return DBA::count('post-searchindex', $condition);
} }
} }

View file

@ -24,7 +24,6 @@ namespace Friendica\Model\Post;
use Friendica\Content\Text\BBCode; use Friendica\Content\Text\BBCode;
use Friendica\Core\Logger; use Friendica\Core\Logger;
use Friendica\Core\Protocol; use Friendica\Core\Protocol;
use Friendica\Database\Database;
use Friendica\Database\DBA; use Friendica\Database\DBA;
use Friendica\DI; use Friendica\DI;
use Friendica\Model\Contact; use Friendica\Model\Contact;
@ -39,7 +38,7 @@ use Friendica\Util\DateTimeFormat;
class Engagement class Engagement
{ {
const KEYWORDS = ['source', 'server', 'from', 'to', 'group', 'tag', 'network', 'platform', 'visibility']; const KEYWORDS = ['source', 'server', 'from', 'to', 'group', 'tag', 'network', 'platform', 'visibility', 'language'];
/** /**
* Store engagement data from an item array * Store engagement data from an item array
@ -146,7 +145,7 @@ class Engagement
'owner-contact-type' => $author['contact-type'], 'owner-contact-type' => $author['contact-type'],
'owner-nick' => $author['nick'], 'owner-nick' => $author['nick'],
'owner-addr' => $author['addr'], 'owner-addr' => $author['addr'],
'author-gsid' => $author['gsid'], 'owner-gsid' => $author['gsid'],
]; ];
foreach ($receivers as $receiver) { foreach ($receivers as $receiver) {
@ -158,6 +157,24 @@ class Engagement
return self::getSearchText($item, $receivers, $tags); return self::getSearchText($item, $receivers, $tags);
} }
public static function getSearchTextForUriId(int $uri_id, bool $refresh = false): string
{
if (!$refresh) {
$engagement = DBA::selectFirst('post-engagement', ['searchtext'], ['uri-id' => $uri_id]);
if (!empty($engagement['searchtext'])) {
return $engagement['searchtext'];
}
}
$post = Post::selectFirstPost(['uri-id', 'network', 'title', 'content-warning', 'body', 'private',
'author-id', 'author-contact-type', 'author-nick', 'author-addr', 'author-gsid',
'owner-id', 'owner-contact-type', 'owner-nick', 'owner-addr', 'owner-gsid'], ['uri-id' => $uri_id]);
if (empty($post['uri-id'])) {
return '';
}
return self::getSearchTextForItem($post);
}
private static function getSearchTextForItem(array $item): string private static function getSearchTextForItem(array $item): string
{ {
$receivers = array_column(Tag::getByURIId($item['uri-id'], [Tag::MENTION, Tag::IMPLICIT_MENTION, Tag::EXCLUSIVE_MENTION, Tag::AUDIENCE]), 'url'); $receivers = array_column(Tag::getByURIId($item['uri-id'], [Tag::MENTION, Tag::IMPLICIT_MENTION, Tag::EXCLUSIVE_MENTION, Tag::AUDIENCE]), 'url');
@ -167,61 +184,61 @@ class Engagement
private static function getSearchText(array $item, array $receivers, array $tags): string private static function getSearchText(array $item, array $receivers, array $tags): string
{ {
$body = '[nosmile]network:' . $item['network']; $body = '[nosmile]network_' . $item['network'];
if (!empty($item['author-gsid'])) { if (!empty($item['author-gsid'])) {
$gserver = DBA::selectFirst('gserver', ['platform', 'nurl'], ['id' => $item['author-gsid']]); $gserver = DBA::selectFirst('gserver', ['platform', 'nurl'], ['id' => $item['author-gsid']]);
$platform = preg_replace( '/[\W]/', '', $gserver['platform'] ?? ''); $platform = preg_replace( '/[\W]/', '', $gserver['platform'] ?? '');
if (!empty($platform)) { if (!empty($platform)) {
$body .= ' platform:' . $platform; $body .= ' platform_' . $platform;
} }
$body .= ' server:' . parse_url($gserver['nurl'], PHP_URL_HOST); $body .= ' server_' . parse_url($gserver['nurl'], PHP_URL_HOST);
} }
if (($item['owner-contact-type'] == Contact::TYPE_COMMUNITY) && !empty($item['owner-gsid']) && ($item['owner-gsid'] != ($item['author-gsid'] ?? 0))) { if (($item['owner-contact-type'] == Contact::TYPE_COMMUNITY) && !empty($item['owner-gsid']) && ($item['owner-gsid'] != ($item['author-gsid'] ?? 0))) {
$gserver = DBA::selectFirst('gserver', ['platform', 'nurl'], ['id' => $item['owner-gsid']]); $gserver = DBA::selectFirst('gserver', ['platform', 'nurl'], ['id' => $item['owner-gsid']]);
$platform = preg_replace( '/[\W]/', '', $gserver['platform'] ?? ''); $platform = preg_replace( '/[\W]/', '', $gserver['platform'] ?? '');
if (!empty($platform) && !strpos($body, 'platform:' . $platform)) { if (!empty($platform) && !strpos($body, 'platform_' . $platform)) {
$body .= ' platform:' . $platform; $body .= ' platform_' . $platform;
} }
$body .= ' server:' . parse_url($gserver['nurl'], PHP_URL_HOST); $body .= ' server_' . parse_url($gserver['nurl'], PHP_URL_HOST);
} }
switch ($item['private']) { switch ($item['private']) {
case Item::PUBLIC: case Item::PUBLIC:
$body .= ' visibility:public'; $body .= ' visibility_public';
break; break;
case Item::UNLISTED: case Item::UNLISTED:
$body .= ' visibility:unlisted'; $body .= ' visibility_unlisted';
break; break;
case Item::PRIVATE: case Item::PRIVATE:
$body .= ' visibility:private'; $body .= ' visibility_private';
break; break;
} }
if (in_array(Contact::TYPE_COMMUNITY, [$item['author-contact-type'], $item['owner-contact-type']])) { if (in_array(Contact::TYPE_COMMUNITY, [$item['author-contact-type'], $item['owner-contact-type']])) {
$body .= ' source:group'; $body .= ' source_group';
} elseif ($item['author-contact-type'] == Contact::TYPE_PERSON) { } elseif ($item['author-contact-type'] == Contact::TYPE_PERSON) {
$body .= ' source:person'; $body .= ' source_person';
} elseif ($item['author-contact-type'] == Contact::TYPE_NEWS) { } elseif ($item['author-contact-type'] == Contact::TYPE_NEWS) {
$body .= ' source:service'; $body .= ' source_service';
} elseif ($item['author-contact-type'] == Contact::TYPE_ORGANISATION) { } elseif ($item['author-contact-type'] == Contact::TYPE_ORGANISATION) {
$body .= ' source:organization'; $body .= ' source_organization';
} elseif ($item['author-contact-type'] == Contact::TYPE_RELAY) { } elseif ($item['author-contact-type'] == Contact::TYPE_RELAY) {
$body .= ' source:application'; $body .= ' source_application';
} }
if ($item['author-contact-type'] == Contact::TYPE_COMMUNITY) { if ($item['author-contact-type'] == Contact::TYPE_COMMUNITY) {
$body .= ' group:' . $item['author-nick'] . ' group:' . $item['author-addr']; $body .= ' group_' . $item['author-nick'] . ' group_' . $item['author-addr'];
} elseif (in_array($item['author-contact-type'], [Contact::TYPE_PERSON, Contact::TYPE_NEWS, Contact::TYPE_ORGANISATION])) { } elseif (in_array($item['author-contact-type'], [Contact::TYPE_PERSON, Contact::TYPE_NEWS, Contact::TYPE_ORGANISATION])) {
$body .= ' from:' . $item['author-nick'] . ' from:' . $item['author-addr']; $body .= ' from_' . $item['author-nick'] . ' from_' . $item['author-addr'];
} }
if ($item['author-id'] != $item['owner-id']) { if ($item['author-id'] != $item['owner-id']) {
if ($item['owner-contact-type'] == Contact::TYPE_COMMUNITY) { if ($item['owner-contact-type'] == Contact::TYPE_COMMUNITY) {
$body .= ' group:' . $item['owner-nick'] . ' group:' . $item['owner-addr']; $body .= ' group_' . $item['owner-nick'] . ' group_' . $item['owner-addr'];
} elseif (in_array($item['owner-contact-type'], [Contact::TYPE_PERSON, Contact::TYPE_NEWS, Contact::TYPE_ORGANISATION])) { } elseif (in_array($item['owner-contact-type'], [Contact::TYPE_PERSON, Contact::TYPE_NEWS, Contact::TYPE_ORGANISATION])) {
$body .= ' from:' . $item['owner-nick'] . ' from:' . $item['owner-addr']; $body .= ' from_' . $item['owner-nick'] . ' from_' . $item['owner-addr'];
} }
} }
@ -231,15 +248,20 @@ class Engagement
continue; continue;
} }
if (($contact['contact-type'] == Contact::TYPE_COMMUNITY) && !strpos($body, 'group:' . $contact['addr'])) { if (($contact['contact-type'] == Contact::TYPE_COMMUNITY) && !strpos($body, 'group_' . $contact['addr'])) {
$body .= ' group:' . $contact['nick'] . ' group:' . $contact['addr']; $body .= ' group_' . $contact['nick'] . ' group_' . $contact['addr'];
} elseif (in_array($contact['contact-type'], [Contact::TYPE_PERSON, Contact::TYPE_NEWS, Contact::TYPE_ORGANISATION])) { } elseif (in_array($contact['contact-type'], [Contact::TYPE_PERSON, Contact::TYPE_NEWS, Contact::TYPE_ORGANISATION])) {
$body .= ' to:' . $contact['nick'] . ' to:' . $contact['addr']; $body .= ' to_' . $contact['nick'] . ' to_' . $contact['addr'];
} }
} }
foreach ($tags as $tag) { foreach ($tags as $tag) {
$body .= ' tag:' . $tag; $body .= ' tag_' . $tag;
}
if (!empty($item['language'])) {
$languages = json_decode($item['language'], true);
$body .= ' language_' . array_key_first($languages);
} }
$body .= ' ' . $item['title'] . ' ' . $item['content-warning'] . ' ' . $item['body']; $body .= ' ' . $item['title'] . ' ' . $item['content-warning'] . ' ' . $item['body'];
@ -293,4 +315,12 @@ class Engagement
return DateTimeFormat::utc('now - ' . DI::config()->get('channel', 'engagement_hours') . ' hour'); return DateTimeFormat::utc('now - ' . DI::config()->get('channel', 'engagement_hours') . ' hour');
} }
public static function escapeKeywords(string $fullTextSearch): string
{
foreach (Engagement::KEYWORDS as $keyword) {
$fullTextSearch = preg_replace('~(' . $keyword . '):(.[\w\*@\.-]+)~', '"$1_$2"', $fullTextSearch);
}
return $fullTextSearch;
}
} }

View file

@ -0,0 +1,93 @@
<?php
/**
* @copyright Copyright (C) 2010-2024, the Friendica project
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
namespace Friendica\Model\Post;
use Friendica\Core\Logger;
use Friendica\Database\Database;
use Friendica\Database\DBA;
use Friendica\DI;
use Friendica\Model\Post;
use Friendica\Util\DateTimeFormat;
class SearchIndex
{
/**
* Insert a post-searchindex entry
*
* @param int $uri_id
* @param string $network
* @param int $private
* @param string $created
* @param bool $refresh
*/
public static function insert(int $uri_id, string $network, int $private, string $created, bool $refresh = false)
{
$limit = self::searchAgeDateLimit();
if (!empty($limit) && (strtotime($created) < strtotime($limit))) {
return;
}
$search = [
'uri-id' => $uri_id,
'network' => $network,
'private' => $private,
'created' => $created,
'searchtext' => Post\Engagement::getSearchTextForUriId($uri_id, $refresh),
];
return DBA::insert('post-searchindex', $search, Database::INSERT_UPDATE);
}
/**
* update a post-searchindex entry
*
* @param int $uri_id
*/
public static function update(int $uri_id)
{
$searchtext = Post\Engagement::getSearchTextForUriId($uri_id, true);
return DBA::update('post-searchindex', ['searchtext' => $searchtext], ['uri-id' => $uri_id]);
}
/**
* Expire old searchindex entries
*
* @return void
*/
public static function expire()
{
$limit = self::searchAgeDateLimit();
if (empty($limit)) {
return;
}
DBA::delete('post-searchindex', ["`created` < ?", $limit]);
Logger::notice('Cleared expired searchindex entries', ['limit' => $limit, 'rows' => DBA::affectedRows()]);
}
public static function searchAgeDateLimit(): string
{
$days = DI::config()->get('system', 'search_age_days');
if (empty($days)) {
return '';
}
return DateTimeFormat::utc('now - ' . $days . ' day');
}
}

View file

@ -140,6 +140,7 @@ class Site extends BaseAdmin
$temppath = (!empty($_POST['temppath']) ? trim($_POST['temppath']) : ''); $temppath = (!empty($_POST['temppath']) ? trim($_POST['temppath']) : '');
$singleuser = (!empty($_POST['singleuser']) ? trim($_POST['singleuser']) : ''); $singleuser = (!empty($_POST['singleuser']) ? trim($_POST['singleuser']) : '');
$only_tag_search = !empty($_POST['only_tag_search']); $only_tag_search = !empty($_POST['only_tag_search']);
$search_age_days = (!empty($_POST['search_age_days']) ? intval($_POST['search_age_days']) : 0);
$compute_circle_counts = !empty($_POST['compute_circle_counts']); $compute_circle_counts = !empty($_POST['compute_circle_counts']);
$process_view = !empty($_POST['process_view']); $process_view = !empty($_POST['process_view']);
$archival_days = (!empty($_POST['archival_days']) ? intval($_POST['archival_days']) : 0); $archival_days = (!empty($_POST['archival_days']) ? intval($_POST['archival_days']) : 0);
@ -314,7 +315,8 @@ class Site extends BaseAdmin
$transactionConfig->set('system', 'temppath', $temppath); $transactionConfig->set('system', 'temppath', $temppath);
$transactionConfig->set('system', 'only_tag_search' , $only_tag_search); $transactionConfig->set('system', 'only_tag_search', $only_tag_search);
$transactionConfig->set('system', 'search_age_days', $search_age_days);
$transactionConfig->set('system', 'compute_circle_counts', $compute_circle_counts); $transactionConfig->set('system', 'compute_circle_counts', $compute_circle_counts);
$transactionConfig->set('system', 'process_view', $process_view); $transactionConfig->set('system', 'process_view', $process_view);
$transactionConfig->set('system', 'archival_days', $archival_days); $transactionConfig->set('system', 'archival_days', $archival_days);
@ -567,6 +569,7 @@ class Site extends BaseAdmin
'$itemspage_network_mobile' => ['itemspage_network_mobile', DI::l10n()->t('Items per page for mobile devices'), DI::config()->get('system', 'itemspage_network_mobile'), DI::l10n()->t('Number of items per page in stream pages (network, community, profile/contact statuses, search) for mobile devices.')], '$itemspage_network_mobile' => ['itemspage_network_mobile', DI::l10n()->t('Items per page for mobile devices'), DI::config()->get('system', 'itemspage_network_mobile'), DI::l10n()->t('Number of items per page in stream pages (network, community, profile/contact statuses, search) for mobile devices.')],
'$temppath' => ['temppath', DI::l10n()->t('Temp path'), DI::config()->get('system', 'temppath'), DI::l10n()->t('If you have a restricted system where the webserver can\'t access the system temp path, enter another path here.')], '$temppath' => ['temppath', DI::l10n()->t('Temp path'), DI::config()->get('system', 'temppath'), DI::l10n()->t('If you have a restricted system where the webserver can\'t access the system temp path, enter another path here.')],
'$only_tag_search' => ['only_tag_search', DI::l10n()->t('Only search in tags'), DI::config()->get('system', 'only_tag_search'), DI::l10n()->t('On large systems the text search can slow down the system extremely.')], '$only_tag_search' => ['only_tag_search', DI::l10n()->t('Only search in tags'), DI::config()->get('system', 'only_tag_search'), DI::l10n()->t('On large systems the text search can slow down the system extremely.')],
'$search_age_days' => ['search_age_days', DI::l10n()->t('Maximum age of items in the search table'), DI::config()->get('system', 'search_age_days'), DI::l10n()->t('Maximum age of items in the search table in days. Lower values will increase the performance and reduce disk usage. 0 means no age restriction.')],
'$compute_circle_counts' => ['compute_circle_counts', DI::l10n()->t('Generate counts per contact circle when calculating network count'), DI::config()->get('system', 'compute_circle_counts'), DI::l10n()->t('On systems with users that heavily use contact circles the query can be very expensive.')], '$compute_circle_counts' => ['compute_circle_counts', DI::l10n()->t('Generate counts per contact circle when calculating network count'), DI::config()->get('system', 'compute_circle_counts'), DI::l10n()->t('On systems with users that heavily use contact circles the query can be very expensive.')],
'$process_view' => ['process_view', DI::l10n()->t('Process "view" activities'), DI::config()->get('system', 'process_view'), DI::l10n()->t('"view" activities are mostly geberated by Peertube systems. Per default they are not processed for performance reasons. Only activate this option on performant system.')], '$process_view' => ['process_view', DI::l10n()->t('Process "view" activities'), DI::config()->get('system', 'process_view'), DI::l10n()->t('"view" activities are mostly geberated by Peertube systems. Per default they are not processed for performance reasons. Only activate this option on performant system.')],
'$archival_days' => ['archival_days', DI::l10n()->t('Days, after which a contact is archived'), DI::config()->get('system', 'archival_days'), DI::l10n()->t('Number of days that we try to deliver content or to update the contact data before we archive a contact.')], '$archival_days' => ['archival_days', DI::l10n()->t('Days, after which a contact is archived'), DI::config()->get('system', 'archival_days'), DI::l10n()->t('Number of days that we try to deliver content or to update the contact data before we archive a contact.')],

View file

@ -23,7 +23,6 @@ namespace Friendica\Module\Api\Mastodon;
use Friendica\Core\Logger; use Friendica\Core\Logger;
use Friendica\Core\Protocol; use Friendica\Core\Protocol;
use Friendica\Core\System;
use Friendica\Database\DBA; use Friendica\Database\DBA;
use Friendica\DI; use Friendica\DI;
use Friendica\Model\Contact; use Friendica\Model\Contact;
@ -154,10 +153,9 @@ class Search extends BaseApi
substr($q, 1), 0, $uid, Protocol::ACTIVITYPUB, Protocol::DFRN, Protocol::DIASPORA, Protocol::OSTATUS, $uid, 0]; substr($q, 1), 0, $uid, Protocol::ACTIVITYPUB, Protocol::DFRN, Protocol::DIASPORA, Protocol::OSTATUS, $uid, 0];
$table = 'tag-search-view'; $table = 'tag-search-view';
} else { } else {
$condition = ["`uri-id` IN (SELECT `uri-id` FROM `post-content` WHERE MATCH (`title`, `content-warning`, `body`) AGAINST (? IN BOOLEAN MODE)) $q = Post\Engagement::escapeKeywords($q);
AND (`uid` = ? OR (`uid` = ? AND NOT `global`)) AND (`network` IN (?, ?, ?, ?) OR (`uid` = ? AND `uid` != ?))", $condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) and (private = ? OR `uri-id` in (SELECT `uri-id` FROM `post-user` where `uid` = ?))", $q, Item::PUBLIC, $uid];
str_replace('@', ' ', $q), 0, $uid, Protocol::ACTIVITYPUB, Protocol::DFRN, Protocol::DIASPORA, Protocol::OSTATUS, $uid, 0]; $table = 'post-searchindex';
$table = 'post-user-view';
} }
if (!empty($max_id)) { if (!empty($max_id)) {

View file

@ -398,11 +398,7 @@ class Timeline extends BaseModule
} }
if (!empty($channel->fullTextSearch)) { if (!empty($channel->fullTextSearch)) {
$search = $channel->fullTextSearch; $condition = DBA::mergeConditions($condition, ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE)", Engagement::escapeKeywords($channel->fullTextSearch)]);
foreach (Engagement::KEYWORDS as $keyword) {
$search = preg_replace('~(' . $keyword . ':.[\w@\.-]+)~', '"$1"', $search);
}
$condition = DBA::mergeConditions($condition, ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE)", $search]);
} }
if (!empty($channel->includeTags)) { if (!empty($channel->includeTags)) {

View file

@ -123,6 +123,8 @@ class Cron
Worker::add(Worker::PRIORITY_LOW, 'ExpireActivities'); Worker::add(Worker::PRIORITY_LOW, 'ExpireActivities');
Worker::add(Worker::PRIORITY_LOW, 'ExpireSearchIndex');
Worker::add(Worker::PRIORITY_LOW, 'RemoveUnusedTags'); Worker::add(Worker::PRIORITY_LOW, 'RemoveUnusedTags');
Worker::add(Worker::PRIORITY_LOW, 'RemoveUnusedContacts'); Worker::add(Worker::PRIORITY_LOW, 'RemoveUnusedContacts');

View file

@ -0,0 +1,35 @@
<?php
/**
* @copyright Copyright (C) 2010-2024, the Friendica project
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
namespace Friendica\Worker;
use Friendica\Model\Post;
/**
* Expire old search index entries
*/
class ExpireSearchIndex
{
public static function execute($param = '', $hook_function = '')
{
Post\SearchIndex::expire();
}
}

View file

@ -56,7 +56,7 @@ use Friendica\Database\DBA;
// This file is required several times during the test in DbaDefinition which justifies this condition // This file is required several times during the test in DbaDefinition which justifies this condition
if (!defined('DB_UPDATE_VERSION')) { if (!defined('DB_UPDATE_VERSION')) {
define('DB_UPDATE_VERSION', 1546); define('DB_UPDATE_VERSION', 1547);
} }
return [ return [
@ -1319,7 +1319,6 @@ return [
"PRIMARY" => ["uri-id"], "PRIMARY" => ["uri-id"],
"plink" => ["plink(191)"], "plink" => ["plink(191)"],
"resource-id" => ["resource-id"], "resource-id" => ["resource-id"],
"title-content-warning-body" => ["FULLTEXT", "title", "content-warning", "body"],
"quote-uri-id" => ["quote-uri-id"], "quote-uri-id" => ["quote-uri-id"],
] ]
], ],
@ -1480,6 +1479,21 @@ return [
"PRIMARY" => ["uri-id", "id"], "PRIMARY" => ["uri-id", "id"],
] ]
], ],
"post-searchindex" => [
"comment" => "Content for all posts",
"fields" => [
"uri-id" => ["type" => "int unsigned", "not null" => "1", "primary" => "1", "foreign" => ["item-uri" => "id"], "comment" => "Id of the item-uri table entry that contains the item uri"],
"network" => ["type" => "char(4)", "comment" => ""],
"private" => ["type" => "tinyint unsigned", "comment" => "0=public, 1=private, 2=unlisted"],
"searchtext" => ["type" => "mediumtext", "comment" => "Simplified text for the full text search"],
"created" => ["type" => "datetime", "comment" => ""],
],
"indexes" => [
"PRIMARY" => ["uri-id"],
"created" => ["created"],
"searchtext" => ["FULLTEXT", "searchtext"],
]
],
"post-tag" => [ "post-tag" => [
"comment" => "post relation to tags", "comment" => "post relation to tags",
"fields" => [ "fields" => [
@ -1708,7 +1722,6 @@ return [
"indexes" => [ "indexes" => [
"PRIMARY" => ["id"], "PRIMARY" => ["id"],
"uid_is-default" => ["uid", "is-default"], "uid_is-default" => ["uid", "is-default"],
"pub_keywords" => ["FULLTEXT", "pub_keywords"],
] ]
], ],
"profile_field" => [ "profile_field" => [

File diff suppressed because it is too large Load diff

View file

@ -116,6 +116,7 @@
<h2>{{$performance}}</h2> <h2>{{$performance}}</h2>
{{include file="field_checkbox.tpl" field=$compute_circle_counts}} {{include file="field_checkbox.tpl" field=$compute_circle_counts}}
{{include file="field_checkbox.tpl" field=$only_tag_search}} {{include file="field_checkbox.tpl" field=$only_tag_search}}
{{include file="field_input.tpl" field=$search_age_days}}
{{include file="field_input.tpl" field=$max_comments}} {{include file="field_input.tpl" field=$max_comments}}
{{include file="field_input.tpl" field=$max_display_comments}} {{include file="field_input.tpl" field=$max_display_comments}}
{{include file="field_input.tpl" field=$itemspage_network}} {{include file="field_input.tpl" field=$itemspage_network}}

View file

@ -250,6 +250,7 @@
<div class="panel-body"> <div class="panel-body">
{{include file="field_checkbox.tpl" field=$compute_circle_counts}} {{include file="field_checkbox.tpl" field=$compute_circle_counts}}
{{include file="field_checkbox.tpl" field=$only_tag_search}} {{include file="field_checkbox.tpl" field=$only_tag_search}}
{{include file="field_input.tpl" field=$search_age_days}}
{{include file="field_input.tpl" field=$max_comments}} {{include file="field_input.tpl" field=$max_comments}}
{{include file="field_input.tpl" field=$max_display_comments}} {{include file="field_input.tpl" field=$max_display_comments}}
{{include file="field_input.tpl" field=$itemspage_network}} {{include file="field_input.tpl" field=$itemspage_network}}