Expiry post search index entries

This commit is contained in:
Michael 2024-01-21 16:24:59 +00:00
parent 919f97c9a0
commit 6389133575
12 changed files with 397 additions and 299 deletions

View file

@ -261,8 +261,12 @@ class BBCode
if (!empty($media['description']) && (stripos($text, $media['description']) === false)) {
$text .= ' ' . $media['description'];
}
if (in_array($media['type'], [Post\Media::HTML, Post\Media::ACTIVITY]) && !empty($media['name']) && (stripos($text, $media['name']) === false)) {
$text .= ' ' . $media['name'];
if (in_array($media['type'], [Post\Media::HTML, Post\Media::ACTIVITY])) {
foreach (['name', 'author-name', 'publisher-name'] as $key) {
if (!empty($media[$key] && stripos($text, $media[$key]) === false)) {
$text .= ' ' . $media[$key];
}
}
}
}
}

View file

@ -1386,7 +1386,14 @@ class PostUpdate
$rows = 0;
$posts = Post::selectPosts(['uri-id', 'network', 'private'], ["`uri-id` < ? AND `gravity` IN (?, ?)", $id, Item::GRAVITY_COMMENT, Item::GRAVITY_PARENT], ['order' => ['uri-id' => true], 'limit' => 1000]);
$condition = ["`uri-id` < ? AND `gravity` IN (?, ?)", $id, Item::GRAVITY_COMMENT, Item::GRAVITY_PARENT];
$limit = Post\SearchIndex::searchAgeDateLimit();
if (!empty($limit)) {
DBA::mergeConditions($condition, ["`created` > ?", $limit]);
}
$posts = Post::selectPosts(['uri-id', 'network', 'private', 'created'], $condition, ['order' => ['uri-id' => true], 'limit' => 1000]);
if (DBA::errorNo() != 0) {
Logger::error('Database error', ['no' => DBA::errorNo(), 'message' => DBA::errorMessage()]);
@ -1395,7 +1402,7 @@ class PostUpdate
while ($post = Post::fetch($posts)) {
$id = $post['uri-id'];
Post\SearchIndex::insert($post['uri-id'], $post['network'], $post['private']);
Post\SearchIndex::insert($post['uri-id'], $post['network'], $post['private'], $post['created'], true);
++$rows;
}
DBA::close($posts);

View file

@ -1450,7 +1450,7 @@ class Item
$engagement_uri_id = Post\Engagement::storeFromItem($posted_item);
if (in_array($posted_item['gravity'], [self::GRAVITY_PARENT, self::GRAVITY_COMMENT])) {
Post\SearchIndex::insert($posted_item['uri-id'], $posted_item['network'], $posted_item['private']);
Post\SearchIndex::insert($posted_item['uri-id'], $posted_item['network'], $posted_item['private'], $posted_item['created']);
}
if (($posted_item['gravity'] == self::GRAVITY_ACTIVITY) && ($posted_item['verb'] == Activity::ANNOUNCE) && ($posted_item['parent-uri-id'] == $posted_item['thr-parent-id'])) {

View file

@ -169,6 +169,9 @@ class Engagement
$post = Post::selectFirstPost(['uri-id', 'network', 'title', 'content-warning', 'body', 'private',
'author-id', 'author-contact-type', 'author-nick', 'author-addr', 'author-gsid',
'owner-id', 'owner-contact-type', 'owner-nick', 'owner-addr', 'owner-gsid'], ['uri-id' => $uri_id]);
if (empty($post['uri-id'])) {
return '';
}
return self::getSearchTextForItem($post);
}
@ -316,7 +319,7 @@ class Engagement
public static function escapeKeywords(string $fullTextSearch): string
{
foreach (Engagement::KEYWORDS as $keyword) {
$fullTextSearch = preg_replace('~(' . $keyword . '):(.[\w\*@\.-]+)~', '$1_$2', $fullTextSearch);
$fullTextSearch = preg_replace('~(' . $keyword . '):(.[\w\*@\.-]+)~', '"$1_$2"', $fullTextSearch);
}
return $fullTextSearch;
}

View file

@ -21,9 +21,12 @@
namespace Friendica\Model\Post;
use Friendica\Core\Logger;
use Friendica\Database\Database;
use Friendica\Database\DBA;
use Friendica\DI;
use Friendica\Model\Post;
use Friendica\Util\DateTimeFormat;
class SearchIndex
{
@ -33,14 +36,22 @@ class SearchIndex
* @param int $uri_id
* @param string $network
* @param int $private
* @param string $created
* @param bool $refresh
*/
public static function insert(int $uri_id, string $network, int $private)
public static function insert(int $uri_id, string $network, int $private, string $created, bool $refresh = false)
{
$limit = self::searchAgeDateLimit();
if (!empty($limit) && (strtotime($created) < strtotime($limit))) {
return;
}
$search = [
'uri-id' => $uri_id,
'network' => $network,
'private' => $private,
'searchtext' => Post\Engagement::getSearchTextForUriId($uri_id),
'created' => $created,
'searchtext' => Post\Engagement::getSearchTextForUriId($uri_id, $refresh),
];
return DBA::insert('post-searchindex', $search, Database::INSERT_UPDATE);
}
@ -55,4 +66,23 @@ class SearchIndex
$searchtext = Post\Engagement::getSearchTextForUriId($uri_id, true);
return DBA::update('post-searchindex', ['searchtext' => $searchtext], ['uri-id' => $uri_id]);
}
public static function expire()
{
$limit = self::searchAgeDateLimit();
if (empty($limit)) {
return;
}
DBA::delete('post-searchindex', ["`created` < ?", $limit]);
Logger::notice('Cleared expired searchindex entries', ['limit' => $limit, 'rows' => DBA::affectedRows()]);
}
public static function searchAgeDateLimit(): string
{
$days = DI::config()->get('system', 'search_age_days');
if (empty($days)) {
return '';
}
return DateTimeFormat::utc('now - ' . $days . ' day');
}
}

View file

@ -140,6 +140,7 @@ class Site extends BaseAdmin
$temppath = (!empty($_POST['temppath']) ? trim($_POST['temppath']) : '');
$singleuser = (!empty($_POST['singleuser']) ? trim($_POST['singleuser']) : '');
$only_tag_search = !empty($_POST['only_tag_search']);
$search_age_days = (!empty($_POST['search_age_days']) ? intval($_POST['search_age_days']) : 0);
$compute_circle_counts = !empty($_POST['compute_circle_counts']);
$process_view = !empty($_POST['process_view']);
$archival_days = (!empty($_POST['archival_days']) ? intval($_POST['archival_days']) : 0);
@ -315,6 +316,7 @@ class Site extends BaseAdmin
$transactionConfig->set('system', 'temppath', $temppath);
$transactionConfig->set('system', 'only_tag_search', $only_tag_search);
$transactionConfig->set('system', 'search_age_days', $search_age_days);
$transactionConfig->set('system', 'compute_circle_counts', $compute_circle_counts);
$transactionConfig->set('system', 'process_view', $process_view);
$transactionConfig->set('system', 'archival_days', $archival_days);
@ -567,6 +569,7 @@ class Site extends BaseAdmin
'$itemspage_network_mobile' => ['itemspage_network_mobile', DI::l10n()->t('Items per page for mobile devices'), DI::config()->get('system', 'itemspage_network_mobile'), DI::l10n()->t('Number of items per page in stream pages (network, community, profile/contact statuses, search) for mobile devices.')],
'$temppath' => ['temppath', DI::l10n()->t('Temp path'), DI::config()->get('system', 'temppath'), DI::l10n()->t('If you have a restricted system where the webserver can\'t access the system temp path, enter another path here.')],
'$only_tag_search' => ['only_tag_search', DI::l10n()->t('Only search in tags'), DI::config()->get('system', 'only_tag_search'), DI::l10n()->t('On large systems the text search can slow down the system extremely.')],
'$search_age_days' => ['search_age_days', DI::l10n()->t('Maximum age of items in the search table'), DI::config()->get('system', 'search_age_days'), DI::l10n()->t('Maximum age of items in the search table in days. Lower values will increase the performance and reduce disk usage. 0 means no age restriction.')],
'$compute_circle_counts' => ['compute_circle_counts', DI::l10n()->t('Generate counts per contact circle when calculating network count'), DI::config()->get('system', 'compute_circle_counts'), DI::l10n()->t('On systems with users that heavily use contact circles the query can be very expensive.')],
'$process_view' => ['process_view', DI::l10n()->t('Process "view" activities'), DI::config()->get('system', 'process_view'), DI::l10n()->t('"view" activities are mostly geberated by Peertube systems. Per default they are not processed for performance reasons. Only activate this option on performant system.')],
'$archival_days' => ['archival_days', DI::l10n()->t('Days, after which a contact is archived'), DI::config()->get('system', 'archival_days'), DI::l10n()->t('Number of days that we try to deliver content or to update the contact data before we archive a contact.')],

View file

@ -123,6 +123,8 @@ class Cron
Worker::add(Worker::PRIORITY_LOW, 'ExpireActivities');
Worker::add(Worker::PRIORITY_LOW, 'ExpireSearchIndex');
Worker::add(Worker::PRIORITY_LOW, 'RemoveUnusedTags');
Worker::add(Worker::PRIORITY_LOW, 'RemoveUnusedContacts');

View file

@ -0,0 +1,35 @@
<?php
/**
* @copyright Copyright (C) 2010-2024, the Friendica project
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
namespace Friendica\Worker;
use Friendica\Model\Post;
/**
* Expire old search index entries
*/
class ExpireSearchIndex
{
public static function execute($param = '', $hook_function = '')
{
Post\SearchIndex::expire();
}
}

View file

@ -1487,9 +1487,11 @@ return [
"network" => ["type" => "char(4)", "comment" => ""],
"private" => ["type" => "tinyint unsigned", "comment" => "0=public, 1=private, 2=unlisted"],
"searchtext" => ["type" => "mediumtext", "comment" => "Simplified text for the full text search"],
"created" => ["type" => "datetime", "comment" => ""],
],
"indexes" => [
"PRIMARY" => ["uri-id"],
"created" => ["created"],
"searchtext" => ["FULLTEXT", "searchtext"],
]
],

File diff suppressed because it is too large Load diff

View file

@ -116,6 +116,7 @@
<h2>{{$performance}}</h2>
{{include file="field_checkbox.tpl" field=$compute_circle_counts}}
{{include file="field_checkbox.tpl" field=$only_tag_search}}
{{include file="field_input.tpl" field=$search_age_days}}
{{include file="field_input.tpl" field=$max_comments}}
{{include file="field_input.tpl" field=$max_display_comments}}
{{include file="field_input.tpl" field=$itemspage_network}}

View file

@ -250,6 +250,7 @@
<div class="panel-body">
{{include file="field_checkbox.tpl" field=$compute_circle_counts}}
{{include file="field_checkbox.tpl" field=$only_tag_search}}
{{include file="field_input.tpl" field=$search_age_days}}
{{include file="field_input.tpl" field=$max_comments}}
{{include file="field_input.tpl" field=$max_display_comments}}
{{include file="field_input.tpl" field=$itemspage_network}}