Merge pull request #11842 from annando/update-contact

Improved performance when updating contacts
This commit is contained in:
Hypolite Petovan 2022-08-13 10:42:39 -04:00 committed by GitHub
commit 23cf850198
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 113 additions and 67 deletions

View file

@ -1,6 +1,6 @@
-- ------------------------------------------
-- Friendica 2022.09-dev (Giant Rhubarb)
-- DB_UPDATE_VERSION 1479
-- DB_UPDATE_VERSION 1480
-- ------------------------------------------
@ -141,12 +141,14 @@ CREATE TABLE IF NOT EXISTS `contact` (
`poll` varchar(255) COMMENT '',
`subscribe` varchar(255) COMMENT '',
`last-update` datetime NOT NULL DEFAULT '0001-01-01 00:00:00' COMMENT 'Date of the last try to update the contact info',
`next-update` datetime COMMENT 'Next connection request',
`success_update` datetime NOT NULL DEFAULT '0001-01-01 00:00:00' COMMENT 'Date of the last successful contact update',
`failure_update` datetime NOT NULL DEFAULT '0001-01-01 00:00:00' COMMENT 'Date of the last failed update',
`failed` boolean COMMENT 'Connection failed',
`term-date` datetime NOT NULL DEFAULT '0001-01-01 00:00:00' COMMENT '',
`last-item` datetime NOT NULL DEFAULT '0001-01-01 00:00:00' COMMENT 'date of the last post',
`last-discovery` datetime NOT NULL DEFAULT '0001-01-01 00:00:00' COMMENT 'date of the last follower discovery',
`local-data` boolean COMMENT 'Is true when there are posts with this contact on the system',
`blocked` boolean NOT NULL DEFAULT '1' COMMENT 'Node-wide block status',
`block_reason` text COMMENT 'Node-wide block reason',
`readonly` boolean NOT NULL DEFAULT '0' COMMENT 'posts of the contact are readonly',
@ -213,6 +215,8 @@ CREATE TABLE IF NOT EXISTS `contact` (
INDEX `attag_uid` (`attag`(96),`uid`),
INDEX `network_uid_lastupdate` (`network`,`uid`,`last-update`),
INDEX `uid_network_self_lastupdate` (`uid`,`network`,`self`,`last-update`),
INDEX `next-update` (`next-update`),
INDEX `local-data-next-update` (`local-data`,`next-update`),
INDEX `uid_lastitem` (`uid`,`last-item`),
INDEX `baseurl` (`baseurl`(64)),
INDEX `uid_contact-type` (`uid`,`contact-type`),

View file

@ -34,12 +34,14 @@ Fields
| poll | | varchar(255) | YES | | NULL | |
| subscribe | | varchar(255) | YES | | NULL | |
| last-update | Date of the last try to update the contact info | datetime | NO | | 0001-01-01 00:00:00 | |
| next-update | Next connection request | datetime | YES | | NULL | |
| success_update | Date of the last successful contact update | datetime | NO | | 0001-01-01 00:00:00 | |
| failure_update | Date of the last failed update | datetime | NO | | 0001-01-01 00:00:00 | |
| failed | Connection failed | boolean | YES | | NULL | |
| term-date | | datetime | NO | | 0001-01-01 00:00:00 | |
| last-item | date of the last post | datetime | NO | | 0001-01-01 00:00:00 | |
| last-discovery | date of the last follower discovery | datetime | NO | | 0001-01-01 00:00:00 | |
| local-data | Is true when there are posts with this contact on the system | boolean | YES | | NULL | |
| blocked | Node-wide block status | boolean | NO | | 1 | |
| block_reason | Node-wide block reason | text | YES | | NULL | |
| readonly | posts of the contact are readonly | boolean | NO | | 0 | |
@ -112,6 +114,8 @@ Indexes
| attag_uid | attag(96), uid |
| network_uid_lastupdate | network, uid, last-update |
| uid_network_self_lastupdate | uid, network, self, last-update |
| next-update | next-update |
| local-data-next-update | local-data, next-update |
| uid_lastitem | uid, last-item |
| baseurl | baseurl(64) |
| uid_contact-type | uid, contact-type |

View file

@ -2339,6 +2339,47 @@ class Contact
return self::updateFromProbeArray($id, $ret);
}
/**
* Checks if the given contact has got local data
*
* @param int $id
* @param array $contact
*
* @return boolean
*/
private static function hasLocalData(int $id, array $contact): bool
{
if (!empty($contact['uri-id']) && DBA::exists('contact', ["`uri-id` = ? AND `uid` != ?", $contact['uri-id'], 0])) {
// User contacts with the same uri-id exist
return true;
} elseif (DBA::exists('contact', ["`nurl` = ? AND `uid` != ?", Strings::normaliseLink($contact['url']), 0])) {
// User contacts with the same nurl exists (compatibility mode for systems with missing uri-id values)
return true;
}
if (DBA::exists('post-tag', ['cid' => $id])) {
// Is tagged in a post
return true;
}
if (DBA::exists('user-contact', ['cid' => $id])) {
// Has got user-contact data
return true;
}
if (Post::exists(['author-id' => $id])) {
// Posts with this author exist
return true;
}
if (Post::exists(['owner-id' => $id])) {
// Posts with this owner exist
return true;
}
if (Post::exists(['causer-id' => $id])) {
// Posts with this causer exist
return true;
}
// We don't have got this contact locally
return false;
}
/**
* Updates contact record by provided id and probed data
*
@ -2360,7 +2401,8 @@ class Contact
$fields = ['uid', 'uri-id', 'avatar', 'header', 'name', 'nick', 'location', 'keywords', 'about', 'subscribe',
'manually-approve', 'unsearchable', 'url', 'addr', 'batch', 'notify', 'poll', 'request', 'confirm', 'poco',
'network', 'alias', 'baseurl', 'gsid', 'forum', 'prv', 'contact-type', 'pubkey', 'last-item', 'xmpp', 'matrix'];
'network', 'alias', 'baseurl', 'gsid', 'forum', 'prv', 'contact-type', 'pubkey', 'last-item', 'xmpp', 'matrix',
'created', 'last-update'];
$contact = DBA::selectFirst('contact', $fields, ['id' => $id]);
if (!DBA::isResult($contact)) {
return false;
@ -2393,14 +2435,34 @@ class Contact
$pubkey = $contact['pubkey'];
unset($contact['pubkey']);
$created = $contact['created'];
unset($contact['created']);
$last_update = $contact['last-update'];
unset($contact['last-update']);
$contact['photo'] = $contact['avatar'];
unset($contact['avatar']);
$updated = DateTimeFormat::utcNow();
$has_local_data = self::hasLocalData($id, $contact);
if (!in_array($ret['network'], array_merge(Protocol::FEDERATED, [Protocol::ZOT, Protocol::PHANTOM]))) {
// Periodical checks are only done on federated contacts
$failed_next_update = null;
$success_next_update = null;
} elseif ($has_local_data) {
$failed_next_update = GServer::getNextUpdateDate(false, $created, $last_update, !in_array($contact['network'], Protocol::FEDERATED));
$success_next_update = GServer::getNextUpdateDate(true, $created, $last_update, !in_array($contact['network'], Protocol::FEDERATED));
} else {
$failed_next_update = DateTimeFormat::utc('now +6 month');
$success_next_update = DateTimeFormat::utc('now +1 month');
}
if (Strings::normaliseLink($contact['url']) != Strings::normaliseLink($ret['url'])) {
Logger::notice('New URL differs from old URL', ['id' => $id, 'uid' => $uid, 'old' => $contact['url'], 'new' => $ret['url']]);
self::updateContact($id, $uid, $contact['url'], $ret['url'], ['failed' => true, 'last-update' => $updated, 'failure_update' => $updated]);
self::updateContact($id, $uid, $contact['url'], $ret['url'], ['failed' => true, 'local-data' => $has_local_data, 'last-update' => $updated, 'next-update' => $failed_next_update, 'failure_update' => $updated]);
return false;
}
@ -2408,14 +2470,14 @@ class Contact
// We check after the probing to be able to correct falsely detected contact types.
if (($contact['contact-type'] == self::TYPE_RELAY) &&
(!Strings::compareLink($ret['url'], $contact['url']) || in_array($ret['network'], [Protocol::FEED, Protocol::PHANTOM]))) {
self::updateContact($id, $uid, $contact['url'], $contact['url'], ['failed' => false, 'last-update' => $updated, 'success_update' => $updated]);
self::updateContact($id, $uid, $contact['url'], $contact['url'], ['failed' => false, 'local-data' => $has_local_data, 'last-update' => $updated, 'next-update' => $success_next_update, 'success_update' => $updated]);
Logger::info('Not updating relais', ['id' => $id, 'url' => $contact['url']]);
return true;
}
// If Probe::uri fails the network code will be different ("feed" or "unkn")
if (($ret['network'] == Protocol::PHANTOM) || (($ret['network'] == Protocol::FEED) && ($ret['network'] != $contact['network']))) {
self::updateContact($id, $uid, $contact['url'], $ret['url'], ['failed' => true, 'last-update' => $updated, 'failure_update' => $updated]);
self::updateContact($id, $uid, $contact['url'], $ret['url'], ['failed' => true, 'local-data' => $has_local_data, 'last-update' => $updated, 'next-update' => $failed_next_update, 'failure_update' => $updated]);
return false;
}
@ -2487,7 +2549,7 @@ class Contact
$uriid = ItemURI::insert(['uri' => $ret['url'], 'guid' => $guid]);
if (!$update) {
self::updateContact($id, $uid, $contact['url'], $ret['url'], ['failed' => false, 'last-update' => $updated, 'success_update' => $updated]);
self::updateContact($id, $uid, $contact['url'], $ret['url'], ['failed' => false, 'local-data' => $has_local_data, 'last-update' => $updated, 'next-update' => $success_next_update, 'success_update' => $updated]);
if (Contact\Relation::isDiscoverable($ret['url'])) {
Worker::add(PRIORITY_LOW, 'ContactDiscovery', $ret['url']);
@ -2504,10 +2566,12 @@ class Contact
return true;
}
$ret['uri-id'] = $uriid;
$ret['nurl'] = Strings::normaliseLink($ret['url']);
$ret['updated'] = $updated;
$ret['failed'] = false;
$ret['uri-id'] = $uriid;
$ret['nurl'] = Strings::normaliseLink($ret['url']);
$ret['updated'] = $updated;
$ret['failed'] = false;
$ret['next-update'] = $success_next_update;
$ret['local-data'] = $has_local_data;
// Only fill the pubkey if it had been empty before. We have to prevent identity theft.
if (empty($pubkey) && !empty($new_pubkey)) {

View file

@ -1948,15 +1948,15 @@ class Item
} else {
$condition = ['id' => $arr['contact-id'], 'self' => false];
}
Contact::update(['failed' => false, 'success_update' => $arr['received'], 'last-item' => $arr['received']], $condition);
Contact::update(['failed' => false, 'local-data' => true, 'success_update' => $arr['received'], 'last-item' => $arr['received']], $condition);
}
// Now do the same for the system wide contacts with uid=0
if ($arr['private'] != self::PRIVATE) {
Contact::update(['failed' => false, 'success_update' => $arr['received'], 'last-item' => $arr['received']],
Contact::update(['failed' => false, 'local-data' => true, 'success_update' => $arr['received'], 'last-item' => $arr['received']],
['id' => $arr['owner-id']]);
if ($arr['owner-id'] != $arr['author-id']) {
Contact::update(['failed' => false, 'success_update' => $arr['received'], 'last-item' => $arr['received']],
Contact::update(['failed' => false, 'local-data' => true, 'success_update' => $arr['received'], 'last-item' => $arr['received']],
['id' => $arr['author-id']]);
}
}

View file

@ -565,6 +565,12 @@ class Post
$posts = DBA::select('post-user-view', ['uri-id'], $condition, ['group_by' => ['uri-id']]);
while ($rows = DBA::toArray($posts, false, 100)) {
$uriids = array_column($rows, 'uri-id');
// Only delete the "post" entry when all "post-user" entries are deleted
if (!empty($update_fields['deleted']) && DBA::exists('post-user', ['uri-id' => $uriids, 'deleted' => false])) {
unset($update_fields['deleted']);
}
if (!DBA::update('post', $update_fields, ['uri-id' => $uriids])) {
DBA::rollback();
Logger::notice('Updating post failed', ['fields' => $update_fields, 'condition' => $condition]);

View file

@ -35,8 +35,6 @@ class UpdateContacts
{
public static function execute()
{
$base_condition = ['network' => array_merge(Protocol::FEDERATED, [Protocol::ZOT, Protocol::PHANTOM]), 'self' => false];
$update_limit = DI::config()->get('system', 'contact_update_limit');
if (empty($update_limit)) {
return;
@ -49,66 +47,24 @@ class UpdateContacts
return;
}
$condition = DBA::mergeConditions($base_condition,
["`uid` != ? AND (`last-update` < ? OR (NOT `failed` AND `last-update` < ?))",
0, DateTimeFormat::utc('now - 1 month'), DateTimeFormat::utc('now - 1 week')]);
$ids = self::getContactsToUpdate($condition, $limit, []);
Logger::info('Fetched federated user contacts', ['count' => count($ids)]);
Logger::info('Updating contact', ['count' => $limit]);
$conditions = ["`id` IN (SELECT `author-id` FROM `post`)",
"`id` IN (SELECT `owner-id` FROM `post`)",
"`id` IN (SELECT `causer-id` FROM `post` WHERE NOT `causer-id` IS NULL)",
"`id` IN (SELECT `cid` FROM `post-tag`)",
"`id` IN (SELECT `cid` FROM `user-contact`)"];
$condition = ['self' => false];
foreach ($conditions as $contact_condition) {
$condition = DBA::mergeConditions($base_condition,
[$contact_condition . " AND (`last-update` < ? OR (NOT `failed` AND `last-update` < ?))",
DateTimeFormat::utc('now - 1 month'), DateTimeFormat::utc('now - 1 week')]);
$ids = self::getContactsToUpdate($condition, $limit, $ids);
Logger::info('Fetched interacting federated contacts', ['count' => count($ids), 'condition' => $contact_condition]);
}
if (count($ids) > $limit) {
$ids = array_slice($ids, 0, $limit, true);
}
if (!DI::config()->get('system', 'update_active_contacts')) {
// Add every contact (mostly failed ones) that hadn't been updated for six months
// and every non failed contact that hadn't been updated for a month
$condition = DBA::mergeConditions($base_condition,
["(`last-update` < ? OR (NOT `failed` AND `last-update` < ?))",
DateTimeFormat::utc('now - 6 month'), DateTimeFormat::utc('now - 1 month')]);
$previous = count($ids);
$ids = self::getContactsToUpdate($condition, $limit - $previous, $ids);
Logger::info('Fetched federated contacts', ['count' => count($ids) - $previous]);
if (DI::config()->get('system', 'update_active_contacts')) {
$condition = array_merge(['local-data' => true], $condition);
}
$condition = array_merge(["`next-update` < ?", DateTimeFormat::utcNow()], $condition);
$contacts = DBA::select('contact', ['id'], $condition, ['order' => ['next-update'], 'limit' => $limit]);
$count = 0;
foreach ($ids as $id) {
if (Worker::add(PRIORITY_LOW, "UpdateContact", $id)) {
while ($contact = DBA::fetch($contacts)) {
if (Worker::add(['priority' => PRIORITY_LOW, 'dont_fork' => true], "UpdateContact", $contact['id'])) {
++$count;
}
}
DBA::close($contacts);
Logger::info('Initiated update for federated contacts', ['count' => $count]);
}
/**
* Returns contact ids based on a given condition
*
* @param array $condition
* @param int $limit
* @param array $ids
* @return array contact ids
*/
private static function getContactsToUpdate(array $condition, int $limit, array $ids = []): array
{
$contacts = DBA::select('contact', ['id'], $condition, ['limit' => $limit]);
while ($contact = DBA::fetch($contacts)) {
$ids[$contact['id']] = $contact['id'];
}
DBA::close($contacts);
return $ids;
}
}

View file

@ -55,7 +55,7 @@
use Friendica\Database\DBA;
if (!defined('DB_UPDATE_VERSION')) {
define('DB_UPDATE_VERSION', 1479);
define('DB_UPDATE_VERSION', 1480);
}
return [
@ -198,12 +198,14 @@ return [
"poll" => ["type" => "varchar(255)", "comment" => ""],
"subscribe" => ["type" => "varchar(255)", "comment" => ""],
"last-update" => ["type" => "datetime", "not null" => "1", "default" => DBA::NULL_DATETIME, "comment" => "Date of the last try to update the contact info"],
"next-update" => ["type" => "datetime", "comment" => "Next connection request"],
"success_update" => ["type" => "datetime", "not null" => "1", "default" => DBA::NULL_DATETIME, "comment" => "Date of the last successful contact update"],
"failure_update" => ["type" => "datetime", "not null" => "1", "default" => DBA::NULL_DATETIME, "comment" => "Date of the last failed update"],
"failed" => ["type" => "boolean", "comment" => "Connection failed"],
"term-date" => ["type" => "datetime", "not null" => "1", "default" => DBA::NULL_DATETIME, "comment" => ""],
"last-item" => ["type" => "datetime", "not null" => "1", "default" => DBA::NULL_DATETIME, "comment" => "date of the last post"],
"last-discovery" => ["type" => "datetime", "not null" => "1", "default" => DBA::NULL_DATETIME, "comment" => "date of the last follower discovery"],
"local-data" => ["type" => "boolean", "comment" => "Is true when there are posts with this contact on the system"],
"blocked" => ["type" => "boolean", "not null" => "1", "default" => "1", "comment" => "Node-wide block status"],
"block_reason" => ["type" => "text", "comment" => "Node-wide block reason"],
"readonly" => ["type" => "boolean", "not null" => "1", "default" => "0", "comment" => "posts of the contact are readonly"],
@ -275,6 +277,8 @@ return [
"attag_uid" => ["attag(96)", "uid"],
"network_uid_lastupdate" => ["network", "uid", "last-update"],
"uid_network_self_lastupdate" => ["uid", "network", "self", "last-update"],
"next-update" => ["next-update"],
"local-data-next-update" => ["local-data", "next-update"],
"uid_lastitem" => ["uid", "last-item"],
"baseurl" => ["baseurl(64)"],
"uid_contact-type" => ["uid", "contact-type"],

View file

@ -42,6 +42,7 @@
use Friendica\Core\Config\ValueObject\Cache;
use Friendica\Core\Logger;
use Friendica\Core\Protocol;
use Friendica\Core\Storage\Capability\ICanReadFromStorage;
use Friendica\Core\Storage\Type\Database as DatabaseStorage;
use Friendica\Core\Update;
@ -1107,3 +1108,10 @@ function update_1457()
return Update::SUCCESS;
}
function update_1480()
{
DBA::update('contact', ['next-update' => DBA::NULL_DATETIME], ['network' => Protocol::FEDERATED]);
DBA::update('post', ['deleted' => false], ["`uri-id` IN (SELECT `uri-id` FROM `post-user` WHERE NOT `deleted`)"]);
return Update::SUCCESS;
}