From 84b6c18b94551e2d21c89a5c44867dbdc5b6c15f Mon Sep 17 00:00:00 2001 From: Michael Date: Thu, 15 Oct 2020 06:02:17 +0000 Subject: [PATCH 1/4] Replace "dbclean" with new expiry functionality --- src/Worker/Cron.php | 8 +- src/Worker/DBClean.php | 230 ---------------------------------- src/Worker/ExpirePosts.php | 73 +++++++++++ src/Worker/RepairDatabase.php | 44 ++----- 4 files changed, 86 insertions(+), 269 deletions(-) delete mode 100644 src/Worker/DBClean.php create mode 100644 src/Worker/ExpirePosts.php diff --git a/src/Worker/Cron.php b/src/Worker/Cron.php index 8ede97c2e4..5e2a6a0695 100644 --- a/src/Worker/Cron.php +++ b/src/Worker/Cron.php @@ -72,9 +72,6 @@ class Cron // Call possible post update functions Worker::add(PRIORITY_LOW, 'PostUpdate'); - // Repair entries in the database - Worker::add(PRIORITY_LOW, 'RepairDatabase'); - // Hourly cron calls if (DI::config()->get('system', 'last_cron_hourly', 0) + 3600 < time()) { @@ -93,6 +90,9 @@ class Cron // Clear cache entries Worker::add(PRIORITY_LOW, 'ClearCache'); + // Repair entries in the database + Worker::add(PRIORITY_LOW, 'RepairDatabase'); + DI::config()->set('system', 'last_cron_hourly', time()); } @@ -110,7 +110,7 @@ class Cron Worker::add(PRIORITY_LOW, 'Expire'); - Worker::add(PRIORITY_MEDIUM, 'DBClean'); + Worker::add(PRIORITY_LOW, 'ExpirePosts'); Worker::add(PRIORITY_LOW, 'ExpireConversations'); diff --git a/src/Worker/DBClean.php b/src/Worker/DBClean.php deleted file mode 100644 index 77eaa8de65..0000000000 --- a/src/Worker/DBClean.php +++ /dev/null @@ -1,230 +0,0 @@ -. - * - */ - -namespace Friendica\Worker; - -use Friendica\Core\Logger; -use Friendica\Core\Worker; -use Friendica\Database\DBA; -use Friendica\DI; - -/** - * The script is called from time to time to clean the database entries and remove orphaned data. - */ -class DBClean { - public static function execute($stage = 0) { - - if (!DI::config()->get('system', 'dbclean', false)) { - return; - } - - if ($stage == 0) { - self::forkCleanProcess(); - } else { - self::removeOrphans($stage); - } - } - - /** - * Fork the different DBClean processes - */ - private static function forkCleanProcess() { - // Get the expire days for step 8 and 9 - $days = DI::config()->get('system', 'dbclean-expire-days', 0); - - for ($i = 1; $i <= 9; $i++) { - // Execute the background script for a step when it isn't finished. - // Execute step 8 and 9 only when $days is defined. - if (!DI::config()->get('system', 'finished-dbclean-'.$i, false) && (($i < 8) || ($i > 9) || ($days > 0))) { - Worker::add(PRIORITY_LOW, 'DBClean', $i); - } - } - } - - /** - * Remove orphaned database entries - * - * @param integer $stage What should be deleted? - * - * Values for $stage: - * ------------------ - * 1: Old global item entries from item table without user copy. - * 2: Items without parents. - * 3: Legacy functionality (removed) - * 4: Orphaned data from notify table. - * 5: Legacy functionality (removed) - * 6: Legacy functionality (removed) - * 7: Legacy functionality (removed) - * 8: Expired threads. - * 9: Old global item entries from expired threads. - * @throws \Friendica\Network\HTTPException\InternalServerErrorException - */ - private static function removeOrphans($stage) { - // We split the deletion in many small tasks - $limit = DI::config()->get('system', 'dbclean-expire-limit', 1000); - - // Get the expire days for step 8 and 9 - $days = DI::config()->get('system', 'dbclean-expire-days', 0); - $days_unclaimed = DI::config()->get('system', 'dbclean-expire-unclaimed', 90); - - if ($days_unclaimed == 0) { - $days_unclaimed = $days; - } - - if ($stage == 1) { - if ($days_unclaimed <= 0) { - return; - } - - $last_id = DI::config()->get('system', 'dbclean-last-id-1', 0); - - Logger::log("Deleting old global item entries from item table without user copy. Last ID: ".$last_id); - $r = DBA::p("SELECT `id`, `guid` FROM `item` WHERE `uid` = 0 AND - NOT EXISTS (SELECT `guid` FROM `item` AS `i` WHERE `item`.`guid` = `i`.`guid` AND `i`.`uid` != 0) AND - `received` < UTC_TIMESTAMP() - INTERVAL ? DAY AND `id` >= ? - ORDER BY `id` LIMIT ?", $days_unclaimed, $last_id, $limit); - $count = DBA::numRows($r); - if ($count > 0) { - Logger::log("found global item orphans: ".$count); - while ($orphan = DBA::fetch($r)) { - $last_id = $orphan["id"]; - Logger::info('Delete global orphan item', ['id' => $orphan['id'], 'guid' => $orphan['guid']]); - DBA::delete('item', ['id' => $orphan["id"]]); - } - Worker::add(PRIORITY_MEDIUM, 'DBClean', 1, $last_id); - } else { - Logger::log("No global item orphans found"); - } - DBA::close($r); - Logger::log("Done deleting ".$count." old global item entries from item table without user copy. Last ID: ".$last_id); - - DI::config()->set('system', 'dbclean-last-id-1', $last_id); - } elseif ($stage == 2) { - $last_id = DI::config()->get('system', 'dbclean-last-id-2', 0); - - Logger::log("Deleting items without parents. Last ID: ".$last_id); - $r = DBA::p("SELECT `id`, `guid` FROM `item` - WHERE NOT EXISTS (SELECT `id` FROM `item` AS `i` WHERE `item`.`parent` = `i`.`id`) - AND `id` >= ? ORDER BY `id` LIMIT ?", $last_id, $limit); - $count = DBA::numRows($r); - if ($count > 0) { - Logger::log("found item orphans without parents: ".$count); - while ($orphan = DBA::fetch($r)) { - $last_id = $orphan["id"]; - Logger::info('Delete orphan item', ['id' => $orphan['id'], 'guid' => $orphan['guid']]); - DBA::delete('item', ['id' => $orphan["id"]]); - } - Worker::add(PRIORITY_MEDIUM, 'DBClean', 2, $last_id); - } else { - Logger::log("No item orphans without parents found"); - } - DBA::close($r); - Logger::log("Done deleting ".$count." items without parents. Last ID: ".$last_id); - - DI::config()->set('system', 'dbclean-last-id-2', $last_id); - - if ($count < $limit) { - DI::config()->set('system', 'finished-dbclean-2', true); - } - } elseif ($stage == 3) { - // The legacy functionality had been removed - DI::config()->set('system', 'finished-dbclean-3', true); - } elseif ($stage == 4) { - DBA::p("DELETE FROM `notify` WHERE NOT `type` IN (1, 2, 16, 32, 512) AND NOT `iid` IN (SELECT `id` FROM `item`)"); - - Logger::notice("Deleted orphaned data from notify table."); - DI::config()->set('system', 'finished-dbclean-4', true); - } elseif ($stage == 5) { - // The legacy functionality had been removed - DI::config()->set('system', 'finished-dbclean-5', true); - } elseif ($stage == 6) { - // The legacy functionality had been removed - DI::config()->set('system', 'finished-dbclean-6', true); - } elseif ($stage == 7) { - // The legacy functionality had been removed - DI::config()->set('system', 'finished-dbclean-7', true); - } elseif ($stage == 8) { - if ($days <= 0) { - return; - } - - $last_id = DI::config()->get('system', 'dbclean-last-id-8', 0); - - Logger::log("Deleting expired threads. Last ID: ".$last_id); - $r = DBA::p("SELECT `thread`.`iid` FROM `thread` - INNER JOIN `contact` ON `thread`.`contact-id` = `contact`.`id` AND NOT `notify_new_posts` - WHERE `thread`.`received` < UTC_TIMESTAMP() - INTERVAL ? DAY - AND NOT `thread`.`mention` AND NOT `thread`.`starred` - AND NOT `thread`.`wall` AND NOT `thread`.`origin` - AND `thread`.`uid` != 0 AND `thread`.`iid` >= ? - AND NOT `thread`.`iid` IN (SELECT `parent` FROM `item` - WHERE (`item`.`starred` OR (`item`.`resource-id` != '') - OR (`item`.`file` != '') OR (`item`.`event-id` != '') - OR (`item`.`attach` != '') OR `item`.`wall` OR `item`.`origin`) - AND `item`.`parent` = `thread`.`iid`) - ORDER BY `thread`.`iid` LIMIT ?", $days, $last_id, $limit); - $count = DBA::numRows($r); - if ($count > 0) { - Logger::log("found expired threads: ".$count); - while ($thread = DBA::fetch($r)) { - $last_id = $thread["iid"]; - DBA::delete('item', ['parent' => $thread["iid"]]); - } - Worker::add(PRIORITY_MEDIUM, 'DBClean', 8, $last_id); - } else { - Logger::log("No expired threads found"); - } - DBA::close($r); - Logger::log("Done deleting ".$count." expired threads. Last ID: ".$last_id); - - DI::config()->set('system', 'dbclean-last-id-8', $last_id); - } elseif ($stage == 9) { - if ($days <= 0) { - return; - } - - $last_id = DI::config()->get('system', 'dbclean-last-id-9', 0); - $till_id = DI::config()->get('system', 'dbclean-last-id-8', 0); - - Logger::log("Deleting old global item entries from expired threads from ID ".$last_id." to ID ".$till_id); - $r = DBA::p("SELECT `id`, `guid` FROM `item` WHERE `uid` = 0 AND - NOT EXISTS (SELECT `guid` FROM `item` AS `i` WHERE `item`.`guid` = `i`.`guid` AND `i`.`uid` != 0) AND - `received` < UTC_TIMESTAMP() - INTERVAL 90 DAY AND `id` >= ? AND `id` <= ? - ORDER BY `id` LIMIT ?", $last_id, $till_id, $limit); - $count = DBA::numRows($r); - if ($count > 0) { - Logger::log("found global item entries from expired threads: ".$count); - while ($orphan = DBA::fetch($r)) { - $last_id = $orphan["id"]; - Logger::info('Delete expired thread item', ['id' => $orphan['id'], 'guid' => $orphan['guid']]); - DBA::delete('item', ['id' => $orphan["id"]]); - } - Worker::add(PRIORITY_MEDIUM, 'DBClean', 9, $last_id); - } else { - Logger::log("No global item entries from expired threads"); - } - DBA::close($r); - Logger::log("Done deleting ".$count." old global item entries from expired threads. Last ID: ".$last_id); - - DI::config()->set('system', 'dbclean-last-id-9', $last_id); - } - } -} diff --git a/src/Worker/ExpirePosts.php b/src/Worker/ExpirePosts.php new file mode 100644 index 0000000000..22136467b7 --- /dev/null +++ b/src/Worker/ExpirePosts.php @@ -0,0 +1,73 @@ +. + * + */ + +namespace Friendica\Worker; + +use Friendica\Core\Logger; +use Friendica\Database\DBA; +use Friendica\DI; +use Friendica\Util\DateTimeFormat; + +class ExpirePosts +{ + /** + * Delete old post entries + */ + public static function execute() + { + $expire_days = DI::config()->get('system', 'dbclean-expire-days'); + $expire_days_unclaimed = DI::config()->get('system', 'dbclean-expire-unclaimed'); + if (empty($expire_days_unclaimed)) { + $expire_days_unclaimed = $expire_days; + } + + if (!empty($expire_days)) { + Logger::notice('Start deleting expired threads', ['expiry_days' => $expire_days, 'count' => DBA::count('item')]); + $ret = DBA::e("DELETE FROM `item-uri` WHERE `id` IN + (SELECT `uri-id` FROM `thread` + INNER JOIN `contact` ON `id` = `contact-id` AND NOT `notify_new_posts` + WHERE `received` < UTC_TIMESTAMP() - INTERVAL ? DAY + AND NOT `mention` AND NOT `starred` AND NOT `wall` AND NOT `origin` + AND `thread`.`uid` != 0 AND NOT `iid` IN (SELECT `parent` FROM `item` + WHERE (`item`.`starred` OR (`item`.`resource-id` != '') + OR (`item`.`event-id` != '') OR (`item`.`attach` != '') + OR `item`.`wall` OR `item`.`origin` + OR `uri-id` IN (SELECT `uri-id` FROM `post-category` + WHERE `uri-id` = `item`.`uri-id`)) + AND `item`.`parent` = `thread`.`iid`))", $expire_days); + + Logger::notice('Deleted expired threads', ['result' => $ret, 'rows' => DBA::affectedRows(), 'count' => DBA::count('item')]); + } + + if (!empty($expire_days_unclaimed)) { + $expiry_date = DateTimeFormat::utc('now - ' . $expire_days_unclaimed . ' days', DateTimeFormat::MYSQL); + + Logger::notice('Start deleting unclaimed public items', ['expiry_days' => $expire_days_unclaimed, 'expired' => $expiry_date, 'count' => DBA::count('item')]); + $ret = DBA::e("DELETE FROM `item-uri` WHERE `id` IN + (SELECT `uri-id` FROM `item` WHERE `gravity` = ? AND `uid` = ? AND `received` < ? + AND NOT `uri-id` IN (SELECT `parent-uri-id` FROM `item` WHERE `uid` != ?) + AND NOT `uri-id` IN (SELECT `parent-uri-id` FROM `item` WHERE `uid` = ? AND `received` > ?))", + GRAVITY_PARENT, 0, $expiry_date, 0, 0, $expiry_date); + + Logger::notice('Deleted unclaimed public items', ['result' => $ret, 'rows' => DBA::affectedRows(), 'count' => DBA::count('item')]); + } + } +} diff --git a/src/Worker/RepairDatabase.php b/src/Worker/RepairDatabase.php index 076974d095..655e99520b 100644 --- a/src/Worker/RepairDatabase.php +++ b/src/Worker/RepairDatabase.php @@ -21,12 +21,8 @@ namespace Friendica\Worker; -use Friendica\Core\Logger; -use Friendica\Core\Protocol; use Friendica\Database\DBA; -use Friendica\Model\Contact; use Friendica\Model\ItemURI; -use Friendica\Util\Strings; /** * Do some repairs in database entries @@ -36,37 +32,6 @@ class RepairDatabase { public static function execute() { - // Sometimes there seem to be issues where the "self" contact vanishes. - // We haven't found the origin of the problem by now. - - $users = DBA::select('user', ['uid'], ["NOT EXISTS (SELECT `uid` FROM `contact` WHERE `contact`.`uid` = `user`.`uid` AND `contact`.`self`)"]); - while ($user = DBA::fetch($users)) { - Logger::notice('Create missing self contact', ['user'=> $user['uid']]); - Contact::createSelfFromUserId($user['uid']); - } - DBA::close($users); - - // There was an issue where the nick vanishes from the contact table - DBA::e("UPDATE `contact` INNER JOIN `user` ON `contact`.`uid` = `user`.`uid` SET `nick` = `nickname` WHERE `self` AND `nick`=''"); - - /// @todo - /// - remove thread entries without item - /// - remove sign entries without item - /// - remove children when parent got lost - /// - set contact-id in item when not present - - // Add intro entries for pending contacts - // We don't do this for DFRN entries since such revived contact requests seem to mostly fail. - $pending_contacts = DBA::p("SELECT `uid`, `id`, `url`, `network`, `created` FROM `contact` - WHERE `pending` AND `rel` IN (?, ?) AND `network` != ? AND `uid` != ? - AND NOT EXISTS (SELECT `id` FROM `intro` WHERE `contact-id` = `contact`.`id`)", - 0, Contact::FOLLOWER, Protocol::DFRN, 0); - while ($contact = DBA::fetch($pending_contacts)) { - DBA::insert('intro', ['uid' => $contact['uid'], 'contact-id' => $contact['id'], 'blocked' => false, - 'hash' => Strings::getRandomHex(), 'datetime' => $contact['created']]); - } - DBA::close($pending_contacts); - // Ensure that there are no "uri-id", "parent-uri-id" or "thr-parent-id" fields that are NULL $items = DBA::select('item', ['id', 'uri', 'guid'], ["`uri-id` IS NULL"]); while ($item = DBA::fetch($items)) { @@ -88,5 +53,14 @@ class RepairDatabase DBA::update('item', ['thr-parent-id' => $uriid], ['id' => $item['id']]); } DBA::close($items); + + // Ensure that all uri-id are set correctly + DBA::e("UPDATE `item` INNER JOIN `item-uri` ON `item-uri`.`uri` = `item`.`uri` + SET `uri-id` = `item-uri`.`id` WHERE `item`.`uri-id` != `item-uri`.`id`"); + DBA::e("UPDATE `item` INNER JOIN `item-uri` ON `item-uri`.`uri` = `item`.`parent-uri` + SET `parent-uri-id` = `item-uri`.`id` WHERE `item`.`parent-uri-id` != `item-uri`.`id`"); + + // Delete orphaned data from notify table. + DBA::e("DELETE FROM `notify` WHERE NOT `type` IN (1, 2, 16, 32, 512) AND NOT `iid` IN (SELECT `id` FROM `item`)"); } } From 9704a9bf25b8b072c2b25fd0655a989172c099a7 Mon Sep 17 00:00:00 2001 From: Michael Date: Thu, 15 Oct 2020 07:34:21 +0000 Subject: [PATCH 2/4] Move the repair functionality to the daily cron --- src/Worker/Cron.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Worker/Cron.php b/src/Worker/Cron.php index 5e2a6a0695..90042e30f6 100644 --- a/src/Worker/Cron.php +++ b/src/Worker/Cron.php @@ -90,9 +90,6 @@ class Cron // Clear cache entries Worker::add(PRIORITY_LOW, 'ClearCache'); - // Repair entries in the database - Worker::add(PRIORITY_LOW, 'RepairDatabase'); - DI::config()->set('system', 'last_cron_hourly', time()); } @@ -108,6 +105,9 @@ class Cron Worker::add(PRIORITY_LOW, 'UpdateGServers'); + // Repair entries in the database + Worker::add(PRIORITY_LOW, 'RepairDatabase'); + Worker::add(PRIORITY_LOW, 'Expire'); Worker::add(PRIORITY_LOW, 'ExpirePosts'); From 53a9c65daa8e24c7fcff803331142b8877448301 Mon Sep 17 00:00:00 2001 From: Michael Date: Thu, 15 Oct 2020 07:40:24 +0000 Subject: [PATCH 3/4] Only set values if the uri isn't empty --- src/Worker/RepairDatabase.php | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Worker/RepairDatabase.php b/src/Worker/RepairDatabase.php index 655e99520b..69c667df42 100644 --- a/src/Worker/RepairDatabase.php +++ b/src/Worker/RepairDatabase.php @@ -56,9 +56,11 @@ class RepairDatabase // Ensure that all uri-id are set correctly DBA::e("UPDATE `item` INNER JOIN `item-uri` ON `item-uri`.`uri` = `item`.`uri` - SET `uri-id` = `item-uri`.`id` WHERE `item`.`uri-id` != `item-uri`.`id`"); + SET `uri-id` = `item-uri`.`id` WHERE `item`.`uri-id` != `item-uri`.`id` AND `uri` != ?", ''); DBA::e("UPDATE `item` INNER JOIN `item-uri` ON `item-uri`.`uri` = `item`.`parent-uri` - SET `parent-uri-id` = `item-uri`.`id` WHERE `item`.`parent-uri-id` != `item-uri`.`id`"); + SET `parent-uri-id` = `item-uri`.`id` WHERE `item`.`parent-uri-id` != `item-uri`.`id` AND `parent-uri` != ?", ''); + DBA::e("UPDATE `item` INNER JOIN `item-uri` ON `item-uri`.`uri` = `item`.`thr-parent` + SET `thr-parent-id` = `item-uri`.`id` WHERE `item`.`thr-parent-id` != `item-uri`.`id` AND `thr-parent` != ?", ''); // Delete orphaned data from notify table. DBA::e("DELETE FROM `notify` WHERE NOT `type` IN (1, 2, 16, 32, 512) AND NOT `iid` IN (SELECT `id` FROM `item`)"); From 2db7a4971c4af77de8ae6634455909a515dd60f8 Mon Sep 17 00:00:00 2001 From: Michael Date: Thu, 15 Oct 2020 09:42:18 +0000 Subject: [PATCH 4/4] Count removed, added logging for iten uri --- src/Worker/CleanItemUri.php | 4 +++- src/Worker/ExpirePosts.php | 8 ++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/Worker/CleanItemUri.php b/src/Worker/CleanItemUri.php index 60a9835062..0f24e549f7 100644 --- a/src/Worker/CleanItemUri.php +++ b/src/Worker/CleanItemUri.php @@ -21,6 +21,7 @@ namespace Friendica\Worker; +use Friendica\Core\Logger; use Friendica\Database\DBA; class CleanItemUri @@ -30,8 +31,9 @@ class CleanItemUri */ public static function execute() { - DBA::p("DELETE FROM `item-uri` WHERE NOT `id` IN (SELECT `uri-id` FROM `item`) + $ret = DBA::e("DELETE FROM `item-uri` WHERE NOT `id` IN (SELECT `uri-id` FROM `item`) AND NOT `id` IN (SELECT `parent-uri-id` FROM `item`) AND NOT `id` IN (SELECT `thr-parent-id` FROM `item`)"); + Logger::notice('Orphaned URI-ID entries removed', ['result' => $ret, 'rows' => DBA::affectedRows()]); } } diff --git a/src/Worker/ExpirePosts.php b/src/Worker/ExpirePosts.php index 22136467b7..f8d2432679 100644 --- a/src/Worker/ExpirePosts.php +++ b/src/Worker/ExpirePosts.php @@ -40,7 +40,7 @@ class ExpirePosts } if (!empty($expire_days)) { - Logger::notice('Start deleting expired threads', ['expiry_days' => $expire_days, 'count' => DBA::count('item')]); + Logger::notice('Start deleting expired threads', ['expiry_days' => $expire_days]); $ret = DBA::e("DELETE FROM `item-uri` WHERE `id` IN (SELECT `uri-id` FROM `thread` INNER JOIN `contact` ON `id` = `contact-id` AND NOT `notify_new_posts` @@ -54,20 +54,20 @@ class ExpirePosts WHERE `uri-id` = `item`.`uri-id`)) AND `item`.`parent` = `thread`.`iid`))", $expire_days); - Logger::notice('Deleted expired threads', ['result' => $ret, 'rows' => DBA::affectedRows(), 'count' => DBA::count('item')]); + Logger::notice('Deleted expired threads', ['result' => $ret, 'rows' => DBA::affectedRows()]); } if (!empty($expire_days_unclaimed)) { $expiry_date = DateTimeFormat::utc('now - ' . $expire_days_unclaimed . ' days', DateTimeFormat::MYSQL); - Logger::notice('Start deleting unclaimed public items', ['expiry_days' => $expire_days_unclaimed, 'expired' => $expiry_date, 'count' => DBA::count('item')]); + Logger::notice('Start deleting unclaimed public items', ['expiry_days' => $expire_days_unclaimed, 'expired' => $expiry_date]); $ret = DBA::e("DELETE FROM `item-uri` WHERE `id` IN (SELECT `uri-id` FROM `item` WHERE `gravity` = ? AND `uid` = ? AND `received` < ? AND NOT `uri-id` IN (SELECT `parent-uri-id` FROM `item` WHERE `uid` != ?) AND NOT `uri-id` IN (SELECT `parent-uri-id` FROM `item` WHERE `uid` = ? AND `received` > ?))", GRAVITY_PARENT, 0, $expiry_date, 0, 0, $expiry_date); - Logger::notice('Deleted unclaimed public items', ['result' => $ret, 'rows' => DBA::affectedRows(), 'count' => DBA::count('item')]); + Logger::notice('Deleted unclaimed public items', ['result' => $ret, 'rows' => DBA::affectedRows()]); } } }