1
0
Fork 0

Merge pull request #9424 from annando/no-dbclean

Replace "dbclean" with new expiry functionality
This commit is contained in:
Hypolite Petovan 2020-10-15 10:33:10 -04:00 committed by GitHub
commit 472cc10ac5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 91 additions and 270 deletions

View file

@ -21,6 +21,7 @@
namespace Friendica\Worker; namespace Friendica\Worker;
use Friendica\Core\Logger;
use Friendica\Database\DBA; use Friendica\Database\DBA;
class CleanItemUri class CleanItemUri
@ -30,8 +31,9 @@ class CleanItemUri
*/ */
public static function execute() public static function execute()
{ {
DBA::p("DELETE FROM `item-uri` WHERE NOT `id` IN (SELECT `uri-id` FROM `item`) $ret = DBA::e("DELETE FROM `item-uri` WHERE NOT `id` IN (SELECT `uri-id` FROM `item`)
AND NOT `id` IN (SELECT `parent-uri-id` FROM `item`) AND NOT `id` IN (SELECT `parent-uri-id` FROM `item`)
AND NOT `id` IN (SELECT `thr-parent-id` FROM `item`)"); AND NOT `id` IN (SELECT `thr-parent-id` FROM `item`)");
Logger::notice('Orphaned URI-ID entries removed', ['result' => $ret, 'rows' => DBA::affectedRows()]);
} }
} }

View file

@ -72,9 +72,6 @@ class Cron
// Call possible post update functions // Call possible post update functions
Worker::add(PRIORITY_LOW, 'PostUpdate'); Worker::add(PRIORITY_LOW, 'PostUpdate');
// Repair entries in the database
Worker::add(PRIORITY_LOW, 'RepairDatabase');
// Hourly cron calls // Hourly cron calls
if (DI::config()->get('system', 'last_cron_hourly', 0) + 3600 < time()) { if (DI::config()->get('system', 'last_cron_hourly', 0) + 3600 < time()) {
@ -108,9 +105,12 @@ class Cron
Worker::add(PRIORITY_LOW, 'UpdateGServers'); Worker::add(PRIORITY_LOW, 'UpdateGServers');
// Repair entries in the database
Worker::add(PRIORITY_LOW, 'RepairDatabase');
Worker::add(PRIORITY_LOW, 'Expire'); Worker::add(PRIORITY_LOW, 'Expire');
Worker::add(PRIORITY_MEDIUM, 'DBClean'); Worker::add(PRIORITY_LOW, 'ExpirePosts');
Worker::add(PRIORITY_LOW, 'ExpireConversations'); Worker::add(PRIORITY_LOW, 'ExpireConversations');

View file

@ -1,230 +0,0 @@
<?php
/**
* @copyright Copyright (C) 2020, Friendica
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
namespace Friendica\Worker;
use Friendica\Core\Logger;
use Friendica\Core\Worker;
use Friendica\Database\DBA;
use Friendica\DI;
/**
* The script is called from time to time to clean the database entries and remove orphaned data.
*/
class DBClean {
public static function execute($stage = 0) {
if (!DI::config()->get('system', 'dbclean', false)) {
return;
}
if ($stage == 0) {
self::forkCleanProcess();
} else {
self::removeOrphans($stage);
}
}
/**
* Fork the different DBClean processes
*/
private static function forkCleanProcess() {
// Get the expire days for step 8 and 9
$days = DI::config()->get('system', 'dbclean-expire-days', 0);
for ($i = 1; $i <= 9; $i++) {
// Execute the background script for a step when it isn't finished.
// Execute step 8 and 9 only when $days is defined.
if (!DI::config()->get('system', 'finished-dbclean-'.$i, false) && (($i < 8) || ($i > 9) || ($days > 0))) {
Worker::add(PRIORITY_LOW, 'DBClean', $i);
}
}
}
/**
* Remove orphaned database entries
*
* @param integer $stage What should be deleted?
*
* Values for $stage:
* ------------------
* 1: Old global item entries from item table without user copy.
* 2: Items without parents.
* 3: Legacy functionality (removed)
* 4: Orphaned data from notify table.
* 5: Legacy functionality (removed)
* 6: Legacy functionality (removed)
* 7: Legacy functionality (removed)
* 8: Expired threads.
* 9: Old global item entries from expired threads.
* @throws \Friendica\Network\HTTPException\InternalServerErrorException
*/
private static function removeOrphans($stage) {
// We split the deletion in many small tasks
$limit = DI::config()->get('system', 'dbclean-expire-limit', 1000);
// Get the expire days for step 8 and 9
$days = DI::config()->get('system', 'dbclean-expire-days', 0);
$days_unclaimed = DI::config()->get('system', 'dbclean-expire-unclaimed', 90);
if ($days_unclaimed == 0) {
$days_unclaimed = $days;
}
if ($stage == 1) {
if ($days_unclaimed <= 0) {
return;
}
$last_id = DI::config()->get('system', 'dbclean-last-id-1', 0);
Logger::log("Deleting old global item entries from item table without user copy. Last ID: ".$last_id);
$r = DBA::p("SELECT `id`, `guid` FROM `item` WHERE `uid` = 0 AND
NOT EXISTS (SELECT `guid` FROM `item` AS `i` WHERE `item`.`guid` = `i`.`guid` AND `i`.`uid` != 0) AND
`received` < UTC_TIMESTAMP() - INTERVAL ? DAY AND `id` >= ?
ORDER BY `id` LIMIT ?", $days_unclaimed, $last_id, $limit);
$count = DBA::numRows($r);
if ($count > 0) {
Logger::log("found global item orphans: ".$count);
while ($orphan = DBA::fetch($r)) {
$last_id = $orphan["id"];
Logger::info('Delete global orphan item', ['id' => $orphan['id'], 'guid' => $orphan['guid']]);
DBA::delete('item', ['id' => $orphan["id"]]);
}
Worker::add(PRIORITY_MEDIUM, 'DBClean', 1, $last_id);
} else {
Logger::log("No global item orphans found");
}
DBA::close($r);
Logger::log("Done deleting ".$count." old global item entries from item table without user copy. Last ID: ".$last_id);
DI::config()->set('system', 'dbclean-last-id-1', $last_id);
} elseif ($stage == 2) {
$last_id = DI::config()->get('system', 'dbclean-last-id-2', 0);
Logger::log("Deleting items without parents. Last ID: ".$last_id);
$r = DBA::p("SELECT `id`, `guid` FROM `item`
WHERE NOT EXISTS (SELECT `id` FROM `item` AS `i` WHERE `item`.`parent` = `i`.`id`)
AND `id` >= ? ORDER BY `id` LIMIT ?", $last_id, $limit);
$count = DBA::numRows($r);
if ($count > 0) {
Logger::log("found item orphans without parents: ".$count);
while ($orphan = DBA::fetch($r)) {
$last_id = $orphan["id"];
Logger::info('Delete orphan item', ['id' => $orphan['id'], 'guid' => $orphan['guid']]);
DBA::delete('item', ['id' => $orphan["id"]]);
}
Worker::add(PRIORITY_MEDIUM, 'DBClean', 2, $last_id);
} else {
Logger::log("No item orphans without parents found");
}
DBA::close($r);
Logger::log("Done deleting ".$count." items without parents. Last ID: ".$last_id);
DI::config()->set('system', 'dbclean-last-id-2', $last_id);
if ($count < $limit) {
DI::config()->set('system', 'finished-dbclean-2', true);
}
} elseif ($stage == 3) {
// The legacy functionality had been removed
DI::config()->set('system', 'finished-dbclean-3', true);
} elseif ($stage == 4) {
DBA::p("DELETE FROM `notify` WHERE NOT `type` IN (1, 2, 16, 32, 512) AND NOT `iid` IN (SELECT `id` FROM `item`)");
Logger::notice("Deleted orphaned data from notify table.");
DI::config()->set('system', 'finished-dbclean-4', true);
} elseif ($stage == 5) {
// The legacy functionality had been removed
DI::config()->set('system', 'finished-dbclean-5', true);
} elseif ($stage == 6) {
// The legacy functionality had been removed
DI::config()->set('system', 'finished-dbclean-6', true);
} elseif ($stage == 7) {
// The legacy functionality had been removed
DI::config()->set('system', 'finished-dbclean-7', true);
} elseif ($stage == 8) {
if ($days <= 0) {
return;
}
$last_id = DI::config()->get('system', 'dbclean-last-id-8', 0);
Logger::log("Deleting expired threads. Last ID: ".$last_id);
$r = DBA::p("SELECT `thread`.`iid` FROM `thread`
INNER JOIN `contact` ON `thread`.`contact-id` = `contact`.`id` AND NOT `notify_new_posts`
WHERE `thread`.`received` < UTC_TIMESTAMP() - INTERVAL ? DAY
AND NOT `thread`.`mention` AND NOT `thread`.`starred`
AND NOT `thread`.`wall` AND NOT `thread`.`origin`
AND `thread`.`uid` != 0 AND `thread`.`iid` >= ?
AND NOT `thread`.`iid` IN (SELECT `parent` FROM `item`
WHERE (`item`.`starred` OR (`item`.`resource-id` != '')
OR (`item`.`file` != '') OR (`item`.`event-id` != '')
OR (`item`.`attach` != '') OR `item`.`wall` OR `item`.`origin`)
AND `item`.`parent` = `thread`.`iid`)
ORDER BY `thread`.`iid` LIMIT ?", $days, $last_id, $limit);
$count = DBA::numRows($r);
if ($count > 0) {
Logger::log("found expired threads: ".$count);
while ($thread = DBA::fetch($r)) {
$last_id = $thread["iid"];
DBA::delete('item', ['parent' => $thread["iid"]]);
}
Worker::add(PRIORITY_MEDIUM, 'DBClean', 8, $last_id);
} else {
Logger::log("No expired threads found");
}
DBA::close($r);
Logger::log("Done deleting ".$count." expired threads. Last ID: ".$last_id);
DI::config()->set('system', 'dbclean-last-id-8', $last_id);
} elseif ($stage == 9) {
if ($days <= 0) {
return;
}
$last_id = DI::config()->get('system', 'dbclean-last-id-9', 0);
$till_id = DI::config()->get('system', 'dbclean-last-id-8', 0);
Logger::log("Deleting old global item entries from expired threads from ID ".$last_id." to ID ".$till_id);
$r = DBA::p("SELECT `id`, `guid` FROM `item` WHERE `uid` = 0 AND
NOT EXISTS (SELECT `guid` FROM `item` AS `i` WHERE `item`.`guid` = `i`.`guid` AND `i`.`uid` != 0) AND
`received` < UTC_TIMESTAMP() - INTERVAL 90 DAY AND `id` >= ? AND `id` <= ?
ORDER BY `id` LIMIT ?", $last_id, $till_id, $limit);
$count = DBA::numRows($r);
if ($count > 0) {
Logger::log("found global item entries from expired threads: ".$count);
while ($orphan = DBA::fetch($r)) {
$last_id = $orphan["id"];
Logger::info('Delete expired thread item', ['id' => $orphan['id'], 'guid' => $orphan['guid']]);
DBA::delete('item', ['id' => $orphan["id"]]);
}
Worker::add(PRIORITY_MEDIUM, 'DBClean', 9, $last_id);
} else {
Logger::log("No global item entries from expired threads");
}
DBA::close($r);
Logger::log("Done deleting ".$count." old global item entries from expired threads. Last ID: ".$last_id);
DI::config()->set('system', 'dbclean-last-id-9', $last_id);
}
}
}

View file

@ -0,0 +1,73 @@
<?php
/**
* @copyright Copyright (C) 2020, Friendica
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
namespace Friendica\Worker;
use Friendica\Core\Logger;
use Friendica\Database\DBA;
use Friendica\DI;
use Friendica\Util\DateTimeFormat;
class ExpirePosts
{
/**
* Delete old post entries
*/
public static function execute()
{
$expire_days = DI::config()->get('system', 'dbclean-expire-days');
$expire_days_unclaimed = DI::config()->get('system', 'dbclean-expire-unclaimed');
if (empty($expire_days_unclaimed)) {
$expire_days_unclaimed = $expire_days;
}
if (!empty($expire_days)) {
Logger::notice('Start deleting expired threads', ['expiry_days' => $expire_days]);
$ret = DBA::e("DELETE FROM `item-uri` WHERE `id` IN
(SELECT `uri-id` FROM `thread`
INNER JOIN `contact` ON `id` = `contact-id` AND NOT `notify_new_posts`
WHERE `received` < UTC_TIMESTAMP() - INTERVAL ? DAY
AND NOT `mention` AND NOT `starred` AND NOT `wall` AND NOT `origin`
AND `thread`.`uid` != 0 AND NOT `iid` IN (SELECT `parent` FROM `item`
WHERE (`item`.`starred` OR (`item`.`resource-id` != '')
OR (`item`.`event-id` != '') OR (`item`.`attach` != '')
OR `item`.`wall` OR `item`.`origin`
OR `uri-id` IN (SELECT `uri-id` FROM `post-category`
WHERE `uri-id` = `item`.`uri-id`))
AND `item`.`parent` = `thread`.`iid`))", $expire_days);
Logger::notice('Deleted expired threads', ['result' => $ret, 'rows' => DBA::affectedRows()]);
}
if (!empty($expire_days_unclaimed)) {
$expiry_date = DateTimeFormat::utc('now - ' . $expire_days_unclaimed . ' days', DateTimeFormat::MYSQL);
Logger::notice('Start deleting unclaimed public items', ['expiry_days' => $expire_days_unclaimed, 'expired' => $expiry_date]);
$ret = DBA::e("DELETE FROM `item-uri` WHERE `id` IN
(SELECT `uri-id` FROM `item` WHERE `gravity` = ? AND `uid` = ? AND `received` < ?
AND NOT `uri-id` IN (SELECT `parent-uri-id` FROM `item` WHERE `uid` != ?)
AND NOT `uri-id` IN (SELECT `parent-uri-id` FROM `item` WHERE `uid` = ? AND `received` > ?))",
GRAVITY_PARENT, 0, $expiry_date, 0, 0, $expiry_date);
Logger::notice('Deleted unclaimed public items', ['result' => $ret, 'rows' => DBA::affectedRows()]);
}
}
}

View file

@ -21,12 +21,8 @@
namespace Friendica\Worker; namespace Friendica\Worker;
use Friendica\Core\Logger;
use Friendica\Core\Protocol;
use Friendica\Database\DBA; use Friendica\Database\DBA;
use Friendica\Model\Contact;
use Friendica\Model\ItemURI; use Friendica\Model\ItemURI;
use Friendica\Util\Strings;
/** /**
* Do some repairs in database entries * Do some repairs in database entries
@ -36,37 +32,6 @@ class RepairDatabase
{ {
public static function execute() public static function execute()
{ {
// Sometimes there seem to be issues where the "self" contact vanishes.
// We haven't found the origin of the problem by now.
$users = DBA::select('user', ['uid'], ["NOT EXISTS (SELECT `uid` FROM `contact` WHERE `contact`.`uid` = `user`.`uid` AND `contact`.`self`)"]);
while ($user = DBA::fetch($users)) {
Logger::notice('Create missing self contact', ['user'=> $user['uid']]);
Contact::createSelfFromUserId($user['uid']);
}
DBA::close($users);
// There was an issue where the nick vanishes from the contact table
DBA::e("UPDATE `contact` INNER JOIN `user` ON `contact`.`uid` = `user`.`uid` SET `nick` = `nickname` WHERE `self` AND `nick`=''");
/// @todo
/// - remove thread entries without item
/// - remove sign entries without item
/// - remove children when parent got lost
/// - set contact-id in item when not present
// Add intro entries for pending contacts
// We don't do this for DFRN entries since such revived contact requests seem to mostly fail.
$pending_contacts = DBA::p("SELECT `uid`, `id`, `url`, `network`, `created` FROM `contact`
WHERE `pending` AND `rel` IN (?, ?) AND `network` != ? AND `uid` != ?
AND NOT EXISTS (SELECT `id` FROM `intro` WHERE `contact-id` = `contact`.`id`)",
0, Contact::FOLLOWER, Protocol::DFRN, 0);
while ($contact = DBA::fetch($pending_contacts)) {
DBA::insert('intro', ['uid' => $contact['uid'], 'contact-id' => $contact['id'], 'blocked' => false,
'hash' => Strings::getRandomHex(), 'datetime' => $contact['created']]);
}
DBA::close($pending_contacts);
// Ensure that there are no "uri-id", "parent-uri-id" or "thr-parent-id" fields that are NULL // Ensure that there are no "uri-id", "parent-uri-id" or "thr-parent-id" fields that are NULL
$items = DBA::select('item', ['id', 'uri', 'guid'], ["`uri-id` IS NULL"]); $items = DBA::select('item', ['id', 'uri', 'guid'], ["`uri-id` IS NULL"]);
while ($item = DBA::fetch($items)) { while ($item = DBA::fetch($items)) {
@ -88,5 +53,16 @@ class RepairDatabase
DBA::update('item', ['thr-parent-id' => $uriid], ['id' => $item['id']]); DBA::update('item', ['thr-parent-id' => $uriid], ['id' => $item['id']]);
} }
DBA::close($items); DBA::close($items);
// Ensure that all uri-id are set correctly
DBA::e("UPDATE `item` INNER JOIN `item-uri` ON `item-uri`.`uri` = `item`.`uri`
SET `uri-id` = `item-uri`.`id` WHERE `item`.`uri-id` != `item-uri`.`id` AND `uri` != ?", '');
DBA::e("UPDATE `item` INNER JOIN `item-uri` ON `item-uri`.`uri` = `item`.`parent-uri`
SET `parent-uri-id` = `item-uri`.`id` WHERE `item`.`parent-uri-id` != `item-uri`.`id` AND `parent-uri` != ?", '');
DBA::e("UPDATE `item` INNER JOIN `item-uri` ON `item-uri`.`uri` = `item`.`thr-parent`
SET `thr-parent-id` = `item-uri`.`id` WHERE `item`.`thr-parent-id` != `item-uri`.`id` AND `thr-parent` != ?", '');
// Delete orphaned data from notify table.
DBA::e("DELETE FROM `notify` WHERE NOT `type` IN (1, 2, 16, 32, 512) AND NOT `iid` IN (SELECT `id` FROM `item`)");
} }
} }