Merge pull request #9113 from annando/split-cron

Split cron tasks in several worker tasks
This commit is contained in:
Hypolite Petovan 2020-09-01 10:08:55 -04:00 committed by GitHub
commit 4dda0b01cb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 373 additions and 227 deletions

View File

@ -0,0 +1,41 @@
<?php
/**
* @copyright Copyright (C) 2020, Friendica
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
namespace Friendica\Worker;
use Friendica\Core\Worker;
use Friendica\Database\DBA;
/**
* Checks for contacts that are about to be deleted and ensures that they are removed.
* This should be done automatically in the "remove" function. This here is a cleanup job.
*/
class CheckDeletedContacts
{
public static function execute()
{
$contacts = DBA::select('contact', ['id'], ['deleted' => true]);
while ($contact = DBA::fetch($contacts)) {
Worker::add(PRIORITY_MEDIUM, 'RemoveContact', $contact['id']);
}
DBA::close($contacts);
}
}

View File

@ -0,0 +1,50 @@
<?php
/**
* @copyright Copyright (C) 2020, Friendica
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
namespace Friendica\Worker;
use Friendica\Core\Worker;
use Friendica\Database\DBA;
use Friendica\DI;
/**
* Delete all done workerqueue entries
*/
class CleanWorkerQueue
{
public static function execute()
{
DBA::delete('workerqueue', ['`done` AND `executed` < UTC_TIMESTAMP() - INTERVAL 1 HOUR']);
// Optimizing this table only last seconds
if (DI::config()->get('system', 'optimize_tables')) {
// We are acquiring the two locks from the worker to avoid locking problems
if (DI::lock()->acquire(Worker::LOCK_PROCESS, 10)) {
if (DI::lock()->acquire(Worker::LOCK_WORKER, 10)) {
DBA::e("OPTIMIZE TABLE `workerqueue`");
DBA::e("OPTIMIZE TABLE `process`");
DI::lock()->release(Worker::LOCK_WORKER);
}
DI::lock()->release(Worker::LOCK_PROCESS);
}
}
}
}

View File

@ -37,9 +37,6 @@ class ClearCache
// clear old cache
DI::cache()->clear();
if (DI::config()->get('system', 'optimize_tables')) {
DBA::e("OPTIMIZE TABLE `cache`");
}
// clear old item cache files
clear_cache();
@ -66,14 +63,8 @@ class ClearCache
// Delete the cached OEmbed entries that are older than three month
DBA::delete('oembed', ["`created` < NOW() - INTERVAL 3 MONTH"]);
if (DI::config()->get('system', 'optimize_tables')) {
DBA::e("OPTIMIZE TABLE `oembed`");
}
// Delete the cached "parse_url" entries that are older than three month
DBA::delete('parsed_url', ["`created` < NOW() - INTERVAL 3 MONTH"]);
if (DI::config()->get('system', 'optimize_tables')) {
DBA::e("OPTIMIZE TABLE `parsed_url`");
}
}
}

View File

@ -21,15 +21,10 @@
namespace Friendica\Worker;
use Friendica\Core\Addon;
use Friendica\Core\Hook;
use Friendica\Core\Logger;
use Friendica\Core\Protocol;
use Friendica\Core\Worker;
use Friendica\Database\DBA;
use Friendica\DI;
use Friendica\Model\Contact;
use Friendica\Util\DateTimeFormat;
class Cron
{
@ -44,15 +39,28 @@ class Cron
if ($last) {
$next = $last + ($poll_interval * 60);
if ($next > time()) {
Logger::log('cron intervall not reached');
Logger::notice('cron intervall not reached');
return;
}
}
Logger::log('cron: start');
Logger::notice('start');
// Ensure to have a .htaccess file.
// this is a precaution for systems that update automatically
$basepath = $a->getBasePath();
if (!file_exists($basepath . '/.htaccess') && is_writable($basepath)) {
copy($basepath . '/.htaccess-dist', $basepath . '/.htaccess');
}
// Fork the cron jobs in separate parts to avoid problems when one of them is crashing
Hook::fork($a->queue['priority'], "cron");
Hook::fork($a->queue['priority'], 'cron');
// Poll contacts
Worker::add(PRIORITY_MEDIUM, 'PollContacts');
// Update contact information
Worker::add(PRIORITY_LOW, 'UpdatePublicContacts');
// run the process to update server directories in the background
Worker::add(PRIORITY_LOW, 'UpdateServerDirectories');
@ -66,12 +74,25 @@ class Cron
// Repair entries in the database
Worker::add(PRIORITY_LOW, 'RepairDatabase');
// once daily run birthday_updates and then expire in background
$d1 = DI::config()->get('system', 'last_expire_day');
$d2 = intval(DateTimeFormat::utcNow('d'));
// Hourly cron calls
if (DI::config()->get('system', 'last_cron_hourly', 0) + 3600 < time()) {
// Search for new contacts in the directory
if (DI::config()->get('system', 'synchronize_directory')) {
Worker::add(PRIORITY_LOW, 'PullDirectory');
}
// Delete all done workerqueue entries
Worker::add(PRIORITY_LOW, 'CleanWorkerQueue');
// Clear cache entries
Worker::add(PRIORITY_LOW, 'ClearCache');
DI::config()->set('system', 'last_cron_hourly', time());
}
// Daily cron calls
if ($d2 != intval($d1)) {
if (DI::config()->get('system', 'last_cron_daily', 0) + 86400 < time()) {
Worker::add(PRIORITY_LOW, 'UpdateContactBirthdays');
@ -93,219 +114,17 @@ class Cron
// check upstream version?
Worker::add(PRIORITY_LOW, 'CheckVersion');
self::checkdeletedContacts();
Worker::add(PRIORITY_LOW, 'CheckDeletedContacts');
if (DI::config()->get('system', 'optimize_tables')) {
self::optimizeTables();
Worker::add(PRIORITY_LOW, 'OptimizeTables');
}
DI::config()->set('system', 'last_expire_day', $d2);
DI::config()->set('system', 'last_cron_daily', time());
}
// Hourly cron calls
if (DI::config()->get('system', 'last_cron_hourly', 0) + 3600 < time()) {
// Search for new contacts in the directory
if (DI::config()->get('system', 'synchronize_directory')) {
Worker::add(PRIORITY_LOW, 'PullDirectory');
}
// Delete all done workerqueue entries
DBA::delete('workerqueue', ['`done` AND `executed` < UTC_TIMESTAMP() - INTERVAL 1 HOUR']);
// Optimizing this table only last seconds
if (DI::config()->get('system', 'optimize_tables')) {
// We are acquiring the two locks from the worker to avoid locking problems
if (DI::lock()->acquire(Worker::LOCK_PROCESS, 10)) {
if (DI::lock()->acquire(Worker::LOCK_WORKER, 10)) {
DBA::e("OPTIMIZE TABLE `workerqueue`");
DBA::e("OPTIMIZE TABLE `process`");
DI::lock()->release(Worker::LOCK_WORKER);
}
DI::lock()->release(Worker::LOCK_PROCESS);
}
}
// Clear cache entries
Worker::add(PRIORITY_LOW, 'ClearCache');
DI::config()->set('system', 'last_cron_hourly', time());
}
// Ensure to have a .htaccess file.
// this is a precaution for systems that update automatically
$basepath = $a->getBasePath();
if (!file_exists($basepath . '/.htaccess') && is_writable($basepath)) {
copy($basepath . '/.htaccess-dist', $basepath . '/.htaccess');
}
// Poll contacts
self::pollContacts();
// Update contact information
self::updatePublicContacts();
Logger::log('cron: end');
Logger::notice('end');
DI::config()->set('system', 'last_cron', time());
return;
}
/**
* Optimize tables that are known to grow and shrink all the time
*
* @return void
*/
private static function optimizeTables()
{
Logger::info('Optimize start');
DBA::e("OPTIMIZE TABLE `auth_codes`");
DBA::e("OPTIMIZE TABLE `challenge`");
DBA::e("OPTIMIZE TABLE `locks`");
DBA::e("OPTIMIZE TABLE `profile_check`");
DBA::e("OPTIMIZE TABLE `session`");
DBA::e("OPTIMIZE TABLE `tokens`");
DI::lock()->release('optimize_tables');
}
/**
* Checks for contacts that are about to be deleted and ensures that they are removed.
* This should be done automatically in the "remove" function. This here is a cleanup job.
*/
private static function checkdeletedContacts()
{
$contacts = DBA::select('contact', ['id'], ['deleted' => true]);
while ($contact = DBA::fetch($contacts)) {
Worker::add(PRIORITY_MEDIUM, 'RemoveContact', $contact['id']);
}
DBA::close($contacts);
}
/**
* Update public contacts
*
* @throws \Friendica\Network\HTTPException\InternalServerErrorException
*/
private static function updatePublicContacts() {
$count = 0;
$last_updated = DateTimeFormat::utc('now - 1 week');
$condition = ["`network` IN (?, ?, ?, ?) AND `uid` = ? AND NOT `self` AND `last-update` < ?",
Protocol::ACTIVITYPUB, Protocol::DFRN, Protocol::DIASPORA, Protocol::OSTATUS, 0, $last_updated];
$oldest_date = '';
$oldest_id = '';
$contacts = DBA::select('contact', ['id', 'last-update'], $condition, ['limit' => 100, 'order' => ['last-update']]);
while ($contact = DBA::fetch($contacts)) {
if (empty($oldest_id)) {
$oldest_id = $contact['id'];
$oldest_date = $contact['last-update'];
}
Worker::add(PRIORITY_LOW, "UpdateContact", $contact['id']);
++$count;
}
Logger::info('Initiated update for public contacts', ['interval' => $count, 'id' => $oldest_id, 'oldest' => $oldest_date]);
DBA::close($contacts);
}
/**
* Poll contacts for unreceived messages
*
* @throws \Friendica\Network\HTTPException\InternalServerErrorException
*/
private static function pollContacts() {
Addon::reload();
$sql = "SELECT `contact`.`id`, `contact`.`nick`, `contact`.`name`, `contact`.`network`, `contact`.`archive`,
`contact`.`last-update`, `contact`.`priority`, `contact`.`rating`, `contact`.`rel`, `contact`.`subhub`
FROM `user`
STRAIGHT_JOIN `contact`
ON `contact`.`uid` = `user`.`uid` AND `contact`.`poll` != ''
AND `contact`.`network` IN (?, ?, ?, ?, ?)
AND NOT `contact`.`self` AND NOT `contact`.`blocked`
AND `contact`.`rel` != ?
WHERE NOT `user`.`account_expired` AND NOT `user`.`account_removed`";
$parameters = [Protocol::DFRN, Protocol::ACTIVITYPUB, Protocol::OSTATUS, Protocol::FEED, Protocol::MAIL, Contact::FOLLOWER];
// Only poll from those with suitable relationships,
// and which have a polling address and ignore Diaspora since
// we are unable to match those posts with a Diaspora GUID and prevent duplicates.
$abandon_days = intval(DI::config()->get('system', 'account_abandon_days'));
if ($abandon_days < 1) {
$abandon_days = 0;
}
if (!empty($abandon_days)) {
$sql .= " AND `user`.`login_date` > UTC_TIMESTAMP() - INTERVAL ? DAY";
$parameters[] = $abandon_days;
}
$contacts = DBA::p($sql, $parameters);
if (!DBA::isResult($contacts)) {
return;
}
while ($contact = DBA::fetch($contacts)) {
$ratings = [0, 3, 7, 8, 9, 10];
if (DI::config()->get('system', 'adjust_poll_frequency') && ($contact['network'] == Protocol::FEED)) {
$rating = $contact['rating'];
} elseif (array_key_exists($contact['priority'], $ratings)) {
$rating = $ratings[$contact['priority']];
} else {
$rating = -1;
}
// Friendica and OStatus are checked once a day
if (in_array($contact['network'], [Protocol::DFRN, Protocol::OSTATUS])) {
$rating = 8;
}
// ActivityPub is checked once a week
if ($contact['network'] == Protocol::ACTIVITYPUB) {
$rating = 9;
}
// Check archived contacts once a month
if ($contact['archive']) {
$rating = 10;
}
if ($rating < 0) {
continue;
}
/*
* Based on $contact['priority'], should we poll this site now? Or later?
*/
$min_poll_interval = DI::config()->get('system', 'min_poll_interval');
$poll_intervals = [$min_poll_interval . ' minute', '15 minute', '30 minute',
'1 hour', '2 hour', '3 hour', '6 hour', '12 hour' ,'1 day', '1 week', '1 month'];
$now = DateTimeFormat::utcNow();
$next_update = DateTimeFormat::utc($contact['last-update'] . ' + ' . $poll_intervals[$rating]);
if (empty($poll_intervals[$rating]) || ($now < $next_update)) {
Logger::debug('No update', ['cid' => $contact['id'], 'rating' => $rating, 'next' => $next_update, 'now' => $now]);
continue;
}
if ((($contact['network'] == Protocol::FEED) && ($contact['priority'] <= 3)) || ($contact['network'] == Protocol::MAIL)) {
$priority = PRIORITY_MEDIUM;
} elseif ($contact['archive']) {
$priority = PRIORITY_NEGLIGIBLE;
} else {
$priority = PRIORITY_LOW;
}
Logger::log("Polling " . $contact["network"] . " " . $contact["id"] . " " . $contact['priority'] . " " . $contact["nick"] . " " . $contact["name"]);
Worker::add(['priority' => $priority, 'dont_fork' => true, 'force_priority' => true], 'OnePoll', (int)$contact['id']);
}
DBA::close($contacts);
}
}

View File

@ -0,0 +1,57 @@
<?php
/**
* @copyright Copyright (C) 2020, Friendica
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
namespace Friendica\Worker;
use Friendica\Core\Logger;
use Friendica\Database\DBA;
use Friendica\DI;
/**
* Optimize tables that are known to grow and shrink all the time
*/
class OptimizeTables
{
public static function execute()
{
if (!DI::lock()->acquire('optimize_tables', 0)) {
Logger::warning('Lock could not be acquired');
return;
}
Logger::info('Optimize start');
DBA::e("OPTIMIZE TABLE `auth_codes`");
DBA::e("OPTIMIZE TABLE `cache`");
DBA::e("OPTIMIZE TABLE `challenge`");
DBA::e("OPTIMIZE TABLE `locks`");
DBA::e("OPTIMIZE TABLE `oembed`");
DBA::e("OPTIMIZE TABLE `parsed_url`");
DBA::e("OPTIMIZE TABLE `profile_check`");
DBA::e("OPTIMIZE TABLE `session`");
DBA::e("OPTIMIZE TABLE `tokens`");
Logger::info('Optimize end');
DI::lock()->release('optimize_tables');
}
}

132
src/Worker/PollContacts.php Normal file
View File

@ -0,0 +1,132 @@
<?php
/**
* @copyright Copyright (C) 2020, Friendica
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
namespace Friendica\Worker;
use Friendica\Core\Addon;
use Friendica\Core\Logger;
use Friendica\Core\Protocol;
use Friendica\Core\Worker;
use Friendica\Database\DBA;
use Friendica\DI;
use Friendica\Model\Contact;
use Friendica\Util\DateTimeFormat;
/**
* Poll contacts for unreceived messages
*/
class PollContacts
{
public static function execute()
{
Addon::reload();
$sql = "SELECT `contact`.`id`, `contact`.`nick`, `contact`.`name`, `contact`.`network`, `contact`.`archive`,
`contact`.`last-update`, `contact`.`priority`, `contact`.`rating`, `contact`.`rel`, `contact`.`subhub`
FROM `user`
STRAIGHT_JOIN `contact`
ON `contact`.`uid` = `user`.`uid` AND `contact`.`poll` != ''
AND `contact`.`network` IN (?, ?, ?, ?, ?)
AND NOT `contact`.`self` AND NOT `contact`.`blocked`
AND `contact`.`rel` != ?
WHERE NOT `user`.`account_expired` AND NOT `user`.`account_removed`";
$parameters = [Protocol::DFRN, Protocol::ACTIVITYPUB, Protocol::OSTATUS, Protocol::FEED, Protocol::MAIL, Contact::FOLLOWER];
// Only poll from those with suitable relationships,
// and which have a polling address and ignore Diaspora since
// we are unable to match those posts with a Diaspora GUID and prevent duplicates.
$abandon_days = intval(DI::config()->get('system', 'account_abandon_days'));
if ($abandon_days < 1) {
$abandon_days = 0;
}
if (!empty($abandon_days)) {
$sql .= " AND `user`.`login_date` > UTC_TIMESTAMP() - INTERVAL ? DAY";
$parameters[] = $abandon_days;
}
$contacts = DBA::p($sql, $parameters);
if (!DBA::isResult($contacts)) {
return;
}
while ($contact = DBA::fetch($contacts)) {
$ratings = [0, 3, 7, 8, 9, 10];
if (DI::config()->get('system', 'adjust_poll_frequency') && ($contact['network'] == Protocol::FEED)) {
$rating = $contact['rating'];
} elseif (array_key_exists($contact['priority'], $ratings)) {
$rating = $ratings[$contact['priority']];
} else {
$rating = -1;
}
// Friendica and OStatus are checked once a day
if (in_array($contact['network'], [Protocol::DFRN, Protocol::OSTATUS])) {
$rating = 8;
}
// ActivityPub is checked once a week
if ($contact['network'] == Protocol::ACTIVITYPUB) {
$rating = 9;
}
// Check archived contacts once a month
if ($contact['archive']) {
$rating = 10;
}
if ($rating < 0) {
continue;
}
/*
* Based on $contact['priority'], should we poll this site now? Or later?
*/
$min_poll_interval = DI::config()->get('system', 'min_poll_interval');
$poll_intervals = [$min_poll_interval . ' minute', '15 minute', '30 minute',
'1 hour', '2 hour', '3 hour', '6 hour', '12 hour' ,'1 day', '1 week', '1 month'];
$now = DateTimeFormat::utcNow();
$next_update = DateTimeFormat::utc($contact['last-update'] . ' + ' . $poll_intervals[$rating]);
if (empty($poll_intervals[$rating]) || ($now < $next_update)) {
Logger::debug('No update', ['cid' => $contact['id'], 'rating' => $rating, 'next' => $next_update, 'now' => $now]);
continue;
}
if ((($contact['network'] == Protocol::FEED) && ($contact['priority'] <= 3)) || ($contact['network'] == Protocol::MAIL)) {
$priority = PRIORITY_MEDIUM;
} elseif ($contact['archive']) {
$priority = PRIORITY_NEGLIGIBLE;
} else {
$priority = PRIORITY_LOW;
}
Logger::notice("Polling " . $contact["network"] . " " . $contact["id"] . " " . $contact['priority'] . " " . $contact["nick"] . " " . $contact["name"]);
Worker::add(['priority' => $priority, 'dont_fork' => true, 'force_priority' => true], 'OnePoll', (int)$contact['id']);
}
DBA::close($contacts);
}
}

View File

@ -0,0 +1,56 @@
<?php
/**
* @copyright Copyright (C) 2020, Friendica
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
namespace Friendica\Worker;
use Friendica\Core\Logger;
use Friendica\Core\Protocol;
use Friendica\Core\Worker;
use Friendica\Database\DBA;
use Friendica\Util\DateTimeFormat;
/**
* Update public contacts
*/
class UpdatePublicContacts
{
public static function execute()
{
$count = 0;
$last_updated = DateTimeFormat::utc('now - 1 week');
$condition = ["`network` IN (?, ?, ?, ?) AND `uid` = ? AND NOT `self` AND `last-update` < ?",
Protocol::ACTIVITYPUB, Protocol::DFRN, Protocol::DIASPORA, Protocol::OSTATUS, 0, $last_updated];
$oldest_date = '';
$oldest_id = '';
$contacts = DBA::select('contact', ['id', 'last-update'], $condition, ['limit' => 100, 'order' => ['last-update']]);
while ($contact = DBA::fetch($contacts)) {
if (empty($oldest_id)) {
$oldest_id = $contact['id'];
$oldest_date = $contact['last-update'];
}
Worker::add(PRIORITY_LOW, "UpdateContact", $contact['id']);
++$count;
}
Logger::info('Initiated update for public contacts', ['interval' => $count, 'id' => $oldest_id, 'oldest' => $oldest_date]);
DBA::close($contacts);
}
}