From 74cc5ade0c54aa396f3a9fe213a017bdff1c123e Mon Sep 17 00:00:00 2001 From: Michael Date: Tue, 21 Mar 2017 07:57:09 +0000 Subject: [PATCH] Worker processes are split into many more separate tasks --- include/cron.php | 236 ++++------------------------------- include/cronjobs.php | 253 +++++++++++++++++++++++++++++++++++++- include/discover_poco.php | 25 +++- include/socgraph.php | 6 +- 4 files changed, 299 insertions(+), 221 deletions(-) diff --git a/include/cron.php b/include/cron.php index ca9b5dff2..1607d4d95 100644 --- a/include/cron.php +++ b/include/cron.php @@ -1,30 +1,26 @@ 1) { + cron_poll_contacts($argc, $argv); + return; + } $last = get_config('system','last_cron'); $poll_interval = intval(get_config('system','cron_interval')); - if(! $poll_interval) + if (! $poll_interval) { $poll_interval = 10; - - if($last) { + } + if ($last) { $next = $last + ($poll_interval * 60); - if($next > time()) { + if ($next > time()) { logger('cron intervall not reached'); return; } @@ -33,19 +29,16 @@ function cron_run(&$argv, &$argc){ logger('cron: start'); // run queue delivery process in the background - proc_run(PRIORITY_NEGLIGIBLE, "include/queue.php"); // run the process to discover global contacts in the background - proc_run(PRIORITY_LOW, "include/discover_poco.php"); // run the process to update locally stored global contacts in the background - proc_run(PRIORITY_LOW, "include/discover_poco.php", "checkcontact"); // Expire and remove user entries - cron_expire_and_remove_users(); + proc_run(PRIORITY_MEDIUM, "include/cronjobs.php", "expire_and_remove_users"); // Check OStatus conversations proc_run(PRIORITY_MEDIUM, "include/cronjobs.php", "ostatus_mentions"); @@ -59,14 +52,22 @@ function cron_run(&$argv, &$argc){ // update nodeinfo data proc_run(PRIORITY_LOW, "include/cronjobs.php", "nodeinfo"); - // once daily run birthday_updates and then expire in background + // Clear cache entries + proc_run(PRIORITY_LOW, "include/cronjobs.php", "clear_cache"); + // Repair missing Diaspora values in contacts + proc_run(PRIORITY_LOW, "include/cronjobs.php", "repair_diaspora"); + + // Repair entries in the database + proc_run(PRIORITY_LOW, "include/cronjobs.php", "repair_database"); + + // once daily run birthday_updates and then expire in background $d1 = get_config('system','last_expire_day'); $d2 = intval(datetime_convert('UTC','UTC','now','d')); if($d2 != intval($d1)) { - update_contact_birthdays(); + proc_run(PRIORITY_LOW, "include/cronjobs.php", "update_contact_birthdays"); proc_run(PRIORITY_LOW, "include/discover_poco.php", "update_server"); @@ -78,18 +79,9 @@ function cron_run(&$argv, &$argc){ proc_run(PRIORITY_MEDIUM, 'include/dbclean.php'); - cron_update_photo_albums(); + proc_run(PRIORITY_LOW, "include/cronjobs.php", "update_photo_albums"); } - // Clear cache entries - cron_clear_cache($a); - - // Repair missing Diaspora values in contacts - cron_repair_diaspora($a); - - // Repair entries in the database - cron_repair_database(); - // Poll contacts cron_poll_contacts($argc, $argv); @@ -100,39 +92,6 @@ function cron_run(&$argv, &$argc){ return; } -/** - * @brief Update the cached values for the number of photo albums per user - */ -function cron_update_photo_albums() { - $r = q("SELECT `uid` FROM `user` WHERE NOT `account_expired` AND NOT `account_removed`"); - if (!dbm::is_result($r)) { - return; - } - - foreach ($r AS $user) { - photo_albums($user['uid'], true); - } -} - -/** - * @brief Expire and remove user entries - */ -function cron_expire_and_remove_users() { - // expire any expired accounts - q("UPDATE user SET `account_expired` = 1 where `account_expired` = 0 - AND `account_expires_on` > '%s' - AND `account_expires_on` < UTC_TIMESTAMP()", dbesc(NULL_DATE)); - - // delete user and contact records for recently removed accounts - $r = q("SELECT * FROM `user` WHERE `account_removed` AND `account_expires_on` < UTC_TIMESTAMP() - INTERVAL 3 DAY"); - if ($r) { - foreach($r as $user) { - q("DELETE FROM `contact` WHERE `uid` = %d", intval($user['uid'])); - q("DELETE FROM `user` WHERE `uid` = %d", intval($user['uid'])); - } - } -} - /** * @brief Poll contacts for unreceived messages * @@ -272,157 +231,10 @@ function cron_poll_contacts($argc, $argv) { logger("Polling ".$contact["network"]." ".$contact["id"]." ".$contact["nick"]." ".$contact["name"]); if (($contact['network'] == NETWORK_FEED) AND ($contact['priority'] <= 3)) { - proc_run(PRIORITY_MEDIUM, 'include/onepoll.php', $contact['id']); + proc_run(PRIORITY_MEDIUM, 'include/onepoll.php', intval($contact['id'])); } else { - proc_run(PRIORITY_LOW, 'include/onepoll.php', $contact['id']); + proc_run(PRIORITY_LOW, 'include/onepoll.php', intval($contact['id'])); } } } } - -/** - * @brief Clear cache entries - * - * @param App $a - */ -function cron_clear_cache(App $a) { - - $last = get_config('system','cache_last_cleared'); - - if($last) { - $next = $last + (3600); // Once per hour - $clear_cache = ($next <= time()); - } else - $clear_cache = true; - - if (!$clear_cache) - return; - - // clear old cache - Cache::clear(); - - // clear old item cache files - clear_cache(); - - // clear cache for photos - clear_cache($a->get_basepath(), $a->get_basepath()."/photo"); - - // clear smarty cache - clear_cache($a->get_basepath()."/view/smarty3/compiled", $a->get_basepath()."/view/smarty3/compiled"); - - // clear cache for image proxy - if (!get_config("system", "proxy_disabled")) { - clear_cache($a->get_basepath(), $a->get_basepath()."/proxy"); - - $cachetime = get_config('system','proxy_cache_time'); - if (!$cachetime) $cachetime = PROXY_DEFAULT_TIME; - - q('DELETE FROM `photo` WHERE `uid` = 0 AND `resource-id` LIKE "pic:%%" AND `created` < NOW() - INTERVAL %d SECOND', $cachetime); - } - - // Delete the cached OEmbed entries that are older than one year - q("DELETE FROM `oembed` WHERE `created` < NOW() - INTERVAL 3 MONTH"); - - // Delete the cached "parse_url" entries that are older than one year - q("DELETE FROM `parsed_url` WHERE `created` < NOW() - INTERVAL 3 MONTH"); - - // Maximum table size in megabyte - $max_tablesize = intval(get_config('system','optimize_max_tablesize')) * 1000000; - if ($max_tablesize == 0) - $max_tablesize = 100 * 1000000; // Default are 100 MB - - if ($max_tablesize > 0) { - // Minimum fragmentation level in percent - $fragmentation_level = intval(get_config('system','optimize_fragmentation')) / 100; - if ($fragmentation_level == 0) - $fragmentation_level = 0.3; // Default value is 30% - - // Optimize some tables that need to be optimized - $r = q("SHOW TABLE STATUS"); - foreach($r as $table) { - - // Don't optimize tables that are too large - if ($table["Data_length"] > $max_tablesize) - continue; - - // Don't optimize empty tables - if ($table["Data_length"] == 0) - continue; - - // Calculate fragmentation - $fragmentation = $table["Data_free"] / ($table["Data_length"] + $table["Index_length"]); - - logger("Table ".$table["Name"]." - Fragmentation level: ".round($fragmentation * 100, 2), LOGGER_DEBUG); - - // Don't optimize tables that needn't to be optimized - if ($fragmentation < $fragmentation_level) - continue; - - // So optimize it - logger("Optimize Table ".$table["Name"], LOGGER_DEBUG); - q("OPTIMIZE TABLE `%s`", dbesc($table["Name"])); - } - } - - set_config('system','cache_last_cleared', time()); -} - -/** - * @brief Repair missing values in Diaspora contacts - * - * @param App $a - */ -function cron_repair_diaspora(App $a) { - $r = q("SELECT `id`, `url` FROM `contact` - WHERE `network` = '%s' AND (`batch` = '' OR `notify` = '' OR `poll` = '' OR pubkey = '') - ORDER BY RAND() LIMIT 50", dbesc(NETWORK_DIASPORA)); - if (dbm::is_result($r)) { - foreach ($r AS $contact) { - if (poco_reachable($contact["url"])) { - $data = probe_url($contact["url"]); - if ($data["network"] == NETWORK_DIASPORA) { - logger("Repair contact ".$contact["id"]." ".$contact["url"], LOGGER_DEBUG); - q("UPDATE `contact` SET `batch` = '%s', `notify` = '%s', `poll` = '%s', pubkey = '%s' WHERE `id` = %d", - dbesc($data["batch"]), dbesc($data["notify"]), dbesc($data["poll"]), dbesc($data["pubkey"]), - intval($contact["id"])); - } - } - } - } -} - -/** - * @brief Do some repairs in database entries - * - */ -function cron_repair_database() { - - // Sometimes there seem to be issues where the "self" contact vanishes. - // We haven't found the origin of the problem by now. - $r = q("SELECT `uid` FROM `user` WHERE NOT EXISTS (SELECT `uid` FROM `contact` WHERE `contact`.`uid` = `user`.`uid` AND `contact`.`self`)"); - if (dbm::is_result($r)) { - foreach ($r AS $user) { - logger('Create missing self contact for user '.$user['uid']); - user_create_self_contact($user['uid']); - } - } - - // Set the parent if it wasn't set. (Shouldn't happen - but does sometimes) - // This call is very "cheap" so we can do it at any time without a problem - q("UPDATE `item` INNER JOIN `item` AS `parent` ON `parent`.`uri` = `item`.`parent-uri` AND `parent`.`uid` = `item`.`uid` SET `item`.`parent` = `parent`.`id` WHERE `item`.`parent` = 0"); - - // There was an issue where the nick vanishes from the contact table - q("UPDATE `contact` INNER JOIN `user` ON `contact`.`uid` = `user`.`uid` SET `nick` = `nickname` WHERE `self` AND `nick`=''"); - - // Update the global contacts for local users - $r = q("SELECT `uid` FROM `user` WHERE `verified` AND NOT `blocked` AND NOT `account_removed` AND NOT `account_expired`"); - if (dbm::is_result($r)) - foreach ($r AS $user) - update_gcontact_for_user($user["uid"]); - - /// @todo - /// - remove thread entries without item - /// - remove sign entries without item - /// - remove children when parent got lost - /// - set contact-id in item when not present -} diff --git a/include/cronjobs.php b/include/cronjobs.php index 5cc2bf132..4c41f182f 100644 --- a/include/cronjobs.php +++ b/include/cronjobs.php @@ -8,10 +8,16 @@ function cronjobs_run(&$argv, &$argc){ require_once('include/ostatus.php'); require_once('include/post_update.php'); require_once('mod/nodeinfo.php'); + require_once('include/photos.php'); + require_once('include/user.php'); + require_once('include/socgraph.php'); // No parameter set? So return - if ($argc <= 1) + if ($argc <= 1) { return; + } + + logger("Starting cronjob ".$argv[1], LOGGER_DEBUG); // Check OStatus conversations // Check only conversations with mentions (for a longer time) @@ -39,5 +45,250 @@ function cronjobs_run(&$argv, &$argc){ return; } + // Expire and remove user entries + if ($argv[1] == 'expire_and_remove_users') { + cron_expire_and_remove_users(); + return; + } + + if ($argv[1] == 'update_contact_birthdays') { + update_contact_birthdays(); + return; + } + + if ($argv[1] == 'update_photo_albums') { + cron_update_photo_albums(); + return; + } + + // Clear cache entries + if ($argv[1] == 'clear_cache') { + cron_clear_cache($a); + return; + } + + // Repair missing Diaspora values in contacts + if ($argv[1] == 'repair_diaspora') { + cron_repair_diaspora($a); + return; + } + + // Repair entries in the database + if ($argv[1] == 'repair_database') { + cron_repair_database(); + return; + } + + logger("Xronjob ".$argv[1]." is unknown.", LOGGER_DEBUG); + return; } + +/** + * @brief Update the cached values for the number of photo albums per user + */ +function cron_update_photo_albums() { + $r = q("SELECT `uid` FROM `user` WHERE NOT `account_expired` AND NOT `account_removed`"); + if (!dbm::is_result($r)) { + return; + } + + foreach ($r AS $user) { + photo_albums($user['uid'], true); + } +} + +/** + * @brief Expire and remove user entries + */ +function cron_expire_and_remove_users() { + // expire any expired accounts + q("UPDATE user SET `account_expired` = 1 where `account_expired` = 0 + AND `account_expires_on` > '%s' + AND `account_expires_on` < UTC_TIMESTAMP()", dbesc(NULL_DATE)); + + // delete user and contact records for recently removed accounts + $r = q("SELECT * FROM `user` WHERE `account_removed` AND `account_expires_on` < UTC_TIMESTAMP() - INTERVAL 3 DAY"); + if (dbm::is_result($r)) { + foreach($r as $user) { + q("DELETE FROM `contact` WHERE `uid` = %d", intval($user['uid'])); + q("DELETE FROM `user` WHERE `uid` = %d", intval($user['uid'])); + } + } +} + +/** + * @brief Clear cache entries + * + * @param App $a + */ +function cron_clear_cache(App $a) { + + $last = get_config('system','cache_last_cleared'); + + if ($last) { + $next = $last + (3600); // Once per hour + $clear_cache = ($next <= time()); + } else { + $clear_cache = true; + } + + if (!$clear_cache) { + return; + } + + // clear old cache + Cache::clear(); + + // clear old item cache files + clear_cache(); + + // clear cache for photos + clear_cache($a->get_basepath(), $a->get_basepath()."/photo"); + + // clear smarty cache + clear_cache($a->get_basepath()."/view/smarty3/compiled", $a->get_basepath()."/view/smarty3/compiled"); + + // clear cache for image proxy + if (!get_config("system", "proxy_disabled")) { + clear_cache($a->get_basepath(), $a->get_basepath()."/proxy"); + + $cachetime = get_config('system','proxy_cache_time'); + if (!$cachetime) { + $cachetime = PROXY_DEFAULT_TIME; + } + q('DELETE FROM `photo` WHERE `uid` = 0 AND `resource-id` LIKE "pic:%%" AND `created` < NOW() - INTERVAL %d SECOND', $cachetime); + } + + // Delete the cached OEmbed entries that are older than one year + q("DELETE FROM `oembed` WHERE `created` < NOW() - INTERVAL 3 MONTH"); + + // Delete the cached "parse_url" entries that are older than one year + q("DELETE FROM `parsed_url` WHERE `created` < NOW() - INTERVAL 3 MONTH"); + + // Maximum table size in megabyte + $max_tablesize = intval(get_config('system','optimize_max_tablesize')) * 1000000; + if ($max_tablesize == 0) { + $max_tablesize = 100 * 1000000; // Default are 100 MB + } + if ($max_tablesize > 0) { + // Minimum fragmentation level in percent + $fragmentation_level = intval(get_config('system','optimize_fragmentation')) / 100; + if ($fragmentation_level == 0) { + $fragmentation_level = 0.3; // Default value is 30% + } + + // Optimize some tables that need to be optimized + $r = q("SHOW TABLE STATUS"); + foreach ($r as $table) { + + // Don't optimize tables that are too large + if ($table["Data_length"] > $max_tablesize) { + continue; + } + + // Don't optimize empty tables + if ($table["Data_length"] == 0) { + continue; + } + + // Calculate fragmentation + $fragmentation = $table["Data_free"] / ($table["Data_length"] + $table["Index_length"]); + + logger("Table ".$table["Name"]." - Fragmentation level: ".round($fragmentation * 100, 2), LOGGER_DEBUG); + + // Don't optimize tables that needn't to be optimized + if ($fragmentation < $fragmentation_level) { + continue; + } + + // So optimize it + logger("Optimize Table ".$table["Name"], LOGGER_DEBUG); + q("OPTIMIZE TABLE `%s`", dbesc($table["Name"])); + } + } + + set_config('system','cache_last_cleared', time()); +} + +/** + * @brief Repair missing values in Diaspora contacts + * + * @param App $a + */ +function cron_repair_diaspora(App $a) { + + $starttime = time(); + + $r = q("SELECT `id`, `url` FROM `contact` + WHERE `network` = '%s' AND (`batch` = '' OR `notify` = '' OR `poll` = '' OR pubkey = '') + ORDER BY RAND() LIMIT 50", dbesc(NETWORK_DIASPORA)); + if (!dbm::is_result($r)) { + return; + } + + foreach ($r AS $contact) { + // Quit the loop after 3 minutes + if (time() > ($starttime + 180)) { + return; + } + + $server_url = poco_detect_server($contact["url"]); + + if (($server_url != "") AND !poco_check_server($server_url)) { + continue; + } + + if (!poco_reachable($contact["url"])) { + continue; + } + + $data = probe_url($contact["url"]); + if ($data["network"] != NETWORK_DIASPORA) { + continue; + } + + logger("Repair contact ".$contact["id"]." ".$contact["url"], LOGGER_DEBUG); + q("UPDATE `contact` SET `batch` = '%s', `notify` = '%s', `poll` = '%s', pubkey = '%s' WHERE `id` = %d", + dbesc($data["batch"]), dbesc($data["notify"]), dbesc($data["poll"]), dbesc($data["pubkey"]), + intval($contact["id"])); + } +} + +/** + * @brief Do some repairs in database entries + * + */ +function cron_repair_database() { + + // Sometimes there seem to be issues where the "self" contact vanishes. + // We haven't found the origin of the problem by now. + $r = q("SELECT `uid` FROM `user` WHERE NOT EXISTS (SELECT `uid` FROM `contact` WHERE `contact`.`uid` = `user`.`uid` AND `contact`.`self`)"); + if (dbm::is_result($r)) { + foreach ($r AS $user) { + logger('Create missing self contact for user '.$user['uid']); + user_create_self_contact($user['uid']); + } + } + + // Set the parent if it wasn't set. (Shouldn't happen - but does sometimes) + // This call is very "cheap" so we can do it at any time without a problem + q("UPDATE `item` INNER JOIN `item` AS `parent` ON `parent`.`uri` = `item`.`parent-uri` AND `parent`.`uid` = `item`.`uid` SET `item`.`parent` = `parent`.`id` WHERE `item`.`parent` = 0"); + + // There was an issue where the nick vanishes from the contact table + q("UPDATE `contact` INNER JOIN `user` ON `contact`.`uid` = `user`.`uid` SET `nick` = `nickname` WHERE `self` AND `nick`=''"); + + // Update the global contacts for local users + $r = q("SELECT `uid` FROM `user` WHERE `verified` AND NOT `blocked` AND NOT `account_removed` AND NOT `account_expired`"); + if (dbm::is_result($r)) { + foreach ($r AS $user) { + update_gcontact_for_user($user["uid"]); + } + } + + /// @todo + /// - remove thread entries without item + /// - remove sign entries without item + /// - remove children when parent got lost + /// - set contact-id in item when not present +} diff --git a/include/discover_poco.php b/include/discover_poco.php index 2923cd01f..d203cfd6b 100644 --- a/include/discover_poco.php +++ b/include/discover_poco.php @@ -16,6 +16,7 @@ function discover_poco_run(&$argv, &$argc) { - update_server: Frequently check the first 250 servers for vitality. - update_server_directory: Discover the given server id for their contacts - poco_load: Load POCO data from a given POCO address + - check_profile: Update remote profile data */ if (($argc > 2) && ($argv[1] == "dirsearch")) { @@ -33,6 +34,8 @@ function discover_poco_run(&$argv, &$argc) { $mode = 6; } elseif (($argc > 5) && ($argv[1] == "poco_load")) { $mode = 7; + } elseif (($argc == 3) && ($argv[1] == "check_profile")) { + $mode = 8; } elseif ($argc == 1) { $search = ""; $mode = 0; @@ -42,7 +45,12 @@ function discover_poco_run(&$argv, &$argc) { logger('start '.$search); - if ($mode == 7) { + if ($mode == 8) { + $profile_url = base64_decode($argv[2]); + if ($profile_url != "") { + poco_last_updated($profile_url, true); + } + } elseif ($mode == 7) { if ($argc == 6) { $url = base64_decode($argv[5]); } else { @@ -121,7 +129,9 @@ function update_server() { function discover_users() { logger("Discover users", LOGGER_DEBUG); - $users = q("SELECT `url`, `created`, `updated`, `last_failure`, `last_contact`, `server_url` FROM `gcontact` + $starttime = time(); + + $users = q("SELECT `url`, `created`, `updated`, `last_failure`, `last_contact`, `server_url`, `network` FROM `gcontact` WHERE `last_contact` < UTC_TIMESTAMP - INTERVAL 1 MONTH AND `last_failure` < UTC_TIMESTAMP - INTERVAL 1 MONTH AND `network` IN ('%s', '%s', '%s', '%s', '') ORDER BY rand()", @@ -160,9 +170,9 @@ function discover_users() { } else { $server_url = poco_detect_server($user["url"]); } - if (($server_url == "") OR poco_check_server($server_url, $gcontacts[0]["network"])) { - logger('Check user '.$user["url"]); - poco_last_updated($user["url"], true); + if ((($server_url == "") AND ($user["network"] == NETWORK_FEED)) OR poco_check_server($server_url, $user["network"])) { + logger('Check profile '.$user["url"]); + proc_run(PRIORITY_LOW, "include/discover_poco.php", "check_profile", base64_encode($user["url"])); if (++$checked > 100) { return; @@ -171,6 +181,11 @@ function discover_users() { q("UPDATE `gcontact` SET `last_failure` = '%s' WHERE `nurl` = '%s'", dbesc(datetime_convert()), dbesc(normalise_link($user["url"]))); } + + // Quit the loop after 3 minutes + if (time() > ($starttime + 180)) { + return; + } } } diff --git a/include/socgraph.php b/include/socgraph.php index f43ad62d0..6baf22ad3 100644 --- a/include/socgraph.php +++ b/include/socgraph.php @@ -34,7 +34,7 @@ require_once("include/Photo.php"); */ function poco_load($cid, $uid = 0, $zcid = 0, $url = null) { // Call the function "poco_load_worker" via the worker - proc_run(PRIORITY_LOW, "include/discover_poco.php", "poco_load", $cid, $uid, $zcid, base64_encode($url)); + proc_run(PRIORITY_LOW, "include/discover_poco.php", "poco_load", intval($cid), intval($uid), intval($zcid), base64_encode($url)); } /** @@ -1769,7 +1769,7 @@ function poco_discover($complete = false) { } logger('Update directory from server '.$server['url'].' with ID '.$server['id'], LOGGER_DEBUG); - proc_run(PRIORITY_LOW, "include/discover_poco.php", "update_server_directory", $server['id']); + proc_run(PRIORITY_LOW, "include/discover_poco.php", "update_server_directory", intval($server['id'])); if (!$complete AND (--$no_of_queries == 0)) { break; @@ -2018,7 +2018,7 @@ function get_gcontact_id($contact) { if ($doprobing) { logger("Last Contact: ". $last_contact_str." - Last Failure: ".$last_failure_str." - Checking: ".$contact["url"], LOGGER_DEBUG); - proc_run(PRIORITY_LOW, 'include/gprobe.php', bin2hex($contact["url"])); + #proc_run(PRIORITY_LOW, 'include/gprobe.php', bin2hex($contact["url"])); } if ((dbm::is_result($r)) AND (count($r) > 1) AND ($gcontact_id > 0) AND ($contact["url"] != ""))