From b833a8d25556e03b0c56d306798993472a32f0f2 Mon Sep 17 00:00:00 2001 From: Hypolite Petovan Date: Sun, 22 Oct 2017 23:48:00 -0400 Subject: [PATCH] Fix cron_maintain - Update profile before fetching scrape url - Honor $parms['hide'] before validating dfrn site - Fetch maintenance items oldest first - Add backlog size in log - Add pid to logger for easier threaded cron debug --- boot.php | 2 +- include/cron_maintain.php | 21 ++++++++++++++++----- include/submit.php | 24 +++++++++++++++++------- 3 files changed, 34 insertions(+), 13 deletions(-) diff --git a/boot.php b/boot.php index 0f4f36a4..9944e881 100644 --- a/boot.php +++ b/boot.php @@ -79,7 +79,7 @@ if (!function_exists('logger')) { } require_once('include/datetime.php'); - @file_put_contents($logfile, datetime_convert() . ':' . ' ' . $msg . "\n", FILE_APPEND); + @file_put_contents($logfile, datetime_convert() . ' [#' . getmypid() . '] ' . $msg . "\n", FILE_APPEND); return; } } diff --git a/include/cron_maintain.php b/include/cron_maintain.php index 960fe901..0e1051d4 100644 --- a/include/cron_maintain.php +++ b/include/cron_maintain.php @@ -21,12 +21,23 @@ require_once '.htconfig.php'; require_once 'dba.php'; $db = new dba($db_host, $db_user, $db_pass, $db_data, $install); -//Get our set of items. Youngest items first, after the threshold. -//This may be counter-intuitive, but is to prevent items that fail to update from blocking the rest. + +//Get the maintenance backlog size. +$res = q("SELECT count(*) as `count` +FROM `profile` +WHERE `updated` < '%s'", + dbesc(date('Y-m-d H:i:s', time() - $a->config['maintenance']['min_scrape_delay'])) +); +$maintenance_backlog = 'unknown'; +if (count($res)) { + $maintenance_backlog = $res[0]['count'] . ' entries left'; +} + +//Get our set of items. Oldest items first, after the threshold. $res = q("SELECT `id`, `homepage`, `censored` FROM `profile` WHERE `updated` < '%s' -ORDER BY `updated` DESC +ORDER BY `updated` ASC LIMIT %u", dbesc(date('Y-m-d H:i:s', time() - $a->config['maintenance']['min_scrape_delay'])), intval($a->config['maintenance']['max_scrapes']) @@ -56,9 +67,9 @@ $threads = array(); //Debug... if ($verbose) { - echo "Creating $threadc maintainer threads for $items profiles." . PHP_EOL; + echo "Creating $threadc maintainer threads for $items profiles, $maintenance_backlog" . PHP_EOL; } -logger("Creating $threadc maintainer threads for $items profiles."); +logger("Creating $threadc maintainer threads for $items profiles. $maintenance_backlog"); for ($i = 0; $i < $threadc; $i++) { diff --git a/include/submit.php b/include/submit.php index 255657b4..90876c2d 100644 --- a/include/submit.php +++ b/include/submit.php @@ -29,6 +29,14 @@ function run_submit($url) { if(count($r)) { $profile_exists = true; $profile_id = $r[0]['id']; + + $r = q("UPDATE `profile` SET + `updated` = '%s' + WHERE `id` = %d LIMIT 1", + + dbesc(datetime_convert()), + intval($profile_id) + ); } //Remove duplicates. @@ -86,19 +94,21 @@ function run_submit($url) { nuke_record($url); return true; //This is a good update. } - - //This is most likely a problem with the site configuration. Ignore. - elseif(validate_dfrn($parms)) { - return false; - } - + if((x($parms,'hide')) || (! (x($parms,'fn')) && (x($parms,'photo')))) { if($profile_exists) { + logger('Profile inferred to be opted out of the directory.'); nuke_record($url); } return true; //This is a good update. } - + + //This is most likely a problem with the site configuration. Ignore. + if(validate_dfrn($parms)) { + logger('Site is unavailable'); + return false; + } + $photo = $parms['photo']; dbesc_array($parms);