Merge pull request #31 from MrPetovan/bug/fix-maintenance-cron

Fix maintenance cron
This commit is contained in:
Michael Vogel 2017-10-23 07:37:38 +02:00 committed by GitHub
commit 16158983c0
5 changed files with 74 additions and 50 deletions

View File

@ -79,7 +79,7 @@ if (!function_exists('logger')) {
}
require_once('include/datetime.php');
@file_put_contents($logfile, datetime_convert() . ':' . ' ' . $msg . "\n", FILE_APPEND);
@file_put_contents($logfile, datetime_convert() . ' [#' . getmypid() . '] ' . $msg . "\n", FILE_APPEND);
return;
}
}

View File

@ -21,12 +21,23 @@ require_once '.htconfig.php';
require_once 'dba.php';
$db = new dba($db_host, $db_user, $db_pass, $db_data, $install);
//Get our set of items. Youngest items first, after the threshold.
//This may be counter-intuitive, but is to prevent items that fail to update from blocking the rest.
//Get the maintenance backlog size.
$res = q("SELECT count(*) as `count`
FROM `profile`
WHERE `updated` < '%s'",
dbesc(date('Y-m-d H:i:s', time() - $a->config['maintenance']['min_scrape_delay']))
);
$maintenance_backlog = 'unknown';
if (count($res)) {
$maintenance_backlog = $res[0]['count'] . ' entries left';
}
//Get our set of items. Oldest items first, after the threshold.
$res = q("SELECT `id`, `homepage`, `censored`
FROM `profile`
WHERE `updated` < '%s'
ORDER BY `updated` DESC
ORDER BY `updated` ASC
LIMIT %u",
dbesc(date('Y-m-d H:i:s', time() - $a->config['maintenance']['min_scrape_delay'])),
intval($a->config['maintenance']['max_scrapes'])
@ -56,9 +67,9 @@ $threads = array();
//Debug...
if ($verbose) {
echo "Creating $threadc maintainer threads for $items profiles." . PHP_EOL;
echo "Creating $threadc maintainer threads for $items profiles, $maintenance_backlog" . PHP_EOL;
}
logger("Creating $threadc maintainer threads for $items profiles.");
logger("Creating $threadc maintainer threads for $items profiles. $maintenance_backlog");
for ($i = 0; $i < $threadc; $i++) {

View File

@ -6,17 +6,17 @@ require_once('site-health.php');
function run_submit($url) {
global $a;
if(! strlen($url))
return false;
logger('Updating: ' . $url);
//First run a notice script for the site it is hosted on.
$site_health = notice_site($url, true);
$submit_start = microtime(true);
$nurl = str_replace(array('https:','//www.'), array('http:','//'), $url);
$profile_exists = false;
@ -26,11 +26,20 @@ function run_submit($url) {
dbesc($nurl)
);
if(count($r)) {
if(count($r)) {
$profile_exists = true;
$profile_id = $r[0]['id'];
$r = q("UPDATE `profile` SET
`available` = 0,
`updated` = '%s'
WHERE `id` = %d LIMIT 1",
dbesc(datetime_convert()),
intval($profile_id)
);
}
//Remove duplicates.
if(count($r) > 1){
for($i=1; $i<count($r); $i++){
@ -43,62 +52,65 @@ function run_submit($url) {
);
}
}
require_once('Scrape.php');
//Skip the scrape? :D
$noscrape = $site_health && $site_health['no_scrape_url'];
if($noscrape){
//Find out who to look up.
$which = str_replace($site_health['base_url'], '', $url);
$noscrape = preg_match('~/profile/([^/]+)~', $which, $matches) === 1;
//If that did not fail...
if($noscrape){
$parms = noscrape_dfrn($site_health['no_scrape_url'].'/'.$matches[1]);
$noscrape = !!$parms; //If the result was false, do a scrape after all.
}
}
if(!$noscrape){
$parms = scrape_dfrn($url);
}
//Empty result is due to an offline site.
if(!count($parms)){
if(!count($parms) > 1){
//For large sites this could lower the health too quickly, so don't track health.
//But for sites that are already in bad status. Do a cleanup now.
if($profile_exists && $site_health['health_score'] < $a->config['maintenance']['remove_profile_health_threshold']){
logger('Nuked bad health record.');
nuke_record($url);
}
return false;
}
//We don't care about valid dfrn if the user indicates to be hidden.
elseif($parms['explicit-hide'] && $profile_exists) {
logger('User opted out of the directory.');
nuke_record($url);
return true; //This is a good update.
}
//This is most likely a problem with the site configuration. Ignore.
elseif(validate_dfrn($parms)) {
return false;
}
if((x($parms,'hide')) || (! (x($parms,'fn')) && (x($parms,'photo')))) {
if($profile_exists) {
logger('Profile inferred to be opted out of the directory.');
nuke_record($url);
}
return true; //This is a good update.
}
//This is most likely a problem with the site configuration. Ignore.
if(validate_dfrn($parms)) {
logger('Site is unavailable');
return false;
}
$photo = $parms['photo'];
dbesc_array($parms);
@ -107,18 +119,19 @@ function run_submit($url) {
$parms['comm'] = intval($parms['comm']);
if($profile_exists) {
$r = q("UPDATE `profile` SET
`name` = '%s',
$r = q("UPDATE `profile` SET
`name` = '%s',
`pdesc` = '%s',
`locality` = '%s',
`region` = '%s',
`postal-code` = '%s',
`country-name` = '%s',
`locality` = '%s',
`region` = '%s',
`postal-code` = '%s',
`country-name` = '%s',
`homepage` = '%s',
`nurl` = '%s',
`comm` = %d,
`tags` = '%s',
`updated` = '%s'
`available` = 1,
`updated` = '%s'
WHERE `id` = %d LIMIT 1",
$parms['fn'],
@ -130,7 +143,7 @@ function run_submit($url) {
dbesc($url),
dbesc($nurl),
intval($parms['comm']),
$parms['tags'],
$parms['tags'],
dbesc(datetime_convert()),
intval($profile_id)
);
@ -196,15 +209,15 @@ function run_submit($url) {
}
}
}
$submit_photo_start = microtime(true);
require_once("Photo.php");
$photo_failure = false;
$status = false;
if($profile_id) {
$img_str = fetch_url($photo,true);
$img = new Photo($img_str);
@ -222,11 +235,11 @@ function run_submit($url) {
nuke_record($url);
return false;
}
$submit_end = microtime(true);
$photo_time = round(($submit_end - $submit_photo_start) * 1000);
$time = round(($submit_end - $submit_start) * 1000);
//Record the scrape speed in a scrapes table.
if($site_health && $status) q(
"INSERT INTO `site-scrape` (`site_health_id`, `dt_performed`, `request_time`, `scrape_time`, `photo_time`, `total_time`)".
@ -237,7 +250,7 @@ function run_submit($url) {
$photo_time,
$time
);
return $status;
}

View File

@ -46,7 +46,7 @@ function directory_content(App $a)
$sql_extra = str_replace('%', '%%', $sql_extra);
$r = q("SELECT COUNT(*) AS `total` FROM `profile` WHERE `censored` = 0 $sql_extra ");
$r = q("SELECT COUNT(*) AS `total` FROM `profile` WHERE `censored` = 0 AND `available` = 1 $sql_extra ");
if (count($r)) {
$total = $r[0]['total'];
$a->set_pager_total($total);
@ -58,7 +58,7 @@ function directory_content(App $a)
$order = ' ORDER BY `updated` DESC, `id` DESC ';
}
$r = q("SELECT * FROM `profile` WHERE `censored` = 0 $sql_extra $order LIMIT %d , %d ",
$r = q("SELECT * FROM `profile` WHERE `censored` = 0 AND `available` = 1 $sql_extra $order LIMIT %d , %d ",
intval($a->pager['start']),
intval($a->pager['itemspage'])
);

View File

@ -60,7 +60,7 @@ function search_content(App $a)
$sql_extra = str_replace('%', '%%', $sql_extra);
$total = 0;
$r = q("SELECT COUNT(*) AS `total` FROM `profile` WHERE `censored` = 0 $sql_extra ");
$r = q("SELECT COUNT(*) AS `total` FROM `profile` WHERE `censored` = 0 AND `available` = 1 $sql_extra ");
if (count($r)) {
$total = $r[0]['total'];
$a->set_pager_total($total);
@ -72,7 +72,7 @@ function search_content(App $a)
$order = ' ORDER BY `updated` DESC, `id` DESC ';
}
$r = q("SELECT * FROM `profile` WHERE `censored` = 0 $sql_extra $order LIMIT %d , %d ",
$r = q("SELECT * FROM `profile` WHERE `censored` = 0 AND `available` = 1 $sql_extra $order LIMIT %d , %d ",
intval($a->pager['start']),
intval($a->pager['itemspage'])
);