Experimental maintenance features.
This commit is contained in:
parent
7276efc1bc
commit
5e1be6b2fe
3 changed files with 137 additions and 3 deletions
114
include/maintenance.php
Normal file
114
include/maintenance.php
Normal file
|
@ -0,0 +1,114 @@
|
|||
<?php
|
||||
|
||||
// Debug stuff.
|
||||
// ini_set('display_errors', 1);
|
||||
// ini_set('log_errors','0');
|
||||
error_reporting(E_ALL^E_NOTICE);
|
||||
|
||||
$start_maintain = time();
|
||||
|
||||
$verbose = $argv[1] === 'verbose';
|
||||
|
||||
//Startup.
|
||||
require_once('boot.php');
|
||||
$a = new App;
|
||||
|
||||
//Config and DB.
|
||||
require_once(".htconfig.php");
|
||||
require_once("dba.php");
|
||||
$db = new dba($db_host, $db_user, $db_pass, $db_data, $install);
|
||||
|
||||
//Get our set of items. Youngest items first, after the threshold.
|
||||
//This may be counter-intuitive, but is to prevent items that fail to update from blocking the rest.
|
||||
$res = q(
|
||||
"SELECT `id`, `homepage`, `censored` FROM `profile` WHERE `updated` < '%s' ORDER BY `updated` DESC LIMIT %u",
|
||||
dbesc(date('Y-m-d H:i:s', time()-$a->config['maintenance']['min_scrape_delay'])),
|
||||
intval($a->config['maintenance']['max_scrapes'])
|
||||
);
|
||||
|
||||
//Nothing to do.
|
||||
if(!$res || !count($res)){
|
||||
exit;
|
||||
}
|
||||
|
||||
//Close DB here. Threads need their private connection.
|
||||
$db->getdb()->close();
|
||||
|
||||
//We need the scraper.
|
||||
require_once('include/submit.php');
|
||||
|
||||
//POSIX threads only.
|
||||
if(!function_exists('pcntl_fork')){
|
||||
logger('Error: no pcntl_fork support. Are you running a different OS? Report an issue please.');
|
||||
die('Error: no pcntl_fork support. Are you running a different OS? Report an issue please.');
|
||||
}
|
||||
|
||||
//Create the threads we need.
|
||||
$items = count($res);
|
||||
$threadc = min($a->config['maintenance']['threads'], $items); //Don't need more threads than items.
|
||||
$threads = array();
|
||||
|
||||
//Debug...
|
||||
if($verbose) echo("Creating $threadc maintainer threads for $items profiles.".PHP_EOL);
|
||||
logger("Creating $threadc maintainer threads for $items profiles.");
|
||||
|
||||
for($i = 0; $i < $threadc; $i++){
|
||||
|
||||
$pid = pcntl_fork();
|
||||
if($pid === -1){
|
||||
if($verbose) echo('Error: something went wrong with the fork. '.pcntl_strerror());
|
||||
logger('Error: something went wrong with the fork. '.pcntl_strerror());
|
||||
die('Error: something went wrong with the fork. '.pcntl_strerror());
|
||||
}
|
||||
|
||||
//You're a child, go do some labor!
|
||||
if($pid === 0) break;
|
||||
|
||||
//Store the list of PID's.
|
||||
if($pid > 0) $threads[] = $pid;
|
||||
|
||||
}
|
||||
|
||||
//The work for child processes.
|
||||
if($pid === 0){
|
||||
|
||||
//Lets be nice, we're only doing maintenance here...
|
||||
pcntl_setpriority(5);
|
||||
|
||||
//Get personal DBA's.
|
||||
$db = new dba($db_host, $db_user, $db_pass, $db_data, $install);
|
||||
|
||||
//Get our (round-robin) workload from the DB results.
|
||||
$myIndex = $i+1;
|
||||
$workload = array();
|
||||
while(isset($res[$i])){
|
||||
$entry = $res[$i];
|
||||
$workload[] = $entry;
|
||||
$ids[] = $entry['id'];
|
||||
$i+=$threadc;
|
||||
}
|
||||
|
||||
while(count($workload)){
|
||||
$entry = array_pop($workload);
|
||||
set_time_limit(20); //This should work for 1 submit.
|
||||
if($verbose) echo "Submitting ".$entry['homepage'].PHP_EOL;
|
||||
run_submit($entry['homepage']);
|
||||
}
|
||||
|
||||
exit;
|
||||
|
||||
}
|
||||
|
||||
//The main process.
|
||||
else{
|
||||
foreach($threads as $pid){
|
||||
pcntl_waitpid($pid, $status);
|
||||
if($status !== 0){
|
||||
if($verbose) echo "Bad process return value $pid:$status".PHP_EOL;
|
||||
logger("Bad process return value $pid:$status");
|
||||
}
|
||||
}
|
||||
$time = time() - $start_maintain;
|
||||
if($verbose) echo("Maintenance completed. Took $time seconds.".PHP_EOL);
|
||||
logger("Maintenance completed. Took $time seconds.");
|
||||
}
|
|
@ -13,9 +13,19 @@ function import_init(&$a)
|
|||
function import_content(&$a)
|
||||
{
|
||||
|
||||
$res = q(
|
||||
"SELECT count(*) as `count` FROM `profile` WHERE `updated` < '%s'",
|
||||
dbesc(date('Y-m-d H:i:s', time()-$a->config['maintenance']['min_scrape_delay']))
|
||||
);
|
||||
$backlog = 'unknown';
|
||||
if(count($res)){
|
||||
$backlog = $res[0]['count'].'/'.$a->config['maintenance']['max_scrapes'].' entries';
|
||||
}
|
||||
|
||||
$tpl = file_get_contents('view/import.tpl');
|
||||
return replace_macros($tpl, array(
|
||||
'$present' => is_file('.htimport') ? ' (present)' : ''
|
||||
'$present' => is_file('.htimport') ? ' (present)' : '',
|
||||
'$backlog' => $backlog
|
||||
));
|
||||
|
||||
}
|
||||
|
@ -23,6 +33,10 @@ function import_content(&$a)
|
|||
function import_post(&$a)
|
||||
{
|
||||
|
||||
if($_POST['submit_url']){
|
||||
goaway($a->get_baseurl().'/submit?url='.bin2hex($_POST['submit_url']));
|
||||
}
|
||||
|
||||
//Get our input.
|
||||
$url = $_POST['url'];
|
||||
$page = intval($_POST['page']);
|
||||
|
@ -66,7 +80,7 @@ function import_post(&$a)
|
|||
$_SESSION['import_total']++;
|
||||
$_SESSION['import_failed']++;
|
||||
try{
|
||||
if(run_submit($a, $url)){
|
||||
if(run_submit($url)){
|
||||
$_SESSION['import_failed']--;
|
||||
$_SESSION['import_success']++;
|
||||
}
|
||||
|
@ -95,7 +109,7 @@ function import_post(&$a)
|
|||
return;
|
||||
|
||||
}
|
||||
elseif($url && $page){
|
||||
elseif($url){
|
||||
|
||||
$result = fetch_url($url."/lsearch?p=$page&n=$perPage&search=.*");
|
||||
if($result)
|
||||
|
|
|
@ -11,4 +11,10 @@
|
|||
<label>Batch submit from file: $present</label>
|
||||
<input type="submit" name="batch_submit" value="Run batch">
|
||||
</form>
|
||||
<h2>Manual submit</h2>
|
||||
<form method="POST">
|
||||
<input type="text" name="submit_url" placeholder="Profile url" size="35" />
|
||||
<input type="submit" value="Submit">
|
||||
</form>
|
||||
<h3>Maintenance backlog: $backlog</h3>
|
||||
</div>
|
Loading…
Reference in a new issue