Improved stability of the submit feature to sustain large batches.
This commit is contained in:
parent
40202ea948
commit
c7d3173080
3 changed files with 26 additions and 14 deletions
5
boot.php
5
boot.php
|
@ -227,11 +227,12 @@ function t($s) {
|
||||||
|
|
||||||
|
|
||||||
if(! function_exists('fetch_url')) {
|
if(! function_exists('fetch_url')) {
|
||||||
function fetch_url($url,$binary = false) {
|
function fetch_url($url,$binary = false, $timeout=20) {
|
||||||
$ch = curl_init($url);
|
$ch = curl_init($url);
|
||||||
if(! $ch) return false;
|
if(! $ch) return false;
|
||||||
|
|
||||||
curl_setopt($ch, CURLOPT_HEADER, 0);
|
curl_setopt($ch, CURLOPT_HEADER, 0);
|
||||||
|
curl_setopt($ch, CURLOPT_TIMEOUT, max(intval($timeout), 1)); //Minimum of 1 second timeout.
|
||||||
curl_setopt($ch, CURLOPT_FOLLOWLOCATION,true);
|
curl_setopt($ch, CURLOPT_FOLLOWLOCATION,true);
|
||||||
curl_setopt($ch, CURLOPT_MAXREDIRS,8);
|
curl_setopt($ch, CURLOPT_MAXREDIRS,8);
|
||||||
curl_setopt($ch, CURLOPT_RETURNTRANSFER,true);
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER,true);
|
||||||
|
|
|
@ -12,10 +12,13 @@ function attribute_contains($attr,$s) {
|
||||||
|
|
||||||
|
|
||||||
if(! function_exists('scrape_dfrn')) {
|
if(! function_exists('scrape_dfrn')) {
|
||||||
function scrape_dfrn($url) {
|
function scrape_dfrn($url, $max_nodes=5000) {
|
||||||
|
|
||||||
|
$minNodes = 100; //Lets do at least 100 nodes per type.
|
||||||
|
$timeout = 10; //Timeout will affect batch processing.
|
||||||
|
|
||||||
$ret = array();
|
$ret = array();
|
||||||
$s = fetch_url($url);
|
$s = fetch_url($url, $timeout);
|
||||||
|
|
||||||
if(! $s)
|
if(! $s)
|
||||||
return $ret;
|
return $ret;
|
||||||
|
@ -29,7 +32,7 @@ function scrape_dfrn($url) {
|
||||||
$items = $dom->getElementsByTagName('meta');
|
$items = $dom->getElementsByTagName('meta');
|
||||||
|
|
||||||
// get DFRN link elements
|
// get DFRN link elements
|
||||||
|
$nodes_left = max(intval($max_nodes), $minNodes);
|
||||||
foreach($items as $item) {
|
foreach($items as $item) {
|
||||||
$x = $item->getAttribute('name');
|
$x = $item->getAttribute('name');
|
||||||
if($x == 'dfrn-global-visibility') {
|
if($x == 'dfrn-global-visibility') {
|
||||||
|
@ -47,20 +50,26 @@ function scrape_dfrn($url) {
|
||||||
if(strlen($z))
|
if(strlen($z))
|
||||||
$ret['tags'] = $z;
|
$ret['tags'] = $z;
|
||||||
}
|
}
|
||||||
|
$nodes_left--;
|
||||||
|
if($nodes_left <= 0) break;
|
||||||
}
|
}
|
||||||
|
|
||||||
$items = $dom->getElementsByTagName('link');
|
$items = $dom->getElementsByTagName('link');
|
||||||
|
|
||||||
// get DFRN link elements
|
// get DFRN link elements
|
||||||
|
|
||||||
|
$nodes_left = max(intval($max_nodes), $minNodes);
|
||||||
foreach($items as $item) {
|
foreach($items as $item) {
|
||||||
$x = $item->getAttribute('rel');
|
$x = $item->getAttribute('rel');
|
||||||
if(substr($x,0,5) == "dfrn-")
|
if(substr($x,0,5) == "dfrn-")
|
||||||
$ret[$x] = $item->getAttribute('href');
|
$ret[$x] = $item->getAttribute('href');
|
||||||
|
$nodes_left--;
|
||||||
|
if($nodes_left <= 0) break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Pull out hCard profile elements
|
// Pull out hCard profile elements
|
||||||
|
|
||||||
|
$nodes_left = max(intval($max_nodes), $minNodes);
|
||||||
$items = $dom->getElementsByTagName('*');
|
$items = $dom->getElementsByTagName('*');
|
||||||
foreach($items as $item) {
|
foreach($items as $item) {
|
||||||
if(attribute_contains($item->getAttribute('class'), 'vcard')) {
|
if(attribute_contains($item->getAttribute('class'), 'vcard')) {
|
||||||
|
@ -89,6 +98,8 @@ function scrape_dfrn($url) {
|
||||||
}
|
}
|
||||||
if(attribute_contains($item->getAttribute('class'),'marital-text'))
|
if(attribute_contains($item->getAttribute('class'),'marital-text'))
|
||||||
$ret['marital'] = $item->textContent;
|
$ret['marital'] = $item->textContent;
|
||||||
|
$nodes_left--;
|
||||||
|
if($nodes_left <= 0) break;
|
||||||
}
|
}
|
||||||
return $ret;
|
return $ret;
|
||||||
}}
|
}}
|
||||||
|
|
|
@ -33,7 +33,7 @@ function import_post(&$a)
|
||||||
|
|
||||||
//Per batch setting.
|
//Per batch setting.
|
||||||
$perPage = 200;
|
$perPage = 200;
|
||||||
$perBatch = 10;
|
$perBatch = 2;
|
||||||
|
|
||||||
if($batch){
|
if($batch){
|
||||||
|
|
||||||
|
@ -62,7 +62,7 @@ function import_post(&$a)
|
||||||
|
|
||||||
for($i=0; $i<$perBatch; $i++){
|
for($i=0; $i<$perBatch; $i++){
|
||||||
if($url = array_shift($list)){
|
if($url = array_shift($list)){
|
||||||
set_time_limit(20);
|
set_time_limit(15);
|
||||||
$_SESSION['import_total']++;
|
$_SESSION['import_total']++;
|
||||||
$_SESSION['import_failed']++;
|
$_SESSION['import_failed']++;
|
||||||
try{
|
try{
|
||||||
|
@ -77,16 +77,16 @@ function import_post(&$a)
|
||||||
|
|
||||||
$left = count($list);
|
$left = count($list);
|
||||||
|
|
||||||
|
$s = $_SESSION['import_success'];
|
||||||
|
$total = $_SESSION['import_total'];
|
||||||
|
$errors = $_SESSION['import_failed'];
|
||||||
if($left > 0){
|
if($left > 0){
|
||||||
notice("$left items left in batch.");
|
notice("$left items left in batch.<br>Stats: $s / $total success, $errors errors.");
|
||||||
file_put_contents($file, implode("\r\n", $list));
|
file_put_contents($file, implode("\r\n", $list));
|
||||||
$fid = uniqid('autosubmit_');
|
$fid = uniqid('autosubmit_');
|
||||||
echo '<form method="POST" id="'.$fid.'"><input type="hidden" name="batch_submit" value="1"></form>'.
|
echo '<form method="POST" id="'.$fid.'"><input type="hidden" name="batch_submit" value="1"></form>'.
|
||||||
'<script type="text/javascript">setTimeout(function(){ document.getElementById("'.$fid.'").submit(); }, 500);</script>';
|
'<script type="text/javascript">setTimeout(function(){ document.getElementById("'.$fid.'").submit(); }, 500);</script>';
|
||||||
} else {
|
} else {
|
||||||
$s = $_SESSION['import_success'];
|
|
||||||
$total = $_SESSION['import_total'];
|
|
||||||
$errors = $_SESSION['import_failed'];
|
|
||||||
notice("Completed batch! $s / $total success. $errors errors.");
|
notice("Completed batch! $s / $total success. $errors errors.");
|
||||||
unlink($file);
|
unlink($file);
|
||||||
unset($_SESSION['import_progress']);
|
unset($_SESSION['import_progress']);
|
||||||
|
|
Loading…
Reference in a new issue