diff --git a/boot.php b/boot.php
index de18161e..786f3045 100755
--- a/boot.php
+++ b/boot.php
@@ -227,11 +227,12 @@ function t($s) {
if(! function_exists('fetch_url')) {
-function fetch_url($url,$binary = false) {
+function fetch_url($url,$binary = false, $timeout=20) {
$ch = curl_init($url);
if(! $ch) return false;
- curl_setopt($ch, CURLOPT_HEADER, 0);
+ curl_setopt($ch, CURLOPT_HEADER, 0);
+ curl_setopt($ch, CURLOPT_TIMEOUT, max(intval($timeout), 1)); //Minimum of 1 second timeout.
curl_setopt($ch, CURLOPT_FOLLOWLOCATION,true);
curl_setopt($ch, CURLOPT_MAXREDIRS,8);
curl_setopt($ch, CURLOPT_RETURNTRANSFER,true);
diff --git a/include/Scrape.php b/include/Scrape.php
index 9cbf1ebc..1edd0672 100755
--- a/include/Scrape.php
+++ b/include/Scrape.php
@@ -12,10 +12,13 @@ function attribute_contains($attr,$s) {
if(! function_exists('scrape_dfrn')) {
-function scrape_dfrn($url) {
-
+function scrape_dfrn($url, $max_nodes=5000) {
+
+ $minNodes = 100; //Lets do at least 100 nodes per type.
+ $timeout = 10; //Timeout will affect batch processing.
+
$ret = array();
- $s = fetch_url($url);
+ $s = fetch_url($url, $timeout);
if(! $s)
return $ret;
@@ -29,7 +32,7 @@ function scrape_dfrn($url) {
$items = $dom->getElementsByTagName('meta');
// get DFRN link elements
-
+ $nodes_left = max(intval($max_nodes), $minNodes);
foreach($items as $item) {
$x = $item->getAttribute('name');
if($x == 'dfrn-global-visibility') {
@@ -47,20 +50,26 @@ function scrape_dfrn($url) {
if(strlen($z))
$ret['tags'] = $z;
}
+ $nodes_left--;
+ if($nodes_left <= 0) break;
}
$items = $dom->getElementsByTagName('link');
// get DFRN link elements
-
+
+ $nodes_left = max(intval($max_nodes), $minNodes);
foreach($items as $item) {
$x = $item->getAttribute('rel');
if(substr($x,0,5) == "dfrn-")
$ret[$x] = $item->getAttribute('href');
+ $nodes_left--;
+ if($nodes_left <= 0) break;
}
// Pull out hCard profile elements
-
+
+ $nodes_left = max(intval($max_nodes), $minNodes);
$items = $dom->getElementsByTagName('*');
foreach($items as $item) {
if(attribute_contains($item->getAttribute('class'), 'vcard')) {
@@ -89,6 +98,8 @@ function scrape_dfrn($url) {
}
if(attribute_contains($item->getAttribute('class'),'marital-text'))
$ret['marital'] = $item->textContent;
+ $nodes_left--;
+ if($nodes_left <= 0) break;
}
return $ret;
}}
diff --git a/mod/import.php b/mod/import.php
index 63451c82..b5e09c0f 100644
--- a/mod/import.php
+++ b/mod/import.php
@@ -33,7 +33,7 @@ function import_post(&$a)
//Per batch setting.
$perPage = 200;
- $perBatch = 10;
+ $perBatch = 2;
if($batch){
@@ -62,7 +62,7 @@ function import_post(&$a)
for($i=0; $i<$perBatch; $i++){
if($url = array_shift($list)){
- set_time_limit(20);
+ set_time_limit(15);
$_SESSION['import_total']++;
$_SESSION['import_failed']++;
try{
@@ -77,16 +77,16 @@ function import_post(&$a)
$left = count($list);
+ $s = $_SESSION['import_success'];
+ $total = $_SESSION['import_total'];
+ $errors = $_SESSION['import_failed'];
if($left > 0){
- notice("$left items left in batch.");
+ notice("$left items left in batch.
Stats: $s / $total success, $errors errors.");
file_put_contents($file, implode("\r\n", $list));
$fid = uniqid('autosubmit_');
echo '