Made a first version of import functionality.

* A very crude extraction step that creates a file with URLs.
* A processor that reduces the file to nothing as it imports them.
This commit is contained in:
Beanow 2014-07-01 03:27:56 +02:00
parent fbf58ea0c4
commit 40202ea948
4 changed files with 347 additions and 164 deletions

195
include/submit.php Normal file
View File

@ -0,0 +1,195 @@
<?php
require_once('datetime.php');
function run_submit(&$a, $url) {
if(! strlen($url))
return false;
logger('Updating: ' . $url);
$nurl = str_replace(array('https:','//www.'), array('http:','//'), $url);
$profile_exists = false;
$r = q("SELECT * FROM `profile` WHERE ( `homepage` = '%s' OR `nurl` = '%s' ) LIMIT 1",
dbesc($url),
dbesc($nurl)
);
if(count($r)) {
$profile_exists = true;
$profile_id = $r[0]['id'];
}
require_once('Scrape.php');
$parms = scrape_dfrn($url);
// logger('dir_submit: ' . print_r($parms,true));
if((! count($parms)) || (validate_dfrn($parms))) {
return false;
}
if((x($parms,'hide')) || (! (x($parms,'fn')) && (x($parms,'photo')))) {
if($profile_exists) {
nuke_record($url);
}
return false;
}
$photo = $parms['photo'];
dbesc_array($parms);
if(x($parms,'comm'))
$parms['comm'] = intval($parms['comm']);
if($profile_exists) {
$r = q("UPDATE `profile` SET
`name` = '%s',
`pdesc` = '%s',
`locality` = '%s',
`region` = '%s',
`postal-code` = '%s',
`country-name` = '%s',
`gender` = '%s',
`marital` = '%s',
`homepage` = '%s',
`nurl` = '%s',
`comm` = %d,
`tags` = '%s',
`updated` = '%s'
WHERE `id` = %d LIMIT 1",
$parms['fn'],
$parms['pdesc'],
$parms['locality'],
$parms['region'],
$parms['postal-code'],
$parms['country-name'],
$parms['gender'],
$parms['marital'],
dbesc($url),
dbesc($nurl),
intval($parms['comm']),
$parms['tags'],
dbesc(datetime_convert()),
intval($profile_id)
);
logger('Update returns: ' . $r);
}
else {
$r = q("INSERT INTO `profile` ( `name`, `pdesc`, `locality`, `region`, `postal-code`, `country-name`, `gender`, `marital`, `homepage`, `nurl`, `comm`, `tags`, `created`, `updated` )
VALUES ( '%s', '%s', '%s', '%s' , '%s', '%s', '%s', '%s', '%s', '%s', %d, '%s', '%s', '%s' )",
$parms['fn'],
$parms['pdesc'],
$parms['locality'],
$parms['region'],
$parms['postal-code'],
$parms['country-name'],
$parms['gender'],
$parms['marital'],
dbesc($url),
dbesc($nurl),
intval($parms['comm']),
$parms['tags'],
dbesc(datetime_convert()),
dbesc(datetime_convert())
);
logger('Insert returns: ' . $r);
$r = q("SELECT `id` FROM `profile` WHERE ( `homepage` = '%s' or `nurl` = '%s' ) order by id asc",
dbesc($url),
dbesc($nurl)
);
if(count($r))
$profile_id = $r[count($r) - 1]['id'];
if(count($r) > 1) {
q("DELETE FROM `photo` WHERE `profile-id` = %d LIMIT 1",
intval($r[0]['id'])
);
q("DELETE FROM `profile` WHERE `id` = %d LIMIT 1",
intval($r[0]['id'])
);
}
}
if($parms['tags']) {
$arr = explode(' ', $parms['tags']);
if(count($arr)) {
foreach($arr as $t) {
$t = strip_tags(trim($t));
$t = substr($t,0,254);
if(strlen($t)) {
$r = q("SELECT `id` FROM `tag` WHERE `term` = '%s' and `nurl` = '%s' LIMIT 1",
dbesc($t),
dbesc($nurl)
);
if(! count($r)) {
$r = q("INSERT INTO `tag` (`term`, `nurl`) VALUES ('%s', '%s') ",
dbesc($t),
dbesc($nurl)
);
}
}
}
}
}
require_once("Photo.php");
$photo_failure = false;
$img_str = fetch_url($photo,true);
$img = new Photo($img_str);
if($img) {
$img->scaleImageSquare(80);
$r = $img->store($profile_id);
}
if($profile_id) {
$r = q("UPDATE `profile` SET `photo` = '%s' WHERE `id` = %d LIMIT 1",
dbesc($a->get_baseurl() . '/photo/' . $profile_id . '.jpg'),
intval($profile_id)
);
}
else{
nuke_record($url);
return false;
}
return true;
}
function nuke_record($url) {
$nurl = str_replace(array('https:','//www.'), array('http:','//'), $url);
$r = q("SELECT `id` FROM `profile` WHERE ( `homepage` = '%s' OR `nurl` = '%s' ) ",
dbesc($url),
dbesc($nurl)
);
if(count($r)) {
foreach($r as $rr) {
q("DELETE FROM `photo` WHERE `profile-id` = %d LIMIT 1",
intval($rr['id'])
);
q("DELETE FROM `profile` WHERE `id` = %d LIMIT 1",
intval($rr['id'])
);
}
}
return;
}

135
mod/import.php Normal file
View File

@ -0,0 +1,135 @@
<?php
function import_init(&$a)
{
if(! $_SESSION['uid']) {
notice("Permission denied.");
return;
}
}
function import_content(&$a)
{
$tpl = file_get_contents('view/import.tpl');
return replace_macros($tpl, array(
'$present' => is_file('.htimport') ? ' (present)' : ''
));
}
function import_post(&$a)
{
//Get our input.
$url = $_POST['url'];
$page = intval($_POST['page']);
$batch = $_POST['batch_submit'];
//Directory
$file = realpath(__DIR__.'/..').'/.htimport';
//Per batch setting.
$perPage = 200;
$perBatch = 10;
if($batch){
require_once('include/submit.php');
//First get all data from file.
$data = file_get_contents($file);
$list = explode("\r\n", $data);
//Fresh batch?
if(!isset($_SESSION['import_progress'])){
$_SESSION['import_progress'] = true;
$_SESSION['import_success'] = 0;
$_SESSION['import_failed'] = 0;
$_SESSION['import_total'] = 0;
notice("Started new batch. ");
}
//Make sure we can use try catch for all sorts of errors.
set_error_handler(function($errno, $errstr='', $errfile='', $errline='', $context=array()){
if((error_reporting() & $errno) == 0){ return; }
throw new \Exception($errstr, $errno);
});
for($i=0; $i<$perBatch; $i++){
if($url = array_shift($list)){
set_time_limit(20);
$_SESSION['import_total']++;
$_SESSION['import_failed']++;
try{
if(run_submit($a, $url)){
$_SESSION['import_failed']--;
$_SESSION['import_success']++;
}
}catch(\Exception $ex){/* We tried... */}
}
else break;
}
$left = count($list);
if($left > 0){
notice("$left items left in batch.");
file_put_contents($file, implode("\r\n", $list));
$fid = uniqid('autosubmit_');
echo '<form method="POST" id="'.$fid.'"><input type="hidden" name="batch_submit" value="1"></form>'.
'<script type="text/javascript">setTimeout(function(){ document.getElementById("'.$fid.'").submit(); }, 500);</script>';
} else {
$s = $_SESSION['import_success'];
$total = $_SESSION['import_total'];
$errors = $_SESSION['import_failed'];
notice("Completed batch! $s / $total success. $errors errors.");
unlink($file);
unset($_SESSION['import_progress']);
}
return;
}
elseif($url && $page){
$result = fetch_url($url."/lsearch?p=$page&n=$perPage&search=.*");
if($result)
$data = json_decode($result);
else
$data = false;
if($data){
$rows = '';
foreach($data->results as $profile){
$rows .= $profile->url."\r\n";
}
file_put_contents($file, $rows, $page > 0 ? FILE_APPEND : 0);
$progress = min((($page+1) * $perPage), $data->total);
notice("Imported ".$progress."/".$data->total." URLs.");
if($progress !== $data->total){
$fid = uniqid('autosubmit_');
echo
'<form method="POST" id="'.$fid.'">'.
'<input type="hidden" name="url" value="'.$url.'">'.
'<input type="hidden" name="page" value="'.($page+1).'">'.
// '<input type="submit">'.
'</form>'.
'<script type="text/javascript">setTimeout(function(){ document.getElementById("'.$fid.'").submit(); }, 500);</script>';
} else {
goaway($a->get_baseurl().'/import');
}
}
}
}

View File

@ -1,172 +1,11 @@
<?php
require_once('include/datetime.php');
require_once('include/submit.php');
function submit_content(&$a) {
$url = hex2bin(notags(trim($_GET['url'])));
if(! strlen($url))
exit;
logger('Updating: ' . $url);
$nurl = str_replace(array('https:','//www.'), array('http:','//'), $url);
$profile_exists = false;
$r = q("SELECT * FROM `profile` WHERE ( `homepage` = '%s' OR `nurl` = '%s' ) LIMIT 1",
dbesc($url),
dbesc($nurl)
);
if(count($r)) {
$profile_exists = true;
$profile_id = $r[0]['id'];
}
require_once('Scrape.php');
$parms = scrape_dfrn($url);
// logger('dir_submit: ' . print_r($parms,true));
if((! count($parms)) || (validate_dfrn($parms))) {
exit;
}
if((x($parms,'hide')) || (! (x($parms,'fn')) && (x($parms,'photo')))) {
if($profile_exists) {
nuke_record($url);
}
exit;
}
$photo = $parms['photo'];
dbesc_array($parms);
if(x($parms,'comm'))
$parms['comm'] = intval($parms['comm']);
if($profile_exists) {
$r = q("UPDATE `profile` SET
`name` = '%s',
`pdesc` = '%s',
`locality` = '%s',
`region` = '%s',
`postal-code` = '%s',
`country-name` = '%s',
`gender` = '%s',
`marital` = '%s',
`homepage` = '%s',
`nurl` = '%s',
`comm` = %d,
`tags` = '%s',
`updated` = '%s'
WHERE `id` = %d LIMIT 1",
$parms['fn'],
$parms['pdesc'],
$parms['locality'],
$parms['region'],
$parms['postal-code'],
$parms['country-name'],
$parms['gender'],
$parms['marital'],
dbesc($url),
dbesc($nurl),
intval($parms['comm']),
$parms['tags'],
dbesc(datetime_convert()),
intval($profile_id)
);
logger('Update returns: ' . $r);
}
else {
$r = q("INSERT INTO `profile` ( `name`, `pdesc`, `locality`, `region`, `postal-code`, `country-name`, `gender`, `marital`, `homepage`, `nurl`, `comm`, `tags`, `created`, `updated` )
VALUES ( '%s', '%s', '%s', '%s' , '%s', '%s', '%s', '%s', '%s', '%s', %d, '%s', '%s', '%s' )",
$parms['fn'],
$parms['pdesc'],
$parms['locality'],
$parms['region'],
$parms['postal-code'],
$parms['country-name'],
$parms['gender'],
$parms['marital'],
dbesc($url),
dbesc($nurl),
intval($parms['comm']),
$parms['tags'],
dbesc(datetime_convert()),
dbesc(datetime_convert())
);
logger('Insert returns: ' . $r);
$r = q("SELECT `id` FROM `profile` WHERE ( `homepage` = '%s' or `nurl` = '%s' ) order by id asc",
dbesc($url),
dbesc($nurl)
);
if(count($r))
$profile_id = $r[count($r) - 1]['id'];
if(count($r) > 1) {
q("DELETE FROM `photo` WHERE `profile-id` = %d LIMIT 1",
intval($r[0]['id'])
);
q("DELETE FROM `profile` WHERE `id` = %d LIMIT 1",
intval($r[0]['id'])
);
}
}
if($parms['tags']) {
$arr = explode(' ', $parms['tags']);
if(count($arr)) {
foreach($arr as $t) {
$t = strip_tags(trim($t));
$t = substr($t,0,254);
if(strlen($t)) {
$r = q("SELECT `id` FROM `tag` WHERE `term` = '%s' and `nurl` = '%s' LIMIT 1",
dbesc($t),
dbesc($nurl)
);
if(! count($r)) {
$r = q("INSERT INTO `tag` (`term`, `nurl`) VALUES ('%s', '%s') ",
dbesc($t),
dbesc($nurl)
);
}
}
}
}
}
require_once("Photo.php");
$photo_failure = false;
$img_str = fetch_url($photo,true);
$img = new Photo($img_str);
if($img) {
$img->scaleImageSquare(80);
$r = $img->store($profile_id);
}
if($profile_id) {
$r = q("UPDATE `profile` SET `photo` = '%s' WHERE `id` = %d LIMIT 1",
dbesc($a->get_baseurl() . '/photo/' . $profile_id . '.jpg'),
intval($profile_id)
);
}
else
nuke_record($url);
run_submit($a, $url);
exit;
}

14
view/import.tpl Normal file
View File

@ -0,0 +1,14 @@
<div class="mirror-wrapper">
<h2>Mirror a directory</h2>
<form method="POST">
<label>Extract URL's:</label>
<input type="text" name="url" value="http://dir.friendica.com">
<input type="hidden" name="page" value="0">
<input type="submit" value="Execute">
</form>
<br>
<form method="POST">
<label>Batch submit from file: $present</label>
<input type="submit" name="batch_submit" value="Run batch">
</form>
</div>