Made a first version of import functionality.
* A very crude extraction step that creates a file with URLs. * A processor that reduces the file to nothing as it imports them.
This commit is contained in:
parent
fbf58ea0c4
commit
40202ea948
4 changed files with 347 additions and 164 deletions
195
include/submit.php
Normal file
195
include/submit.php
Normal file
|
@ -0,0 +1,195 @@
|
|||
<?php
|
||||
|
||||
require_once('datetime.php');
|
||||
|
||||
function run_submit(&$a, $url) {
|
||||
|
||||
if(! strlen($url))
|
||||
return false;
|
||||
|
||||
|
||||
|
||||
logger('Updating: ' . $url);
|
||||
|
||||
$nurl = str_replace(array('https:','//www.'), array('http:','//'), $url);
|
||||
|
||||
$profile_exists = false;
|
||||
|
||||
$r = q("SELECT * FROM `profile` WHERE ( `homepage` = '%s' OR `nurl` = '%s' ) LIMIT 1",
|
||||
dbesc($url),
|
||||
dbesc($nurl)
|
||||
);
|
||||
|
||||
if(count($r)) {
|
||||
$profile_exists = true;
|
||||
$profile_id = $r[0]['id'];
|
||||
}
|
||||
|
||||
require_once('Scrape.php');
|
||||
|
||||
|
||||
$parms = scrape_dfrn($url);
|
||||
|
||||
// logger('dir_submit: ' . print_r($parms,true));
|
||||
|
||||
if((! count($parms)) || (validate_dfrn($parms))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if((x($parms,'hide')) || (! (x($parms,'fn')) && (x($parms,'photo')))) {
|
||||
if($profile_exists) {
|
||||
nuke_record($url);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
$photo = $parms['photo'];
|
||||
|
||||
dbesc_array($parms);
|
||||
|
||||
if(x($parms,'comm'))
|
||||
$parms['comm'] = intval($parms['comm']);
|
||||
|
||||
if($profile_exists) {
|
||||
$r = q("UPDATE `profile` SET
|
||||
`name` = '%s',
|
||||
`pdesc` = '%s',
|
||||
`locality` = '%s',
|
||||
`region` = '%s',
|
||||
`postal-code` = '%s',
|
||||
`country-name` = '%s',
|
||||
`gender` = '%s',
|
||||
`marital` = '%s',
|
||||
`homepage` = '%s',
|
||||
`nurl` = '%s',
|
||||
`comm` = %d,
|
||||
`tags` = '%s',
|
||||
`updated` = '%s'
|
||||
WHERE `id` = %d LIMIT 1",
|
||||
|
||||
$parms['fn'],
|
||||
$parms['pdesc'],
|
||||
$parms['locality'],
|
||||
$parms['region'],
|
||||
$parms['postal-code'],
|
||||
$parms['country-name'],
|
||||
$parms['gender'],
|
||||
$parms['marital'],
|
||||
dbesc($url),
|
||||
dbesc($nurl),
|
||||
intval($parms['comm']),
|
||||
$parms['tags'],
|
||||
dbesc(datetime_convert()),
|
||||
intval($profile_id)
|
||||
);
|
||||
logger('Update returns: ' . $r);
|
||||
|
||||
}
|
||||
else {
|
||||
$r = q("INSERT INTO `profile` ( `name`, `pdesc`, `locality`, `region`, `postal-code`, `country-name`, `gender`, `marital`, `homepage`, `nurl`, `comm`, `tags`, `created`, `updated` )
|
||||
VALUES ( '%s', '%s', '%s', '%s' , '%s', '%s', '%s', '%s', '%s', '%s', %d, '%s', '%s', '%s' )",
|
||||
$parms['fn'],
|
||||
$parms['pdesc'],
|
||||
$parms['locality'],
|
||||
$parms['region'],
|
||||
$parms['postal-code'],
|
||||
$parms['country-name'],
|
||||
$parms['gender'],
|
||||
$parms['marital'],
|
||||
dbesc($url),
|
||||
dbesc($nurl),
|
||||
intval($parms['comm']),
|
||||
$parms['tags'],
|
||||
dbesc(datetime_convert()),
|
||||
dbesc(datetime_convert())
|
||||
);
|
||||
logger('Insert returns: ' . $r);
|
||||
|
||||
$r = q("SELECT `id` FROM `profile` WHERE ( `homepage` = '%s' or `nurl` = '%s' ) order by id asc",
|
||||
dbesc($url),
|
||||
dbesc($nurl)
|
||||
);
|
||||
|
||||
if(count($r))
|
||||
$profile_id = $r[count($r) - 1]['id'];
|
||||
|
||||
if(count($r) > 1) {
|
||||
q("DELETE FROM `photo` WHERE `profile-id` = %d LIMIT 1",
|
||||
intval($r[0]['id'])
|
||||
);
|
||||
q("DELETE FROM `profile` WHERE `id` = %d LIMIT 1",
|
||||
intval($r[0]['id'])
|
||||
);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if($parms['tags']) {
|
||||
$arr = explode(' ', $parms['tags']);
|
||||
if(count($arr)) {
|
||||
foreach($arr as $t) {
|
||||
$t = strip_tags(trim($t));
|
||||
$t = substr($t,0,254);
|
||||
|
||||
if(strlen($t)) {
|
||||
$r = q("SELECT `id` FROM `tag` WHERE `term` = '%s' and `nurl` = '%s' LIMIT 1",
|
||||
dbesc($t),
|
||||
dbesc($nurl)
|
||||
);
|
||||
if(! count($r)) {
|
||||
$r = q("INSERT INTO `tag` (`term`, `nurl`) VALUES ('%s', '%s') ",
|
||||
dbesc($t),
|
||||
dbesc($nurl)
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
require_once("Photo.php");
|
||||
|
||||
$photo_failure = false;
|
||||
|
||||
$img_str = fetch_url($photo,true);
|
||||
$img = new Photo($img_str);
|
||||
if($img) {
|
||||
$img->scaleImageSquare(80);
|
||||
$r = $img->store($profile_id);
|
||||
}
|
||||
if($profile_id) {
|
||||
$r = q("UPDATE `profile` SET `photo` = '%s' WHERE `id` = %d LIMIT 1",
|
||||
dbesc($a->get_baseurl() . '/photo/' . $profile_id . '.jpg'),
|
||||
intval($profile_id)
|
||||
);
|
||||
}
|
||||
else{
|
||||
nuke_record($url);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
||||
}
|
||||
|
||||
|
||||
function nuke_record($url) {
|
||||
|
||||
$nurl = str_replace(array('https:','//www.'), array('http:','//'), $url);
|
||||
|
||||
$r = q("SELECT `id` FROM `profile` WHERE ( `homepage` = '%s' OR `nurl` = '%s' ) ",
|
||||
dbesc($url),
|
||||
dbesc($nurl)
|
||||
);
|
||||
|
||||
if(count($r)) {
|
||||
foreach($r as $rr) {
|
||||
q("DELETE FROM `photo` WHERE `profile-id` = %d LIMIT 1",
|
||||
intval($rr['id'])
|
||||
);
|
||||
q("DELETE FROM `profile` WHERE `id` = %d LIMIT 1",
|
||||
intval($rr['id'])
|
||||
);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
135
mod/import.php
Normal file
135
mod/import.php
Normal file
|
@ -0,0 +1,135 @@
|
|||
<?php
|
||||
|
||||
function import_init(&$a)
|
||||
{
|
||||
|
||||
if(! $_SESSION['uid']) {
|
||||
notice("Permission denied.");
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
function import_content(&$a)
|
||||
{
|
||||
|
||||
$tpl = file_get_contents('view/import.tpl');
|
||||
return replace_macros($tpl, array(
|
||||
'$present' => is_file('.htimport') ? ' (present)' : ''
|
||||
));
|
||||
|
||||
}
|
||||
|
||||
function import_post(&$a)
|
||||
{
|
||||
|
||||
//Get our input.
|
||||
$url = $_POST['url'];
|
||||
$page = intval($_POST['page']);
|
||||
$batch = $_POST['batch_submit'];
|
||||
|
||||
//Directory
|
||||
$file = realpath(__DIR__.'/..').'/.htimport';
|
||||
|
||||
//Per batch setting.
|
||||
$perPage = 200;
|
||||
$perBatch = 10;
|
||||
|
||||
if($batch){
|
||||
|
||||
require_once('include/submit.php');
|
||||
|
||||
//First get all data from file.
|
||||
$data = file_get_contents($file);
|
||||
$list = explode("\r\n", $data);
|
||||
|
||||
//Fresh batch?
|
||||
if(!isset($_SESSION['import_progress'])){
|
||||
|
||||
$_SESSION['import_progress'] = true;
|
||||
$_SESSION['import_success'] = 0;
|
||||
$_SESSION['import_failed'] = 0;
|
||||
$_SESSION['import_total'] = 0;
|
||||
notice("Started new batch. ");
|
||||
|
||||
}
|
||||
|
||||
//Make sure we can use try catch for all sorts of errors.
|
||||
set_error_handler(function($errno, $errstr='', $errfile='', $errline='', $context=array()){
|
||||
if((error_reporting() & $errno) == 0){ return; }
|
||||
throw new \Exception($errstr, $errno);
|
||||
});
|
||||
|
||||
for($i=0; $i<$perBatch; $i++){
|
||||
if($url = array_shift($list)){
|
||||
set_time_limit(20);
|
||||
$_SESSION['import_total']++;
|
||||
$_SESSION['import_failed']++;
|
||||
try{
|
||||
if(run_submit($a, $url)){
|
||||
$_SESSION['import_failed']--;
|
||||
$_SESSION['import_success']++;
|
||||
}
|
||||
}catch(\Exception $ex){/* We tried... */}
|
||||
}
|
||||
else break;
|
||||
}
|
||||
|
||||
$left = count($list);
|
||||
|
||||
if($left > 0){
|
||||
notice("$left items left in batch.");
|
||||
file_put_contents($file, implode("\r\n", $list));
|
||||
$fid = uniqid('autosubmit_');
|
||||
echo '<form method="POST" id="'.$fid.'"><input type="hidden" name="batch_submit" value="1"></form>'.
|
||||
'<script type="text/javascript">setTimeout(function(){ document.getElementById("'.$fid.'").submit(); }, 500);</script>';
|
||||
} else {
|
||||
$s = $_SESSION['import_success'];
|
||||
$total = $_SESSION['import_total'];
|
||||
$errors = $_SESSION['import_failed'];
|
||||
notice("Completed batch! $s / $total success. $errors errors.");
|
||||
unlink($file);
|
||||
unset($_SESSION['import_progress']);
|
||||
}
|
||||
|
||||
return;
|
||||
|
||||
}
|
||||
elseif($url && $page){
|
||||
|
||||
$result = fetch_url($url."/lsearch?p=$page&n=$perPage&search=.*");
|
||||
if($result)
|
||||
$data = json_decode($result);
|
||||
else
|
||||
$data = false;
|
||||
|
||||
if($data){
|
||||
|
||||
$rows = '';
|
||||
foreach($data->results as $profile){
|
||||
$rows .= $profile->url."\r\n";
|
||||
}
|
||||
file_put_contents($file, $rows, $page > 0 ? FILE_APPEND : 0);
|
||||
|
||||
$progress = min((($page+1) * $perPage), $data->total);
|
||||
notice("Imported ".$progress."/".$data->total." URLs.");
|
||||
|
||||
if($progress !== $data->total){
|
||||
$fid = uniqid('autosubmit_');
|
||||
echo
|
||||
'<form method="POST" id="'.$fid.'">'.
|
||||
'<input type="hidden" name="url" value="'.$url.'">'.
|
||||
'<input type="hidden" name="page" value="'.($page+1).'">'.
|
||||
// '<input type="submit">'.
|
||||
'</form>'.
|
||||
'<script type="text/javascript">setTimeout(function(){ document.getElementById("'.$fid.'").submit(); }, 500);</script>';
|
||||
|
||||
} else {
|
||||
goaway($a->get_baseurl().'/import');
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
167
mod/submit.php
167
mod/submit.php
|
@ -1,172 +1,11 @@
|
|||
<?php
|
||||
|
||||
require_once('include/datetime.php');
|
||||
require_once('include/submit.php');
|
||||
|
||||
function submit_content(&$a) {
|
||||
|
||||
|
||||
$url = hex2bin(notags(trim($_GET['url'])));
|
||||
|
||||
if(! strlen($url))
|
||||
exit;
|
||||
|
||||
|
||||
|
||||
logger('Updating: ' . $url);
|
||||
|
||||
$nurl = str_replace(array('https:','//www.'), array('http:','//'), $url);
|
||||
|
||||
$profile_exists = false;
|
||||
|
||||
$r = q("SELECT * FROM `profile` WHERE ( `homepage` = '%s' OR `nurl` = '%s' ) LIMIT 1",
|
||||
dbesc($url),
|
||||
dbesc($nurl)
|
||||
);
|
||||
|
||||
if(count($r)) {
|
||||
$profile_exists = true;
|
||||
$profile_id = $r[0]['id'];
|
||||
}
|
||||
|
||||
require_once('Scrape.php');
|
||||
|
||||
|
||||
$parms = scrape_dfrn($url);
|
||||
|
||||
// logger('dir_submit: ' . print_r($parms,true));
|
||||
|
||||
if((! count($parms)) || (validate_dfrn($parms))) {
|
||||
exit;
|
||||
}
|
||||
|
||||
if((x($parms,'hide')) || (! (x($parms,'fn')) && (x($parms,'photo')))) {
|
||||
if($profile_exists) {
|
||||
nuke_record($url);
|
||||
}
|
||||
exit;
|
||||
}
|
||||
|
||||
$photo = $parms['photo'];
|
||||
|
||||
dbesc_array($parms);
|
||||
|
||||
if(x($parms,'comm'))
|
||||
$parms['comm'] = intval($parms['comm']);
|
||||
|
||||
if($profile_exists) {
|
||||
$r = q("UPDATE `profile` SET
|
||||
`name` = '%s',
|
||||
`pdesc` = '%s',
|
||||
`locality` = '%s',
|
||||
`region` = '%s',
|
||||
`postal-code` = '%s',
|
||||
`country-name` = '%s',
|
||||
`gender` = '%s',
|
||||
`marital` = '%s',
|
||||
`homepage` = '%s',
|
||||
`nurl` = '%s',
|
||||
`comm` = %d,
|
||||
`tags` = '%s',
|
||||
`updated` = '%s'
|
||||
WHERE `id` = %d LIMIT 1",
|
||||
|
||||
$parms['fn'],
|
||||
$parms['pdesc'],
|
||||
$parms['locality'],
|
||||
$parms['region'],
|
||||
$parms['postal-code'],
|
||||
$parms['country-name'],
|
||||
$parms['gender'],
|
||||
$parms['marital'],
|
||||
dbesc($url),
|
||||
dbesc($nurl),
|
||||
intval($parms['comm']),
|
||||
$parms['tags'],
|
||||
dbesc(datetime_convert()),
|
||||
intval($profile_id)
|
||||
);
|
||||
logger('Update returns: ' . $r);
|
||||
|
||||
}
|
||||
else {
|
||||
$r = q("INSERT INTO `profile` ( `name`, `pdesc`, `locality`, `region`, `postal-code`, `country-name`, `gender`, `marital`, `homepage`, `nurl`, `comm`, `tags`, `created`, `updated` )
|
||||
VALUES ( '%s', '%s', '%s', '%s' , '%s', '%s', '%s', '%s', '%s', '%s', %d, '%s', '%s', '%s' )",
|
||||
$parms['fn'],
|
||||
$parms['pdesc'],
|
||||
$parms['locality'],
|
||||
$parms['region'],
|
||||
$parms['postal-code'],
|
||||
$parms['country-name'],
|
||||
$parms['gender'],
|
||||
$parms['marital'],
|
||||
dbesc($url),
|
||||
dbesc($nurl),
|
||||
intval($parms['comm']),
|
||||
$parms['tags'],
|
||||
dbesc(datetime_convert()),
|
||||
dbesc(datetime_convert())
|
||||
);
|
||||
logger('Insert returns: ' . $r);
|
||||
|
||||
$r = q("SELECT `id` FROM `profile` WHERE ( `homepage` = '%s' or `nurl` = '%s' ) order by id asc",
|
||||
dbesc($url),
|
||||
dbesc($nurl)
|
||||
);
|
||||
|
||||
if(count($r))
|
||||
$profile_id = $r[count($r) - 1]['id'];
|
||||
|
||||
if(count($r) > 1) {
|
||||
q("DELETE FROM `photo` WHERE `profile-id` = %d LIMIT 1",
|
||||
intval($r[0]['id'])
|
||||
);
|
||||
q("DELETE FROM `profile` WHERE `id` = %d LIMIT 1",
|
||||
intval($r[0]['id'])
|
||||
);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if($parms['tags']) {
|
||||
$arr = explode(' ', $parms['tags']);
|
||||
if(count($arr)) {
|
||||
foreach($arr as $t) {
|
||||
$t = strip_tags(trim($t));
|
||||
$t = substr($t,0,254);
|
||||
|
||||
if(strlen($t)) {
|
||||
$r = q("SELECT `id` FROM `tag` WHERE `term` = '%s' and `nurl` = '%s' LIMIT 1",
|
||||
dbesc($t),
|
||||
dbesc($nurl)
|
||||
);
|
||||
if(! count($r)) {
|
||||
$r = q("INSERT INTO `tag` (`term`, `nurl`) VALUES ('%s', '%s') ",
|
||||
dbesc($t),
|
||||
dbesc($nurl)
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
require_once("Photo.php");
|
||||
|
||||
$photo_failure = false;
|
||||
|
||||
$img_str = fetch_url($photo,true);
|
||||
$img = new Photo($img_str);
|
||||
if($img) {
|
||||
$img->scaleImageSquare(80);
|
||||
$r = $img->store($profile_id);
|
||||
}
|
||||
if($profile_id) {
|
||||
$r = q("UPDATE `profile` SET `photo` = '%s' WHERE `id` = %d LIMIT 1",
|
||||
dbesc($a->get_baseurl() . '/photo/' . $profile_id . '.jpg'),
|
||||
intval($profile_id)
|
||||
);
|
||||
}
|
||||
else
|
||||
nuke_record($url);
|
||||
run_submit($a, $url);
|
||||
exit;
|
||||
|
||||
}
|
||||
|
|
14
view/import.tpl
Normal file
14
view/import.tpl
Normal file
|
@ -0,0 +1,14 @@
|
|||
<div class="mirror-wrapper">
|
||||
<h2>Mirror a directory</h2>
|
||||
<form method="POST">
|
||||
<label>Extract URL's:</label>
|
||||
<input type="text" name="url" value="http://dir.friendica.com">
|
||||
<input type="hidden" name="page" value="0">
|
||||
<input type="submit" value="Execute">
|
||||
</form>
|
||||
<br>
|
||||
<form method="POST">
|
||||
<label>Batch submit from file: $present</label>
|
||||
<input type="submit" name="batch_submit" value="Run batch">
|
||||
</form>
|
||||
</div>
|
Loading…
Reference in a new issue