Merge pull request #1396 from annando/1502-poco-generation-counter

Improved probe_url, new generation counter for poco
This commit is contained in:
Tobias Diekershoff 2015-02-17 07:43:09 +01:00
commit 94bbc5605b
8 changed files with 209 additions and 40 deletions

View File

@ -18,7 +18,7 @@ define ( 'FRIENDICA_PLATFORM', 'Friendica');
define ( 'FRIENDICA_CODENAME', 'Ginger');
define ( 'FRIENDICA_VERSION', '3.3.3-RC' );
define ( 'DFRN_PROTOCOL_VERSION', '2.23' );
define ( 'DB_UPDATE_VERSION', 1179 );
define ( 'DB_UPDATE_VERSION', 1180 );
define ( 'EOL', "<br />\r\n" );
define ( 'ATOM_TIME', 'Y-m-d\TH:i:s\Z' );

View File

@ -374,6 +374,7 @@ function probe_url($url, $mode = PROBE_NORMAL) {
$network = NETWORK_APPNET;
}
// Twitter is deactivated since twitter closed its old API
//$twitter = ((strpos($url,'twitter.com') !== false) ? true : false);
$lastfm = ((strpos($url,'last.fm/user') !== false) ? true : false);
@ -526,8 +527,8 @@ function probe_url($url, $mode = PROBE_NORMAL) {
if($j) {
$network = NETWORK_ZOT;
$vcard = array(
'fn' => $j->fullname,
'nick' => $j->nickname,
'fn' => $j->fullname,
'nick' => $j->nickname,
'photo' => $j->photo
);
$profile = $j->url;
@ -569,6 +570,10 @@ function probe_url($url, $mode = PROBE_NORMAL) {
$network = NETWORK_DIASPORA;
elseif($has_lrdd)
$network = NETWORK_OSTATUS;
if(strpos($url,'@'))
$addr = str_replace('acct:', '', $url);
$priority = 0;
if($hcard && ! $vcard) {
@ -762,6 +767,22 @@ function probe_url($url, $mode = PROBE_NORMAL) {
if(($network === NETWORK_FEED) && ($poll) && (! x($vcard,'fn')))
$vcard['fn'] = $url;
if (($notify != "") AND ($poll != "")) {
$baseurl = matching($notify, $poll);
$baseurl2 = matching($baseurl, $profile);
if ($baseurl2 != "")
$baseurl = $baseurl2;
}
if (($baseurl == "") AND ($notify != ""))
$baseurl = matching($profile, $notify);
if (($baseurl == "") AND ($poll != ""))
$baseurl = matching($profile, $poll);
$baseurl = rtrim($baseurl, "/");
$vcard['fn'] = notags($vcard['fn']);
$vcard['nick'] = str_replace(' ','',notags($vcard['nick']));
@ -780,11 +801,12 @@ function probe_url($url, $mode = PROBE_NORMAL) {
$result['network'] = $network;
$result['alias'] = $alias;
$result['pubkey'] = $pubkey;
$result['baseurl'] = $baseurl;
logger('probe_url: ' . print_r($result,true), LOGGER_DEBUG);
// Trying if it maybe a diaspora account
if ($result['network'] == NETWORK_FEED) {
if (($result['network'] == NETWORK_FEED) OR ($result['addr'] == "")) {
require_once('include/bbcode.php');
$address = GetProfileUsername($url, "", true);
$result2 = probe_url($address, $mode);
@ -796,3 +818,20 @@ function probe_url($url, $mode = PROBE_NORMAL) {
return $result;
}
function matching($part1, $part2) {
$len = min(strlen($part1), strlen($part2));
$match = "";
$matching = true;
$i = 0;
while (($i <= $len) AND $matching) {
if (substr($part1, $i, 1) == substr($part2, $i, 1))
$match .= substr($part1, $i, 1);
else
$matching = false;
$i++;
}
return($match);
}

View File

@ -626,6 +626,7 @@ function db_definition() {
"keywords" => array("type" => "text", "not null" => "1"),
"gender" => array("type" => "varchar(32)", "not null" => "1", "default" => ""),
"network" => array("type" => "varchar(255)", "not null" => "1", "default" => ""),
"generation" => array("type" => "tinyint(3)", "not null" => "1", "default" => "0"),
),
"indexes" => array(
"PRIMARY" => array("id"),

View File

@ -2398,7 +2398,7 @@ function diaspora_profile($importer,$xml,$msg) {
if (unxmlify($xml->searchable) == "true") {
require_once('include/socgraph.php');
poco_check($contact['url'], $name, NETWORK_DIASPORA, $images[0], $about, $location, $gender, $keywords, "",
datetime_convert(), $contact['id'], $importer['uid']);
datetime_convert(), 2, $contact['id'], $importer['uid']);
}
$profileurl = "";

View File

@ -1376,16 +1376,6 @@ function item_store($arr,$force_parent = false, $notify = false, $dontcache = fa
$current_post = $r[0]['id'];
logger('item_store: created item ' . $current_post);
// Add every contact to the global contact table
// Contacts from the statusnet connector are also added since you could add them in OStatus as well.
if (!$arr['private'] AND in_array($arr["network"],
array(NETWORK_DFRN, NETWORK_DIASPORA, NETWORK_OSTATUS, NETWORK_STATUSNET, ""))) {
poco_check($arr["author-link"], $arr["author-name"], $arr["network"], $arr["author-avatar"], "", "", "", "", "", $arr["received"], $arr["contact-id"], $arr["uid"]);
// Maybe its a body with a shared item? Then extract a global contact from it.
poco_contact_from_body($arr["body"], $arr["received"], $arr["contact-id"], $arr["uid"]);
}
// Set "success_update" to the date of the last time we heard from this contact
// This can be used to filter for inactive contacts and poco.
// Only do this for public postings to avoid privacy problems, since poco data is public.
@ -1475,7 +1465,7 @@ function item_store($arr,$force_parent = false, $notify = false, $dontcache = fa
$deleted = tag_deliver($arr['uid'],$current_post);
// current post can be deleted if is for a communuty page and no mention are
// current post can be deleted if is for a community page and no mention are
// in it.
if (!$deleted AND !$dontcache) {
@ -1485,11 +1475,13 @@ function item_store($arr,$force_parent = false, $notify = false, $dontcache = fa
$r = q('SELECT * FROM `item` WHERE id = %d', intval($current_post));
if (count($r) == 1) {
call_hooks('post_remote_end', $r[0]);
} else {
} else
logger('item_store: new item not found in DB, id ' . $current_post);
}
}
// Add every contact of the post to the global contact table
poco_store($arr);
create_tags_from_item($current_post);
create_files_from_item($current_post);
@ -2083,6 +2075,7 @@ function consume_feed($xml,$importer,&$contact, &$hub, $datedir = 0, $pass = 0)
$photo_timestamp = '';
$photo_url = '';
$birthday = '';
$contact_updated = '';
$hubs = $feed->get_links('hub');
logger('consume_feed: hubs: ' . print_r($hubs,true), LOGGER_DATA);
@ -2118,6 +2111,9 @@ function consume_feed($xml,$importer,&$contact, &$hub, $datedir = 0, $pass = 0)
if((is_array($contact)) && ($photo_timestamp) && (strlen($photo_url)) && ($photo_timestamp > $contact['avatar-date'])) {
logger('consume_feed: Updating photo for '.$contact['name'].' from '.$photo_url.' uid: '.$contact['uid']);
$contact_updated = $photo_timestamp;
require_once("include/Photo.php");
$photo_failure = false;
$have_photo = false;
@ -2175,6 +2171,9 @@ function consume_feed($xml,$importer,&$contact, &$hub, $datedir = 0, $pass = 0)
}
if((is_array($contact)) && ($name_updated) && (strlen($new_name)) && ($name_updated > $contact['name-date'])) {
if ($name_updated > $contact_updated)
$contact_updated = $name_updated;
$r = q("select * from contact where uid = %d and id = %d limit 1",
intval($contact['uid']),
intval($contact['id'])
@ -2199,6 +2198,9 @@ function consume_feed($xml,$importer,&$contact, &$hub, $datedir = 0, $pass = 0)
}
}
if ($contact_updated AND $new_name AND $photo_url)
poco_check($contact['url'], $new_name, NETWORK_DFRN, $photo_url, "", "", "", "", "", $contact_updated, 2, $contact['id'], $contact['uid']);
if(strlen($birthday)) {
if(substr($birthday,0,4) != $contact['bdyear']) {
logger('consume_feed: updating birthday: ' . $birthday);
@ -2245,7 +2247,6 @@ function consume_feed($xml,$importer,&$contact, &$hub, $datedir = 0, $pass = 0)
$contact['bdyear'] = substr($birthday,0,4);
}
}
$community_page = 0;
@ -2811,6 +2812,7 @@ function local_delivery($importer,$data) {
$new_name = '';
$photo_timestamp = '';
$photo_url = '';
$contact_updated = '';
$rawtags = $feed->get_feed_tags( NAMESPACE_DFRN, 'owner');
@ -2839,6 +2841,9 @@ function local_delivery($importer,$data) {
}
if(($photo_timestamp) && (strlen($photo_url)) && ($photo_timestamp > $importer['avatar-date'])) {
$contact_updated = $photo_timestamp;
logger('local_delivery: Updating photo for ' . $importer['name']);
require_once("include/Photo.php");
$photo_failure = false;
@ -2897,6 +2902,9 @@ function local_delivery($importer,$data) {
}
if(($name_updated) && (strlen($new_name)) && ($name_updated > $importer['name-date'])) {
if ($name_updated > $contact_updated)
$contact_updated = $name_updated;
$r = q("select * from contact where uid = %d and id = %d limit 1",
intval($importer['importer_uid']),
intval($importer['id'])
@ -2921,7 +2929,8 @@ function local_delivery($importer,$data) {
}
}
if ($contact_updated AND $new_name AND $photo_url)
poco_check($importer['url'], $new_name, NETWORK_DFRN, $photo_url, "", "", "", "", "", $contact_updated, 2, $importer['id'], $importer['importer_uid']);
// Currently unsupported - needs a lot of work
$reloc = $feed->get_feed_tags( NAMESPACE_DFRN, 'relocate' );

View File

@ -42,7 +42,7 @@ function poco_load($cid,$uid = 0,$zcid = 0,$url = null) {
if(! $url)
return;
$url = $url . (($uid) ? '/@me/@all?fields=displayName,urls,photos,updated,network,aboutMe,currentLocation,tags,gender' : '?fields=displayName,urls,photos,updated,network,aboutMe,currentLocation,tags,gender') ;
$url = $url . (($uid) ? '/@me/@all?fields=displayName,urls,photos,updated,network,aboutMe,currentLocation,tags,gender,generation' : '?fields=displayName,urls,photos,updated,network,aboutMe,currentLocation,tags,gender,generation') ;
logger('poco_load: ' . $url, LOGGER_DEBUG);
@ -76,6 +76,7 @@ function poco_load($cid,$uid = 0,$zcid = 0,$url = null) {
$about = '';
$keywords = '';
$gender = '';
$generation = 0;
$name = $entry->displayName;
@ -115,11 +116,18 @@ function poco_load($cid,$uid = 0,$zcid = 0,$url = null) {
if(isset($entry->gender))
$gender = $entry->gender;
if(isset($entry->generation) AND ($entry->generation > 0))
$generation = ++$entry->generation;
if(isset($entry->tags))
foreach($entry->tags as $tag)
$keywords = implode(", ", $tag);
poco_check($profile_url, $name, $network, $profile_photo, $about, $location, $gender, $keywords, $connect_url, $updated, $cid, $uid, $zcid);
// If you query a Friendica server for its profiles, the network has to be Friendica
if ($uid == 0)
$network = NETWORK_DFRN;
poco_check($profile_url, $name, $network, $profile_photo, $about, $location, $gender, $keywords, $connect_url, $updated, $generation, $cid, $uid, $zcid);
// Update the Friendica contacts. Diaspora is doing it via a message. (See include/diaspora.php)
if (($location != "") OR ($about != "") OR ($keywords != "") OR ($gender != ""))
@ -142,24 +150,60 @@ function poco_load($cid,$uid = 0,$zcid = 0,$url = null) {
}
function poco_check($profile_url, $name, $network, $profile_photo, $about, $location, $gender, $keywords, $connect_url, $updated, $cid = 0, $uid = 0, $zcid = 0) {
function poco_check($profile_url, $name, $network, $profile_photo, $about, $location, $gender, $keywords, $connect_url, $updated, $generation, $cid = 0, $uid = 0, $zcid = 0) {
$a = get_app();
// Generation:
// 0: No definition
// 1: Profiles on this server
// 2: Contacts of profiles on this server
// 3: Contacts of contacts of profiles on this server
// 4: ...
$gcid = "";
if ($profile_url == "")
return $gcid;
// Don't store the statusnet connector as network
// We can't simply set this to NETWORK_OSTATUS since the connector could have fetched posts from friendica as well
if ($network == NETWORK_STATUSNET)
$network = "";
// The global contacts should contain the original picture, not the cached one
if (($generation != 1) AND stristr(normalise_link($profile_photo), normalise_link($a->get_baseurl()."/photo/")))
$profile_photo = "";
$r = q("SELECT `network` FROM `contact` WHERE `nurl` = '%s' AND `network` != '' AND `network` != '%s' LIMIT 1",
dbesc(normalise_link($profile_url)), dbesc(NETWORK_STATUSNET)
);
if(count($r))
$network = $r[0]["network"];
if (($network == "") OR ($network == NETWORK_OSTATUS)) {
$r = q("SELECT `network`, `url` FROM `contact` WHERE `alias` IN ('%s', '%s') AND `network` != '' AND `network` != '%s' LIMIT 1",
dbesc($profile_url), dbesc(normalise_link($profile_url)), dbesc(NETWORK_STATUSNET)
);
if(count($r)) {
$network = $r[0]["network"];
$profile_url = $r[0]["url"];
}
}
$x = q("SELECT * FROM `gcontact` WHERE `nurl` = '%s' LIMIT 1",
dbesc(normalise_link($profile_url))
);
if(count($x))
if(count($x) AND ($network == "") AND ($x[0]["network"] != NETWORK_STATUSNET))
$network = $x[0]["network"];
if (($network == "") OR ($name == "") OR ($profile_photo == "")) {
require_once("include/Scrape.php");
$data = probe_url($profile_url, PROBE_DIASPORA);
$data = probe_url($profile_url);
$network = $data["network"];
$name = $data["name"];
$profile_url = $data["url"];
$profile_photo = $data["photo"];
}
@ -173,10 +217,10 @@ function poco_check($profile_url, $name, $network, $profile_photo, $about, $loca
if (($name == "") OR ($profile_photo == ""))
return $gcid;
if (!in_array($network, array(NETWORK_DFRN, NETWORK_OSTATUS, NETWORK_DIASPORA, NETWORK_STATUSNET)))
if (!in_array($network, array(NETWORK_DFRN, NETWORK_OSTATUS, NETWORK_DIASPORA)))
return $gcid;
logger("profile-check URL: ".$profile_url." name: ".$name." avatar: ".$profile_photo, LOGGER_DEBUG);
logger("profile-check generation: ".$generation." Network: ".$network." URL: ".$profile_url." name: ".$name." avatar: ".$profile_photo, LOGGER_DEBUG);
if(count($x)) {
$gcid = $x[0]['id'];
@ -193,10 +237,13 @@ function poco_check($profile_url, $name, $network, $profile_photo, $about, $loca
if (($keywords == "") AND ($x[0]['keywords'] != ""))
$keywords = $x[0]['keywords'];
if (($generation == 0) AND ($x[0]['generation'] > 0))
$generation = $x[0]['generation'];
if($x[0]['name'] != $name || $x[0]['photo'] != $profile_photo || $x[0]['updated'] < $updated) {
q("update gcontact set `name` = '%s', `network` = '%s', `photo` = '%s', `connect` = '%s', `url` = '%s',
`updated` = '%s', `location` = '%s', `about` = '%s', `keywords` = '%s', `gender` = '%s'
where `nurl` = '%s'",
q("UPDATE `gcontact` SET `name` = '%s', `network` = '%s', `photo` = '%s', `connect` = '%s', `url` = '%s',
`updated` = '%s', `location` = '%s', `about` = '%s', `keywords` = '%s', `gender` = '%s', `generation` = %d
WHERE (`generation` >= %d OR `generation` = 0) AND `nurl` = '%s'",
dbesc($name),
dbesc($network),
dbesc($profile_photo),
@ -207,12 +254,14 @@ function poco_check($profile_url, $name, $network, $profile_photo, $about, $loca
dbesc($about),
dbesc($keywords),
dbesc($gender),
intval($generation),
intval($generation),
dbesc(normalise_link($profile_url))
);
}
} else {
q("insert into `gcontact` (`name`,`network`, `url`,`nurl`,`photo`,`connect`, `updated`, `location`, `about`, `keywords`, `gender`)
values ('%s', '%s', '%s', '%s', '%s','%s', '%s', '%s', '%s', '%s', '%s')",
q("INSERT INTO `gcontact` (`name`,`network`, `url`,`nurl`,`photo`,`connect`, `updated`, `location`, `about`, `keywords`, `gender`, `generation`)
VALUES ('%s', '%s', '%s', '%s', '%s','%s', '%s', '%s', '%s', '%s', '%s', %d)",
dbesc($name),
dbesc($network),
dbesc($profile_url),
@ -223,7 +272,8 @@ function poco_check($profile_url, $name, $network, $profile_photo, $about, $loca
dbesc($location),
dbesc($about),
dbesc($keywords),
dbesc($gender)
dbesc($gender),
intval($generation)
);
$x = q("SELECT * FROM `gcontact` WHERE `nurl` = '%s' LIMIT 1",
dbesc(normalise_link($profile_url))
@ -290,7 +340,65 @@ function sub_poco_from_share($share, $created, $cid, $uid) {
return;
logger("prepare poco_check for profile ".$profile, LOGGER_DEBUG);
poco_check($profile, "", "", "", "", "", "", "", "", $created, $cid, $uid);
poco_check($profile, "", "", "", "", "", "", "", "", $created, 3, $cid, $uid);
}
function poco_store($item) {
// Isn't it public?
if ($item['private'])
return;
// Or is it from a network where we don't store the global contacts?
if (!in_array($item["network"], array(NETWORK_DFRN, NETWORK_DIASPORA, NETWORK_OSTATUS, NETWORK_STATUSNET, "")))
return;
// Is it a global copy?
$store_gcontact = ($item["uid"] == 0);
// Is it a comment on a global copy?
if (!$store_gcontact AND ($item["uri"] != $item["parent-uri"])) {
$q = q("SELECT `id` FROM `item` WHERE `uri`='%s' AND `uid` = 0", $item["parent-uri"]);
$store_gcontact = count($q);
}
if (!$store_gcontact)
return;
// "3" means: We don't know this contact directly (Maybe a reshared item)
$generation = 3;
$network = "";
$profile_url = $item["author-link"];
// Is it a user from our server?
$q = q("SELECT `id` FROM `contact` WHERE `self` AND `nurl` = '%s' LIMIT 1",
dbesc(normalise_link($item["author-link"])));
if (count($q)) {
logger("Our user (generation 1): ".$item["author-link"], LOGGER_DEBUG);
$generation = 1;
$network = NETWORK_DFRN;
} else { // Is it a contact from a user on our server?
$q = q("SELECT `network`, `url` FROM `contact` WHERE `uid` != 0 AND `network` != ''
AND (`nurl` = '%s' OR `alias` IN ('%s', '%s')) AND `network` != '%s' LIMIT 1",
dbesc(normalise_link($item["author-link"])),
dbesc(normalise_link($item["author-link"])),
dbesc($item["author-link"]),
dbesc(NETWORK_STATUSNET));
if (count($q)) {
$generation = 2;
$network = $q[0]["network"];
$profile_url = $q[0]["url"];
logger("Known contact (generation 2): ".$profile_url, LOGGER_DEBUG);
}
}
if ($generation == 3)
logger("Unknown contact (generation 3): ".$item["author-link"], LOGGER_DEBUG);
poco_check($profile_url, $item["author-name"], $network, $item["author-avatar"], "", "", "", "", "", $item["received"], $generation, $item["contact-id"], $item["uid"]);
// Maybe its a body with a shared item? Then extract a global contact from it.
poco_contact_from_body($item["body"], $item["received"], $item["contact-id"], $item["uid"]);
}
function count_common_friends($uid,$cid) {

View File

@ -135,9 +135,9 @@ function poco_init(&$a) {
if(x($_GET,'updatedSince') AND !$global)
$ret['updatedSince'] = false;
$ret['startIndex'] = (string) $startIndex;
$ret['itemsPerPage'] = (string) $itemsPerPage;
$ret['totalResults'] = (string) $totalResults;
$ret['startIndex'] = (int) $startIndex;
$ret['itemsPerPage'] = (int) $itemsPerPage;
$ret['totalResults'] = (int) $totalResults;
$ret['entry'] = array();
@ -153,7 +153,8 @@ function poco_init(&$a) {
'network' => false,
'gender' => false,
'tags' => false,
'address' => false
'address' => false,
'generation' => false
);
if((! x($_GET,'fields')) || ($_GET['fields'] === '@all'))
@ -168,6 +169,15 @@ function poco_init(&$a) {
if(is_array($r)) {
if(count($r)) {
foreach($r as $rr) {
if (!isset($rr['generation'])) {
if ($global)
$rr['generation'] = 3;
elseif ($system_mode)
$rr['generation'] = 1;
else
$rr['generation'] = 2;
}
if (($rr['about'] == "") AND isset($rr['pabout']))
$rr['about'] = $rr['pabout'];
@ -198,7 +208,7 @@ function poco_init(&$a) {
$entry = array();
if($fields_ret['id'])
$entry['id'] = $rr['id'];
$entry['id'] = (int)$rr['id'];
if($fields_ret['displayName'])
$entry['displayName'] = $rr['name'];
if($fields_ret['aboutMe'])
@ -207,6 +217,8 @@ function poco_init(&$a) {
$entry['currentLocation'] = $rr['location'];
if($fields_ret['gender'])
$entry['gender'] = $rr['gender'];
if($fields_ret['generation'])
$entry['generation'] = (int)$rr['generation'];
if($fields_ret['urls']) {
$entry['urls'] = array(array('value' => $rr['url'], 'type' => 'profile'));
if($rr['addr'] && ($rr['network'] !== NETWORK_MAIL))

View File

@ -1,6 +1,6 @@
<?php
define( 'UPDATE_VERSION' , 1179 );
define( 'UPDATE_VERSION' , 1180 );
/**
*