Browse Source

Improved probe_url, fixed wrong network detection.

pull/1396/head
Michael Vogel 6 years ago
parent
commit
8b1b886797
3 changed files with 88 additions and 26 deletions
  1. +44
    -3
      include/Scrape.php
  2. +5
    -6
      include/items.php
  3. +39
    -17
      include/socgraph.php

+ 44
- 3
include/Scrape.php View File

@ -374,6 +374,7 @@ function probe_url($url, $mode = PROBE_NORMAL) {
$network = NETWORK_APPNET;
}
// Twitter is deactivated since twitter closed its old API
//$twitter = ((strpos($url,'twitter.com') !== false) ? true : false);
$lastfm = ((strpos($url,'last.fm/user') !== false) ? true : false);
@ -526,8 +527,8 @@ function probe_url($url, $mode = PROBE_NORMAL) {
if($j) {
$network = NETWORK_ZOT;
$vcard = array(
'fn' => $j->fullname,
'nick' => $j->nickname,
'fn' => $j->fullname,
'nick' => $j->nickname,
'photo' => $j->photo
);
$profile = $j->url;
@ -569,6 +570,10 @@ function probe_url($url, $mode = PROBE_NORMAL) {
$network = NETWORK_DIASPORA;
elseif($has_lrdd)
$network = NETWORK_OSTATUS;
if(strpos($url,'@'))
$addr = str_replace('acct:', '', $url);
$priority = 0;
if($hcard && ! $vcard) {
@ -762,6 +767,22 @@ function probe_url($url, $mode = PROBE_NORMAL) {
if(($network === NETWORK_FEED) && ($poll) && (! x($vcard,'fn')))
$vcard['fn'] = $url;
if (($notify != "") AND ($poll != "")) {
$baseurl = matching($notify, $poll);
$baseurl2 = matching($baseurl, $profile);
if ($baseurl2 != "")
$baseurl = $baseurl2;
}
if (($baseurl == "") AND ($notify != ""))
$baseurl = matching($profile, $notify);
if (($baseurl == "") AND ($poll != ""))
$baseurl = matching($profile, $poll);
$baseurl = rtrim($baseurl, "/");
$vcard['fn'] = notags($vcard['fn']);
$vcard['nick'] = str_replace(' ','',notags($vcard['nick']));
@ -780,14 +801,17 @@ function probe_url($url, $mode = PROBE_NORMAL) {
$result['network'] = $network;
$result['alias'] = $alias;
$result['pubkey'] = $pubkey;
$result['baseurl'] = $baseurl;
logger('probe_url: ' . print_r($result,true), LOGGER_DEBUG);
// Trying if it maybe a diaspora account
if ($result['network'] == NETWORK_FEED) {
//if (($result['network'] == NETWORK_FEED) OR (($result['addr'] == "") AND ($result['network'] != NETWORK_OSTATUS))) {
if (($result['network'] == NETWORK_FEED) OR ($result['addr'] == "")) {
require_once('include/bbcode.php');
$address = GetProfileUsername($url, "", true);
$result2 = probe_url($address, $mode);
//$result2 = probe_url($address, PROBE_DIASPORA);
if ($result2['network'] != "")
$result = $result2;
}
@ -796,3 +820,20 @@ function probe_url($url, $mode = PROBE_NORMAL) {
return $result;
}
function matching($part1, $part2) {
$len = min(strlen($part1), strlen($part2));
$match = "";
$matching = true;
$i = 0;
while (($i <= $len) AND $matching) {
if (substr($part1, $i, 1) == substr($part2, $i, 1))
$match .= substr($part1, $i, 1);
else
$matching = false;
$i++;
}
return($match);
}

+ 5
- 6
include/items.php View File

@ -1376,9 +1376,6 @@ function item_store($arr,$force_parent = false, $notify = false, $dontcache = fa
$current_post = $r[0]['id'];
logger('item_store: created item ' . $current_post);
// Add every contact to the global contact table
poco_store($arr);
/*
// Is it a global copy?
$store_gcontact = ($arr["uid"] == 0);
@ -1511,7 +1508,7 @@ function item_store($arr,$force_parent = false, $notify = false, $dontcache = fa
$deleted = tag_deliver($arr['uid'],$current_post);
// current post can be deleted if is for a communuty page and no mention are
// current post can be deleted if is for a community page and no mention are
// in it.
if (!$deleted AND !$dontcache) {
@ -1521,11 +1518,13 @@ function item_store($arr,$force_parent = false, $notify = false, $dontcache = fa
$r = q('SELECT * FROM `item` WHERE id = %d', intval($current_post));
if (count($r) == 1) {
call_hooks('post_remote_end', $r[0]);
} else {
} else
logger('item_store: new item not found in DB, id ' . $current_post);
}
}
// Add every contact of the post to the global contact table
poco_store($arr);
create_tags_from_item($current_post);
create_files_from_item($current_post);


+ 39
- 17
include/socgraph.php View File

@ -78,9 +78,6 @@ function poco_load($cid,$uid = 0,$zcid = 0,$url = null) {
$gender = '';
$generation = 0;
if ($uid == 0)
$network = NETWORK_DFRN;
$name = $entry->displayName;
if(isset($entry->urls)) {
@ -126,6 +123,10 @@ function poco_load($cid,$uid = 0,$zcid = 0,$url = null) {
foreach($entry->tags as $tag)
$keywords = implode(", ", $tag);
// If you query a Friendica server for its profiles, the network has to be Friendica
if ($uid == 0)
$network = NETWORK_DFRN;
poco_check($profile_url, $name, $network, $profile_photo, $about, $location, $gender, $keywords, $connect_url, $updated, $generation, $cid, $uid, $zcid);
// Update the Friendica contacts. Diaspora is doing it via a message. (See include/diaspora.php)
@ -151,6 +152,8 @@ function poco_load($cid,$uid = 0,$zcid = 0,$url = null) {
function poco_check($profile_url, $name, $network, $profile_photo, $about, $location, $gender, $keywords, $connect_url, $updated, $generation, $cid = 0, $uid = 0, $zcid = 0) {
$a = get_app();
// Generation:
// 0: No definition
// 1: Profiles on this server
@ -163,15 +166,24 @@ function poco_check($profile_url, $name, $network, $profile_photo, $about, $loca
if ($profile_url == "")
return $gcid;
$r = q("SELECT `network` FROM `contact` WHERE `nurl` = '%s' AND `network` != '' LIMIT 1",
dbesc(normalise_link($profile_url))
// Don't store the statusnet connector as network
// We can't simply set this to NETWORK_OSTATUS since the connector could have fetched posts from friendica as well
if ($network == NETWORK_STATUSNET)
$network = "";
// The global contacts should contain the original picture, not the cached one
if (($generation != 1) AND stristr(normalise_link($profile_photo), normalise_link($a->get_baseurl()."/photo/")))
$profile_photo = "";
$r = q("SELECT `network` FROM `contact` WHERE `nurl` = '%s' AND `network` != '' AND `network` != '%s' LIMIT 1",
dbesc(normalise_link($profile_url)), dbesc(NETWORK_STATUSNET)
);
if(count($r))
$network = $r[0]["network"];
if ($network == "") {
$r = q("SELECT `network`, `url` FROM `contact` WHERE `alias` IN ('%s', '%s') AND `network` != '' LIMIT 1",
dbesc($profile_url), dbesc(normalise_link($profile_url))
if (($network == "") OR ($network == NETWORK_OSTATUS)) {
$r = q("SELECT `network`, `url` FROM `contact` WHERE `alias` IN ('%s', '%s') AND `network` != '' AND `network` != '%s' LIMIT 1",
dbesc($profile_url), dbesc(normalise_link($profile_url)), dbesc(NETWORK_STATUSNET)
);
if(count($r)) {
$network = $r[0]["network"];
@ -182,15 +194,16 @@ function poco_check($profile_url, $name, $network, $profile_photo, $about, $loca
$x = q("SELECT * FROM `gcontact` WHERE `nurl` = '%s' LIMIT 1",
dbesc(normalise_link($profile_url))
);
if(count($x) AND ($network == ""))
if(count($x) AND ($network == "") AND ($x[0]["network"] != NETWORK_STATUSNET))
$network = $x[0]["network"];
if (($network == "") OR ($name == "") OR ($profile_photo == "")) {
require_once("include/Scrape.php");
$data = probe_url($profile_url, PROBE_DIASPORA);
$data = probe_url($profile_url);
$network = $data["network"];
$name = $data["name"];
$profile_url = $data["url"];
$profile_photo = $data["photo"];
}
@ -204,7 +217,7 @@ function poco_check($profile_url, $name, $network, $profile_photo, $about, $loca
if (($name == "") OR ($profile_photo == ""))
return $gcid;
if (!in_array($network, array(NETWORK_DFRN, NETWORK_OSTATUS, NETWORK_DIASPORA, NETWORK_STATUSNET)))
if (!in_array($network, array(NETWORK_DFRN, NETWORK_OSTATUS, NETWORK_DIASPORA)))
return $gcid;
logger("profile-check generation: ".$generation." Network: ".$network." URL: ".$profile_url." name: ".$name." avatar: ".$profile_photo, LOGGER_DEBUG);
@ -333,11 +346,11 @@ function sub_poco_from_share($share, $created, $cid, $uid) {
function poco_store($item) {
// Isn't it public?
if (!$item['private'])
if ($item['private'])
return;
// Or is it from a network where we don't store the global contacts?
if (!in_array($item["network"], array(NETWORK_DFRN, NETWORK_DIASPORA, NETWORK_OSTATUS, "")))
if (!in_array($item["network"], array(NETWORK_DFRN, NETWORK_DIASPORA, NETWORK_OSTATUS, NETWORK_STATUSNET, "")))
return;
// Is it a global copy?
@ -355,25 +368,34 @@ function poco_store($item) {
// "3" means: We don't know this contact directly (Maybe a reshared item)
$generation = 3;
$network = "";
$profile_url = $item["author-link"];
// Is it a user from our server?
$q = q("SELECT `id` FROM `contact` WHERE `self` AND `nurl` = '%s' LIMIT 1",
dbesc(normalise_link($item["author-link"])));
if (count($q)) {
logger("Our user (generation 1): ".$item["author-link"], LOGGER_DEBUG);
$generation = 1;
$network = NETWORK_DFRN;
} else { // Is it a contact from a user on our server?
$q = q("SELECT `network` FROM `contact` WHERE `uid` != 0 AND `network` != ''
AND (`nurl` = '%s' OR `alias` IN ('%s', '%s')) LIMIT 1",
$q = q("SELECT `network`, `url` FROM `contact` WHERE `uid` != 0 AND `network` != ''
AND (`nurl` = '%s' OR `alias` IN ('%s', '%s')) AND `network` != '%s' LIMIT 1",
dbesc(normalise_link($item["author-link"])),
dbesc(normalise_link($item["author-link"])),
dbesc($item["author-link"]));
dbesc($item["author-link"]),
dbesc(NETWORK_STATUSNET));
if (count($q)) {
$generation = 2;
$network = $q[0]["network"];
$profile_url = $q[0]["url"];
logger("Known contact (generation 2): ".$profile_url, LOGGER_DEBUG);
}
}
poco_check($item["author-link"], $item["author-name"], $network, $item["author-avatar"], "", "", "", "", "", $item["received"], $generation, $item["contact-id"], $item["uid"]);
if ($generation == 3)
logger("Unknown contact (generation 3): ".$item["author-link"], LOGGER_DEBUG);
poco_check($profile_url, $item["author-name"], $network, $item["author-avatar"], "", "", "", "", "", $item["received"], $generation, $item["contact-id"], $item["uid"]);
// Maybe its a body with a shared item? Then extract a global contact from it.
poco_contact_from_body($item["body"], $item["received"], $item["contact-id"], $item["uid"]);


Loading…
Cancel
Save