Browse Source

Improvements how gcontact entries are updated

pull/2349/head
Michael Vogel 5 years ago
parent
commit
2ca6cdf6b6
7 changed files with 136 additions and 217 deletions
  1. +18
    -8
      boot.php
  2. +2
    -16
      database.sql
  3. +13
    -2
      include/Scrape.php
  4. +33
    -17
      include/dfrn.php
  5. +2
    -11
      include/items.php
  6. +63
    -162
      include/socgraph.php
  7. +5
    -1
      mod/noscrape.php

+ 18
- 8
boot.php View File

@ -1037,19 +1037,29 @@ class App {
$this->performance[$value] += (float)$duration;
$this->performance["marktime"] += (float)$duration;
// Trace the different functions with their timestamps
$trace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 5);
$callstack = $this->callstack();
array_shift($trace);
$this->callstack[$value][$callstack] += (float)$duration;
$function = array();
foreach ($trace AS $func)
$function[] = $func["function"];
}
$function = implode(", ", $function);
/**
* @brief Returns a string with a callstack. Can be used for logging.
*
* @return string
*/
function callstack() {
$trace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 6);
$this->callstack[$value][$function] += (float)$duration;
// We remove the first two items from the list since they contain data that we don't need.
array_shift($trace);
array_shift($trace);
$callstack = array();
foreach ($trace AS $func)
$callstack[] = $func["function"];
return implode(", ", $callstack);
}
function mark_timestamp($mark) {


+ 2
- 16
database.sql View File

@ -1,6 +1,6 @@
-- ------------------------------------------
-- Friendica 3.5-dev (Asparagus)
-- DB_UPDATE_VERSION 1193
-- DB_UPDATE_VERSION 1194
-- ------------------------------------------
@ -119,6 +119,7 @@ CREATE TABLE IF NOT EXISTS `contact` (
`keywords` text NOT NULL,
`gender` varchar(32) NOT NULL DEFAULT '',
`attag` varchar(255) NOT NULL DEFAULT '',
`avatar` varchar(255) NOT NULL DEFAULT '',
`photo` text NOT NULL,
`thumb` text NOT NULL,
`micro` text NOT NULL,
@ -411,21 +412,6 @@ CREATE TABLE IF NOT EXISTS `gserver` (
INDEX `nurl` (`nurl`)
) DEFAULT CHARSET=utf8;
--
-- TABLE guid
--
CREATE TABLE IF NOT EXISTS `guid` (
`id` int(10) unsigned NOT NULL auto_increment,
`guid` varchar(255) NOT NULL DEFAULT '',
`plink` varchar(255) NOT NULL DEFAULT '',
`uri` varchar(255) NOT NULL DEFAULT '',
`network` varchar(32) NOT NULL DEFAULT '',
PRIMARY KEY(`id`),
INDEX `guid` (`guid`),
INDEX `plink` (`plink`),
INDEX `uri` (`uri`)
) DEFAULT CHARSET=utf8;
--
-- TABLE hook
--


+ 13
- 2
include/Scrape.php View File

@ -12,6 +12,18 @@ function scrape_dfrn($url, $dont_probe = false) {
logger('scrape_dfrn: url=' . $url);
// Try to fetch the data from noscrape. This is faster than parsing the HTML
$noscrape = str_replace("/hcard/", "/noscrape/", $url);
$noscrapejson = fetch_url($noscrape);
$noscrapedata = array();
if ($noscrapejson) {
$noscrapedata = json_decode($noscrapejson, true);
if (is_array($noscrapedata))
if ($noscrapedata["nick"] != "")
return($noscrapedata);
}
$s = fetch_url($url);
if(! $s)
@ -91,8 +103,7 @@ function scrape_dfrn($url, $dont_probe = false) {
}
}
}
return $ret;
return array_merge($ret, $noscrapedata);
}}


+ 33
- 17
include/dfrn.php View File

@ -1115,13 +1115,13 @@ class dfrn {
*
* @return Returns an array with relevant data of the author
*/
private function fetchauthor($xpath, $context, $importer, $element, $onlyfetch) {
private function fetchauthor($xpath, $context, $importer, $element, $onlyfetch, $xml = "") {
$author = array();
$author["name"] = $xpath->evaluate($element."/atom:name/text()", $context)->item(0)->nodeValue;
$author["link"] = $xpath->evaluate($element."/atom:uri/text()", $context)->item(0)->nodeValue;
$r = q("SELECT `id`, `uid`, `network`, `avatar-date`, `name-date`, `uri-date`, `addr`,
$r = q("SELECT `id`, `uid`, `url`, `network`, `avatar-date`, `name-date`, `uri-date`, `addr`,
`name`, `nick`, `about`, `location`, `keywords`, `bdyear`, `bd`
FROM `contact` WHERE `uid` = %d AND `nurl` = '%s' AND `network` != '%s'",
intval($importer["uid"]), dbesc(normalise_link($author["link"])), dbesc(NETWORK_STATUSNET));
@ -1130,6 +1130,9 @@ class dfrn {
$author["contact-id"] = $r[0]["id"];
$author["network"] = $r[0]["network"];
} else {
if (!$onlyfetch)
logger("Contact ".$author["link"]." wasn't found for user ".$importer["uid"]." XML: ".$xml, LOGGER_DEBUG);
$author["contact-id"] = $importer["id"];
$author["network"] = $importer["network"];
$onlyfetch = true;
@ -1159,38 +1162,41 @@ class dfrn {
}
if ($r AND !$onlyfetch) {
logger("Check if contact details for contact ".$r[0]["id"]." (".$r[0]["nick"].") have to be updated.", LOGGER_DEBUG);
$poco = array("url" => $contact["url"]);
// When was the last change to name or uri?
$name_element = $xpath->query($element."/atom:name", $context)->item(0);
foreach($name_element->attributes AS $attributes)
if ($attributes->name == "updated")
$contact["name-date"] = $attributes->textContent;
$poco["name-date"] = $attributes->textContent;
$link_element = $xpath->query($element."/atom:link", $context)->item(0);
foreach($link_element->attributes AS $attributes)
if ($attributes->name == "updated")
$contact["uri-date"] = $attributes->textContent;
$poco["uri-date"] = $attributes->textContent;
// Update contact data
$value = $xpath->evaluate($element."/dfrn:handle/text()", $context)->item(0)->nodeValue;
if ($value != "")
$contact["addr"] = $value;
$poco["addr"] = $value;
$value = $xpath->evaluate($element."/poco:displayName/text()", $context)->item(0)->nodeValue;
if ($value != "")
$contact["name"] = $value;
$poco["name"] = $value;
$value = $xpath->evaluate($element."/poco:preferredUsername/text()", $context)->item(0)->nodeValue;
if ($value != "")
$contact["nick"] = $value;
$poco["nick"] = $value;
$value = $xpath->evaluate($element."/poco:note/text()", $context)->item(0)->nodeValue;
if ($value != "")
$contact["about"] = $value;
$poco["about"] = $value;
$value = $xpath->evaluate($element."/poco:address/poco:formatted/text()", $context)->item(0)->nodeValue;
if ($value != "")
$contact["location"] = $value;
$poco["location"] = $value;
/// @todo Add support for the following fields that we don't support by now in the contact table:
/// - poco:utcOffset
@ -1207,7 +1213,7 @@ class dfrn {
$tags[$tag->nodeValue] = $tag->nodeValue;
if (count($tags))
$contact["keywords"] = implode(", ", $tags);
$poco["keywords"] = implode(", ", $tags);
// "dfrn:birthday" contains the birthday converted to UTC
$old_bdyear = $contact["bdyear"];
@ -1217,7 +1223,7 @@ class dfrn {
if (strtotime($birthday) > time()) {
$bd_timestamp = strtotime($birthday);
$contact["bdyear"] = date("Y", $bd_timestamp);
$poco["bdyear"] = date("Y", $bd_timestamp);
}
// "poco:birthday" is the birthday in the format "yyyy-mm-dd"
@ -1232,9 +1238,11 @@ class dfrn {
$bdyear = $bdyear + 1;
}
$contact["bd"] = $value;
$poco["bd"] = $value;
}
$contact = array_merge($contact, $poco);
if ($old_bdyear != $contact["bdyear"])
self::birthday_event($contact, $birthday);
@ -1245,6 +1253,7 @@ class dfrn {
unset($fields["id"]);
unset($fields["uid"]);
unset($fields["url"]);
unset($fields["avatar-date"]);
unset($fields["name-date"]);
unset($fields["uri-date"]);
@ -1264,7 +1273,7 @@ class dfrn {
}
if ($update) {
logger("Update contact data for contact ".$contact["id"], LOGGER_DEBUG);
logger("Update contact data for contact ".$contact["id"]." (".$contact["nick"].")", LOGGER_DEBUG);
q("UPDATE `contact` SET `name` = '%s', `nick` = '%s', `about` = '%s', `location` = '%s',
`addr` = '%s', `keywords` = '%s', `bdyear` = '%s', `bd` = '%s',
@ -1283,9 +1292,10 @@ class dfrn {
// It is used in the socgraph.php to prevent that old contact data
// that was relayed over several servers can overwrite contact
// data that we received directly.
$contact["generation"] = 2;
$contact["photo"] = $author["avatar"];
update_gcontact($contact);
$poco["generation"] = 2;
$poco["photo"] = $author["avatar"];
update_gcontact($poco);
}
return($author);
@ -2369,8 +2379,14 @@ class dfrn {
$header["contact-id"] = $importer["id"];
// Update the contact table if the data has changed
// The "atom:author" is only present in feeds
if ($xpath->query("/atom:feed/atom:author")->length > 0)
self::fetchauthor($xpath, $doc->firstChild, $importer, "atom:author", false, $xml);
// Only the "dfrn:owner" in the head section contains all data
self::fetchauthor($xpath, $doc->firstChild, $importer, "dfrn:owner", false);
if ($xpath->query("/atom:feed/dfrn:owner")->length > 0)
self::fetchauthor($xpath, $doc->firstChild, $importer, "dfrn:owner", false, $xml);
logger("Import DFRN message for user ".$importer["uid"]." from contact ".$importer["id"], LOGGER_DEBUG);


+ 2
- 11
include/items.php View File

@ -500,14 +500,8 @@ function item_store($arr,$force_parent = false, $notify = false, $dontcache = fa
$arr['file'] = ((x($arr,'file')) ? trim($arr['file']) : '');
if (($arr['author-link'] == "") AND ($arr['owner-link'] == "")) {
$trace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 5);
foreach ($trace AS $func)
$function[] = $func["function"];
$function = implode(", ", $function);
logger("Both author-link and owner-link are empty. Called by: ".$function, LOGGER_DEBUG);
}
if (($arr['author-link'] == "") AND ($arr['owner-link'] == ""))
logger("Both author-link and owner-link are empty. Called by: ".App::callstack(), LOGGER_DEBUG);
if ($arr['plink'] == "") {
$a = get_app();
@ -888,9 +882,6 @@ function item_store($arr,$force_parent = false, $notify = false, $dontcache = fa
logger('item_store: new item not found in DB, id ' . $current_post);
}
// Add every contact of the post to the global contact table
poco_store($arr);
create_tags_from_item($current_post);
create_files_from_item($current_post);


+ 63
- 162
include/socgraph.php View File

@ -10,6 +10,7 @@
require_once('include/datetime.php');
require_once("include/Scrape.php");
require_once("include/html2bbcode.php");
require_once("include/Contact.php");
/*
@ -428,7 +429,7 @@ function poco_last_updated($profile, $force = false) {
if (($gcontacts[0]["server_url"] != "") AND ($gcontacts[0]["nick"] != "")) {
// Use noscrape if possible
$server = q("SELECT `noscrape` FROM `gserver` WHERE `nurl` = '%s' AND `noscrape` != ''", dbesc(normalise_link($gcontacts[0]["server_url"])));
$server = q("SELECT `noscrape`, `network` FROM `gserver` WHERE `nurl` = '%s' AND `noscrape` != ''", dbesc(normalise_link($gcontacts[0]["server_url"])));
if ($server) {
$noscraperet = z_fetch_url($server[0]["noscrape"]."/".$gcontacts[0]["nick"]);
@ -437,67 +438,42 @@ function poco_last_updated($profile, $force = false) {
$noscrape = json_decode($noscraperet["body"], true);
if (($noscrape["fn"] != "") AND ($noscrape["fn"] != $gcontacts[0]["name"]))
q("UPDATE `gcontact` SET `name` = '%s' WHERE `nurl` = '%s'",
dbesc($noscrape["fn"]), dbesc(normalise_link($profile)));
$contact = array("url" => $profile,
"network" => $server[0]["network"],
"generation" => $gcontacts[0]["generation"]);
if (($noscrape["photo"] != "") AND ($noscrape["photo"] != $gcontacts[0]["photo"]))
q("UPDATE `gcontact` SET `photo` = '%s' WHERE `nurl` = '%s'",
dbesc($noscrape["photo"]), dbesc(normalise_link($profile)));
$contact["name"] = $noscrape["fn"];
$contact["community"] = $noscrape["comm"];
if (($noscrape["updated"] != "") AND ($noscrape["updated"] != $gcontacts[0]["updated"]))
q("UPDATE `gcontact` SET `updated` = '%s' WHERE `nurl` = '%s'",
dbesc($noscrape["updated"]), dbesc(normalise_link($profile)));
if (($noscrape["gender"] != "") AND ($noscrape["gender"] != $gcontacts[0]["gender"]))
q("UPDATE `gcontact` SET `gender` = '%s' WHERE `nurl` = '%s'",
dbesc($noscrape["gender"]), dbesc(normalise_link($profile)));
if (($noscrape["pdesc"] != "") AND ($noscrape["pdesc"] != $gcontacts[0]["about"]))
q("UPDATE `gcontact` SET `about` = '%s' WHERE `nurl` = '%s'",
dbesc($noscrape["pdesc"]), dbesc(normalise_link($profile)));
if (($noscrape["about"] != "") AND ($noscrape["about"] != $gcontacts[0]["about"]))
q("UPDATE `gcontact` SET `about` = '%s' WHERE `nurl` = '%s'",
dbesc($noscrape["about"]), dbesc(normalise_link($profile)));
if (isset($noscrape["comm"]) AND ($noscrape["comm"] != $gcontacts[0]["community"]))
q("UPDATE `gcontact` SET `community` = %d WHERE `nurl` = '%s'",
intval($noscrape["comm"]), dbesc(normalise_link($profile)));
if (isset($noscrape["tags"]))
if (isset($noscrape["tags"])) {
$keywords = implode(" ", $noscrape["tags"]);
else
$keywords = "";
if (($keywords != "") AND ($keywords != $gcontacts[0]["keywords"]))
q("UPDATE `gcontact` SET `keywords` = '%s' WHERE `nurl` = '%s'",
dbesc($keywords), dbesc(normalise_link($profile)));
$location = $noscrape["locality"];
if ($noscrape["region"] != "") {
if ($location != "")
$location .= ", ";
$location .= $noscrape["region"];
}
if ($noscrape["country-name"] != "") {
if ($location != "")
$location .= ", ";
$location .= $noscrape["country-name"];
if ($keywords != "")
$contact["keywords"] = $keywords;
}
if (($location != "") AND ($location != $gcontacts[0]["location"]))
q("UPDATE `gcontact` SET `location` = '%s' WHERE `nurl` = '%s'",
dbesc($location), dbesc(normalise_link($profile)));
// If we got data from noscrape then mark the contact as reachable
if (is_array($noscrape) AND count($noscrape))
q("UPDATE `gcontact` SET `last_contact` = '%s' WHERE `nurl` = '%s'",
dbesc(datetime_convert()), dbesc(normalise_link($profile)));
$location = formatted_location($noscrape);
if ($location)
$contact["location"] = $location;
$contact["notify"] = $noscrape["dfrn-notify"];
// Remove all fields that are not present in the gcontact table
unset($noscrape["fn"]);
unset($noscrape["key"]);
unset($noscrape["homepage"]);
unset($noscrape["comm"]);
unset($noscrape["tags"]);
unset($noscrape["locality"]);
unset($noscrape["region"]);
unset($noscrape["country-name"]);
unset($noscrape["contacts"]);
unset($noscrape["dfrn-request"]);
unset($noscrape["dfrn-confirm"]);
unset($noscrape["dfrn-notify"]);
unset($noscrape["dfrn-poll"]);
$contact = array_merge($contact, $noscrape);
update_gcontact($contact);
return $noscrape["updated"];
}
@ -534,25 +510,22 @@ function poco_last_updated($profile, $force = false) {
return false;
}
if (($data["name"] != "") AND ($data["name"] != $gcontacts[0]["name"]))
q("UPDATE `gcontact` SET `name` = '%s' WHERE `nurl` = '%s'",
dbesc($data["name"]), dbesc(normalise_link($profile)));
$contact = array("generation" => $gcontacts[0]["generation"]);
if (($data["nick"] != "") AND ($data["nick"] != $gcontacts[0]["nick"]))
q("UPDATE `gcontact` SET `nick` = '%s' WHERE `nurl` = '%s'",
dbesc($data["nick"]), dbesc(normalise_link($profile)));
$contact = array_merge($contact, $data);
if (($data["addr"] != "") AND ($data["addr"] != $gcontacts[0]["connect"]))
q("UPDATE `gcontact` SET `connect` = '%s' WHERE `nurl` = '%s'",
dbesc($data["addr"]), dbesc(normalise_link($profile)));
$contact["server_url"] = $data["baseurl"];
if (($data["photo"] != "") AND ($data["photo"] != $gcontacts[0]["photo"]))
q("UPDATE `gcontact` SET `photo` = '%s' WHERE `nurl` = '%s'",
dbesc($data["photo"]), dbesc(normalise_link($profile)));
unset($contact["batch"]);
unset($contact["poll"]);
unset($contact["request"]);
unset($contact["confirm"]);
unset($contact["poco"]);
unset($contact["priority"]);
unset($contact["pubkey"]);
unset($contact["baseurl"]);
if (($data["baseurl"] != "") AND ($data["baseurl"] != $gcontacts[0]["server_url"]))
q("UPDATE `gcontact` SET `server_url` = '%s' WHERE `nurl` = '%s'",
dbesc($data["baseurl"]), dbesc(normalise_link($profile)));
update_gcontact($contact);
$feedret = z_fetch_url($data["poll"]);
@ -921,88 +894,6 @@ function poco_check_server($server_url, $network = "", $force = false) {
return !$failure;
}
function poco_contact_from_body($body, $created, $cid, $uid) {
preg_replace_callback("/\[share(.*?)\].*?\[\/share\]/ism",
function ($match) use ($created, $cid, $uid){
return(sub_poco_from_share($match, $created, $cid, $uid));
}, $body);
}
function sub_poco_from_share($share, $created, $cid, $uid) {
$profile = "";
preg_match("/profile='(.*?)'/ism", $share[1], $matches);
if ($matches[1] != "")
$profile = $matches[1];
preg_match('/profile="(.*?)"/ism', $share[1], $matches);
if ($matches[1] != "")
$profile = $matches[1];
if ($profile == "")
return;
logger("prepare poco_check for profile ".$profile, LOGGER_DEBUG);
poco_check($profile, "", "", "", "", "", "", "", "", $created, 3, $cid, $uid);
}
function poco_store($item) {
// Isn't it public?
if ($item['private'])
return;
// Or is it from a network where we don't store the global contacts?
if (!in_array($item["network"], array(NETWORK_DFRN, NETWORK_DIASPORA, NETWORK_OSTATUS, NETWORK_STATUSNET, "")))
return;
// Is it a global copy?
$store_gcontact = ($item["uid"] == 0);
// Is it a comment on a global copy?
if (!$store_gcontact AND ($item["uri"] != $item["parent-uri"])) {
$q = q("SELECT `id` FROM `item` WHERE `uri`='%s' AND `uid` = 0", $item["parent-uri"]);
$store_gcontact = count($q);
}
if (!$store_gcontact)
return;
// "3" means: We don't know this contact directly (Maybe a reshared item)
$generation = 3;
$network = "";
$profile_url = $item["author-link"];
// Is it a user from our server?
$q = q("SELECT `id` FROM `contact` WHERE `self` AND `nurl` = '%s' LIMIT 1",
dbesc(normalise_link($item["author-link"])));
if (count($q)) {
logger("Our user (generation 1): ".$item["author-link"], LOGGER_DEBUG);
$generation = 1;
$network = NETWORK_DFRN;
} else { // Is it a contact from a user on our server?
$q = q("SELECT `network`, `url` FROM `contact` WHERE `uid` != 0 AND `network` != ''
AND (`nurl` = '%s' OR `alias` IN ('%s', '%s')) AND `network` != '%s' LIMIT 1",
dbesc(normalise_link($item["author-link"])),
dbesc(normalise_link($item["author-link"])),
dbesc($item["author-link"]),
dbesc(NETWORK_STATUSNET));
if (count($q)) {
$generation = 2;
$network = $q[0]["network"];
$profile_url = $q[0]["url"];
logger("Known contact (generation 2): ".$profile_url, LOGGER_DEBUG);
}
}
if ($generation == 3)
logger("Unknown contact (generation 3): ".$item["author-link"], LOGGER_DEBUG);
poco_check($profile_url, $item["author-name"], $network, $item["author-avatar"], "", "", "", "", "", $item["received"], $generation, $item["contact-id"], $item["uid"]);
// Maybe its a body with a shared item? Then extract a global contact from it.
poco_contact_from_body($item["body"], $item["received"], $item["contact-id"], $item["uid"]);
}
function count_common_friends($uid,$cid) {
$r = q("SELECT count(*) as `total`
@ -1533,7 +1424,7 @@ function update_gcontact($contact) {
// assign all unassigned fields from the database entry
foreach ($fields AS $field => $data)
if (!isset($contact[$field]))
if (!isset($contact[$field]) OR ($contact[$field] == ""))
$contact[$field] = $r[0][$field];
if ($contact["network"] == NETWORK_STATUSNET)
@ -1546,14 +1437,22 @@ function update_gcontact($contact) {
$update = false;
unset($fields["generation"]);
foreach ($fields AS $field => $data)
if ($contact[$field] != $r[0][$field])
$update = true;
if ((($contact["generation"] > 0) AND ($contact["generation"] <= $r[0]["generation"])) OR ($r[0]["generation"] == 0)) {
foreach ($fields AS $field => $data)
if ($contact[$field] != $r[0][$field]) {
logger("Difference for contact ".$contact["url"]." in field '".$field."'. New value: '".$contact[$field]."', old value '".$r[0][$field]."'", LOGGER_DEBUG);
$update = true;
}
if ($contact["generation"] < $r[0]["generation"])
$update = true;
if ($contact["generation"] < $r[0]["generation"]) {
logger("Difference for contact ".$contact["url"]." in field 'generation'. new value: '".$contact["generation"]."', old value '".$r[0]["generation"]."'", LOGGER_DEBUG);
$update = true;
}
}
if ($update) {
logger("Update gcontact for ".$contact["url"]." Callstack: ".App::callstack(), LOGGER_DEBUG);
q("UPDATE `gcontact` SET `photo` = '%s', `name` = '%s', `nick` = '%s', `addr` = '%s', `network` = '%s',
`birthday` = '%s', `gender` = '%s', `keywords` = '%s', `hide` = %d, `nsfw` = %d,
`alias` = '%s', `notify` = '%s', `url` = '%s',
@ -1581,8 +1480,10 @@ function update_gcontact($contact) {
function update_gcontact_from_probe($url) {
$data = probe_url($url);
if ($data["network"] != NETWORK_PHANTOM)
update_gcontact($data);
if ($data["network"] == NETWORK_PHANTOM)
return;
update_gcontact($data);
}
/**


+ 5
- 1
mod/noscrape.php View File

@ -22,13 +22,17 @@ function noscrape_init(&$a) {
$keywords = str_replace(array('#',',',' ',',,'),array('',' ',',',','),$keywords);
$keywords = explode(',', $keywords);
$r = q("SELECT `photo` FROM `contact` WHERE `self` AND `uid` = %d",
intval($a->profile['uid']));
$json_info = array(
'fn' => $a->profile['name'],
'addr' => $a->profile['addr'],
'nick' => $a->user['nickname'],
'key' => $a->profile['pubkey'],
'homepage' => $a->get_baseurl()."/profile/{$which}",
'comm' => (x($a->profile,'page-flags')) && ($a->profile['page-flags'] == PAGE_COMMUNITY),
'photo' => $a->profile['photo'],
'photo' => $r[0]["photo"],
'tags' => $keywords
);


Loading…
Cancel
Save