Merge pull request #8983 from annando/probe-lock

Reworked "getIdForURL"
This commit is contained in:
Hypolite Petovan 2020-08-07 14:38:08 -04:00 committed by GitHub
commit f810733478
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 97 additions and 206 deletions

View file

@ -88,7 +88,7 @@ function match_content(App $a)
$profile = $msearch->results[$i]; $profile = $msearch->results[$i];
// Already known contact // Already known contact
if (!$profile || Contact::getIdForURL($profile->url, local_user(), false)) { if (!$profile || Contact::getIdForURL($profile->url, local_user())) {
continue; continue;
} }

View file

@ -330,7 +330,7 @@ class Contact
*/ */
public static function isFollowerByURL($url, $uid) public static function isFollowerByURL($url, $uid)
{ {
$cid = self::getIdForURL($url, $uid, false); $cid = self::getIdForURL($url, $uid);
if (empty($cid)) { if (empty($cid)) {
return false; return false;
@ -376,7 +376,7 @@ class Contact
*/ */
public static function isSharingByURL($url, $uid) public static function isSharingByURL($url, $uid)
{ {
$cid = self::getIdForURL($url, $uid, false); $cid = self::getIdForURL($url, $uid);
if (empty($cid)) { if (empty($cid)) {
return false; return false;
@ -471,7 +471,7 @@ class Contact
if (!DBA::isResult($self)) { if (!DBA::isResult($self)) {
return false; return false;
} }
return self::getIdForURL($self['url'], 0, false); return self::getIdForURL($self['url']);
} }
/** /**
@ -508,7 +508,7 @@ class Contact
$ucid = $contact['id']; $ucid = $contact['id'];
} else { } else {
$pcid = $contact['id']; $pcid = $contact['id'];
$ucid = Contact::getIdForURL($contact['url'], $uid, false); $ucid = Contact::getIdForURL($contact['url'], $uid);
} }
return ['public' => $pcid, 'user' => $ucid]; return ['public' => $pcid, 'user' => $ucid];
@ -993,86 +993,6 @@ class Contact
return $menucondensed; return $menucondensed;
} }
/**
* Have a look at all contact tables for a given profile url.
* This function works as a replacement for probing the contact.
*
* @param string $url Contact URL
* @param integer $cid Contact ID
*
* @return array Contact array in the "probe" structure
*/
private static function getProbeDataFromDatabase($url, $cid = null)
{
// The link could be provided as http although we stored it as https
$ssl_url = str_replace('http://', 'https://', $url);
$fields = ['id', 'uid', 'url', 'addr', 'alias', 'notify', 'poll', 'name', 'nick',
'photo', 'keywords', 'location', 'about', 'network',
'priority', 'batch', 'request', 'confirm', 'poco'];
if (!empty($cid)) {
$data = DBA::selectFirst('contact', $fields, ['id' => $cid]);
if (DBA::isResult($data)) {
return $data;
}
}
$data = DBA::selectFirst('contact', $fields, ['nurl' => Strings::normaliseLink($url)]);
if (!DBA::isResult($data)) {
$condition = ['alias' => [$url, Strings::normaliseLink($url), $ssl_url]];
$data = DBA::selectFirst('contact', $fields, $condition);
}
if (DBA::isResult($data)) {
// For security reasons we don't fetch key data from our users
$data["pubkey"] = '';
return $data;
}
$fields = ['url', 'addr', 'alias', 'notify', 'name', 'nick',
'photo', 'keywords', 'location', 'about', 'network'];
$condition = ['alias' => [$url, Strings::normaliseLink($url), $ssl_url]];
$data = DBA::selectFirst('contact', $fields, $condition);
if (DBA::isResult($data)) {
$data["pubkey"] = '';
$data["poll"] = '';
$data["priority"] = 0;
$data["batch"] = '';
$data["request"] = '';
$data["confirm"] = '';
$data["poco"] = '';
return $data;
}
$data = ActivityPub::probeProfile($url, false);
if (!empty($data)) {
return $data;
}
$fields = ['url', 'addr', 'alias', 'notify', 'poll', 'name', 'nick',
'photo', 'network', 'priority', 'batch', 'request', 'confirm'];
$data = DBA::selectFirst('fcontact', $fields, ['url' => $url]);
if (!DBA::isResult($data)) {
$condition = ['alias' => [$url, Strings::normaliseLink($url), $ssl_url]];
$data = DBA::selectFirst('contact', $fields, $condition);
}
if (DBA::isResult($data)) {
$data["pubkey"] = '';
$data["keywords"] = '';
$data["location"] = '';
$data["about"] = '';
$data["poco"] = '';
return $data;
}
return [];
}
/** /**
* Fetch the contact id for a given URL and user * Fetch the contact id for a given URL and user
* *
@ -1093,8 +1013,8 @@ class Contact
* *
* @param string $url Contact URL * @param string $url Contact URL
* @param integer $uid The user id for the contact (0 = public contact) * @param integer $uid The user id for the contact (0 = public contact)
* @param boolean $update true = always update, false = never update, null = update when not found or outdated * @param boolean $update true = always update, false = never update, null = update when not found
* @param array $default Default value for creating the contact when every else fails * @param array $default Default value for creating the contact when everything else fails
* *
* @return integer Contact ID * @return integer Contact ID
* @throws HTTPException\InternalServerErrorException * @throws HTTPException\InternalServerErrorException
@ -1102,78 +1022,72 @@ class Contact
*/ */
public static function getIdForURL($url, $uid = 0, $update = null, $default = []) public static function getIdForURL($url, $uid = 0, $update = null, $default = [])
{ {
Logger::info('Get contact data', ['url' => $url, 'user' => $uid]);
$contact_id = 0; $contact_id = 0;
if ($url == '') { if ($url == '') {
Logger::notice('Empty url, quitting', ['url' => $url, 'user' => $uid, 'default' => $default]);
return 0; return 0;
} }
$contact = self::getByURL($url, false, ['id', 'avatar', 'updated', 'network'], $uid); $contact = self::getByURL($url, false, ['id', 'network'], $uid);
if (!empty($contact)) { if (!empty($contact)) {
$contact_id = $contact["id"]; $contact_id = $contact["id"];
if (empty($default) && in_array($contact['network'], [Protocol::MAIL, Protocol::PHANTOM]) && ($uid == 0)) {
// Update public mail accounts via their user's accounts
$fields = ['network', 'addr', 'name', 'nick', 'avatar', 'photo', 'thumb', 'micro'];
$mailcontact = DBA::selectFirst('contact', $fields, ["`addr` = ? AND `network` = ? AND `uid` != 0", $url, Protocol::MAIL]);
if (!DBA::isResult($mailcontact)) {
$mailcontact = DBA::selectFirst('contact', $fields, ["`nurl` = ? AND `network` = ? AND `uid` != 0", $url, Protocol::MAIL]);
}
if (DBA::isResult($mailcontact)) {
DBA::update('contact', $mailcontact, ['id' => $contact_id]);
}
}
if (empty($update)) { if (empty($update)) {
Logger::debug('Contact found', ['url' => $url, 'uid' => $uid, 'update' => $update, 'cid' => $contact_id]);
return $contact_id; return $contact_id;
} }
} elseif ($uid != 0) { } elseif ($uid != 0) {
// Non-existing user-specific contact, exiting Logger::debug('Contact does not exist for the user', ['url' => $url, 'uid' => $uid, 'update' => $update]);
return 0;
} elseif (empty($default) && !is_null($update) && !$update) {
Logger::info('Contact not found, update not desired', ['url' => $url, 'uid' => $uid, 'update' => $update]);
return 0; return 0;
} }
if (!$update && empty($default)) {
// When we don't want to update, we look if we know this contact in any way
$data = self::getProbeDataFromDatabase($url, $contact_id);
$background_update = true;
} elseif (!$update && !empty($default['network'])) {
// If there are default values, take these
$data = $default;
$background_update = false;
} else {
$data = []; $data = [];
$background_update = false;
}
if ((empty($data) && is_null($update)) || $update) { if (empty($default['network']) || $update) {
$data = Probe::uri($url, "", $uid); $data = Probe::uri($url, "", $uid);
$probed = !empty($data['network']) && ($data['network'] != Protocol::PHANTOM);
} else {
$probed = false;
}
// Take the default values when probing failed // Take the default values when probing failed
if (!empty($default) && (empty($data['network']) || !in_array($data["network"], array_merge(Protocol::NATIVE_SUPPORT, [Protocol::PUMPIO])))) { if (!empty($default) && !in_array($data["network"], array_merge(Protocol::NATIVE_SUPPORT, [Protocol::PUMPIO]))) {
$data = array_merge($data, $default); $data = array_merge($data, $default);
} }
} elseif (!empty($default['network'])) {
$data = $default;
}
if (($uid == 0) && (empty($data['network']) || ($data['network'] == Protocol::PHANTOM))) {
// Fetch data for the public contact via the first found personal contact
/// @todo Check if this case can happen at all (possibly with mail accounts?)
$fields = ['name', 'nick', 'url', 'addr', 'alias', 'avatar', 'contact-type',
'keywords', 'location', 'about', 'unsearchable', 'batch', 'notify', 'poll',
'request', 'confirm', 'poco', 'subscribe', 'network', 'baseurl', 'gsid'];
$personal_contact = DBA::selectFirst('contact', $fields, ["`addr` = ? AND `uid` != 0", $url]);
if (!DBA::isResult($personal_contact)) {
$personal_contact = DBA::selectFirst('contact', $fields, ["`nurl` = ? AND `uid` != 0", Strings::normaliseLink($url)]);
}
if (DBA::isResult($personal_contact)) {
Logger::info('Take contact data from personal contact', ['url' => $url, 'update' => $update, 'contact' => $personal_contact, 'callstack' => System::callstack(20)]);
$data = $personal_contact;
$data['photo'] = $personal_contact['avatar'];
$data['account-type'] = $personal_contact['contact-type'];
$data['hide'] = $personal_contact['unsearchable'];
unset($data['avatar']);
unset($data['contact-type']);
unset($data['unsearchable']);
}
}
if (empty($data['network']) || ($data['network'] == Protocol::PHANTOM)) { if (empty($data['network']) || ($data['network'] == Protocol::PHANTOM)) {
Logger::info('No valid network found', ['url' => $url, 'data' => $data, 'callstack' => System::callstack(20)]); Logger::notice('No valid network found', ['url' => $url, 'uid' => $uid, 'default' => $default, 'update' => $update, 'callstack' => System::callstack(20)]);
return 0; return 0;
} }
if (!empty($data['baseurl'])) {
$data['baseurl'] = GServer::cleanURL($data['baseurl']);
}
if (!empty($data['baseurl']) && empty($data['gsid'])) {
$data['gsid'] = GServer::getID($data['baseurl']);
}
if (!$contact_id) { if (!$contact_id) {
$urls = [Strings::normaliseLink($url), Strings::normaliseLink($data['url'])]; $urls = [Strings::normaliseLink($url), Strings::normaliseLink($data['url'])];
if (!empty($data['alias'])) { if (!empty($data['alias'])) {
@ -1187,74 +1101,44 @@ class Contact
} }
if (!$contact_id) { if (!$contact_id) {
// We only insert the basic data. The rest will be done in "updateFromProbeArray"
$fields = [ $fields = [
'uid' => $uid, 'uid' => $uid,
'created' => DateTimeFormat::utcNow(),
'url' => $data['url'], 'url' => $data['url'],
'nurl' => Strings::normaliseLink($data['url']), 'nurl' => Strings::normaliseLink($data['url']),
'addr' => $data['addr'] ?? '',
'alias' => $data['alias'] ?? '',
'notify' => $data['notify'] ?? '',
'poll' => $data['poll'] ?? '',
'name' => $data['name'] ?? '',
'nick' => $data['nick'] ?? '',
'keywords' => $data['keywords'] ?? '',
'location' => $data['location'] ?? '',
'about' => $data['about'] ?? '',
'network' => $data['network'], 'network' => $data['network'],
'pubkey' => $data['pubkey'] ?? '', 'created' => DateTimeFormat::utcNow(),
'rel' => self::SHARING, 'rel' => self::SHARING,
'priority' => $data['priority'] ?? 0,
'batch' => $data['batch'] ?? '',
'request' => $data['request'] ?? '',
'confirm' => $data['confirm'] ?? '',
'poco' => $data['poco'] ?? '',
'baseurl' => $data['baseurl'] ?? '',
'gsid' => $data['gsid'] ?? null,
'name-date' => DateTimeFormat::utcNow(),
'uri-date' => DateTimeFormat::utcNow(),
'avatar-date' => DateTimeFormat::utcNow(),
'writable' => 1, 'writable' => 1,
'blocked' => 0, 'blocked' => 0,
'readonly' => 0, 'readonly' => 0,
'pending' => 0]; 'pending' => 0];
if (($uid == 0) && $probed) {
$fields['last-item'] = Probe::getLastUpdate($data);
Logger::info('Fetched last item', ['url' => $url, 'probed_url' => $data['url'], 'last-item' => $fields['last-item'], 'callstack' => System::callstack(20)]);
}
$condition = ['nurl' => Strings::normaliseLink($data["url"]), 'uid' => $uid, 'deleted' => false]; $condition = ['nurl' => Strings::normaliseLink($data["url"]), 'uid' => $uid, 'deleted' => false];
// Before inserting we do check if the entry does exist now. // Before inserting we do check if the entry does exist now.
DBA::lock('contact');
$contact = DBA::selectFirst('contact', ['id'], $condition, ['order' => ['id']]); $contact = DBA::selectFirst('contact', ['id'], $condition, ['order' => ['id']]);
if (!DBA::isResult($contact)) { if (DBA::isResult($contact)) {
Logger::info('Create new contact', $fields); $contact_id = $contact['id'];
Logger::notice('Contact had been created (shortly) before', ['id' => $contact_id, 'url' => $url, 'uid' => $uid]);
self::insert($fields); } else {
DBA::insert('contact', $fields);
// We intentionally aren't using lastInsertId here. There is a chance for duplicates. $contact_id = DBA::lastInsertId();
$contact = DBA::selectFirst('contact', ['id'], $condition, ['order' => ['id']]); if ($contact_id) {
if (!DBA::isResult($contact)) { Logger::info('Contact inserted', ['id' => $contact_id, 'url' => $url, 'uid' => $uid]);
Logger::info('Contact creation failed', $fields); }
// Shouldn't happen }
DBA::unlock();
if (!$contact_id) {
Logger::info('Contact was not inserted', ['url' => $url, 'uid' => $uid]);
return 0; return 0;
} }
} else { } else {
Logger::info('Contact had been created before', ['id' => $contact["id"], 'url' => $url, 'contact' => $fields]); Logger::info('Contact will be updated', ['url' => $url, 'uid' => $uid, 'update' => $update, 'cid' => $contact_id]);
} }
$contact_id = $contact["id"];
}
if ($background_update && !$probed && in_array($data["network"], array_merge(Protocol::NATIVE_SUPPORT, [Protocol::PUMPIO]))) {
// Update in the background when we fetched the data solely from the database
Worker::add(PRIORITY_MEDIUM, "UpdateContact", $contact_id);
} elseif (!empty($data['network'])) {
self::updateFromProbeArray($contact_id, $data); self::updateFromProbeArray($contact_id, $data);
} else {
Logger::info('Invalid data', ['url' => $url, 'data' => $data]);
}
return $contact_id; return $contact_id;
} }
@ -2565,15 +2449,15 @@ class Contact
return $url ?: $contact_url; // Equivalent to: ($url != '') ? $url : $contact_url; return $url ?: $contact_url; // Equivalent to: ($url != '') ? $url : $contact_url;
} }
$data = self::getProbeDataFromDatabase($contact_url); $contact = self::getByURL($contact_url, false);
if (empty($data)) { if (empty($contact)) {
return $url ?: $contact_url; // Equivalent to: ($url != '') ? $url : $contact_url; return $url ?: $contact_url; // Equivalent to: ($url != '') ? $url : $contact_url;
} }
// Prevents endless loop in case only a non-public contact exists for the contact URL // Prevents endless loop in case only a non-public contact exists for the contact URL
unset($data['uid']); unset($contact['uid']);
return self::magicLinkByContact($data, $url ?: $contact_url); return self::magicLinkByContact($contact, $url ?: $contact_url);
} }
/** /**

View file

@ -3064,7 +3064,7 @@ class Item
if (local_user() == $uid) { if (local_user() == $uid) {
$item_contact_id = $owner_self_contact['id']; $item_contact_id = $owner_self_contact['id'];
} else { } else {
$item_contact_id = Contact::getIdForURL($author_contact['url'], $uid, false); $item_contact_id = Contact::getIdForURL($author_contact['url'], $uid);
$item_contact = DBA::selectFirst('contact', [], ['id' => $item_contact_id]); $item_contact = DBA::selectFirst('contact', [], ['id' => $item_contact_id]);
if (!DBA::isResult($item_contact)) { if (!DBA::isResult($item_contact)) {
Logger::log('like: unknown item contact ' . $item_contact_id); Logger::log('like: unknown item contact ' . $item_contact_id);

View file

@ -185,7 +185,7 @@ class Contact extends BaseModule
} }
// Update the entry in the contact table // Update the entry in the contact table
Model\Contact::updateFromProbe($contact_id, '', true); Model\Contact::updateFromProbe($contact_id);
} }
/** /**

View file

@ -312,6 +312,11 @@ class HTTPRequest implements IHTTPRequest
*/ */
public function finalUrl(string $url, int $depth = 1, bool $fetchbody = false) public function finalUrl(string $url, int $depth = 1, bool $fetchbody = false)
{ {
if (Network::isUrlBlocked($url)) {
$this->logger->info('Domain is blocked.', ['url' => $url]);
return $url;
}
$url = Network::stripTrackingQueryParams($url); $url = Network::stripTrackingQueryParams($url);
if ($depth > 10) { if ($depth > 10) {

View file

@ -226,9 +226,9 @@ class Processor
$item['network'] = Protocol::ACTIVITYPUB; $item['network'] = Protocol::ACTIVITYPUB;
$item['author-link'] = $activity['author']; $item['author-link'] = $activity['author'];
$item['author-id'] = Contact::getIdForURL($activity['author'], 0, false); $item['author-id'] = Contact::getIdForURL($activity['author']);
$item['owner-link'] = $activity['actor']; $item['owner-link'] = $activity['actor'];
$item['owner-id'] = Contact::getIdForURL($activity['actor'], 0, false); $item['owner-id'] = Contact::getIdForURL($activity['actor']);
if (in_array(0, $activity['receiver']) && !empty($activity['unlisted'])) { if (in_array(0, $activity['receiver']) && !empty($activity['unlisted'])) {
$item['private'] = Item::UNLISTED; $item['private'] = Item::UNLISTED;
@ -528,13 +528,13 @@ class Processor
$item['uid'] = $receiver; $item['uid'] = $receiver;
if ($item['isForum'] ?? false) { if ($item['isForum'] ?? false) {
$item['contact-id'] = Contact::getIdForURL($activity['actor'], $receiver, false); $item['contact-id'] = Contact::getIdForURL($activity['actor'], $receiver);
} else { } else {
$item['contact-id'] = Contact::getIdForURL($activity['author'], $receiver, false); $item['contact-id'] = Contact::getIdForURL($activity['author'], $receiver);
} }
if (($receiver != 0) && empty($item['contact-id'])) { if (($receiver != 0) && empty($item['contact-id'])) {
$item['contact-id'] = Contact::getIdForURL($activity['author'], 0, false); $item['contact-id'] = Contact::getIdForURL($activity['author']);
} }
if (!empty($activity['directmessage'])) { if (!empty($activity['directmessage'])) {

View file

@ -150,7 +150,7 @@ class Transmitter
*/ */
public static function getOutbox($owner, $page = null) public static function getOutbox($owner, $page = null)
{ {
$public_contact = Contact::getIdForURL($owner['url'], 0, false); $public_contact = Contact::getIdForURL($owner['url']);
$condition = ['uid' => 0, 'contact-id' => $public_contact, 'author-id' => $public_contact, $condition = ['uid' => 0, 'contact-id' => $public_contact, 'author-id' => $public_contact,
'private' => [Item::PUBLIC, Item::UNLISTED], 'gravity' => [GRAVITY_PARENT, GRAVITY_COMMENT], 'private' => [Item::PUBLIC, Item::UNLISTED], 'gravity' => [GRAVITY_PARENT, GRAVITY_COMMENT],

View file

@ -690,7 +690,7 @@ class Feed
} }
$check_date = empty($last_update) ? '' : DateTimeFormat::utc($last_update); $check_date = empty($last_update) ? '' : DateTimeFormat::utc($last_update);
$authorid = Contact::getIdForURL($owner["url"], 0, false); $authorid = Contact::getIdForURL($owner["url"]);
$condition = ["`uid` = ? AND `received` > ? AND NOT `deleted` AND `gravity` IN (?, ?) $condition = ["`uid` = ? AND `received` > ? AND NOT `deleted` AND `gravity` IN (?, ?)
AND `private` != ? AND `visible` AND `wall` AND `parent-network` IN (?, ?, ?, ?)", AND `private` != ? AND `visible` AND `wall` AND `parent-network` IN (?, ?, ?, ?)",

View file

@ -219,7 +219,7 @@ class OStatus
} }
// Ensure that we are having this contact (with uid=0) // Ensure that we are having this contact (with uid=0)
$cid = Contact::getIdForURL($aliaslink, 0, false); $cid = Contact::getIdForURL($aliaslink);
if ($cid) { if ($cid) {
$fields = ['url', 'nurl', 'name', 'nick', 'alias', 'about', 'location']; $fields = ['url', 'nurl', 'name', 'nick', 'alias', 'about', 'location'];
@ -2122,7 +2122,7 @@ class OStatus
} }
$check_date = DateTimeFormat::utc($last_update); $check_date = DateTimeFormat::utc($last_update);
$authorid = Contact::getIdForURL($owner["url"], 0, false); $authorid = Contact::getIdForURL($owner["url"]);
$condition = ["`uid` = ? AND `received` > ? AND NOT `deleted` $condition = ["`uid` = ? AND `received` > ? AND NOT `deleted`
AND `private` != ? AND `visible` AND `wall` AND `parent-network` IN (?, ?)", AND `private` != ? AND `visible` AND `wall` AND `parent-network` IN (?, ?)",

View file

@ -413,12 +413,15 @@ class HTTPSignature
*/ */
public static function fetchRaw($request, $uid = 0, $binary = false, $opts = []) public static function fetchRaw($request, $uid = 0, $binary = false, $opts = [])
{ {
$headers = [];
if (!empty($uid)) { if (!empty($uid)) {
$owner = User::getOwnerDataById($uid); $owner = User::getOwnerDataById($uid);
if (!$owner) { if (!$owner) {
return; return;
} }
if (!empty($owner['uprvkey'])) {
// Header data that is about to be signed. // Header data that is about to be signed.
$host = parse_url($request, PHP_URL_HOST); $host = parse_url($request, PHP_URL_HOST);
$path = parse_url($request, PHP_URL_PATH); $path = parse_url($request, PHP_URL_PATH);
@ -431,8 +434,7 @@ class HTTPSignature
$signature = base64_encode(Crypto::rsaSign($signed_data, $owner['uprvkey'], 'sha256')); $signature = base64_encode(Crypto::rsaSign($signed_data, $owner['uprvkey'], 'sha256'));
$headers[] = 'Signature: keyId="' . $owner['url'] . '#main-key' . '",algorithm="rsa-sha256",headers="(request-target) date host",signature="' . $signature . '"'; $headers[] = 'Signature: keyId="' . $owner['url'] . '#main-key' . '",algorithm="rsa-sha256",headers="(request-target) date host",signature="' . $signature . '"';
} else { }
$headers = [];
} }
if (!empty($opts['accept_content'])) { if (!empty($opts['accept_content'])) {