Merge pull request 'Normalize HTTP client user agent' (#90) from MrPetovan/friendica-directory:task/89-normalize-user-agent into stable

Reviewed-on: friendica/friendica-directory#90
This commit is contained in:
Tobias Diekershoff 2022-06-07 05:40:29 +00:00
commit 230f17ef36
8 changed files with 685 additions and 225 deletions

View file

@ -22,9 +22,9 @@
"boronczyk/localization-middleware": "^1.4",
"byjg/migration": "^4.0",
"byjg/uri": "^1.0.4",
"byjg/webrequest": "^1.0",
"gettext/gettext": "^4.6",
"gofabian/negotiation-middleware": "^0.1.3",
"guzzlehttp/guzzle": "^6.5",
"laminas/laminas-escaper": "^2.6",
"masterminds/html5": "^2.3",
"monolog/monolog": "^1.17",

641
composer.lock generated
View file

@ -4,7 +4,7 @@
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
"This file is @generated automatically"
],
"content-hash": "a8bc1750aa6136e10ea28799a0974f6b",
"content-hash": "8966ff83bb5be2c08214c3982105c898",
"packages": [
{
"name": "asika/simple-console",
@ -447,46 +447,6 @@
"description": "An implementation of PSR UriInterface ",
"time": "2018-03-09T01:56:58+00:00"
},
{
"name": "byjg/webrequest",
"version": "1.0.7",
"source": {
"type": "git",
"url": "https://github.com/byjg/webrequest.git",
"reference": "a307e4e21525c9dc0f116b5cf8657318ef2a44e8"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/byjg/webrequest/zipball/a307e4e21525c9dc0f116b5cf8657318ef2a44e8",
"reference": "a307e4e21525c9dc0f116b5cf8657318ef2a44e8",
"shasum": ""
},
"require": {
"ext-curl": "*",
"php": ">=5.4.0"
},
"require-dev": {
"phpunit/phpunit": ">5.7"
},
"type": "library",
"autoload": {
"psr-4": {
"ByJG\\Util\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "João Gilberto Magalhães",
"email": "joao@byjg.com.br"
}
],
"description": "A lightweight and highly customized CURL wrapper for making RESt calls and a wrapper for call dynamically SOAP requests.",
"time": "2018-03-09T02:53:33+00:00"
},
{
"name": "byjg/xmlutil",
"version": "1.0.6",
@ -783,6 +743,303 @@
],
"time": "2017-02-22T18:45:01+00:00"
},
{
"name": "guzzlehttp/guzzle",
"version": "6.5.6",
"source": {
"type": "git",
"url": "https://github.com/guzzle/guzzle.git",
"reference": "f092dd734083473658de3ee4bef093ed77d2689c"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/guzzle/guzzle/zipball/f092dd734083473658de3ee4bef093ed77d2689c",
"reference": "f092dd734083473658de3ee4bef093ed77d2689c",
"shasum": ""
},
"require": {
"ext-json": "*",
"guzzlehttp/promises": "^1.0",
"guzzlehttp/psr7": "^1.6.1",
"php": ">=5.5",
"symfony/polyfill-intl-idn": "^1.17.0"
},
"require-dev": {
"ext-curl": "*",
"phpunit/phpunit": "^4.8.35 || ^5.7 || ^6.4 || ^7.0",
"psr/log": "^1.1"
},
"suggest": {
"psr/log": "Required for using the Log middleware"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "6.5-dev"
}
},
"autoload": {
"files": [
"src/functions_include.php"
],
"psr-4": {
"GuzzleHttp\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Graham Campbell",
"email": "hello@gjcampbell.co.uk",
"homepage": "https://github.com/GrahamCampbell"
},
{
"name": "Michael Dowling",
"email": "mtdowling@gmail.com",
"homepage": "https://github.com/mtdowling"
},
{
"name": "Jeremy Lindblom",
"email": "jeremeamia@gmail.com",
"homepage": "https://github.com/jeremeamia"
},
{
"name": "George Mponos",
"email": "gmponos@gmail.com",
"homepage": "https://github.com/gmponos"
},
{
"name": "Tobias Nyholm",
"email": "tobias.nyholm@gmail.com",
"homepage": "https://github.com/Nyholm"
},
{
"name": "Márk Sági-Kazár",
"email": "mark.sagikazar@gmail.com",
"homepage": "https://github.com/sagikazarmark"
},
{
"name": "Tobias Schultze",
"email": "webmaster@tubo-world.de",
"homepage": "https://github.com/Tobion"
}
],
"description": "Guzzle is a PHP HTTP client library",
"homepage": "http://guzzlephp.org/",
"keywords": [
"client",
"curl",
"framework",
"http",
"http client",
"rest",
"web service"
],
"funding": [
{
"url": "https://github.com/GrahamCampbell",
"type": "github"
},
{
"url": "https://github.com/Nyholm",
"type": "github"
},
{
"url": "https://tidelift.com/funding/github/packagist/guzzlehttp/guzzle",
"type": "tidelift"
}
],
"time": "2022-05-25T13:19:12+00:00"
},
{
"name": "guzzlehttp/promises",
"version": "1.5.1",
"source": {
"type": "git",
"url": "https://github.com/guzzle/promises.git",
"reference": "fe752aedc9fd8fcca3fe7ad05d419d32998a06da"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/guzzle/promises/zipball/fe752aedc9fd8fcca3fe7ad05d419d32998a06da",
"reference": "fe752aedc9fd8fcca3fe7ad05d419d32998a06da",
"shasum": ""
},
"require": {
"php": ">=5.5"
},
"require-dev": {
"symfony/phpunit-bridge": "^4.4 || ^5.1"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "1.5-dev"
}
},
"autoload": {
"files": [
"src/functions_include.php"
],
"psr-4": {
"GuzzleHttp\\Promise\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Graham Campbell",
"email": "hello@gjcampbell.co.uk",
"homepage": "https://github.com/GrahamCampbell"
},
{
"name": "Michael Dowling",
"email": "mtdowling@gmail.com",
"homepage": "https://github.com/mtdowling"
},
{
"name": "Tobias Nyholm",
"email": "tobias.nyholm@gmail.com",
"homepage": "https://github.com/Nyholm"
},
{
"name": "Tobias Schultze",
"email": "webmaster@tubo-world.de",
"homepage": "https://github.com/Tobion"
}
],
"description": "Guzzle promises library",
"keywords": [
"promise"
],
"funding": [
{
"url": "https://github.com/GrahamCampbell",
"type": "github"
},
{
"url": "https://github.com/Nyholm",
"type": "github"
},
{
"url": "https://tidelift.com/funding/github/packagist/guzzlehttp/promises",
"type": "tidelift"
}
],
"time": "2021-10-22T20:56:57+00:00"
},
{
"name": "guzzlehttp/psr7",
"version": "1.8.5",
"source": {
"type": "git",
"url": "https://github.com/guzzle/psr7.git",
"reference": "337e3ad8e5716c15f9657bd214d16cc5e69df268"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/guzzle/psr7/zipball/337e3ad8e5716c15f9657bd214d16cc5e69df268",
"reference": "337e3ad8e5716c15f9657bd214d16cc5e69df268",
"shasum": ""
},
"require": {
"php": ">=5.4.0",
"psr/http-message": "~1.0",
"ralouphie/getallheaders": "^2.0.5 || ^3.0.0"
},
"provide": {
"psr/http-message-implementation": "1.0"
},
"require-dev": {
"ext-zlib": "*",
"phpunit/phpunit": "~4.8.36 || ^5.7.27 || ^6.5.14 || ^7.5.20 || ^8.5.8 || ^9.3.10"
},
"suggest": {
"laminas/laminas-httphandlerrunner": "Emit PSR-7 responses"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "1.7-dev"
}
},
"autoload": {
"files": [
"src/functions_include.php"
],
"psr-4": {
"GuzzleHttp\\Psr7\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Graham Campbell",
"email": "hello@gjcampbell.co.uk",
"homepage": "https://github.com/GrahamCampbell"
},
{
"name": "Michael Dowling",
"email": "mtdowling@gmail.com",
"homepage": "https://github.com/mtdowling"
},
{
"name": "George Mponos",
"email": "gmponos@gmail.com",
"homepage": "https://github.com/gmponos"
},
{
"name": "Tobias Nyholm",
"email": "tobias.nyholm@gmail.com",
"homepage": "https://github.com/Nyholm"
},
{
"name": "Márk Sági-Kazár",
"email": "mark.sagikazar@gmail.com",
"homepage": "https://github.com/sagikazarmark"
},
{
"name": "Tobias Schultze",
"email": "webmaster@tubo-world.de",
"homepage": "https://github.com/Tobion"
}
],
"description": "PSR-7 message implementation that also provides common utility methods",
"keywords": [
"http",
"message",
"psr-7",
"request",
"response",
"stream",
"uri",
"url"
],
"funding": [
{
"url": "https://github.com/GrahamCampbell",
"type": "github"
},
{
"url": "https://github.com/Nyholm",
"type": "github"
},
{
"url": "https://tidelift.com/funding/github/packagist/guzzlehttp/psr7",
"type": "tidelift"
}
],
"time": "2022-03-20T21:51:18+00:00"
},
{
"name": "laminas/laminas-escaper",
"version": "2.6.1",
@ -1581,6 +1838,46 @@
],
"time": "2017-10-23T01:57:42+00:00"
},
{
"name": "ralouphie/getallheaders",
"version": "3.0.3",
"source": {
"type": "git",
"url": "https://github.com/ralouphie/getallheaders.git",
"reference": "120b605dfeb996808c31b6477290a714d356e822"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/ralouphie/getallheaders/zipball/120b605dfeb996808c31b6477290a714d356e822",
"reference": "120b605dfeb996808c31b6477290a714d356e822",
"shasum": ""
},
"require": {
"php": ">=5.6"
},
"require-dev": {
"php-coveralls/php-coveralls": "^2.1",
"phpunit/phpunit": "^5 || ^6.5"
},
"type": "library",
"autoload": {
"files": [
"src/getallheaders.php"
]
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Ralph Khattar",
"email": "ralph.khattar@gmail.com"
}
],
"description": "A polyfill for getallheaders.",
"time": "2019-03-08T08:55:37+00:00"
},
{
"name": "sarahman/simple-filesystem-cache",
"version": "1.0.2",
@ -1797,6 +2094,244 @@
],
"time": "2019-11-28T17:40:33+00:00"
},
{
"name": "symfony/polyfill-intl-idn",
"version": "v1.26.0",
"source": {
"type": "git",
"url": "https://github.com/symfony/polyfill-intl-idn.git",
"reference": "59a8d271f00dd0e4c2e518104cc7963f655a1aa8"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/symfony/polyfill-intl-idn/zipball/59a8d271f00dd0e4c2e518104cc7963f655a1aa8",
"reference": "59a8d271f00dd0e4c2e518104cc7963f655a1aa8",
"shasum": ""
},
"require": {
"php": ">=7.1",
"symfony/polyfill-intl-normalizer": "^1.10",
"symfony/polyfill-php72": "^1.10"
},
"suggest": {
"ext-intl": "For best performance"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-main": "1.26-dev"
},
"thanks": {
"name": "symfony/polyfill",
"url": "https://github.com/symfony/polyfill"
}
},
"autoload": {
"files": [
"bootstrap.php"
],
"psr-4": {
"Symfony\\Polyfill\\Intl\\Idn\\": ""
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Laurent Bassin",
"email": "laurent@bassin.info"
},
{
"name": "Trevor Rowbotham",
"email": "trevor.rowbotham@pm.me"
},
{
"name": "Symfony Community",
"homepage": "https://symfony.com/contributors"
}
],
"description": "Symfony polyfill for intl's idn_to_ascii and idn_to_utf8 functions",
"homepage": "https://symfony.com",
"keywords": [
"compatibility",
"idn",
"intl",
"polyfill",
"portable",
"shim"
],
"funding": [
{
"url": "https://symfony.com/sponsor",
"type": "custom"
},
{
"url": "https://github.com/fabpot",
"type": "github"
},
{
"url": "https://tidelift.com/funding/github/packagist/symfony/symfony",
"type": "tidelift"
}
],
"time": "2022-05-24T11:49:31+00:00"
},
{
"name": "symfony/polyfill-intl-normalizer",
"version": "v1.26.0",
"source": {
"type": "git",
"url": "https://github.com/symfony/polyfill-intl-normalizer.git",
"reference": "219aa369ceff116e673852dce47c3a41794c14bd"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/symfony/polyfill-intl-normalizer/zipball/219aa369ceff116e673852dce47c3a41794c14bd",
"reference": "219aa369ceff116e673852dce47c3a41794c14bd",
"shasum": ""
},
"require": {
"php": ">=7.1"
},
"suggest": {
"ext-intl": "For best performance"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-main": "1.26-dev"
},
"thanks": {
"name": "symfony/polyfill",
"url": "https://github.com/symfony/polyfill"
}
},
"autoload": {
"files": [
"bootstrap.php"
],
"psr-4": {
"Symfony\\Polyfill\\Intl\\Normalizer\\": ""
},
"classmap": [
"Resources/stubs"
]
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Nicolas Grekas",
"email": "p@tchwork.com"
},
{
"name": "Symfony Community",
"homepage": "https://symfony.com/contributors"
}
],
"description": "Symfony polyfill for intl's Normalizer class and related functions",
"homepage": "https://symfony.com",
"keywords": [
"compatibility",
"intl",
"normalizer",
"polyfill",
"portable",
"shim"
],
"funding": [
{
"url": "https://symfony.com/sponsor",
"type": "custom"
},
{
"url": "https://github.com/fabpot",
"type": "github"
},
{
"url": "https://tidelift.com/funding/github/packagist/symfony/symfony",
"type": "tidelift"
}
],
"time": "2022-05-24T11:49:31+00:00"
},
{
"name": "symfony/polyfill-php72",
"version": "v1.26.0",
"source": {
"type": "git",
"url": "https://github.com/symfony/polyfill-php72.git",
"reference": "bf44a9fd41feaac72b074de600314a93e2ae78e2"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/symfony/polyfill-php72/zipball/bf44a9fd41feaac72b074de600314a93e2ae78e2",
"reference": "bf44a9fd41feaac72b074de600314a93e2ae78e2",
"shasum": ""
},
"require": {
"php": ">=7.1"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-main": "1.26-dev"
},
"thanks": {
"name": "symfony/polyfill",
"url": "https://github.com/symfony/polyfill"
}
},
"autoload": {
"files": [
"bootstrap.php"
],
"psr-4": {
"Symfony\\Polyfill\\Php72\\": ""
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Nicolas Grekas",
"email": "p@tchwork.com"
},
{
"name": "Symfony Community",
"homepage": "https://symfony.com/contributors"
}
],
"description": "Symfony polyfill backporting some PHP 7.2+ features to lower PHP versions",
"homepage": "https://symfony.com",
"keywords": [
"compatibility",
"polyfill",
"portable",
"shim"
],
"funding": [
{
"url": "https://symfony.com/sponsor",
"type": "custom"
},
{
"url": "https://github.com/fabpot",
"type": "github"
},
{
"url": "https://tidelift.com/funding/github/packagist/symfony/symfony",
"type": "tidelift"
}
],
"time": "2022-05-24T11:49:31+00:00"
},
{
"name": "willdurand/negotiation",
"version": "v2.3.1",
@ -2167,12 +2702,12 @@
},
"type": "library",
"autoload": {
"psr-4": {
"DeepCopy\\": "src/DeepCopy/"
},
"files": [
"src/DeepCopy/deep_copy.php"
]
],
"psr-4": {
"DeepCopy\\": "src/DeepCopy/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
@ -3019,14 +3554,14 @@
},
"type": "library",
"autoload": {
"psr-4": {
"Sabre\\Event\\": "lib/"
},
"files": [
"lib/coroutine.php",
"lib/Loop/functions.php",
"lib/Promise/functions.php"
]
],
"psr-4": {
"Sabre\\Event\\": "lib/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
@ -3728,12 +4263,12 @@
}
},
"autoload": {
"psr-4": {
"Symfony\\Polyfill\\Ctype\\": ""
},
"files": [
"bootstrap.php"
]
],
"psr-4": {
"Symfony\\Polyfill\\Ctype\\": ""
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [

View file

@ -10,9 +10,9 @@ use Friendica\Directory\Utils\Network;
class Directory
{
/**
* @var \Atlas\Pdo\Connection
* @var \GuzzleHttp\ClientInterface
*/
private $atlas;
private $http;
/**
* @var \Friendica\Directory\Models\ProfilePollQueue
*/
@ -30,12 +30,12 @@ class Directory
];
public function __construct(
\Atlas\Pdo\Connection $atlas,
\GuzzleHttp\ClientInterface $http,
\Friendica\Directory\Models\ProfilePollQueue $profilePollQueueModel,
\Psr\Log\LoggerInterface $logger,
array $settings)
{
$this->atlas = $atlas;
$this->http = $http;
$this->profilePollQueueModel = $profilePollQueueModel;
$this->logger = $logger;
$this->settings = array_merge($this->settings, $settings);
@ -82,35 +82,7 @@ class Directory
$path = '/sync/pull/since/' . $last_polled;
}
//Prepare the CURL call.
$handle = curl_init();
$options = array(
//Timeouts
CURLOPT_TIMEOUT => max($this->settings['probe_timeout'], 1), //Minimum of 1 second timeout.
CURLOPT_CONNECTTIMEOUT => 1,
//Redirecting
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_MAXREDIRS => 8,
//SSL
CURLOPT_SSL_VERIFYPEER => true,
// CURLOPT_VERBOSE => true,
// CURLOPT_CERTINFO => true,
CURLOPT_SSL_VERIFYHOST => 2,
CURLOPT_PROTOCOLS => CURLPROTO_HTTP | CURLPROTO_HTTPS,
//Basic request
CURLOPT_USERAGENT => Network::USER_AGENT,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_URL => $directory_url . $path
);
curl_setopt_array($handle, $options);
$this->logger->info('Pulling profiles from directory URL: ' . $directory_url . $path);
//Probe the site.
$pull_data = curl_exec($handle);
//Done with CURL now.
curl_close($handle);
$pull_data = $this->http->get($directory_url . $path, ['timeout' => max($this->settings['probe_timeout'], 1)])->getBody()->getContents();
$data = json_decode($pull_data, true);

View file

@ -18,6 +18,11 @@ class Profile
*/
private $atlas;
/**
* @var \GuzzleHttp\ClientInterface
*/
private $http;
/**
* @var \Friendica\Directory\Models\Server
*/
@ -43,6 +48,7 @@ class Profile
public function __construct(
\Atlas\Pdo\Connection $atlas,
\GuzzleHttp\ClientInterface $http,
\Friendica\Directory\Models\Server $serverModel,
\Friendica\Directory\Models\Profile $profileModel,
\Psr\Log\LoggerInterface $logger,
@ -50,6 +56,7 @@ class Profile
)
{
$this->atlas = $atlas;
$this->http = $http;
$this->serverModel = $serverModel;
$this->profileModel = $profileModel;
$this->logger = $logger;
@ -122,25 +129,23 @@ class Profile
);
}
//Skip the profile scrape?
$noscrape = $server['noscrape_url'];
$available = false;
$params = [];
if ($noscrape) {
//Skip the profile scrape?
if ($server['noscrape_url']) {
$this->logger->debug('Calling ' . $server['noscrape_url'] . '/' . $username);
$params = \Friendica\Directory\Utils\Scrape::retrieveNoScrapeData($server['noscrape_url'] . '/' . $username);
$noscrape = !!$params; //If the result was false, do a scrape after all.
$params = \Friendica\Directory\Utils\Scrape::retrieveNoScrapeData($this->http, $server['noscrape_url'] . '/' . $username);
$available = !!$params; //If the result was false, do a scrape after all.
}
$available = true;
if ($noscrape) {
$available = Network::testURL($profile_uri);
$this->logger->debug('Testing ' . $profile_uri . ': ' . ($available?'Success':'Failure'));
} else {
if (!$available) {
$this->logger->notice('Parsing profile page ' . $profile_uri);
$params = \Friendica\Directory\Utils\Scrape::retrieveProfileData($profile_uri);
$params = \Friendica\Directory\Utils\Scrape::retrieveProfileData($this->http, $profile_uri);
$params['language'] = $server['language'];
$available = !empty($params['fn']);
}
// Empty result is due to an offline site.
@ -235,7 +240,7 @@ class Profile
'language' => $params['language'] ?? null,
'filled_fields'=> $filled_fields,
'last_activity'=> $params['last-activity'] ?? null,
'available' => $available,
'available' => [$available, \PDO::PARAM_BOOL],
];
$this->logger->debug(var_export($values, true));
@ -301,7 +306,7 @@ class Profile
$status = false;
if ($profile_id) {
$img_str = \Friendica\Directory\Utils\Network::fetchURL($params['photo'], true);
$img_str = $this->http->get($params['photo'])->getBody()->getContents();
$img = new \Friendica\Directory\Utils\Photo($img_str);
if ($img->getImage()) {
$img->scaleImageSquare(80);

View file

@ -2,8 +2,9 @@
namespace Friendica\Directory\Pollers;
use ByJG\Util\WebRequest;
use Friendica\Directory\Utils\Network;
use GuzzleHttp\Psr7\Uri;
use GuzzleHttp\Exception\RequestException;
use GuzzleHttp\TransferStats;
/**
* @author Hypolite Petovan <hypolite@mrpetovan.com>
@ -14,6 +15,10 @@ class Server
* @var \Atlas\Pdo\Connection
*/
private $atlas;
/**
* @var \GuzzleHttp\ClientInterface
*/
private $http;
/**
* @var \Friendica\Directory\Models\ProfilePollQueue
*/
@ -41,6 +46,7 @@ class Server
public function __construct(
\Atlas\Pdo\Connection $atlas,
\GuzzleHttp\ClientInterface $http,
\Friendica\Directory\Models\ProfilePollQueue $profilePollQueueModel,
\Friendica\Directory\Models\Server $serverModel,
\Psr\SimpleCache\CacheInterface $simplecache,
@ -48,6 +54,7 @@ class Server
array $settings)
{
$this->atlas = $atlas;
$this->http = $http;
$this->profilePollQueueModel = $profilePollQueueModel;
$this->serverModel = $serverModel;
$this->simplecache = $simplecache;
@ -247,65 +254,41 @@ class Server
private function getProbeResult(string $base_url): array
{
//Prepare the CURL call.
$handle = curl_init();
$options = array(
//Timeouts
CURLOPT_TIMEOUT => max($this->settings['probe_timeout'], 1), //Minimum of 1 second timeout.
CURLOPT_CONNECTTIMEOUT => 1,
//Redirecting
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_MAXREDIRS => 8,
//SSL
CURLOPT_SSL_VERIFYPEER => true,
// CURLOPT_VERBOSE => true,
// CURLOPT_CERTINFO => true,
CURLOPT_SSL_VERIFYHOST => 2,
CURLOPT_PROTOCOLS => CURLPROTO_HTTP | CURLPROTO_HTTPS,
//Basic request
CURLOPT_USERAGENT => Network::USER_AGENT,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_URL => $base_url . '/friendica/json'
);
curl_setopt_array($handle, $options);
$curl_info = null;
$options = [
'timeout' => max($this->settings['probe_timeout'], 1),
'on_stats' => function (TransferStats $transferStats) use (&$curl_info) {
$curl_info = $transferStats->getHandlerStats();
}
];
$sslcert_issues = false;
try {
//Probe the site.
$probe_start = microtime(true);
$probe_data = curl_exec($handle);
$probe_data = $this->http->get($base_url . '/friendica/json', $options)->getBody()->getContents();
$probe_end = microtime(true);
//Check for SSL problems.
$curl_statuscode = curl_errno($handle);
$sslcert_issues = in_array($curl_statuscode, array(
} catch (RequestException $e) {
if (!in_array($e->getHandlerContext()['errno'], [
60, //Could not authenticate certificate with known CA's
83 //Issuer check failed
));
//When it's the certificate that doesn't work.
if ($sslcert_issues) {
//Probe again, without strict SSL.
$options[CURLOPT_SSL_VERIFYPEER] = false;
//Replace the handle.
curl_close($handle);
$handle = curl_init();
curl_setopt_array($handle, $options);
//Probe.
$probe_start = microtime(true);
$probe_data = curl_exec($handle);
$probe_end = microtime(true);
//Store new status.
$curl_statuscode = curl_errno($handle);
])) {
throw $e;
}
//Gather more meta.
$time = round(($probe_end - $probe_start) * 1000);
$curl_info = curl_getinfo($handle);
$sslcert_issues = true;
//Done with CURL now.
curl_close($handle);
//When it's the certificate that doesn't work, we probe again without strict SSL.
$options['verify'] = false;
$probe_start = microtime(true);
$probe_data = $this->http->get($base_url . '/friendica/json', $options)->getBody()->getContents();
$probe_end = microtime(true);
}
$time = round(($probe_end - $probe_start) * 1000);
try {
$data = json_decode($probe_data, true);
@ -419,27 +402,26 @@ class Server
function discoverPoco($base_url): void
{
$pocoUrl = $base_url . '/poco';
$uri = Uri::withQueryValues(new Uri($base_url . '/poco'), ['fields' => 'urls', 'count' => 1000]);
$webrequest = new WebRequest($pocoUrl);
$pocoJsonData = $webrequest->get(['fields' => 'urls', 'count' => 1000]);
$response = $this->http->request('GET', $uri);
$this->logger->debug('WebRequest: ' . $webrequest->getLastFetchedUrl() . ' Status: ' . $webrequest->getLastStatus());
$this->logger->debug('WebRequest: ' . $uri . ' Status: ' . $response->getStatusCode());
if ($webrequest->getLastStatus() != 200) {
$this->logger->info('Unsuccessful poco request: ' . $webrequest->getLastFetchedUrl());
if ($response->getStatusCode() != 200) {
$this->logger->info('Unsuccessful poco request: ' . $uri);
return;
}
try {
$pocoFetchData = json_decode($pocoJsonData);
$pocoFetchData = json_decode($response->getBody()->getContents());
} catch (\Throwable $e) {
$this->logger->notice('Invalid JSON string for PoCo URL: ' . $webrequest->getLastFetchedUrl());
$this->logger->notice('Invalid JSON string for PoCo URL: ' . $uri);
return;
}
if (!isset($pocoFetchData->entry)) {
$this->logger->notice('Invalid JSON structure for PoCo URL: ' . $webrequest->getLastFetchedUrl());
$this->logger->notice('Invalid JSON structure for PoCo URL: ' . $uri);
return;
}
@ -461,26 +443,28 @@ class Server
public function getSubscribeUrl($base_url, $profile)
{
$xrdRequest = new WebRequest($base_url . '/xrd');
$xrdRequest->addRequestHeader('Accept', 'application/jrd+json');
$xrdJsonData = $xrdRequest->get(['uri' => $profile]);
$uri = Uri::withQueryValues(new Uri($base_url . '/xrd'), ['uri' => $profile]);
$this->logger->debug('WebRequest: ' . $xrdRequest->getLastFetchedUrl() . ' Status: ' . $xrdRequest->getLastStatus());
$response = $this->http->request('GET', $uri, ['headers' => ['Accept' => 'application/jrd+json']]);
if ($xrdRequest->getLastStatus() != 200) {
$this->logger->info('Unsuccessful XRD request: ' . $xrdRequest->getLastFetchedUrl());
$xrdJsonData = $response->getBody()->getContents();
$this->logger->debug('WebRequest: ' . $uri . ' Status: ' . $response->getStatusCode());
if ($response->getStatusCode() != 200) {
$this->logger->info('Unsuccessful XRD request: ' . $uri);
return null;
}
try {
$xrdData = json_decode($xrdJsonData);
} catch (\Throwable $e) {
$this->logger->notice('Invalid JSON string for XRD URL: ' . $xrdRequest->getLastFetchedUrl());
$this->logger->notice('Invalid JSON string for XRD URL: ' . $uri);
return null;
}
if (!isset($xrdData->links)) {
$this->logger->notice('Invalid JSON structure for XRD URL: ' . $xrdRequest->getLastFetchedUrl());
$this->logger->notice('Invalid JSON structure for XRD URL: ' . $uri);
return null;
}

View file

@ -15,56 +15,6 @@ namespace Friendica\Directory\Utils;
*/
class Network
{
const USER_AGENT = 'friendica-directory-probe-1.0';
public static function fetchURL(string $url, bool $binary = false, int $timeout = 20): string
{
$ch = curl_init($url);
if (!$ch) {
return false;
}
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_TIMEOUT, max($timeout, 1)); //Minimum of 1 second timeout.
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_MAXREDIRS, 8);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_USERAGENT, self::USER_AGENT);
if ($binary) {
curl_setopt($ch, CURLOPT_BINARYTRANSFER, 1);
}
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
$s = curl_exec($ch);
curl_close($ch);
return $s;
}
public static function testURL(string $url, int $timeout = 20): bool
{
$ch = curl_init($url);
if (!$ch) {
return false;
}
curl_setopt($ch, CURLOPT_HEADER , 0);
curl_setopt($ch, CURLOPT_TIMEOUT , max($timeout, 1)); //Minimum of 1 second timeout.
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_MAXREDIRS , 8);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($ch, CURLOPT_NOBODY , true);
curl_setopt($ch, CURLOPT_USERAGENT , self::USER_AGENT);
curl_exec($ch);
$responseCode = intval(curl_getinfo($ch, CURLINFO_RESPONSE_CODE));
$testSuccess = curl_errno($ch) === 0 && $responseCode < 400;
curl_close($ch);
return $testSuccess;
}
/**
* Check if a hostname is public and non-reserved
*

View file

@ -2,6 +2,8 @@
namespace Friendica\Directory\Utils;
use GuzzleHttp\ClientInterface;
/**
* @author Hypolite Petovan <hypolite@mrpetovan.com>
*/
@ -12,10 +14,10 @@ class Scrape
* @param string $url
* @return array|false
*/
public static function retrieveNoScrapeData(string $url)
public static function retrieveNoScrapeData(ClientInterface $http, string $url)
{
$submit_noscrape_start = microtime(true);
$data = Network::fetchURL($url);
$data = $http->get($url)->getBody()->getContents();
$submit_noscrape_request_end = microtime(true);
if (empty($data)) {
@ -42,7 +44,7 @@ class Scrape
return $params;
}
public static function retrieveProfileData(string $url, int $max_nodes = 3500): array
public static function retrieveProfileData(ClientInterface $http, string $url, int $max_nodes = 3500): array
{
$minNodes = 100; //Lets do at least 100 nodes per type.
@ -56,7 +58,7 @@ class Scrape
$scrape_start = microtime(true);
$params = [];
$html = Network::fetchURL($url, false, $timeout);
$html = $http->get($url, ['timeout' => $timeout])->getBody()->getContents();;
$scrape_fetch_end = microtime(true);

View file

@ -89,6 +89,16 @@ $container['migration'] = function (ContainerInterface $c): ByJG\DbMigration\Mig
return $migration;
};
$container['http'] = function (ContainerInterface $c): GuzzleHttp\ClientInterface {
$version = file_get_contents(__DIR__ . '/../VERSION');
if (!$version || !preg_match('/^\s*\d\.\d\.\d\s*$/', $version)) {
$version = '0.0.0';
}
return new GuzzleHttp\Client(['timeout' => 20, 'headers' => ['User-Agent' => 'FriendicaDirectory/' . trim($version) . ' ' . \GuzzleHttp\default_user_agent()]]);
};
// Internal Dependency Injection
$container[\Friendica\Directory\Models\Profile::class] = function (ContainerInterface $c): Friendica\Directory\Models\Profile {
@ -106,7 +116,7 @@ $container[\Friendica\Directory\Models\Server::class] = function (ContainerInter
$container[\Friendica\Directory\Pollers\Directory::class] = function (ContainerInterface $c): Friendica\Directory\Pollers\Directory {
$settings = $c->get('settings')['poller'];
return new Friendica\Directory\Pollers\Directory(
$c->get('atlas'),
$c->get('http'),
$c->get(\Friendica\Directory\Models\ProfilePollQueue::class),
$c->get('logger'),
$settings ?: []
@ -117,6 +127,7 @@ $container[\Friendica\Directory\Pollers\Profile::class] = function (ContainerInt
$settings = $c->get('settings')['poller'];
return new Friendica\Directory\Pollers\Profile(
$c->get('atlas'),
$c->get('http'),
$c->get(\Friendica\Directory\Models\Server::class),
$c->get(\Friendica\Directory\Models\Profile::class),
$c->get('logger'),
@ -128,6 +139,7 @@ $container[\Friendica\Directory\Pollers\Server::class] = function (ContainerInte
$settings = $c->get('settings')['poller'];
return new Friendica\Directory\Pollers\Server(
$c->get('atlas'),
$c->get('http'),
$c->get(\Friendica\Directory\Models\ProfilePollQueue::class),
$c->get(\Friendica\Directory\Models\Server::class),
$c->get('simplecache'),