Merge pull request 'Normalize HTTP client user agent' (#90) from MrPetovan/friendica-directory:task/89-normalize-user-agent into stable

Reviewed-on: #90
This commit is contained in:
Tobias Diekershoff 2022-06-07 05:40:29 +00:00
commit 230f17ef36
8 changed files with 685 additions and 225 deletions

View file

@ -22,9 +22,9 @@
"boronczyk/localization-middleware": "^1.4", "boronczyk/localization-middleware": "^1.4",
"byjg/migration": "^4.0", "byjg/migration": "^4.0",
"byjg/uri": "^1.0.4", "byjg/uri": "^1.0.4",
"byjg/webrequest": "^1.0",
"gettext/gettext": "^4.6", "gettext/gettext": "^4.6",
"gofabian/negotiation-middleware": "^0.1.3", "gofabian/negotiation-middleware": "^0.1.3",
"guzzlehttp/guzzle": "^6.5",
"laminas/laminas-escaper": "^2.6", "laminas/laminas-escaper": "^2.6",
"masterminds/html5": "^2.3", "masterminds/html5": "^2.3",
"monolog/monolog": "^1.17", "monolog/monolog": "^1.17",

641
composer.lock generated
View file

@ -4,7 +4,7 @@
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
"This file is @generated automatically" "This file is @generated automatically"
], ],
"content-hash": "a8bc1750aa6136e10ea28799a0974f6b", "content-hash": "8966ff83bb5be2c08214c3982105c898",
"packages": [ "packages": [
{ {
"name": "asika/simple-console", "name": "asika/simple-console",
@ -447,46 +447,6 @@
"description": "An implementation of PSR UriInterface ", "description": "An implementation of PSR UriInterface ",
"time": "2018-03-09T01:56:58+00:00" "time": "2018-03-09T01:56:58+00:00"
}, },
{
"name": "byjg/webrequest",
"version": "1.0.7",
"source": {
"type": "git",
"url": "https://github.com/byjg/webrequest.git",
"reference": "a307e4e21525c9dc0f116b5cf8657318ef2a44e8"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/byjg/webrequest/zipball/a307e4e21525c9dc0f116b5cf8657318ef2a44e8",
"reference": "a307e4e21525c9dc0f116b5cf8657318ef2a44e8",
"shasum": ""
},
"require": {
"ext-curl": "*",
"php": ">=5.4.0"
},
"require-dev": {
"phpunit/phpunit": ">5.7"
},
"type": "library",
"autoload": {
"psr-4": {
"ByJG\\Util\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "João Gilberto Magalhães",
"email": "joao@byjg.com.br"
}
],
"description": "A lightweight and highly customized CURL wrapper for making RESt calls and a wrapper for call dynamically SOAP requests.",
"time": "2018-03-09T02:53:33+00:00"
},
{ {
"name": "byjg/xmlutil", "name": "byjg/xmlutil",
"version": "1.0.6", "version": "1.0.6",
@ -783,6 +743,303 @@
], ],
"time": "2017-02-22T18:45:01+00:00" "time": "2017-02-22T18:45:01+00:00"
}, },
{
"name": "guzzlehttp/guzzle",
"version": "6.5.6",
"source": {
"type": "git",
"url": "https://github.com/guzzle/guzzle.git",
"reference": "f092dd734083473658de3ee4bef093ed77d2689c"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/guzzle/guzzle/zipball/f092dd734083473658de3ee4bef093ed77d2689c",
"reference": "f092dd734083473658de3ee4bef093ed77d2689c",
"shasum": ""
},
"require": {
"ext-json": "*",
"guzzlehttp/promises": "^1.0",
"guzzlehttp/psr7": "^1.6.1",
"php": ">=5.5",
"symfony/polyfill-intl-idn": "^1.17.0"
},
"require-dev": {
"ext-curl": "*",
"phpunit/phpunit": "^4.8.35 || ^5.7 || ^6.4 || ^7.0",
"psr/log": "^1.1"
},
"suggest": {
"psr/log": "Required for using the Log middleware"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "6.5-dev"
}
},
"autoload": {
"files": [
"src/functions_include.php"
],
"psr-4": {
"GuzzleHttp\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Graham Campbell",
"email": "hello@gjcampbell.co.uk",
"homepage": "https://github.com/GrahamCampbell"
},
{
"name": "Michael Dowling",
"email": "mtdowling@gmail.com",
"homepage": "https://github.com/mtdowling"
},
{
"name": "Jeremy Lindblom",
"email": "jeremeamia@gmail.com",
"homepage": "https://github.com/jeremeamia"
},
{
"name": "George Mponos",
"email": "gmponos@gmail.com",
"homepage": "https://github.com/gmponos"
},
{
"name": "Tobias Nyholm",
"email": "tobias.nyholm@gmail.com",
"homepage": "https://github.com/Nyholm"
},
{
"name": "Márk Sági-Kazár",
"email": "mark.sagikazar@gmail.com",
"homepage": "https://github.com/sagikazarmark"
},
{
"name": "Tobias Schultze",
"email": "webmaster@tubo-world.de",
"homepage": "https://github.com/Tobion"
}
],
"description": "Guzzle is a PHP HTTP client library",
"homepage": "http://guzzlephp.org/",
"keywords": [
"client",
"curl",
"framework",
"http",
"http client",
"rest",
"web service"
],
"funding": [
{
"url": "https://github.com/GrahamCampbell",
"type": "github"
},
{
"url": "https://github.com/Nyholm",
"type": "github"
},
{
"url": "https://tidelift.com/funding/github/packagist/guzzlehttp/guzzle",
"type": "tidelift"
}
],
"time": "2022-05-25T13:19:12+00:00"
},
{
"name": "guzzlehttp/promises",
"version": "1.5.1",
"source": {
"type": "git",
"url": "https://github.com/guzzle/promises.git",
"reference": "fe752aedc9fd8fcca3fe7ad05d419d32998a06da"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/guzzle/promises/zipball/fe752aedc9fd8fcca3fe7ad05d419d32998a06da",
"reference": "fe752aedc9fd8fcca3fe7ad05d419d32998a06da",
"shasum": ""
},
"require": {
"php": ">=5.5"
},
"require-dev": {
"symfony/phpunit-bridge": "^4.4 || ^5.1"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "1.5-dev"
}
},
"autoload": {
"files": [
"src/functions_include.php"
],
"psr-4": {
"GuzzleHttp\\Promise\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Graham Campbell",
"email": "hello@gjcampbell.co.uk",
"homepage": "https://github.com/GrahamCampbell"
},
{
"name": "Michael Dowling",
"email": "mtdowling@gmail.com",
"homepage": "https://github.com/mtdowling"
},
{
"name": "Tobias Nyholm",
"email": "tobias.nyholm@gmail.com",
"homepage": "https://github.com/Nyholm"
},
{
"name": "Tobias Schultze",
"email": "webmaster@tubo-world.de",
"homepage": "https://github.com/Tobion"
}
],
"description": "Guzzle promises library",
"keywords": [
"promise"
],
"funding": [
{
"url": "https://github.com/GrahamCampbell",
"type": "github"
},
{
"url": "https://github.com/Nyholm",
"type": "github"
},
{
"url": "https://tidelift.com/funding/github/packagist/guzzlehttp/promises",
"type": "tidelift"
}
],
"time": "2021-10-22T20:56:57+00:00"
},
{
"name": "guzzlehttp/psr7",
"version": "1.8.5",
"source": {
"type": "git",
"url": "https://github.com/guzzle/psr7.git",
"reference": "337e3ad8e5716c15f9657bd214d16cc5e69df268"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/guzzle/psr7/zipball/337e3ad8e5716c15f9657bd214d16cc5e69df268",
"reference": "337e3ad8e5716c15f9657bd214d16cc5e69df268",
"shasum": ""
},
"require": {
"php": ">=5.4.0",
"psr/http-message": "~1.0",
"ralouphie/getallheaders": "^2.0.5 || ^3.0.0"
},
"provide": {
"psr/http-message-implementation": "1.0"
},
"require-dev": {
"ext-zlib": "*",
"phpunit/phpunit": "~4.8.36 || ^5.7.27 || ^6.5.14 || ^7.5.20 || ^8.5.8 || ^9.3.10"
},
"suggest": {
"laminas/laminas-httphandlerrunner": "Emit PSR-7 responses"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "1.7-dev"
}
},
"autoload": {
"files": [
"src/functions_include.php"
],
"psr-4": {
"GuzzleHttp\\Psr7\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Graham Campbell",
"email": "hello@gjcampbell.co.uk",
"homepage": "https://github.com/GrahamCampbell"
},
{
"name": "Michael Dowling",
"email": "mtdowling@gmail.com",
"homepage": "https://github.com/mtdowling"
},
{
"name": "George Mponos",
"email": "gmponos@gmail.com",
"homepage": "https://github.com/gmponos"
},
{
"name": "Tobias Nyholm",
"email": "tobias.nyholm@gmail.com",
"homepage": "https://github.com/Nyholm"
},
{
"name": "Márk Sági-Kazár",
"email": "mark.sagikazar@gmail.com",
"homepage": "https://github.com/sagikazarmark"
},
{
"name": "Tobias Schultze",
"email": "webmaster@tubo-world.de",
"homepage": "https://github.com/Tobion"
}
],
"description": "PSR-7 message implementation that also provides common utility methods",
"keywords": [
"http",
"message",
"psr-7",
"request",
"response",
"stream",
"uri",
"url"
],
"funding": [
{
"url": "https://github.com/GrahamCampbell",
"type": "github"
},
{
"url": "https://github.com/Nyholm",
"type": "github"
},
{
"url": "https://tidelift.com/funding/github/packagist/guzzlehttp/psr7",
"type": "tidelift"
}
],
"time": "2022-03-20T21:51:18+00:00"
},
{ {
"name": "laminas/laminas-escaper", "name": "laminas/laminas-escaper",
"version": "2.6.1", "version": "2.6.1",
@ -1581,6 +1838,46 @@
], ],
"time": "2017-10-23T01:57:42+00:00" "time": "2017-10-23T01:57:42+00:00"
}, },
{
"name": "ralouphie/getallheaders",
"version": "3.0.3",
"source": {
"type": "git",
"url": "https://github.com/ralouphie/getallheaders.git",
"reference": "120b605dfeb996808c31b6477290a714d356e822"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/ralouphie/getallheaders/zipball/120b605dfeb996808c31b6477290a714d356e822",
"reference": "120b605dfeb996808c31b6477290a714d356e822",
"shasum": ""
},
"require": {
"php": ">=5.6"
},
"require-dev": {
"php-coveralls/php-coveralls": "^2.1",
"phpunit/phpunit": "^5 || ^6.5"
},
"type": "library",
"autoload": {
"files": [
"src/getallheaders.php"
]
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Ralph Khattar",
"email": "ralph.khattar@gmail.com"
}
],
"description": "A polyfill for getallheaders.",
"time": "2019-03-08T08:55:37+00:00"
},
{ {
"name": "sarahman/simple-filesystem-cache", "name": "sarahman/simple-filesystem-cache",
"version": "1.0.2", "version": "1.0.2",
@ -1797,6 +2094,244 @@
], ],
"time": "2019-11-28T17:40:33+00:00" "time": "2019-11-28T17:40:33+00:00"
}, },
{
"name": "symfony/polyfill-intl-idn",
"version": "v1.26.0",
"source": {
"type": "git",
"url": "https://github.com/symfony/polyfill-intl-idn.git",
"reference": "59a8d271f00dd0e4c2e518104cc7963f655a1aa8"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/symfony/polyfill-intl-idn/zipball/59a8d271f00dd0e4c2e518104cc7963f655a1aa8",
"reference": "59a8d271f00dd0e4c2e518104cc7963f655a1aa8",
"shasum": ""
},
"require": {
"php": ">=7.1",
"symfony/polyfill-intl-normalizer": "^1.10",
"symfony/polyfill-php72": "^1.10"
},
"suggest": {
"ext-intl": "For best performance"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-main": "1.26-dev"
},
"thanks": {
"name": "symfony/polyfill",
"url": "https://github.com/symfony/polyfill"
}
},
"autoload": {
"files": [
"bootstrap.php"
],
"psr-4": {
"Symfony\\Polyfill\\Intl\\Idn\\": ""
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Laurent Bassin",
"email": "laurent@bassin.info"
},
{
"name": "Trevor Rowbotham",
"email": "trevor.rowbotham@pm.me"
},
{
"name": "Symfony Community",
"homepage": "https://symfony.com/contributors"
}
],
"description": "Symfony polyfill for intl's idn_to_ascii and idn_to_utf8 functions",
"homepage": "https://symfony.com",
"keywords": [
"compatibility",
"idn",
"intl",
"polyfill",
"portable",
"shim"
],
"funding": [
{
"url": "https://symfony.com/sponsor",
"type": "custom"
},
{
"url": "https://github.com/fabpot",
"type": "github"
},
{
"url": "https://tidelift.com/funding/github/packagist/symfony/symfony",
"type": "tidelift"
}
],
"time": "2022-05-24T11:49:31+00:00"
},
{
"name": "symfony/polyfill-intl-normalizer",
"version": "v1.26.0",
"source": {
"type": "git",
"url": "https://github.com/symfony/polyfill-intl-normalizer.git",
"reference": "219aa369ceff116e673852dce47c3a41794c14bd"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/symfony/polyfill-intl-normalizer/zipball/219aa369ceff116e673852dce47c3a41794c14bd",
"reference": "219aa369ceff116e673852dce47c3a41794c14bd",
"shasum": ""
},
"require": {
"php": ">=7.1"
},
"suggest": {
"ext-intl": "For best performance"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-main": "1.26-dev"
},
"thanks": {
"name": "symfony/polyfill",
"url": "https://github.com/symfony/polyfill"
}
},
"autoload": {
"files": [
"bootstrap.php"
],
"psr-4": {
"Symfony\\Polyfill\\Intl\\Normalizer\\": ""
},
"classmap": [
"Resources/stubs"
]
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Nicolas Grekas",
"email": "p@tchwork.com"
},
{
"name": "Symfony Community",
"homepage": "https://symfony.com/contributors"
}
],
"description": "Symfony polyfill for intl's Normalizer class and related functions",
"homepage": "https://symfony.com",
"keywords": [
"compatibility",
"intl",
"normalizer",
"polyfill",
"portable",
"shim"
],
"funding": [
{
"url": "https://symfony.com/sponsor",
"type": "custom"
},
{
"url": "https://github.com/fabpot",
"type": "github"
},
{
"url": "https://tidelift.com/funding/github/packagist/symfony/symfony",
"type": "tidelift"
}
],
"time": "2022-05-24T11:49:31+00:00"
},
{
"name": "symfony/polyfill-php72",
"version": "v1.26.0",
"source": {
"type": "git",
"url": "https://github.com/symfony/polyfill-php72.git",
"reference": "bf44a9fd41feaac72b074de600314a93e2ae78e2"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/symfony/polyfill-php72/zipball/bf44a9fd41feaac72b074de600314a93e2ae78e2",
"reference": "bf44a9fd41feaac72b074de600314a93e2ae78e2",
"shasum": ""
},
"require": {
"php": ">=7.1"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-main": "1.26-dev"
},
"thanks": {
"name": "symfony/polyfill",
"url": "https://github.com/symfony/polyfill"
}
},
"autoload": {
"files": [
"bootstrap.php"
],
"psr-4": {
"Symfony\\Polyfill\\Php72\\": ""
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Nicolas Grekas",
"email": "p@tchwork.com"
},
{
"name": "Symfony Community",
"homepage": "https://symfony.com/contributors"
}
],
"description": "Symfony polyfill backporting some PHP 7.2+ features to lower PHP versions",
"homepage": "https://symfony.com",
"keywords": [
"compatibility",
"polyfill",
"portable",
"shim"
],
"funding": [
{
"url": "https://symfony.com/sponsor",
"type": "custom"
},
{
"url": "https://github.com/fabpot",
"type": "github"
},
{
"url": "https://tidelift.com/funding/github/packagist/symfony/symfony",
"type": "tidelift"
}
],
"time": "2022-05-24T11:49:31+00:00"
},
{ {
"name": "willdurand/negotiation", "name": "willdurand/negotiation",
"version": "v2.3.1", "version": "v2.3.1",
@ -2167,12 +2702,12 @@
}, },
"type": "library", "type": "library",
"autoload": { "autoload": {
"psr-4": {
"DeepCopy\\": "src/DeepCopy/"
},
"files": [ "files": [
"src/DeepCopy/deep_copy.php" "src/DeepCopy/deep_copy.php"
] ],
"psr-4": {
"DeepCopy\\": "src/DeepCopy/"
}
}, },
"notification-url": "https://packagist.org/downloads/", "notification-url": "https://packagist.org/downloads/",
"license": [ "license": [
@ -3019,14 +3554,14 @@
}, },
"type": "library", "type": "library",
"autoload": { "autoload": {
"psr-4": {
"Sabre\\Event\\": "lib/"
},
"files": [ "files": [
"lib/coroutine.php", "lib/coroutine.php",
"lib/Loop/functions.php", "lib/Loop/functions.php",
"lib/Promise/functions.php" "lib/Promise/functions.php"
] ],
"psr-4": {
"Sabre\\Event\\": "lib/"
}
}, },
"notification-url": "https://packagist.org/downloads/", "notification-url": "https://packagist.org/downloads/",
"license": [ "license": [
@ -3728,12 +4263,12 @@
} }
}, },
"autoload": { "autoload": {
"psr-4": {
"Symfony\\Polyfill\\Ctype\\": ""
},
"files": [ "files": [
"bootstrap.php" "bootstrap.php"
] ],
"psr-4": {
"Symfony\\Polyfill\\Ctype\\": ""
}
}, },
"notification-url": "https://packagist.org/downloads/", "notification-url": "https://packagist.org/downloads/",
"license": [ "license": [

View file

@ -10,9 +10,9 @@ use Friendica\Directory\Utils\Network;
class Directory class Directory
{ {
/** /**
* @var \Atlas\Pdo\Connection * @var \GuzzleHttp\ClientInterface
*/ */
private $atlas; private $http;
/** /**
* @var \Friendica\Directory\Models\ProfilePollQueue * @var \Friendica\Directory\Models\ProfilePollQueue
*/ */
@ -30,12 +30,12 @@ class Directory
]; ];
public function __construct( public function __construct(
\Atlas\Pdo\Connection $atlas, \GuzzleHttp\ClientInterface $http,
\Friendica\Directory\Models\ProfilePollQueue $profilePollQueueModel, \Friendica\Directory\Models\ProfilePollQueue $profilePollQueueModel,
\Psr\Log\LoggerInterface $logger, \Psr\Log\LoggerInterface $logger,
array $settings) array $settings)
{ {
$this->atlas = $atlas; $this->http = $http;
$this->profilePollQueueModel = $profilePollQueueModel; $this->profilePollQueueModel = $profilePollQueueModel;
$this->logger = $logger; $this->logger = $logger;
$this->settings = array_merge($this->settings, $settings); $this->settings = array_merge($this->settings, $settings);
@ -82,35 +82,7 @@ class Directory
$path = '/sync/pull/since/' . $last_polled; $path = '/sync/pull/since/' . $last_polled;
} }
//Prepare the CURL call. $pull_data = $this->http->get($directory_url . $path, ['timeout' => max($this->settings['probe_timeout'], 1)])->getBody()->getContents();
$handle = curl_init();
$options = array(
//Timeouts
CURLOPT_TIMEOUT => max($this->settings['probe_timeout'], 1), //Minimum of 1 second timeout.
CURLOPT_CONNECTTIMEOUT => 1,
//Redirecting
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_MAXREDIRS => 8,
//SSL
CURLOPT_SSL_VERIFYPEER => true,
// CURLOPT_VERBOSE => true,
// CURLOPT_CERTINFO => true,
CURLOPT_SSL_VERIFYHOST => 2,
CURLOPT_PROTOCOLS => CURLPROTO_HTTP | CURLPROTO_HTTPS,
//Basic request
CURLOPT_USERAGENT => Network::USER_AGENT,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_URL => $directory_url . $path
);
curl_setopt_array($handle, $options);
$this->logger->info('Pulling profiles from directory URL: ' . $directory_url . $path);
//Probe the site.
$pull_data = curl_exec($handle);
//Done with CURL now.
curl_close($handle);
$data = json_decode($pull_data, true); $data = json_decode($pull_data, true);

View file

@ -18,6 +18,11 @@ class Profile
*/ */
private $atlas; private $atlas;
/**
* @var \GuzzleHttp\ClientInterface
*/
private $http;
/** /**
* @var \Friendica\Directory\Models\Server * @var \Friendica\Directory\Models\Server
*/ */
@ -43,6 +48,7 @@ class Profile
public function __construct( public function __construct(
\Atlas\Pdo\Connection $atlas, \Atlas\Pdo\Connection $atlas,
\GuzzleHttp\ClientInterface $http,
\Friendica\Directory\Models\Server $serverModel, \Friendica\Directory\Models\Server $serverModel,
\Friendica\Directory\Models\Profile $profileModel, \Friendica\Directory\Models\Profile $profileModel,
\Psr\Log\LoggerInterface $logger, \Psr\Log\LoggerInterface $logger,
@ -50,6 +56,7 @@ class Profile
) )
{ {
$this->atlas = $atlas; $this->atlas = $atlas;
$this->http = $http;
$this->serverModel = $serverModel; $this->serverModel = $serverModel;
$this->profileModel = $profileModel; $this->profileModel = $profileModel;
$this->logger = $logger; $this->logger = $logger;
@ -122,25 +129,23 @@ class Profile
); );
} }
//Skip the profile scrape? $available = false;
$noscrape = $server['noscrape_url'];
$params = []; $params = [];
if ($noscrape) {
//Skip the profile scrape?
if ($server['noscrape_url']) {
$this->logger->debug('Calling ' . $server['noscrape_url'] . '/' . $username); $this->logger->debug('Calling ' . $server['noscrape_url'] . '/' . $username);
$params = \Friendica\Directory\Utils\Scrape::retrieveNoScrapeData($server['noscrape_url'] . '/' . $username); $params = \Friendica\Directory\Utils\Scrape::retrieveNoScrapeData($this->http, $server['noscrape_url'] . '/' . $username);
$noscrape = !!$params; //If the result was false, do a scrape after all. $available = !!$params; //If the result was false, do a scrape after all.
} }
$available = true; if (!$available) {
if ($noscrape) {
$available = Network::testURL($profile_uri);
$this->logger->debug('Testing ' . $profile_uri . ': ' . ($available?'Success':'Failure'));
} else {
$this->logger->notice('Parsing profile page ' . $profile_uri); $this->logger->notice('Parsing profile page ' . $profile_uri);
$params = \Friendica\Directory\Utils\Scrape::retrieveProfileData($profile_uri); $params = \Friendica\Directory\Utils\Scrape::retrieveProfileData($this->http, $profile_uri);
$params['language'] = $server['language']; $params['language'] = $server['language'];
$available = !empty($params['fn']);
} }
// Empty result is due to an offline site. // Empty result is due to an offline site.
@ -235,7 +240,7 @@ class Profile
'language' => $params['language'] ?? null, 'language' => $params['language'] ?? null,
'filled_fields'=> $filled_fields, 'filled_fields'=> $filled_fields,
'last_activity'=> $params['last-activity'] ?? null, 'last_activity'=> $params['last-activity'] ?? null,
'available' => $available, 'available' => [$available, \PDO::PARAM_BOOL],
]; ];
$this->logger->debug(var_export($values, true)); $this->logger->debug(var_export($values, true));
@ -301,7 +306,7 @@ class Profile
$status = false; $status = false;
if ($profile_id) { if ($profile_id) {
$img_str = \Friendica\Directory\Utils\Network::fetchURL($params['photo'], true); $img_str = $this->http->get($params['photo'])->getBody()->getContents();
$img = new \Friendica\Directory\Utils\Photo($img_str); $img = new \Friendica\Directory\Utils\Photo($img_str);
if ($img->getImage()) { if ($img->getImage()) {
$img->scaleImageSquare(80); $img->scaleImageSquare(80);

View file

@ -2,8 +2,9 @@
namespace Friendica\Directory\Pollers; namespace Friendica\Directory\Pollers;
use ByJG\Util\WebRequest; use GuzzleHttp\Psr7\Uri;
use Friendica\Directory\Utils\Network; use GuzzleHttp\Exception\RequestException;
use GuzzleHttp\TransferStats;
/** /**
* @author Hypolite Petovan <hypolite@mrpetovan.com> * @author Hypolite Petovan <hypolite@mrpetovan.com>
@ -14,6 +15,10 @@ class Server
* @var \Atlas\Pdo\Connection * @var \Atlas\Pdo\Connection
*/ */
private $atlas; private $atlas;
/**
* @var \GuzzleHttp\ClientInterface
*/
private $http;
/** /**
* @var \Friendica\Directory\Models\ProfilePollQueue * @var \Friendica\Directory\Models\ProfilePollQueue
*/ */
@ -41,6 +46,7 @@ class Server
public function __construct( public function __construct(
\Atlas\Pdo\Connection $atlas, \Atlas\Pdo\Connection $atlas,
\GuzzleHttp\ClientInterface $http,
\Friendica\Directory\Models\ProfilePollQueue $profilePollQueueModel, \Friendica\Directory\Models\ProfilePollQueue $profilePollQueueModel,
\Friendica\Directory\Models\Server $serverModel, \Friendica\Directory\Models\Server $serverModel,
\Psr\SimpleCache\CacheInterface $simplecache, \Psr\SimpleCache\CacheInterface $simplecache,
@ -48,6 +54,7 @@ class Server
array $settings) array $settings)
{ {
$this->atlas = $atlas; $this->atlas = $atlas;
$this->http = $http;
$this->profilePollQueueModel = $profilePollQueueModel; $this->profilePollQueueModel = $profilePollQueueModel;
$this->serverModel = $serverModel; $this->serverModel = $serverModel;
$this->simplecache = $simplecache; $this->simplecache = $simplecache;
@ -247,65 +254,41 @@ class Server
private function getProbeResult(string $base_url): array private function getProbeResult(string $base_url): array
{ {
//Prepare the CURL call. $curl_info = null;
$handle = curl_init();
$options = array(
//Timeouts
CURLOPT_TIMEOUT => max($this->settings['probe_timeout'], 1), //Minimum of 1 second timeout.
CURLOPT_CONNECTTIMEOUT => 1,
//Redirecting
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_MAXREDIRS => 8,
//SSL
CURLOPT_SSL_VERIFYPEER => true,
// CURLOPT_VERBOSE => true,
// CURLOPT_CERTINFO => true,
CURLOPT_SSL_VERIFYHOST => 2,
CURLOPT_PROTOCOLS => CURLPROTO_HTTP | CURLPROTO_HTTPS,
//Basic request
CURLOPT_USERAGENT => Network::USER_AGENT,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_URL => $base_url . '/friendica/json'
);
curl_setopt_array($handle, $options);
$options = [
'timeout' => max($this->settings['probe_timeout'], 1),
'on_stats' => function (TransferStats $transferStats) use (&$curl_info) {
$curl_info = $transferStats->getHandlerStats();
}
];
$sslcert_issues = false;
try {
//Probe the site. //Probe the site.
$probe_start = microtime(true); $probe_start = microtime(true);
$probe_data = curl_exec($handle); $probe_data = $this->http->get($base_url . '/friendica/json', $options)->getBody()->getContents();
$probe_end = microtime(true); $probe_end = microtime(true);
} catch (RequestException $e) {
//Check for SSL problems. if (!in_array($e->getHandlerContext()['errno'], [
$curl_statuscode = curl_errno($handle);
$sslcert_issues = in_array($curl_statuscode, array(
60, //Could not authenticate certificate with known CA's 60, //Could not authenticate certificate with known CA's
83 //Issuer check failed 83 //Issuer check failed
)); ])) {
throw $e;
//When it's the certificate that doesn't work.
if ($sslcert_issues) {
//Probe again, without strict SSL.
$options[CURLOPT_SSL_VERIFYPEER] = false;
//Replace the handle.
curl_close($handle);
$handle = curl_init();
curl_setopt_array($handle, $options);
//Probe.
$probe_start = microtime(true);
$probe_data = curl_exec($handle);
$probe_end = microtime(true);
//Store new status.
$curl_statuscode = curl_errno($handle);
} }
//Gather more meta. $sslcert_issues = true;
$time = round(($probe_end - $probe_start) * 1000);
$curl_info = curl_getinfo($handle);
//Done with CURL now. //When it's the certificate that doesn't work, we probe again without strict SSL.
curl_close($handle); $options['verify'] = false;
$probe_start = microtime(true);
$probe_data = $this->http->get($base_url . '/friendica/json', $options)->getBody()->getContents();
$probe_end = microtime(true);
}
$time = round(($probe_end - $probe_start) * 1000);
try { try {
$data = json_decode($probe_data, true); $data = json_decode($probe_data, true);
@ -419,27 +402,26 @@ class Server
function discoverPoco($base_url): void function discoverPoco($base_url): void
{ {
$pocoUrl = $base_url . '/poco'; $uri = Uri::withQueryValues(new Uri($base_url . '/poco'), ['fields' => 'urls', 'count' => 1000]);
$webrequest = new WebRequest($pocoUrl); $response = $this->http->request('GET', $uri);
$pocoJsonData = $webrequest->get(['fields' => 'urls', 'count' => 1000]);
$this->logger->debug('WebRequest: ' . $webrequest->getLastFetchedUrl() . ' Status: ' . $webrequest->getLastStatus()); $this->logger->debug('WebRequest: ' . $uri . ' Status: ' . $response->getStatusCode());
if ($webrequest->getLastStatus() != 200) { if ($response->getStatusCode() != 200) {
$this->logger->info('Unsuccessful poco request: ' . $webrequest->getLastFetchedUrl()); $this->logger->info('Unsuccessful poco request: ' . $uri);
return; return;
} }
try { try {
$pocoFetchData = json_decode($pocoJsonData); $pocoFetchData = json_decode($response->getBody()->getContents());
} catch (\Throwable $e) { } catch (\Throwable $e) {
$this->logger->notice('Invalid JSON string for PoCo URL: ' . $webrequest->getLastFetchedUrl()); $this->logger->notice('Invalid JSON string for PoCo URL: ' . $uri);
return; return;
} }
if (!isset($pocoFetchData->entry)) { if (!isset($pocoFetchData->entry)) {
$this->logger->notice('Invalid JSON structure for PoCo URL: ' . $webrequest->getLastFetchedUrl()); $this->logger->notice('Invalid JSON structure for PoCo URL: ' . $uri);
return; return;
} }
@ -461,26 +443,28 @@ class Server
public function getSubscribeUrl($base_url, $profile) public function getSubscribeUrl($base_url, $profile)
{ {
$xrdRequest = new WebRequest($base_url . '/xrd'); $uri = Uri::withQueryValues(new Uri($base_url . '/xrd'), ['uri' => $profile]);
$xrdRequest->addRequestHeader('Accept', 'application/jrd+json');
$xrdJsonData = $xrdRequest->get(['uri' => $profile]);
$this->logger->debug('WebRequest: ' . $xrdRequest->getLastFetchedUrl() . ' Status: ' . $xrdRequest->getLastStatus()); $response = $this->http->request('GET', $uri, ['headers' => ['Accept' => 'application/jrd+json']]);
if ($xrdRequest->getLastStatus() != 200) { $xrdJsonData = $response->getBody()->getContents();
$this->logger->info('Unsuccessful XRD request: ' . $xrdRequest->getLastFetchedUrl());
$this->logger->debug('WebRequest: ' . $uri . ' Status: ' . $response->getStatusCode());
if ($response->getStatusCode() != 200) {
$this->logger->info('Unsuccessful XRD request: ' . $uri);
return null; return null;
} }
try { try {
$xrdData = json_decode($xrdJsonData); $xrdData = json_decode($xrdJsonData);
} catch (\Throwable $e) { } catch (\Throwable $e) {
$this->logger->notice('Invalid JSON string for XRD URL: ' . $xrdRequest->getLastFetchedUrl()); $this->logger->notice('Invalid JSON string for XRD URL: ' . $uri);
return null; return null;
} }
if (!isset($xrdData->links)) { if (!isset($xrdData->links)) {
$this->logger->notice('Invalid JSON structure for XRD URL: ' . $xrdRequest->getLastFetchedUrl()); $this->logger->notice('Invalid JSON structure for XRD URL: ' . $uri);
return null; return null;
} }

View file

@ -15,56 +15,6 @@ namespace Friendica\Directory\Utils;
*/ */
class Network class Network
{ {
const USER_AGENT = 'friendica-directory-probe-1.0';
public static function fetchURL(string $url, bool $binary = false, int $timeout = 20): string
{
$ch = curl_init($url);
if (!$ch) {
return false;
}
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_TIMEOUT, max($timeout, 1)); //Minimum of 1 second timeout.
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_MAXREDIRS, 8);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_USERAGENT, self::USER_AGENT);
if ($binary) {
curl_setopt($ch, CURLOPT_BINARYTRANSFER, 1);
}
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
$s = curl_exec($ch);
curl_close($ch);
return $s;
}
public static function testURL(string $url, int $timeout = 20): bool
{
$ch = curl_init($url);
if (!$ch) {
return false;
}
curl_setopt($ch, CURLOPT_HEADER , 0);
curl_setopt($ch, CURLOPT_TIMEOUT , max($timeout, 1)); //Minimum of 1 second timeout.
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_MAXREDIRS , 8);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($ch, CURLOPT_NOBODY , true);
curl_setopt($ch, CURLOPT_USERAGENT , self::USER_AGENT);
curl_exec($ch);
$responseCode = intval(curl_getinfo($ch, CURLINFO_RESPONSE_CODE));
$testSuccess = curl_errno($ch) === 0 && $responseCode < 400;
curl_close($ch);
return $testSuccess;
}
/** /**
* Check if a hostname is public and non-reserved * Check if a hostname is public and non-reserved
* *

View file

@ -2,6 +2,8 @@
namespace Friendica\Directory\Utils; namespace Friendica\Directory\Utils;
use GuzzleHttp\ClientInterface;
/** /**
* @author Hypolite Petovan <hypolite@mrpetovan.com> * @author Hypolite Petovan <hypolite@mrpetovan.com>
*/ */
@ -12,10 +14,10 @@ class Scrape
* @param string $url * @param string $url
* @return array|false * @return array|false
*/ */
public static function retrieveNoScrapeData(string $url) public static function retrieveNoScrapeData(ClientInterface $http, string $url)
{ {
$submit_noscrape_start = microtime(true); $submit_noscrape_start = microtime(true);
$data = Network::fetchURL($url); $data = $http->get($url)->getBody()->getContents();
$submit_noscrape_request_end = microtime(true); $submit_noscrape_request_end = microtime(true);
if (empty($data)) { if (empty($data)) {
@ -42,7 +44,7 @@ class Scrape
return $params; return $params;
} }
public static function retrieveProfileData(string $url, int $max_nodes = 3500): array public static function retrieveProfileData(ClientInterface $http, string $url, int $max_nodes = 3500): array
{ {
$minNodes = 100; //Lets do at least 100 nodes per type. $minNodes = 100; //Lets do at least 100 nodes per type.
@ -56,7 +58,7 @@ class Scrape
$scrape_start = microtime(true); $scrape_start = microtime(true);
$params = []; $params = [];
$html = Network::fetchURL($url, false, $timeout); $html = $http->get($url, ['timeout' => $timeout])->getBody()->getContents();;
$scrape_fetch_end = microtime(true); $scrape_fetch_end = microtime(true);

View file

@ -89,6 +89,16 @@ $container['migration'] = function (ContainerInterface $c): ByJG\DbMigration\Mig
return $migration; return $migration;
}; };
$container['http'] = function (ContainerInterface $c): GuzzleHttp\ClientInterface {
$version = file_get_contents(__DIR__ . '/../VERSION');
if (!$version || !preg_match('/^\s*\d\.\d\.\d\s*$/', $version)) {
$version = '0.0.0';
}
return new GuzzleHttp\Client(['timeout' => 20, 'headers' => ['User-Agent' => 'FriendicaDirectory/' . trim($version) . ' ' . \GuzzleHttp\default_user_agent()]]);
};
// Internal Dependency Injection // Internal Dependency Injection
$container[\Friendica\Directory\Models\Profile::class] = function (ContainerInterface $c): Friendica\Directory\Models\Profile { $container[\Friendica\Directory\Models\Profile::class] = function (ContainerInterface $c): Friendica\Directory\Models\Profile {
@ -106,7 +116,7 @@ $container[\Friendica\Directory\Models\Server::class] = function (ContainerInter
$container[\Friendica\Directory\Pollers\Directory::class] = function (ContainerInterface $c): Friendica\Directory\Pollers\Directory { $container[\Friendica\Directory\Pollers\Directory::class] = function (ContainerInterface $c): Friendica\Directory\Pollers\Directory {
$settings = $c->get('settings')['poller']; $settings = $c->get('settings')['poller'];
return new Friendica\Directory\Pollers\Directory( return new Friendica\Directory\Pollers\Directory(
$c->get('atlas'), $c->get('http'),
$c->get(\Friendica\Directory\Models\ProfilePollQueue::class), $c->get(\Friendica\Directory\Models\ProfilePollQueue::class),
$c->get('logger'), $c->get('logger'),
$settings ?: [] $settings ?: []
@ -117,6 +127,7 @@ $container[\Friendica\Directory\Pollers\Profile::class] = function (ContainerInt
$settings = $c->get('settings')['poller']; $settings = $c->get('settings')['poller'];
return new Friendica\Directory\Pollers\Profile( return new Friendica\Directory\Pollers\Profile(
$c->get('atlas'), $c->get('atlas'),
$c->get('http'),
$c->get(\Friendica\Directory\Models\Server::class), $c->get(\Friendica\Directory\Models\Server::class),
$c->get(\Friendica\Directory\Models\Profile::class), $c->get(\Friendica\Directory\Models\Profile::class),
$c->get('logger'), $c->get('logger'),
@ -128,6 +139,7 @@ $container[\Friendica\Directory\Pollers\Server::class] = function (ContainerInte
$settings = $c->get('settings')['poller']; $settings = $c->get('settings')['poller'];
return new Friendica\Directory\Pollers\Server( return new Friendica\Directory\Pollers\Server(
$c->get('atlas'), $c->get('atlas'),
$c->get('http'),
$c->get(\Friendica\Directory\Models\ProfilePollQueue::class), $c->get(\Friendica\Directory\Models\ProfilePollQueue::class),
$c->get(\Friendica\Directory\Models\Server::class), $c->get(\Friendica\Directory\Models\Server::class),
$c->get('simplecache'), $c->get('simplecache'),