Replace direct curl uses with Guzzle HTTP client
- Add http dependency with custom User Agent - Simplify profile poll to remove the second profile call to check availability - Remove obsolete Network::fetchURL and Network::testURL
This commit is contained in:
parent
5829108c0e
commit
0ee5bf55b1
6 changed files with 76 additions and 152 deletions
|
@ -10,9 +10,9 @@ use Friendica\Directory\Utils\Network;
|
|||
class Directory
|
||||
{
|
||||
/**
|
||||
* @var \Atlas\Pdo\Connection
|
||||
* @var \GuzzleHttp\ClientInterface
|
||||
*/
|
||||
private $atlas;
|
||||
private $http;
|
||||
/**
|
||||
* @var \Friendica\Directory\Models\ProfilePollQueue
|
||||
*/
|
||||
|
@ -30,12 +30,12 @@ class Directory
|
|||
];
|
||||
|
||||
public function __construct(
|
||||
\Atlas\Pdo\Connection $atlas,
|
||||
\GuzzleHttp\ClientInterface $http,
|
||||
\Friendica\Directory\Models\ProfilePollQueue $profilePollQueueModel,
|
||||
\Psr\Log\LoggerInterface $logger,
|
||||
array $settings)
|
||||
{
|
||||
$this->atlas = $atlas;
|
||||
$this->http = $http;
|
||||
$this->profilePollQueueModel = $profilePollQueueModel;
|
||||
$this->logger = $logger;
|
||||
$this->settings = array_merge($this->settings, $settings);
|
||||
|
@ -82,35 +82,7 @@ class Directory
|
|||
$path = '/sync/pull/since/' . $last_polled;
|
||||
}
|
||||
|
||||
//Prepare the CURL call.
|
||||
$handle = curl_init();
|
||||
$options = array(
|
||||
//Timeouts
|
||||
CURLOPT_TIMEOUT => max($this->settings['probe_timeout'], 1), //Minimum of 1 second timeout.
|
||||
CURLOPT_CONNECTTIMEOUT => 1,
|
||||
//Redirecting
|
||||
CURLOPT_FOLLOWLOCATION => true,
|
||||
CURLOPT_MAXREDIRS => 8,
|
||||
//SSL
|
||||
CURLOPT_SSL_VERIFYPEER => true,
|
||||
// CURLOPT_VERBOSE => true,
|
||||
// CURLOPT_CERTINFO => true,
|
||||
CURLOPT_SSL_VERIFYHOST => 2,
|
||||
CURLOPT_PROTOCOLS => CURLPROTO_HTTP | CURLPROTO_HTTPS,
|
||||
//Basic request
|
||||
CURLOPT_USERAGENT => Network::USER_AGENT,
|
||||
CURLOPT_RETURNTRANSFER => true,
|
||||
CURLOPT_URL => $directory_url . $path
|
||||
);
|
||||
curl_setopt_array($handle, $options);
|
||||
|
||||
$this->logger->info('Pulling profiles from directory URL: ' . $directory_url . $path);
|
||||
|
||||
//Probe the site.
|
||||
$pull_data = curl_exec($handle);
|
||||
|
||||
//Done with CURL now.
|
||||
curl_close($handle);
|
||||
$pull_data = $this->http->get($directory_url . $path, ['timeout' => max($this->settings['probe_timeout'], 1)])->getBody()->getContents();
|
||||
|
||||
$data = json_decode($pull_data, true);
|
||||
|
||||
|
|
|
@ -18,6 +18,11 @@ class Profile
|
|||
*/
|
||||
private $atlas;
|
||||
|
||||
/**
|
||||
* @var \GuzzleHttp\ClientInterface
|
||||
*/
|
||||
private $http;
|
||||
|
||||
/**
|
||||
* @var \Friendica\Directory\Models\Server
|
||||
*/
|
||||
|
@ -43,6 +48,7 @@ class Profile
|
|||
|
||||
public function __construct(
|
||||
\Atlas\Pdo\Connection $atlas,
|
||||
\GuzzleHttp\ClientInterface $http,
|
||||
\Friendica\Directory\Models\Server $serverModel,
|
||||
\Friendica\Directory\Models\Profile $profileModel,
|
||||
\Psr\Log\LoggerInterface $logger,
|
||||
|
@ -50,6 +56,7 @@ class Profile
|
|||
)
|
||||
{
|
||||
$this->atlas = $atlas;
|
||||
$this->http = $http;
|
||||
$this->serverModel = $serverModel;
|
||||
$this->profileModel = $profileModel;
|
||||
$this->logger = $logger;
|
||||
|
@ -122,25 +129,23 @@ class Profile
|
|||
);
|
||||
}
|
||||
|
||||
//Skip the profile scrape?
|
||||
$noscrape = $server['noscrape_url'];
|
||||
$available = false;
|
||||
|
||||
$params = [];
|
||||
if ($noscrape) {
|
||||
|
||||
//Skip the profile scrape?
|
||||
if ($server['noscrape_url']) {
|
||||
$this->logger->debug('Calling ' . $server['noscrape_url'] . '/' . $username);
|
||||
$params = \Friendica\Directory\Utils\Scrape::retrieveNoScrapeData($server['noscrape_url'] . '/' . $username);
|
||||
$noscrape = !!$params; //If the result was false, do a scrape after all.
|
||||
$params = \Friendica\Directory\Utils\Scrape::retrieveNoScrapeData($this->http, $server['noscrape_url'] . '/' . $username);
|
||||
$available = !!$params; //If the result was false, do a scrape after all.
|
||||
}
|
||||
|
||||
$available = true;
|
||||
|
||||
if ($noscrape) {
|
||||
$available = Network::testURL($profile_uri);
|
||||
$this->logger->debug('Testing ' . $profile_uri . ': ' . ($available?'Success':'Failure'));
|
||||
} else {
|
||||
if (!$available) {
|
||||
$this->logger->notice('Parsing profile page ' . $profile_uri);
|
||||
$params = \Friendica\Directory\Utils\Scrape::retrieveProfileData($profile_uri);
|
||||
$params = \Friendica\Directory\Utils\Scrape::retrieveProfileData($this->http, $profile_uri);
|
||||
$params['language'] = $server['language'];
|
||||
|
||||
$available = !empty($params['fn']);
|
||||
}
|
||||
|
||||
// Empty result is due to an offline site.
|
||||
|
@ -301,7 +306,7 @@ class Profile
|
|||
$status = false;
|
||||
|
||||
if ($profile_id) {
|
||||
$img_str = \Friendica\Directory\Utils\Network::fetchURL($params['photo'], true);
|
||||
$img_str = $this->http->get($params['photo'])->getBody()->getContents();
|
||||
$img = new \Friendica\Directory\Utils\Photo($img_str);
|
||||
if ($img->getImage()) {
|
||||
$img->scaleImageSquare(80);
|
||||
|
|
|
@ -3,7 +3,8 @@
|
|||
namespace Friendica\Directory\Pollers;
|
||||
|
||||
use ByJG\Util\WebRequest;
|
||||
use Friendica\Directory\Utils\Network;
|
||||
use GuzzleHttp\Exception\RequestException;
|
||||
use GuzzleHttp\TransferStats;
|
||||
|
||||
/**
|
||||
* @author Hypolite Petovan <hypolite@mrpetovan.com>
|
||||
|
@ -14,6 +15,10 @@ class Server
|
|||
* @var \Atlas\Pdo\Connection
|
||||
*/
|
||||
private $atlas;
|
||||
/**
|
||||
* @var \GuzzleHttp\ClientInterface
|
||||
*/
|
||||
private $http;
|
||||
/**
|
||||
* @var \Friendica\Directory\Models\ProfilePollQueue
|
||||
*/
|
||||
|
@ -41,6 +46,7 @@ class Server
|
|||
|
||||
public function __construct(
|
||||
\Atlas\Pdo\Connection $atlas,
|
||||
\GuzzleHttp\ClientInterface $http,
|
||||
\Friendica\Directory\Models\ProfilePollQueue $profilePollQueueModel,
|
||||
\Friendica\Directory\Models\Server $serverModel,
|
||||
\Psr\SimpleCache\CacheInterface $simplecache,
|
||||
|
@ -48,6 +54,7 @@ class Server
|
|||
array $settings)
|
||||
{
|
||||
$this->atlas = $atlas;
|
||||
$this->http = $http;
|
||||
$this->profilePollQueueModel = $profilePollQueueModel;
|
||||
$this->serverModel = $serverModel;
|
||||
$this->simplecache = $simplecache;
|
||||
|
@ -247,65 +254,41 @@ class Server
|
|||
|
||||
private function getProbeResult(string $base_url): array
|
||||
{
|
||||
//Prepare the CURL call.
|
||||
$handle = curl_init();
|
||||
$options = array(
|
||||
//Timeouts
|
||||
CURLOPT_TIMEOUT => max($this->settings['probe_timeout'], 1), //Minimum of 1 second timeout.
|
||||
CURLOPT_CONNECTTIMEOUT => 1,
|
||||
//Redirecting
|
||||
CURLOPT_FOLLOWLOCATION => true,
|
||||
CURLOPT_MAXREDIRS => 8,
|
||||
//SSL
|
||||
CURLOPT_SSL_VERIFYPEER => true,
|
||||
// CURLOPT_VERBOSE => true,
|
||||
// CURLOPT_CERTINFO => true,
|
||||
CURLOPT_SSL_VERIFYHOST => 2,
|
||||
CURLOPT_PROTOCOLS => CURLPROTO_HTTP | CURLPROTO_HTTPS,
|
||||
//Basic request
|
||||
CURLOPT_USERAGENT => Network::USER_AGENT,
|
||||
CURLOPT_RETURNTRANSFER => true,
|
||||
CURLOPT_URL => $base_url . '/friendica/json'
|
||||
);
|
||||
curl_setopt_array($handle, $options);
|
||||
$curl_info = null;
|
||||
|
||||
//Probe the site.
|
||||
$probe_start = microtime(true);
|
||||
$probe_data = curl_exec($handle);
|
||||
$probe_end = microtime(true);
|
||||
$options = [
|
||||
'timeout' => max($this->settings['probe_timeout'], 1),
|
||||
'on_stats' => function (TransferStats $transferStats) use (&$curl_info) {
|
||||
$curl_info = $transferStats->getHandlerStats();
|
||||
}
|
||||
];
|
||||
|
||||
//Check for SSL problems.
|
||||
$curl_statuscode = curl_errno($handle);
|
||||
$sslcert_issues = in_array($curl_statuscode, array(
|
||||
60, //Could not authenticate certificate with known CA's
|
||||
83 //Issuer check failed
|
||||
));
|
||||
$sslcert_issues = false;
|
||||
|
||||
//When it's the certificate that doesn't work.
|
||||
if ($sslcert_issues) {
|
||||
//Probe again, without strict SSL.
|
||||
$options[CURLOPT_SSL_VERIFYPEER] = false;
|
||||
|
||||
//Replace the handle.
|
||||
curl_close($handle);
|
||||
$handle = curl_init();
|
||||
curl_setopt_array($handle, $options);
|
||||
|
||||
//Probe.
|
||||
try {
|
||||
//Probe the site.
|
||||
$probe_start = microtime(true);
|
||||
$probe_data = curl_exec($handle);
|
||||
$probe_data = $this->http->get($base_url . '/friendica/json', $options)->getBody()->getContents();
|
||||
$probe_end = microtime(true);
|
||||
} catch (RequestException $e) {
|
||||
if (!in_array($e->getHandlerContext()['errno'], [
|
||||
60, //Could not authenticate certificate with known CA's
|
||||
83 //Issuer check failed
|
||||
])) {
|
||||
throw $e;
|
||||
}
|
||||
|
||||
//Store new status.
|
||||
$curl_statuscode = curl_errno($handle);
|
||||
$sslcert_issues = true;
|
||||
|
||||
//When it's the certificate that doesn't work, we probe again without strict SSL.
|
||||
$options['verify'] = false;
|
||||
|
||||
$probe_start = microtime(true);
|
||||
$probe_data = $this->http->get($base_url . '/friendica/json', $options)->getBody()->getContents();
|
||||
$probe_end = microtime(true);
|
||||
}
|
||||
|
||||
//Gather more meta.
|
||||
$time = round(($probe_end - $probe_start) * 1000);
|
||||
$curl_info = curl_getinfo($handle);
|
||||
|
||||
//Done with CURL now.
|
||||
curl_close($handle);
|
||||
|
||||
try {
|
||||
$data = json_decode($probe_data, true);
|
||||
|
|
|
@ -15,56 +15,6 @@ namespace Friendica\Directory\Utils;
|
|||
*/
|
||||
class Network
|
||||
{
|
||||
const USER_AGENT = 'friendica-directory-probe-1.0';
|
||||
|
||||
public static function fetchURL(string $url, bool $binary = false, int $timeout = 20): string
|
||||
{
|
||||
$ch = curl_init($url);
|
||||
if (!$ch) {
|
||||
return false;
|
||||
}
|
||||
|
||||
curl_setopt($ch, CURLOPT_HEADER, 0);
|
||||
curl_setopt($ch, CURLOPT_TIMEOUT, max($timeout, 1)); //Minimum of 1 second timeout.
|
||||
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
|
||||
curl_setopt($ch, CURLOPT_MAXREDIRS, 8);
|
||||
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($ch, CURLOPT_USERAGENT, self::USER_AGENT);
|
||||
if ($binary) {
|
||||
curl_setopt($ch, CURLOPT_BINARYTRANSFER, 1);
|
||||
}
|
||||
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
|
||||
$s = curl_exec($ch);
|
||||
curl_close($ch);
|
||||
return $s;
|
||||
}
|
||||
|
||||
public static function testURL(string $url, int $timeout = 20): bool
|
||||
{
|
||||
$ch = curl_init($url);
|
||||
if (!$ch) {
|
||||
return false;
|
||||
}
|
||||
|
||||
curl_setopt($ch, CURLOPT_HEADER , 0);
|
||||
curl_setopt($ch, CURLOPT_TIMEOUT , max($timeout, 1)); //Minimum of 1 second timeout.
|
||||
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
|
||||
curl_setopt($ch, CURLOPT_MAXREDIRS , 8);
|
||||
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, true);
|
||||
curl_setopt($ch, CURLOPT_NOBODY , true);
|
||||
curl_setopt($ch, CURLOPT_USERAGENT , self::USER_AGENT);
|
||||
|
||||
curl_exec($ch);
|
||||
|
||||
$responseCode = intval(curl_getinfo($ch, CURLINFO_RESPONSE_CODE));
|
||||
|
||||
$testSuccess = curl_errno($ch) === 0 && $responseCode < 400;
|
||||
|
||||
curl_close($ch);
|
||||
|
||||
return $testSuccess;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a hostname is public and non-reserved
|
||||
*
|
||||
|
|
|
@ -2,6 +2,8 @@
|
|||
|
||||
namespace Friendica\Directory\Utils;
|
||||
|
||||
use GuzzleHttp\ClientInterface;
|
||||
|
||||
/**
|
||||
* @author Hypolite Petovan <hypolite@mrpetovan.com>
|
||||
*/
|
||||
|
@ -12,10 +14,10 @@ class Scrape
|
|||
* @param string $url
|
||||
* @return array|false
|
||||
*/
|
||||
public static function retrieveNoScrapeData(string $url)
|
||||
public static function retrieveNoScrapeData(ClientInterface $http, string $url)
|
||||
{
|
||||
$submit_noscrape_start = microtime(true);
|
||||
$data = Network::fetchURL($url);
|
||||
$data = $http->get($url)->getBody()->getContents();
|
||||
$submit_noscrape_request_end = microtime(true);
|
||||
|
||||
if (empty($data)) {
|
||||
|
@ -42,7 +44,7 @@ class Scrape
|
|||
return $params;
|
||||
}
|
||||
|
||||
public static function retrieveProfileData(string $url, int $max_nodes = 3500): array
|
||||
public static function retrieveProfileData(ClientInterface $http, string $url, int $max_nodes = 3500): array
|
||||
{
|
||||
|
||||
$minNodes = 100; //Lets do at least 100 nodes per type.
|
||||
|
@ -56,7 +58,7 @@ class Scrape
|
|||
$scrape_start = microtime(true);
|
||||
|
||||
$params = [];
|
||||
$html = Network::fetchURL($url, false, $timeout);
|
||||
$html = $http->get($url, ['timeout' => $timeout])->getBody()->getContents();;
|
||||
|
||||
$scrape_fetch_end = microtime(true);
|
||||
|
||||
|
|
|
@ -89,6 +89,16 @@ $container['migration'] = function (ContainerInterface $c): ByJG\DbMigration\Mig
|
|||
return $migration;
|
||||
};
|
||||
|
||||
$container['http'] = function (ContainerInterface $c): GuzzleHttp\ClientInterface {
|
||||
$version = file_get_contents(__DIR__ . '/../VERSION');
|
||||
|
||||
if (!$version || !preg_match('/^\s*\d\.\d\.\d\s*$/', $version)) {
|
||||
$version = '0.0.0';
|
||||
}
|
||||
|
||||
return new GuzzleHttp\Client(['timeout' => 20, 'headers' => ['User-Agent' => 'FriendicaDirectory/' . trim($version) . ' ' . \GuzzleHttp\default_user_agent()]]);
|
||||
};
|
||||
|
||||
// Internal Dependency Injection
|
||||
|
||||
$container[\Friendica\Directory\Models\Profile::class] = function (ContainerInterface $c): Friendica\Directory\Models\Profile {
|
||||
|
@ -106,7 +116,7 @@ $container[\Friendica\Directory\Models\Server::class] = function (ContainerInter
|
|||
$container[\Friendica\Directory\Pollers\Directory::class] = function (ContainerInterface $c): Friendica\Directory\Pollers\Directory {
|
||||
$settings = $c->get('settings')['poller'];
|
||||
return new Friendica\Directory\Pollers\Directory(
|
||||
$c->get('atlas'),
|
||||
$c->get('http'),
|
||||
$c->get(\Friendica\Directory\Models\ProfilePollQueue::class),
|
||||
$c->get('logger'),
|
||||
$settings ?: []
|
||||
|
@ -117,6 +127,7 @@ $container[\Friendica\Directory\Pollers\Profile::class] = function (ContainerInt
|
|||
$settings = $c->get('settings')['poller'];
|
||||
return new Friendica\Directory\Pollers\Profile(
|
||||
$c->get('atlas'),
|
||||
$c->get('http'),
|
||||
$c->get(\Friendica\Directory\Models\Server::class),
|
||||
$c->get(\Friendica\Directory\Models\Profile::class),
|
||||
$c->get('logger'),
|
||||
|
@ -128,6 +139,7 @@ $container[\Friendica\Directory\Pollers\Server::class] = function (ContainerInte
|
|||
$settings = $c->get('settings')['poller'];
|
||||
return new Friendica\Directory\Pollers\Server(
|
||||
$c->get('atlas'),
|
||||
$c->get('http'),
|
||||
$c->get(\Friendica\Directory\Models\ProfilePollQueue::class),
|
||||
$c->get(\Friendica\Directory\Models\Server::class),
|
||||
$c->get('simplecache'),
|
||||
|
|
Loading…
Reference in a new issue