Move "Network::finalUrl" to "HTTPRequest" class
This commit is contained in:
		
					parent
					
						
							
								57587efe58
							
						
					
				
			
			
				commit
				
					
						60e18736b0
					
				
			
		
					 3 changed files with 123 additions and 124 deletions
				
			
		| 
						 | 
				
			
			@ -21,10 +21,13 @@
 | 
			
		|||
 | 
			
		||||
namespace Friendica\Network;
 | 
			
		||||
 | 
			
		||||
use DOMDocument;
 | 
			
		||||
use DomXPath;
 | 
			
		||||
use Friendica\App;
 | 
			
		||||
use Friendica\Core\Config\IConfig;
 | 
			
		||||
use Friendica\Core\Logger;
 | 
			
		||||
use Friendica\Core\System;
 | 
			
		||||
use Friendica\DI;
 | 
			
		||||
use Friendica\Util\Network;
 | 
			
		||||
use Friendica\Util\Profiler;
 | 
			
		||||
use Psr\Log\LoggerInterface;
 | 
			
		||||
| 
						 | 
				
			
			@ -323,6 +326,124 @@ class HTTPRequest
 | 
			
		|||
		return $curlResponse;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Returns the original URL of the provided URL
 | 
			
		||||
	 *
 | 
			
		||||
	 * This function strips tracking query params and follows redirections, either
 | 
			
		||||
	 * through HTTP code or meta refresh tags. Stops after 10 redirections.
 | 
			
		||||
	 *
 | 
			
		||||
	 * @todo  Remove the $fetchbody parameter that generates an extraneous HEAD request
 | 
			
		||||
	 *
 | 
			
		||||
	 * @see   ParseUrl::getSiteinfo
 | 
			
		||||
	 *
 | 
			
		||||
	 * @param string $url       A user-submitted URL
 | 
			
		||||
	 * @param int    $depth     The current redirection recursion level (internal)
 | 
			
		||||
	 * @param bool   $fetchbody Wether to fetch the body or not after the HEAD requests
 | 
			
		||||
	 * @return string A canonical URL
 | 
			
		||||
	 * @throws \Friendica\Network\HTTPException\InternalServerErrorException
 | 
			
		||||
	 */
 | 
			
		||||
	public static function finalUrl(string $url, int $depth = 1, bool $fetchbody = false)
 | 
			
		||||
	{
 | 
			
		||||
		$url = Network::stripTrackingQueryParams($url);
 | 
			
		||||
 | 
			
		||||
		if ($depth > 10) {
 | 
			
		||||
			return $url;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		$url = trim($url, "'");
 | 
			
		||||
 | 
			
		||||
		$stamp1 = microtime(true);
 | 
			
		||||
 | 
			
		||||
		$ch = curl_init();
 | 
			
		||||
		curl_setopt($ch, CURLOPT_URL, $url);
 | 
			
		||||
		curl_setopt($ch, CURLOPT_HEADER, 1);
 | 
			
		||||
		curl_setopt($ch, CURLOPT_NOBODY, 1);
 | 
			
		||||
		curl_setopt($ch, CURLOPT_TIMEOUT, 10);
 | 
			
		||||
		curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
 | 
			
		||||
		curl_setopt($ch, CURLOPT_USERAGENT, DI::httpRequest()->getUserAgent());
 | 
			
		||||
 | 
			
		||||
		curl_exec($ch);
 | 
			
		||||
		$curl_info = @curl_getinfo($ch);
 | 
			
		||||
		$http_code = $curl_info['http_code'];
 | 
			
		||||
		curl_close($ch);
 | 
			
		||||
 | 
			
		||||
		DI::profiler()->saveTimestamp($stamp1, "network", System::callstack());
 | 
			
		||||
 | 
			
		||||
		if ($http_code == 0) {
 | 
			
		||||
			return $url;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if (in_array($http_code, ['301', '302'])) {
 | 
			
		||||
			if (!empty($curl_info['redirect_url'])) {
 | 
			
		||||
				return self::finalUrl($curl_info['redirect_url'], ++$depth, $fetchbody);
 | 
			
		||||
			} elseif (!empty($curl_info['location'])) {
 | 
			
		||||
				return self::finalUrl($curl_info['location'], ++$depth, $fetchbody);
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		// Check for redirects in the meta elements of the body if there are no redirects in the header.
 | 
			
		||||
		if (!$fetchbody) {
 | 
			
		||||
			return self::finalUrl($url, ++$depth, true);
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		// if the file is too large then exit
 | 
			
		||||
		if ($curl_info["download_content_length"] > 1000000) {
 | 
			
		||||
			return $url;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		// if it isn't a HTML file then exit
 | 
			
		||||
		if (!empty($curl_info["content_type"]) && !strstr(strtolower($curl_info["content_type"]), "html")) {
 | 
			
		||||
			return $url;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		$stamp1 = microtime(true);
 | 
			
		||||
 | 
			
		||||
		$ch = curl_init();
 | 
			
		||||
		curl_setopt($ch, CURLOPT_URL, $url);
 | 
			
		||||
		curl_setopt($ch, CURLOPT_HEADER, 0);
 | 
			
		||||
		curl_setopt($ch, CURLOPT_NOBODY, 0);
 | 
			
		||||
		curl_setopt($ch, CURLOPT_TIMEOUT, 10);
 | 
			
		||||
		curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
 | 
			
		||||
		curl_setopt($ch, CURLOPT_USERAGENT, DI::httpRequest()->getUserAgent());
 | 
			
		||||
 | 
			
		||||
		$body = curl_exec($ch);
 | 
			
		||||
		curl_close($ch);
 | 
			
		||||
 | 
			
		||||
		DI::profiler()->saveTimestamp($stamp1, "network", System::callstack());
 | 
			
		||||
 | 
			
		||||
		if (trim($body) == "") {
 | 
			
		||||
			return $url;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		// Check for redirect in meta elements
 | 
			
		||||
		$doc = new DOMDocument();
 | 
			
		||||
		@$doc->loadHTML($body);
 | 
			
		||||
 | 
			
		||||
		$xpath = new DomXPath($doc);
 | 
			
		||||
 | 
			
		||||
		$list = $xpath->query("//meta[@content]");
 | 
			
		||||
		foreach ($list as $node) {
 | 
			
		||||
			$attr = [];
 | 
			
		||||
			if ($node->attributes->length) {
 | 
			
		||||
				foreach ($node->attributes as $attribute) {
 | 
			
		||||
					$attr[$attribute->name] = $attribute->value;
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			if (@$attr["http-equiv"] == 'refresh') {
 | 
			
		||||
				$path = $attr["content"];
 | 
			
		||||
				$pathinfo = explode(";", $path);
 | 
			
		||||
				foreach ($pathinfo as $value) {
 | 
			
		||||
					if (substr(strtolower($value), 0, 4) == "url=") {
 | 
			
		||||
						return self::finalUrl(substr($value, 4), ++$depth);
 | 
			
		||||
					}
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		return $url;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Curl wrapper
 | 
			
		||||
	 *
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -35,6 +35,7 @@ use Friendica\Model\Contact;
 | 
			
		|||
use Friendica\Model\Item;
 | 
			
		||||
use Friendica\Model\Tag;
 | 
			
		||||
use Friendica\Model\User;
 | 
			
		||||
use Friendica\Network\HTTPRequest;
 | 
			
		||||
use Friendica\Util\DateTimeFormat;
 | 
			
		||||
use Friendica\Util\Network;
 | 
			
		||||
use Friendica\Util\ParseUrl;
 | 
			
		||||
| 
						 | 
				
			
			@ -350,7 +351,7 @@ class Feed
 | 
			
		|||
 | 
			
		||||
			$orig_plink = $item["plink"];
 | 
			
		||||
 | 
			
		||||
			$item["plink"] = Network::finalUrl($item["plink"]);
 | 
			
		||||
			$item["plink"] = HTTPRequest::finalUrl($item["plink"]);
 | 
			
		||||
 | 
			
		||||
			$item["parent-uri"] = $item["uri"];
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -21,11 +21,8 @@
 | 
			
		|||
 | 
			
		||||
namespace Friendica\Util;
 | 
			
		||||
 | 
			
		||||
use DOMDocument;
 | 
			
		||||
use DomXPath;
 | 
			
		||||
use Friendica\Core\Hook;
 | 
			
		||||
use Friendica\Core\Logger;
 | 
			
		||||
use Friendica\Core\System;
 | 
			
		||||
use Friendica\DI;
 | 
			
		||||
 | 
			
		||||
class Network
 | 
			
		||||
| 
						 | 
				
			
			@ -314,126 +311,6 @@ class Network
 | 
			
		|||
		return self::unparseURL($parts);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Returns the original URL of the provided URL
 | 
			
		||||
	 *
 | 
			
		||||
	 * This function strips tracking query params and follows redirections, either
 | 
			
		||||
	 * through HTTP code or meta refresh tags. Stops after 10 redirections.
 | 
			
		||||
	 *
 | 
			
		||||
	 * @todo  Remove the $fetchbody parameter that generates an extraneous HEAD request
 | 
			
		||||
	 *
 | 
			
		||||
	 * @see   ParseUrl::getSiteinfo
 | 
			
		||||
	 *
 | 
			
		||||
	 * @param string $url       A user-submitted URL
 | 
			
		||||
	 * @param int    $depth     The current redirection recursion level (internal)
 | 
			
		||||
	 * @param bool   $fetchbody Wether to fetch the body or not after the HEAD requests
 | 
			
		||||
	 * @return string A canonical URL
 | 
			
		||||
	 * @throws \Friendica\Network\HTTPException\InternalServerErrorException
 | 
			
		||||
	 */
 | 
			
		||||
	public static function finalUrl(string $url, int $depth = 1, bool $fetchbody = false)
 | 
			
		||||
	{
 | 
			
		||||
		$a = DI::app();
 | 
			
		||||
 | 
			
		||||
		$url = self::stripTrackingQueryParams($url);
 | 
			
		||||
 | 
			
		||||
		if ($depth > 10) {
 | 
			
		||||
			return $url;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		$url = trim($url, "'");
 | 
			
		||||
 | 
			
		||||
		$stamp1 = microtime(true);
 | 
			
		||||
 | 
			
		||||
		$ch = curl_init();
 | 
			
		||||
		curl_setopt($ch, CURLOPT_URL, $url);
 | 
			
		||||
		curl_setopt($ch, CURLOPT_HEADER, 1);
 | 
			
		||||
		curl_setopt($ch, CURLOPT_NOBODY, 1);
 | 
			
		||||
		curl_setopt($ch, CURLOPT_TIMEOUT, 10);
 | 
			
		||||
		curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
 | 
			
		||||
		curl_setopt($ch, CURLOPT_USERAGENT, DI::httpRequest()->getUserAgent());
 | 
			
		||||
 | 
			
		||||
		curl_exec($ch);
 | 
			
		||||
		$curl_info = @curl_getinfo($ch);
 | 
			
		||||
		$http_code = $curl_info['http_code'];
 | 
			
		||||
		curl_close($ch);
 | 
			
		||||
 | 
			
		||||
		DI::profiler()->saveTimestamp($stamp1, "network", System::callstack());
 | 
			
		||||
 | 
			
		||||
		if ($http_code == 0) {
 | 
			
		||||
			return $url;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if (in_array($http_code, ['301', '302'])) {
 | 
			
		||||
			if (!empty($curl_info['redirect_url'])) {
 | 
			
		||||
				return self::finalUrl($curl_info['redirect_url'], ++$depth, $fetchbody);
 | 
			
		||||
			} elseif (!empty($curl_info['location'])) {
 | 
			
		||||
				return self::finalUrl($curl_info['location'], ++$depth, $fetchbody);
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		// Check for redirects in the meta elements of the body if there are no redirects in the header.
 | 
			
		||||
		if (!$fetchbody) {
 | 
			
		||||
			return(self::finalUrl($url, ++$depth, true));
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		// if the file is too large then exit
 | 
			
		||||
		if ($curl_info["download_content_length"] > 1000000) {
 | 
			
		||||
			return $url;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		// if it isn't a HTML file then exit
 | 
			
		||||
		if (!empty($curl_info["content_type"]) && !strstr(strtolower($curl_info["content_type"]), "html")) {
 | 
			
		||||
			return $url;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		$stamp1 = microtime(true);
 | 
			
		||||
 | 
			
		||||
		$ch = curl_init();
 | 
			
		||||
		curl_setopt($ch, CURLOPT_URL, $url);
 | 
			
		||||
		curl_setopt($ch, CURLOPT_HEADER, 0);
 | 
			
		||||
		curl_setopt($ch, CURLOPT_NOBODY, 0);
 | 
			
		||||
		curl_setopt($ch, CURLOPT_TIMEOUT, 10);
 | 
			
		||||
		curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
 | 
			
		||||
		curl_setopt($ch, CURLOPT_USERAGENT, DI::httpRequest()->getUserAgent());
 | 
			
		||||
 | 
			
		||||
		$body = curl_exec($ch);
 | 
			
		||||
		curl_close($ch);
 | 
			
		||||
 | 
			
		||||
		DI::profiler()->saveTimestamp($stamp1, "network", System::callstack());
 | 
			
		||||
 | 
			
		||||
		if (trim($body) == "") {
 | 
			
		||||
			return $url;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		// Check for redirect in meta elements
 | 
			
		||||
		$doc = new DOMDocument();
 | 
			
		||||
		@$doc->loadHTML($body);
 | 
			
		||||
 | 
			
		||||
		$xpath = new DomXPath($doc);
 | 
			
		||||
 | 
			
		||||
		$list = $xpath->query("//meta[@content]");
 | 
			
		||||
		foreach ($list as $node) {
 | 
			
		||||
			$attr = [];
 | 
			
		||||
			if ($node->attributes->length) {
 | 
			
		||||
				foreach ($node->attributes as $attribute) {
 | 
			
		||||
					$attr[$attribute->name] = $attribute->value;
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			if (@$attr["http-equiv"] == 'refresh') {
 | 
			
		||||
				$path = $attr["content"];
 | 
			
		||||
				$pathinfo = explode(";", $path);
 | 
			
		||||
				foreach ($pathinfo as $value) {
 | 
			
		||||
					if (substr(strtolower($value), 0, 4) == "url=") {
 | 
			
		||||
						return self::finalUrl(substr($value, 4), ++$depth);
 | 
			
		||||
					}
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		return $url;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Find the matching part between two url
 | 
			
		||||
	 *
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue