Merge remote-tracking branch 'upstream/develop' into 1501-enhanced-poco

This commit is contained in:
Michael Vogel 2015-01-04 21:07:45 +01:00
commit 47560fc6c7
2 changed files with 48 additions and 14 deletions

View file

@ -1160,12 +1160,7 @@ function original_url($url, $depth=1, $fetchbody = false) {
$ch = curl_init(); $ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, 1); curl_setopt($ch, CURLOPT_HEADER, 1);
if ($fetchbody)
curl_setopt($ch, CURLOPT_NOBODY, 0);
else
curl_setopt($ch, CURLOPT_NOBODY, 1); curl_setopt($ch, CURLOPT_NOBODY, 1);
curl_setopt($ch, CURLOPT_TIMEOUT, 10); curl_setopt($ch, CURLOPT_TIMEOUT, 10);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_USERAGENT, $a->get_useragent()); curl_setopt($ch, CURLOPT_USERAGENT, $a->get_useragent());
@ -1183,16 +1178,33 @@ function original_url($url, $depth=1, $fetchbody = false) {
return(original_url($curl_info['location'], ++$depth, $fetchbody)); return(original_url($curl_info['location'], ++$depth, $fetchbody));
} }
$pos = strpos($header, "\r\n\r\n"); // Check for redirects in the meta elements of the body if there are no redirects in the header.
if (!$fetchbody)
if ($pos)
$body = trim(substr($header, $pos));
else
$body = $header;
if (trim($body) == "")
return(original_url($url, ++$depth, true)); return(original_url($url, ++$depth, true));
// if the file is too large then exit
if ($curl_info["download_content_length"] > 1000000)
return($url);
// if it isn't a HTML file then exit
if (($curl_info["content_type"] != "") AND !strstr(strtolower($curl_info["content_type"]),"html"))
return($url);
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_NOBODY, 0);
curl_setopt($ch, CURLOPT_TIMEOUT, 10);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_USERAGENT, $a->get_useragent());
$body = curl_exec($ch);
curl_close($ch);
if (trim($body) == "")
return($url);
// Check for redirect in meta elements
$doc = new DOMDocument(); $doc = new DOMDocument();
@$doc->loadHTML($body); @$doc->loadHTML($body);

View file

@ -73,7 +73,7 @@ function parseurl_getsiteinfo($url, $no_guessing = false, $do_oembed = true, $co
$ch = curl_init(); $ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, 1); curl_setopt($ch, CURLOPT_HEADER, 1);
curl_setopt($ch, CURLOPT_NOBODY, 0); curl_setopt($ch, CURLOPT_NOBODY, 1);
curl_setopt($ch, CURLOPT_TIMEOUT, 3); curl_setopt($ch, CURLOPT_TIMEOUT, 3);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
//curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); //curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
@ -93,6 +93,28 @@ function parseurl_getsiteinfo($url, $no_guessing = false, $do_oembed = true, $co
return($siteinfo); return($siteinfo);
} }
// if the file is too large then exit
if ($curl_info["download_content_length"] > 1000000)
return($siteinfo);
// if it isn't a HTML file then exit
if (($curl_info["content_type"] != "") AND !strstr(strtolower($curl_info["content_type"]),"html"))
return($siteinfo);
// Now fetch the body as well
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, 1);
curl_setopt($ch, CURLOPT_NOBODY, 0);
curl_setopt($ch, CURLOPT_TIMEOUT, 10);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_USERAGENT, $a->get_useragent());
$header = curl_exec($ch);
$curl_info = @curl_getinfo($ch);
$http_code = $curl_info['http_code'];
curl_close($ch);
if ($do_oembed) { if ($do_oembed) {
require_once("include/oembed.php"); require_once("include/oembed.php");