|
|
@ -33,7 +33,7 @@ function completeurl($url, $scheme) { |
|
|
|
|
|
|
|
$complete = $schemearr["scheme"]."://".$schemearr["host"]; |
|
|
|
|
|
|
|
if ($schemearr["port"] != "") |
|
|
|
if (@$schemearr["port"] != "") |
|
|
|
$complete .= ":".$schemearr["port"]; |
|
|
|
|
|
|
|
if(strpos($urlarr['path'],'/') !== 0) |
|
|
@ -41,10 +41,10 @@ function completeurl($url, $scheme) { |
|
|
|
|
|
|
|
$complete .= $urlarr["path"]; |
|
|
|
|
|
|
|
if ($urlarr["query"] != "") |
|
|
|
if (@$urlarr["query"] != "") |
|
|
|
$complete .= "?".$urlarr["query"]; |
|
|
|
|
|
|
|
if ($urlarr["fragment"] != "") |
|
|
|
if (@$urlarr["fragment"] != "") |
|
|
|
$complete .= "#".$urlarr["fragment"]; |
|
|
|
|
|
|
|
return($complete); |
|
|
@ -52,18 +52,29 @@ function completeurl($url, $scheme) { |
|
|
|
|
|
|
|
function parseurl_getsiteinfo($url) { |
|
|
|
$siteinfo = array(); |
|
|
|
|
|
|
|
$ch = curl_init(); |
|
|
|
curl_setopt($ch, CURLOPT_URL, $url); |
|
|
|
curl_setopt($ch, CURLOPT_HEADER, 1); |
|
|
|
curl_setopt($ch, CURLOPT_NOBODY, 0); |
|
|
|
curl_setopt($ch, CURLOPT_TIMEOUT, 3); |
|
|
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); |
|
|
|
//curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
|
|
|
|
curl_setopt($ch,CURLOPT_USERAGENT,'Opera/9.64(Windows NT 5.1; U; de) Presto/2.1.1'); |
|
|
|
|
|
|
|
$header = curl_exec($ch); |
|
|
|
$curl_info = @curl_getinfo($ch); |
|
|
|
$http_code = $curl_info['http_code']; |
|
|
|
curl_close($ch); |
|
|
|
|
|
|
|
if ((($curl_info['http_code'] == "301") OR ($curl_info['http_code'] == "302")) |
|
|
|
AND (($curl_info['redirect_url'] != "") OR ($curl_info['location'] != ""))) { |
|
|
|
if ($curl_info['redirect_url'] != "") |
|
|
|
$siteinfo = parseurl_getsiteinfo($curl_info['redirect_url']); |
|
|
|
else |
|
|
|
$siteinfo = parseurl_getsiteinfo($curl_info['location']); |
|
|
|
return($siteinfo); |
|
|
|
} |
|
|
|
|
|
|
|
// Fetch the first mentioned charset. Can be in body or header
|
|
|
|
if (preg_match('/charset=(.*?)['."'".'"\s\n]/', $header, $matches)) |
|
|
|
$charset = trim(array_pop($matches)); |
|
|
@ -173,7 +184,7 @@ function parseurl_getsiteinfo($url) { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
if ($siteinfo["image"] == "") { |
|
|
|
if (@$siteinfo["image"] == "") { |
|
|
|
$list = $xpath->query("//img[@src]"); |
|
|
|
foreach ($list as $node) { |
|
|
|
$attr = array(); |
|
|
@ -212,7 +223,7 @@ function parseurl_getsiteinfo($url) { |
|
|
|
"height"=>$photodata[1]); |
|
|
|
} |
|
|
|
|
|
|
|
if ($siteinfo["text"] == "") { |
|
|
|
if (@$siteinfo["text"] == "") { |
|
|
|
$text = ""; |
|
|
|
|
|
|
|
$list = $xpath->query("//div[@class='article']"); |
|
|
|