From 12866209e889cea0e2c3e1659d0845388bc4090b Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Fri, 4 Apr 2014 10:58:31 +0200 Subject: [PATCH] parse url: Emtpy values in meta headers are now ignored. Oembed data has priority. Videos are embedded with some further information. --- mod/parse_url.php | 113 +++++++++++++++++++++++++--------------------- 1 file changed, 62 insertions(+), 51 deletions(-) diff --git a/mod/parse_url.php b/mod/parse_url.php index 5caf830325..a0608f6363 100644 --- a/mod/parse_url.php +++ b/mod/parse_url.php @@ -53,6 +53,9 @@ function completeurl($url, $scheme) { function parseurl_getsiteinfo($url, $no_guessing = false) { $siteinfo = array(); + $url = trim($url, "'"); + $url = trim($url, '"'); + $siteinfo["url"] = $url; $siteinfo["type"] = "link"; $ch = curl_init(); @@ -69,7 +72,7 @@ function parseurl_getsiteinfo($url, $no_guessing = false) { $http_code = $curl_info['http_code']; curl_close($ch); - if ((($curl_info['http_code'] == "301") OR ($curl_info['http_code'] == "302")) + if ((($curl_info['http_code'] == "301") OR ($curl_info['http_code'] == "302") OR ($curl_info['http_code'] == "303") OR ($curl_info['http_code'] == "307")) AND (($curl_info['redirect_url'] != "") OR ($curl_info['location'] != ""))) { if ($curl_info['redirect_url'] != "") $siteinfo = parseurl_getsiteinfo($curl_info['redirect_url']); @@ -85,19 +88,12 @@ function parseurl_getsiteinfo($url, $no_guessing = false) { if ($oembed_data->type != "error") $siteinfo["type"] = $oembed_data->type; - if ($oembed_data->type == "link") { - if (isset($oembed_data->title)) - $siteinfo["title"] = $oembed_data->title; - if (isset($oembed_data->description)) - $siteinfo["text"] = trim($oembed_data->description); - if (isset($oembed_data->thumbnail_url)) - $siteinfo["image"] = $oembed_data->thumbnail_url; - } - // Fetch the first mentioned charset. Can be in body or header + $charset = ""; if (preg_match('/charset=(.*?)['."'".'"\s\n]/', $header, $matches)) $charset = trim(array_pop($matches)); - else + + if ($charset == "") $charset = "utf-8"; $pos = strpos($header, "\r\n\r\n"); @@ -164,33 +160,34 @@ function parseurl_getsiteinfo($url, $no_guessing = false) { $attr["content"] = trim(html_entity_decode($attr["content"], ENT_QUOTES, "UTF-8")); - switch (strtolower($attr["name"])) { - case "fulltitle": - $siteinfo["title"] = $attr["content"]; - break; - case "description": - $siteinfo["text"] = $attr["content"]; - break; - case "twitter:image": - $siteinfo["image"] = $attr["content"]; - break; - case "twitter:card": - if ($siteinfo["type"] == "") - $siteinfo["type"] = $attr["content"]; - break; - case "twitter:description": - $siteinfo["text"] = $attr["content"]; - break; - case "twitter:title": - $siteinfo["title"] = $attr["content"]; - break; - case "dc.title": - $siteinfo["title"] = $attr["content"]; - break; - case "dc.description": - $siteinfo["text"] = $attr["content"]; - break; - } + if ($attr["content"] != "") + switch (strtolower($attr["name"])) { + case "fulltitle": + $siteinfo["title"] = $attr["content"]; + break; + case "description": + $siteinfo["text"] = $attr["content"]; + break; + case "twitter:image": + $siteinfo["image"] = $attr["content"]; + break; + case "twitter:card": + if ($siteinfo["type"] == "") + $siteinfo["type"] = $attr["content"]; + break; + case "twitter:description": + $siteinfo["text"] = $attr["content"]; + break; + case "twitter:title": + $siteinfo["title"] = $attr["content"]; + break; + case "dc.title": + $siteinfo["title"] = $attr["content"]; + break; + case "dc.description": + $siteinfo["text"] = $attr["content"]; + break; + } if ($siteinfo["type"] == "summary") $siteinfo["type"] = "link"; } @@ -205,17 +202,27 @@ function parseurl_getsiteinfo($url, $no_guessing = false) { $attr["content"] = trim(html_entity_decode($attr["content"], ENT_QUOTES, "UTF-8")); - switch (strtolower($attr["property"])) { - case "og:image": - $siteinfo["image"] = $attr["content"]; - break; - case "og:title": - $siteinfo["title"] = $attr["content"]; - break; - case "og:description": - $siteinfo["text"] = $attr["content"]; - break; - } + if ($attr["content"] != "") + switch (strtolower($attr["property"])) { + case "og:image": + $siteinfo["image"] = $attr["content"]; + break; + case "og:title": + $siteinfo["title"] = $attr["content"]; + break; + case "og:description": + $siteinfo["text"] = $attr["content"]; + break; + } + } + + if ($oembed_data->type == "link") { + if (isset($oembed_data->title) AND (trim($oembed_data->title) != "")) + $siteinfo["title"] = $oembed_data->title; + if (isset($oembed_data->description) AND (trim($oembed_data->description) != "")) + $siteinfo["text"] = trim($oembed_data->description); + if (isset($oembed_data->thumbnail_url) AND (trim($oembed_data->thumbnail_url) != "")) + $siteinfo["image"] = $oembed_data->thumbnail_url; } if ((@$siteinfo["image"] == "") AND !$no_guessing) { @@ -290,6 +297,8 @@ function parseurl_getsiteinfo($url, $no_guessing = false) { } } + logger("parseurl_getsiteinfo: Siteinfo for ".$url." ".print_r($siteinfo, true), LOGGER_DEBUG); + return($siteinfo); } @@ -367,6 +376,8 @@ function parse_url_content(&$a) { $siteinfo = parseurl_getsiteinfo($url); + $url= $siteinfo["url"]; + $sitedata = ""; if($siteinfo["title"] == "") { @@ -379,7 +390,7 @@ function parse_url_content(&$a) { $image = ""; - if(sizeof($siteinfo["images"]) > 0){ + if (($siteinfo["type"] != "video") AND (sizeof($siteinfo["images"]) > 0)){ /* Execute below code only if image is present in siteinfo */ $total_images = 0; @@ -421,7 +432,7 @@ function parse_url_content(&$a) { $sitedata .= trim($result); if (($siteinfo["type"] == "video") AND ($url != "")) - echo "[video]".$url."[/video]"; + echo "[class=type-video]".$sitedata."[/class]"; elseif (($siteinfo["type"] != "photo")) echo "[class=type-link]".$sitedata."[/class]"; else