From 886cf400369289d5ef91fabe6d67008209350dca Mon Sep 17 00:00:00 2001 From: Hypolite Petovan Date: Fri, 17 Jul 2020 19:18:27 -0400 Subject: [PATCH] Ensure ParseUrl::getSiteinfo always returns the url and type keys --- src/Util/ParseUrl.php | 51 ++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 27 deletions(-) diff --git a/src/Util/ParseUrl.php b/src/Util/ParseUrl.php index 62b5d007d..b6d172a3a 100644 --- a/src/Util/ParseUrl.php +++ b/src/Util/ParseUrl.php @@ -55,14 +55,13 @@ class ParseUrl * to avoid endless loops * * @return array which contains needed data for embedding - * string 'url' => The url of the parsed page - * string 'type' => Content type - * string 'title' => The title of the content - * string 'text' => The description for the content - * string 'image' => A preview image of the content (only available - * if $no_geuessing = false - * array'images' = Array of preview pictures - * string 'keywords' => The tags which belong to the content + * string 'url' => The url of the parsed page + * string 'type' => Content type + * string 'title' => (optional) The title of the content + * string 'text' => (optional) The description for the content + * string 'image' => (optional) A preview image of the content (only available if $no_geuessing = false) + * array 'images' => (optional) Array of preview pictures + * string 'keywords' => (optional) The tags which belong to the content * * @throws \Friendica\Network\HTTPException\InternalServerErrorException * @see ParseUrl::getSiteinfo() for more information about scraping @@ -115,14 +114,13 @@ class ParseUrl * @param int $count Internal counter to avoid endless loops * * @return array which contains needed data for embedding - * string 'url' => The url of the parsed page - * string 'type' => Content type - * string 'title' => The title of the content - * string 'text' => The description for the content - * string 'image' => A preview image of the content (only available - * if $no_geuessing = false - * array'images' = Array of preview pictures - * string 'keywords' => The tags which belong to the content + * string 'url' => The url of the parsed page + * string 'type' => Content type + * string 'title' => (optional) The title of the content + * string 'text' => (optional) The description for the content + * string 'image' => (optional) A preview image of the content (only available if $no_guessing = false) + * array 'images' => (optional) Array of preview pictures + * string 'keywords' => (optional) The tags which belong to the content * * @throws \Friendica\Network\HTTPException\InternalServerErrorException * @todo https://developers.google.com/+/plugins/snippet/ @@ -140,28 +138,27 @@ class ParseUrl */ public static function getSiteinfo($url, $no_guessing = false, $do_oembed = true, $count = 1) { - $siteinfo = []; - // Check if the URL does contain a scheme $scheme = parse_url($url, PHP_URL_SCHEME); if ($scheme == '') { - $url = 'http://' . trim($url, '/'); + $url = 'http://' . ltrim($url, '/'); } + $url = trim($url, "'\""); + + $url = Network::stripTrackingQueryParams($url); + + $siteinfo = [ + 'url' => $url, + 'type' => 'link', + ]; + if ($count > 10) { Logger::log('Endless loop detected for ' . $url, Logger::DEBUG); return $siteinfo; } - $url = trim($url, "'"); - $url = trim($url, '"'); - - $url = Network::stripTrackingQueryParams($url); - - $siteinfo['url'] = $url; - $siteinfo['type'] = 'link'; - $curlResult = Network::curl($url); if (!$curlResult->isSuccess()) { return $siteinfo;