From e974e586ccfe72a77b001e9571f623ef91fd72c0 Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Thu, 7 Aug 2014 07:58:20 +0200 Subject: [PATCH] Unified user agent string when fetching data. --- include/bbcode.php | 35 ++++++++++++++++++----------------- include/network.php | 10 +++++----- mod/parse_url.php | 3 +-- 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/include/bbcode.php b/include/bbcode.php index af14a57560..571809c3b5 100644 --- a/include/bbcode.php +++ b/include/bbcode.php @@ -595,7 +595,7 @@ function bb_RemovePictureLinks($match) { $ch = @curl_init($match[1]); @curl_setopt($ch, CURLOPT_NOBODY, true); @curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); - @curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; Friendica)"); + @curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; ".FRIENDICA_PLATFORM." ".FRIENDICA_VERSION."-".DB_UPDATE_VERSION.")"); @curl_exec($ch); $curl_info = @curl_getinfo($ch); @@ -643,7 +643,7 @@ function bb_CleanPictureLinksSub($match) { $ch = @curl_init($match[1]); @curl_setopt($ch, CURLOPT_NOBODY, true); @curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); - @curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; Friendica)"); + @curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; ".FRIENDICA_PLATFORM." ".FRIENDICA_VERSION."-".DB_UPDATE_VERSION.")"); @curl_exec($ch); $curl_info = @curl_getinfo($ch); @@ -1069,25 +1069,26 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true, $simplehtml = fal if($saved_image) $Text = bb_replace_images($Text, $saved_image); - // Clean up the HTML by loading and saving the HTML with the DOM - // Only do it when it has to be done - for performance reasons - // Update: Now it is done every time - since bad structured html can break a whole page - //if (!$tryoembed) { - // $doc = new DOMDocument(); - // $doc->preserveWhiteSpace = false; + // Clean up the HTML by loading and saving the HTML with the DOM. + // Bad structured html can break a whole page. + // For performance reasons do it only with ativated item cache or at export. + if (!$tryoembed OR (get_itemcachepath() != "")) { + $doc = new DOMDocument(); + $doc->preserveWhiteSpace = false; - // $Text = mb_convert_encoding($Text, 'HTML-ENTITIES', "UTF-8"); + //$Text = mb_convert_encoding($Text, 'HTML-ENTITIES', "UTF-8"); - // $doctype = ''; - // @$doc->loadHTML($doctype."".$Text.""); + $doctype = ''; + $encoding = ''; + @$doc->loadHTML($encoding.$doctype."".$Text.""); + $doc->encoding = 'UTF-8'; + $Text = $doc->saveHTML(); + $Text = str_replace(array("", "", $doctype, $encoding), array("", "", "", ""), $Text); - // $Text = $doc->saveHTML(); - // $Text = str_replace(array("", "", $doctype), array("", "", ""), $Text); + $Text = str_replace('
','', $Text); - // $Text = str_replace('
','', $Text); - - // $Text = mb_convert_encoding($Text, "UTF-8", 'HTML-ENTITIES'); - //} + //$Text = mb_convert_encoding($Text, "UTF-8", 'HTML-ENTITIES'); + } // Clean up some useless linebreaks in lists //$Text = str_replace('