Unified user agent string when fetching data.
This commit is contained in:
parent
4d6f28d224
commit
e974e586cc
|
@ -595,7 +595,7 @@ function bb_RemovePictureLinks($match) {
|
||||||
$ch = @curl_init($match[1]);
|
$ch = @curl_init($match[1]);
|
||||||
@curl_setopt($ch, CURLOPT_NOBODY, true);
|
@curl_setopt($ch, CURLOPT_NOBODY, true);
|
||||||
@curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
@curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
||||||
@curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; Friendica)");
|
@curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; ".FRIENDICA_PLATFORM." ".FRIENDICA_VERSION."-".DB_UPDATE_VERSION.")");
|
||||||
@curl_exec($ch);
|
@curl_exec($ch);
|
||||||
$curl_info = @curl_getinfo($ch);
|
$curl_info = @curl_getinfo($ch);
|
||||||
|
|
||||||
|
@ -643,7 +643,7 @@ function bb_CleanPictureLinksSub($match) {
|
||||||
$ch = @curl_init($match[1]);
|
$ch = @curl_init($match[1]);
|
||||||
@curl_setopt($ch, CURLOPT_NOBODY, true);
|
@curl_setopt($ch, CURLOPT_NOBODY, true);
|
||||||
@curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
@curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
||||||
@curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; Friendica)");
|
@curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; ".FRIENDICA_PLATFORM." ".FRIENDICA_VERSION."-".DB_UPDATE_VERSION.")");
|
||||||
@curl_exec($ch);
|
@curl_exec($ch);
|
||||||
$curl_info = @curl_getinfo($ch);
|
$curl_info = @curl_getinfo($ch);
|
||||||
|
|
||||||
|
@ -1069,25 +1069,26 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true, $simplehtml = fal
|
||||||
if($saved_image)
|
if($saved_image)
|
||||||
$Text = bb_replace_images($Text, $saved_image);
|
$Text = bb_replace_images($Text, $saved_image);
|
||||||
|
|
||||||
// Clean up the HTML by loading and saving the HTML with the DOM
|
// Clean up the HTML by loading and saving the HTML with the DOM.
|
||||||
// Only do it when it has to be done - for performance reasons
|
// Bad structured html can break a whole page.
|
||||||
// Update: Now it is done every time - since bad structured html can break a whole page
|
// For performance reasons do it only with ativated item cache or at export.
|
||||||
//if (!$tryoembed) {
|
if (!$tryoembed OR (get_itemcachepath() != "")) {
|
||||||
// $doc = new DOMDocument();
|
$doc = new DOMDocument();
|
||||||
// $doc->preserveWhiteSpace = false;
|
$doc->preserveWhiteSpace = false;
|
||||||
|
|
||||||
// $Text = mb_convert_encoding($Text, 'HTML-ENTITIES', "UTF-8");
|
//$Text = mb_convert_encoding($Text, 'HTML-ENTITIES', "UTF-8");
|
||||||
|
|
||||||
// $doctype = '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">';
|
$doctype = '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">';
|
||||||
// @$doc->loadHTML($doctype."<html><body>".$Text."</body></html>");
|
$encoding = '<?xml encoding="UTF-8">';
|
||||||
|
@$doc->loadHTML($encoding.$doctype."<html><body>".$Text."</body></html>");
|
||||||
|
$doc->encoding = 'UTF-8';
|
||||||
|
$Text = $doc->saveHTML();
|
||||||
|
$Text = str_replace(array("<html><body>", "</body></html>", $doctype, $encoding), array("", "", "", ""), $Text);
|
||||||
|
|
||||||
// $Text = $doc->saveHTML();
|
$Text = str_replace('<br></li>','</li>', $Text);
|
||||||
// $Text = str_replace(array("<html><body>", "</body></html>", $doctype), array("", "", ""), $Text);
|
|
||||||
|
|
||||||
// $Text = str_replace('<br></li>','</li>', $Text);
|
//$Text = mb_convert_encoding($Text, "UTF-8", 'HTML-ENTITIES');
|
||||||
|
}
|
||||||
// $Text = mb_convert_encoding($Text, "UTF-8", 'HTML-ENTITIES');
|
|
||||||
//}
|
|
||||||
|
|
||||||
// Clean up some useless linebreaks in lists
|
// Clean up some useless linebreaks in lists
|
||||||
//$Text = str_replace('<br /><ul','<ul ', $Text);
|
//$Text = str_replace('<br /><ul','<ul ', $Text);
|
||||||
|
|
|
@ -35,7 +35,7 @@ function fetch_url($url,$binary = false, &$redirects = 0, $timeout = 0, $accept_
|
||||||
}
|
}
|
||||||
|
|
||||||
@curl_setopt($ch, CURLOPT_RETURNTRANSFER,true);
|
@curl_setopt($ch, CURLOPT_RETURNTRANSFER,true);
|
||||||
@curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; Friendica)");
|
@curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; ".FRIENDICA_PLATFORM." ".FRIENDICA_VERSION."-".DB_UPDATE_VERSION.")");
|
||||||
|
|
||||||
|
|
||||||
if(intval($timeout)) {
|
if(intval($timeout)) {
|
||||||
|
@ -72,7 +72,8 @@ function fetch_url($url,$binary = false, &$redirects = 0, $timeout = 0, $accept_
|
||||||
$base = $s;
|
$base = $s;
|
||||||
$curl_info = @curl_getinfo($ch);
|
$curl_info = @curl_getinfo($ch);
|
||||||
$http_code = $curl_info['http_code'];
|
$http_code = $curl_info['http_code'];
|
||||||
// logger('fetch_url:' . $http_code . ' data: ' . $s);
|
logger('fetch_url '.$url.': '.$http_code." *".$s."*", LOGGER_DEBUG);
|
||||||
|
//logger('fetch_url:' . $http_code . ' data: ' . $s);
|
||||||
$header = '';
|
$header = '';
|
||||||
|
|
||||||
// Pull out multiple headers, e.g. proxy and continuation headers
|
// Pull out multiple headers, e.g. proxy and continuation headers
|
||||||
|
@ -134,7 +135,7 @@ function post_url($url,$params, $headers = null, &$redirects = 0, $timeout = 0)
|
||||||
curl_setopt($ch, CURLOPT_RETURNTRANSFER,true);
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER,true);
|
||||||
curl_setopt($ch, CURLOPT_POST,1);
|
curl_setopt($ch, CURLOPT_POST,1);
|
||||||
curl_setopt($ch, CURLOPT_POSTFIELDS,$params);
|
curl_setopt($ch, CURLOPT_POSTFIELDS,$params);
|
||||||
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; Friendica)");
|
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; ".FRIENDICA_PLATFORM." ".FRIENDICA_VERSION."-".DB_UPDATE_VERSION.")");
|
||||||
|
|
||||||
if(intval($timeout)) {
|
if(intval($timeout)) {
|
||||||
curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
|
curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
|
||||||
|
@ -1163,8 +1164,7 @@ function original_url($url, $depth=1, $fetchbody = false) {
|
||||||
|
|
||||||
curl_setopt($ch, CURLOPT_TIMEOUT, 10);
|
curl_setopt($ch, CURLOPT_TIMEOUT, 10);
|
||||||
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
||||||
//curl_setopt($ch,CURLOPT_USERAGENT,'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:24.0) Gecko/20100101 Firefox/24.0');
|
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; ".FRIENDICA_PLATFORM." ".FRIENDICA_VERSION."-".DB_UPDATE_VERSION.")");
|
||||||
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; Friendica)");
|
|
||||||
|
|
||||||
$header = curl_exec($ch);
|
$header = curl_exec($ch);
|
||||||
$curl_info = @curl_getinfo($ch);
|
$curl_info = @curl_getinfo($ch);
|
||||||
|
|
|
@ -71,8 +71,7 @@ function parseurl_getsiteinfo($url, $no_guessing = false, $do_oembed = true, $co
|
||||||
curl_setopt($ch, CURLOPT_TIMEOUT, 3);
|
curl_setopt($ch, CURLOPT_TIMEOUT, 3);
|
||||||
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
||||||
//curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
|
//curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
|
||||||
//curl_setopt($ch,CURLOPT_USERAGENT,' Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:24.0) Gecko/20100101 Firefox/24.0');
|
curl_setopt($ch,CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; ".FRIENDICA_PLATFORM." ".FRIENDICA_VERSION."-".DB_UPDATE_VERSION.")");
|
||||||
curl_setopt($ch,CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; Friendica)");
|
|
||||||
|
|
||||||
$header = curl_exec($ch);
|
$header = curl_exec($ch);
|
||||||
$curl_info = @curl_getinfo($ch);
|
$curl_info = @curl_getinfo($ch);
|
||||||
|
|
Loading…
Reference in a new issue