From 52569e536e217c3178f13d2a722eb8ebfd801465 Mon Sep 17 00:00:00 2001
From: Michael Vogel <icarus@dabo.de>
Date: Fri, 29 Jun 2012 21:40:54 +0200
Subject: [PATCH 01/16] Tests considering removing of the title

---
 include/items.php | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/items.php b/include/items.php
index a0dd1c8159..fbd4af5f3d 100755
--- a/include/items.php
+++ b/include/items.php
@@ -306,6 +306,9 @@ function get_atom_elements($feed,$item) {
 	$res['body'] = unxmlify($item->get_content());
 	$res['plink'] = unxmlify($item->get_link(0));
 
+	//$debugfile = tempnam("/home/ike/log", "item-res-");
+	//file_put_contents($debugfile, serialize($res));
+
 	if($res['plink'])
 		$base_url = implode('/', array_slice(explode('/',$res['plink']),0,3));
 	else
@@ -1645,7 +1648,6 @@ function consume_feed($xml,$importer,&$contact, &$hub, $datedir = 0, $pass = 0)
 				$item_id  = $item->get_id();
 				$datarray = get_atom_elements($feed,$item);
 
-
 				if((! x($datarray,'author-name')) && ($contact['network'] != NETWORK_DFRN))
 					$datarray['author-name'] = $contact['name'];
 				if((! x($datarray,'author-link')) && ($contact['network'] != NETWORK_DFRN))
@@ -3469,4 +3471,4 @@ function posted_date_widget($url,$uid,$wall) {
 		'$dates' => $ret
 	));
 	return $o;
-}
\ No newline at end of file
+}

From bcca817b9ab8f0a9dc0af56d55a3a347056a4494 Mon Sep 17 00:00:00 2001
From: Michael Vogel <icarus@dabo.de>
Date: Sat, 30 Jun 2012 09:25:22 +0200
Subject: [PATCH 02/16] Removing of the title if it is the same (or part) from
 the body. Helps against auto generated titles from tumblr.

---
 include/items.php | 29 +++++++++++++++++++++++++++--
 1 file changed, 27 insertions(+), 2 deletions(-)

diff --git a/include/items.php b/include/items.php
index aed0a8e684..87d6ab4794 100755
--- a/include/items.php
+++ b/include/items.php
@@ -280,6 +280,29 @@ function construct_activity_target($item) {
 	return '';
 } 
 
+function title_is_body($title, $body) {
+
+	$title = strip_tags($title);
+	$title = trim($title);
+	$title = str_replace(array("\n", "\r", "\t", " "), array("","","",""), $title);
+
+	$body = strip_tags($body);
+	$body = trim($body);
+	$body = str_replace(array("\n", "\r", "\t", " "), array("","","",""), $body);
+
+	if (strlen($title) < strlen($body))
+		$body = substr($body, 0, strlen($title));
+
+	if (($title != $body) and (substr($title, -3) == "...")) {
+		$pos = strrpos($title, "...");
+		if ($pos > 0) {
+			$title = substr($title, 0, $pos);
+			$body = substr($body, 0, $pos);
+		}
+	}
+
+	return($title == $body);
+}
 
 
 
@@ -306,8 +329,10 @@ function get_atom_elements($feed,$item) {
 	$res['body'] = unxmlify($item->get_content());
 	$res['plink'] = unxmlify($item->get_link(0));
 
-	//$debugfile = tempnam("/home/ike/log", "item-res-");
-	//file_put_contents($debugfile, serialize($res));
+	// removing the content of the title if its identically to the body
+	// This helps with auto generated titles e.g. from tumblr
+	if (title_is_body($res["title"], $res["body"]))
+		$res['title'] = "";
 
 	if($res['plink'])
 		$base_url = implode('/', array_slice(explode('/',$res['plink']),0,3));

From dafc84390db4b57e5958f1af386c778b481c7d04 Mon Sep 17 00:00:00 2001
From: Michael Vogel <icarus@dabo.de>
Date: Thu, 5 Jul 2012 23:36:00 +0200
Subject: [PATCH 03/16] Improvments of the HTML to make a better bb2markdown
 conversion (nested lists, youtube, vimeo, ...)

---
 include/api.php         |  1 +
 include/bb2diaspora.php |  4 ++--
 include/bbcode.php      | 46 +++++++++++++++++++++++++++++++++--------
 include/items.php       | 19 ++++++++++-------
 4 files changed, 52 insertions(+), 18 deletions(-)

diff --git a/include/api.php b/include/api.php
index d790b4b875..629b5fcc2a 100644
--- a/include/api.php
+++ b/include/api.php
@@ -1726,3 +1726,4 @@ notifications/leave
 blocks/exists
 blocks/blocking
 */
+
diff --git a/include/bb2diaspora.php b/include/bb2diaspora.php
index 77a5f5c2a0..4a82635e57 100644
--- a/include/bb2diaspora.php
+++ b/include/bb2diaspora.php
@@ -112,7 +112,7 @@ function bb2diaspora($Text,$preserve_nl = false) {
 	// Note that to get nested lists to work for Diaspora, we would need
 	// to define the closing tag for the list elements. So nested lists
 	// are going to be flattened out in Diaspora for now
-	$endlessloop = 0;
+/*	$endlessloop = 0;
 	while ((((strpos($Text, "[/list]") !== false) && (strpos($Text, "[list") !== false)) ||
 	       ((strpos($Text, "[/ol]") !== false) && (strpos($Text, "[ol]") !== false)) || 
 	       ((strpos($Text, "[/ul]") !== false) && (strpos($Text, "[ul]") !== false))) && (++$endlessloop < 20)) {
@@ -125,7 +125,7 @@ function bb2diaspora($Text,$preserve_nl = false) {
 		$Text = preg_replace_callback("/\[ul\](.*?)\[\/ul\]/is", 'diaspora_ul', $Text);
 		$Text = preg_replace_callback("/\[ol\](.*?)\[\/ol\]/is", 'diaspora_ol', $Text);
 	}
-
+*/
 	// Convert it to HTML - don't try oembed
 	$Text = bbcode($Text, $preserve_nl, false);
 
diff --git a/include/bbcode.php b/include/bbcode.php
index 9071c767b6..988e75d417 100644
--- a/include/bbcode.php
+++ b/include/bbcode.php
@@ -251,21 +251,25 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true) {
 	$Text = preg_replace("/\[img\](.*?)\[\/img\]/ism", '<img src="$1" alt="' . t('Image/photo') . '" />', $Text);
 
 
-	$Text = preg_replace("/\[video\](.*?\.(ogg|ogv|oga|ogm|webm|mp4))\[\/video\]/ism", '<video src="$1" controls="controls" width="425" height="350"><a href="$1">$1</a></video>', $Text);
-
-	$Text = preg_replace("/\[audio\](.*?\.(ogg|ogv|oga|ogm|webm|mp4|mp3))\[\/audio\]/ism", '<audio src="$1" controls="controls"><a href="$1">$1</a></audio>', $Text);
-
 	// Try to Oembed
 	if ($tryoembed) {
+		$Text = preg_replace("/\[video\](.*?\.(ogg|ogv|oga|ogm|webm|mp4))\[\/video\]/ism", '<video src="$1" controls="controls" width="425" height="350"><a href="$1">$1</a></video>', $Text);
+		$Text = preg_replace("/\[audio\](.*?\.(ogg|ogv|oga|ogm|webm|mp4|mp3))\[\/audio\]/ism", '<audio src="$1" controls="controls"><a href="$1">$1</a></audio>', $Text);
+
 		$Text = preg_replace_callback("/\[video\](.*?)\[\/video\]/ism", 'tryoembed', $Text);
 		$Text = preg_replace_callback("/\[audio\](.*?)\[\/audio\]/ism", 'tryoembed', $Text);
+	} else {
+		$Text = preg_replace("/\[video\](.*?)\[\/video\]/", '$1', $Text);
+		$Text = preg_replace("/\[audio\](.*?)\[\/audio\]/", '$1', $Text);
 	}
 
 	// html5 video and audio
 
 
-	$Text = preg_replace("/\[iframe\](.*?)\[\/iframe\]/ism", '<iframe src="$1" width="425" height="350"><a href="$1">$1</a></iframe>', $Text);
-
+	if ($tryoembed)
+		$Text = preg_replace("/\[iframe\](.*?)\[\/iframe\]/ism", '<iframe src="$1" width="425" height="350"><a href="$1">$1</a></iframe>', $Text);
+	else
+		$Text = preg_replace("/\[iframe\](.*?)\[\/iframe\]/ism", '<a href="$1">$1</a>', $Text);
 
 	// Youtube extensions
 	if ($tryoembed) {
@@ -278,7 +282,10 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true) {
 	$Text = preg_replace("/\[youtube\]https?:\/\/www.youtube.com\/embed\/(.*?)\[\/youtube\]/ism",'[youtube]$1[/youtube]',$Text); 
 	$Text = preg_replace("/\[youtube\]https?:\/\/youtu.be\/(.*?)\[\/youtube\]/ism",'[youtube]$1[/youtube]',$Text);
 
-	$Text = preg_replace("/\[youtube\]([A-Za-z0-9\-_=]+)(.*?)\[\/youtube\]/ism", '<iframe width="425" height="350" src="http://www.youtube.com/embed/$1" frameborder="0" ></iframe>', $Text);
+	if ($tryoembed)
+		$Text = preg_replace("/\[youtube\]([A-Za-z0-9\-_=]+)(.*?)\[\/youtube\]/ism", '<iframe width="425" height="350" src="http://www.youtube.com/embed/$1" frameborder="0" ></iframe>', $Text);
+	else
+		$Text = preg_replace("/\[youtube\]([A-Za-z0-9\-_=]+)(.*?)\[\/youtube\]/ism", "http://www.youtube.com/watch?v=$1", $Text);
 
 
 	if ($tryoembed) {
@@ -287,8 +294,12 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true) {
 	}
 
 	$Text = preg_replace("/\[vimeo\]https?:\/\/player.vimeo.com\/video\/([0-9]+)(.*?)\[\/vimeo\]/ism",'[vimeo]$1[/vimeo]',$Text); 
-	$Text = preg_replace("/\[vimeo\]https?:\/\/vimeo.com\/([0-9]+)(.*?)\[\/vimeo\]/ism",'[vimeo]$1[/vimeo]',$Text); 
-	$Text = preg_replace("/\[vimeo\]([0-9]+)(.*?)\[\/vimeo\]/ism", '<iframe width="425" height="350" src="http://player.vimeo.com/video/$1" frameborder="0" ></iframe>', $Text);
+	$Text = preg_replace("/\[vimeo\]https?:\/\/vimeo.com\/([0-9]+)(.*?)\[\/vimeo\]/ism",'[vimeo]$1[/vimeo]',$Text);
+
+	if ($tryoembed)
+		$Text = preg_replace("/\[vimeo\]([0-9]+)(.*?)\[\/vimeo\]/ism", '<iframe width="425" height="350" src="http://player.vimeo.com/video/$1" frameborder="0" ></iframe>', $Text);
+	else
+		$Text = preg_replace("/\[vimeo\]([0-9]+)(.*?)\[\/vimeo\]/ism", "http://vimeo.com/$1", $Text);
 
 //	$Text = preg_replace("/\[youtube\](.*?)\[\/youtube\]/", '<object width="425" height="350" type="application/x-shockwave-flash" data="http://www.youtube.com/v/$1" ><param name="movie" value="http://www.youtube.com/v/$1"></param><!--[if IE]><embed src="http://www.youtube.com/v/$1" type="application/x-shockwave-flash" width="425" height="350" /><![endif]--></object>', $Text);
 
@@ -327,6 +338,23 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true) {
 	if(strlen($saved_image))
 		$Text = str_replace('[$#saved_image#$]','<img src="' . $saved_image .'" alt="' . t('Image/photo') . '" />',$Text);
 
+	// Clean up the HTML by loading and saving the HTML with the DOM
+	// Only do it when it has to be done - for performance reasons
+	if (!$tryoembed) {
+		$doc = new DOMDocument();
+		$doc->preserveWhiteSpace = false;
+
+		$Text = mb_convert_encoding($Text, 'HTML-ENTITIES', "UTF-8");
+
+		$doctype = '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">';
+		@$doc->loadHTML($doctype."<html><body>".$Text."</body></html>");
+
+		$Text = $doc->saveHTML();
+		$Text = str_replace(array("<html><body>", "</body></html>", $doctype), array("", "", ""), $Text);
+
+		$Text = str_replace('<br></li>','</li>', $Text);
+	}
+
 	call_hooks('bbcode',$Text);
 
 	return $Text;
diff --git a/include/items.php b/include/items.php
index 87d6ab4794..6ab681c875 100755
--- a/include/items.php
+++ b/include/items.php
@@ -352,7 +352,7 @@ function get_atom_elements($feed,$item) {
 					$res['author-avatar'] = unxmlify($link['attribs']['']['href']);
 			}
 		}
-	}			
+	}
 
 	$rawactor = $item->get_item_tags(NAMESPACE_ACTIVITY, 'actor');
 
@@ -384,7 +384,7 @@ function get_atom_elements($feed,$item) {
 						$res['author-avatar'] = unxmlify($link['attribs']['']['href']);
 				}
 			}
-		}			
+		}
 
 		$rawactor = $feed->get_feed_tags(NAMESPACE_ACTIVITY, 'subject');
 
@@ -409,7 +409,7 @@ function get_atom_elements($feed,$item) {
 		$res['app'] = strip_tags(unxmlify($apps[0]['attribs']['']['source']));
 		if($res['app'] === 'web')
 			$res['app'] = 'OStatus';
-	}		   
+	}
 
 	// base64 encoded json structure representing Diaspora signature
 
@@ -553,7 +553,7 @@ function get_atom_elements($feed,$item) {
 
 		foreach($base as $link) {
 			if(!x($res, 'owner-avatar') || !$res['owner-avatar']) {
-				if($link['attribs']['']['rel'] === 'photo' || $link['attribs']['']['rel'] === 'avatar')			
+				if($link['attribs']['']['rel'] === 'photo' || $link['attribs']['']['rel'] === 'avatar')
 					$res['owner-avatar'] = unxmlify($link['attribs']['']['href']);
 			}
 		}
@@ -697,6 +697,11 @@ function get_atom_elements($feed,$item) {
 
 	call_hooks('parse_atom', $arr);
 
+	//if (($res["title"] != "") or (strpos($res["body"], "RT @") > 0)) {
+	//	$debugfile = tempnam("/home/ike/log", "item-res2-");
+	//	file_put_contents($debugfile, serialize($res));
+	//}
+
 	return $res;
 }
 
@@ -1644,7 +1649,7 @@ function consume_feed($xml,$importer,&$contact, &$hub, $datedir = 0, $pass = 0)
 
 	// Now process the feed
 
-	if($feed->get_item_quantity()) {		
+	if($feed->get_item_quantity()) {
 
 		logger('consume_feed: feed item count = ' . $feed->get_item_quantity());
 
@@ -1657,7 +1662,7 @@ function consume_feed($xml,$importer,&$contact, &$hub, $datedir = 0, $pass = 0)
 
 		foreach($items as $item) {
 
-			$is_reply = false;		
+			$is_reply = false;
 			$item_id = $item->get_id();
 			$rawthread = $item->get_item_tags( NAMESPACE_THREAD,'in-reply-to');
 			if(isset($rawthread[0]['attribs']['']['ref'])) {
@@ -1671,7 +1676,7 @@ function consume_feed($xml,$importer,&$contact, &$hub, $datedir = 0, $pass = 0)
 					continue;
 
 				// Have we seen it? If not, import it.
-	
+
 				$item_id  = $item->get_id();
 				$datarray = get_atom_elements($feed,$item);
 

From e9d7f852978e7fe4231893680c60c13dc3eb5bd4 Mon Sep 17 00:00:00 2001
From: Michael Vogel <icarus@dabo.de>
Date: Sat, 7 Jul 2012 20:40:15 +0200
Subject: [PATCH 04/16] network: Small changes

---
 include/network.php | 28 +++++++++++++---------------
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/include/network.php b/include/network.php
index c1a76000ef..500dff08be 100644
--- a/include/network.php
+++ b/include/network.php
@@ -14,15 +14,16 @@ function fetch_url($url,$binary = false, &$redirects = 0, $timeout = 0, $accept_
 		return false;
 
 	@curl_setopt($ch, CURLOPT_HEADER, true);
-	
+
 	if (!is_null($accept_content)){
 		curl_setopt($ch,CURLOPT_HTTPHEADER, array (
 			"Accept: " . $accept_content
 		));
 	}
-	
+
 	@curl_setopt($ch, CURLOPT_RETURNTRANSFER,true);
-	@curl_setopt($ch, CURLOPT_USERAGENT, "Friendica");
+	//@curl_setopt($ch, CURLOPT_USERAGENT, "Friendica");
+	@curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; Friendica)");
 
 
 	if(intval($timeout)) {
@@ -59,7 +60,6 @@ function fetch_url($url,$binary = false, &$redirects = 0, $timeout = 0, $accept_
 	$base = $s;
 	$curl_info = @curl_getinfo($ch);
 	$http_code = $curl_info['http_code'];
-
 //	logger('fetch_url:' . $http_code . ' data: ' . $s);
 	$header = '';
 
@@ -73,24 +73,22 @@ function fetch_url($url,$binary = false, &$redirects = 0, $timeout = 0, $accept_
 	}
 
 	if($http_code == 301 || $http_code == 302 || $http_code == 303 || $http_code == 307) {
-        $matches = array();
-        preg_match('/(Location:|URI:)(.*?)\n/', $header, $matches);
-        $newurl = trim(array_pop($matches));
+		$matches = array();
+		preg_match('/(Location:|URI:)(.*?)\n/', $header, $matches);
+		$newurl = trim(array_pop($matches));
 		if(strpos($newurl,'/') === 0)
 			$newurl = $url . $newurl;
-        $url_parsed = @parse_url($newurl);
-        if (isset($url_parsed)) {
-            $redirects++;
-            return fetch_url($newurl,$binary,$redirects,$timeout);
-        }
-    }
+		$url_parsed = @parse_url($newurl);
+		if (isset($url_parsed)) {
+			$redirects++;
+			return fetch_url($newurl,$binary,$redirects,$timeout);
+		}
+	}
 
 	$a->set_curl_code($http_code);
 
 	$body = substr($s,strlen($header));
-
 	$a->set_curl_headers($header);
-
 	@curl_close($ch);
 	return($body);
 }}

From db2f0f79190650e6c63e7c1ac10bf9f3304d2115 Mon Sep 17 00:00:00 2001
From: Michael Vogel <icarus@dabo.de>
Date: Sun, 8 Jul 2012 18:27:11 +0200
Subject: [PATCH 05/16] Bugfix: API: No extra encoding for the status text -
 makes it more compatible to statusnet Bugfix: BBCode: Fixed charset problems.

---
 include/api.php    | 6 ++++--
 include/bbcode.php | 2 ++
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/include/api.php b/include/api.php
index 629b5fcc2a..3858b9fe32 100644
--- a/include/api.php
+++ b/include/api.php
@@ -490,7 +490,8 @@
                 $_REQUEST['type'] = 'wall';
                 $_REQUEST['profile_uid'] = local_user();
                 $_REQUEST['api_source'] = true;
-                $txt = urldecode(requestdata('status'));
+                $txt = requestdata('status');
+                //$txt = urldecode(requestdata('status'));
 
                 require_once('library/HTMLPurifier.auto.php');
                 require_once('include/html2bbcode.php');
@@ -554,7 +555,8 @@
 
 		}
 		else
-			$_REQUEST['body'] = urldecode(requestdata('status'));
+			$_REQUEST['body'] = requestdata('status');
+			//$_REQUEST['body'] = urldecode(requestdata('status'));
 
 		$parent = requestdata('in_reply_to_status_id');
 		if(ctype_digit($parent))
diff --git a/include/bbcode.php b/include/bbcode.php
index 988e75d417..e212ec4aed 100644
--- a/include/bbcode.php
+++ b/include/bbcode.php
@@ -353,6 +353,8 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true) {
 		$Text = str_replace(array("<html><body>", "</body></html>", $doctype), array("", "", ""), $Text);
 
 		$Text = str_replace('<br></li>','</li>', $Text);
+
+		$Text = mb_convert_encoding($Text, "UTF-8", 'HTML-ENTITIES');
 	}
 
 	call_hooks('bbcode',$Text);

From 47c48aa438e30573a4df7ade39ecaacb804f7710 Mon Sep 17 00:00:00 2001
From: Michael Vogel <icarus@dabo.de>
Date: Sun, 8 Jul 2012 21:27:20 +0200
Subject: [PATCH 06/16] DBA: Possibility to log the database speed

---
 include/dba.php     | 18 ++++++++++++++----
 include/network.php |  3 +++
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/include/dba.php b/include/dba.php
index 881097f30c..d37b756aee 100644
--- a/include/dba.php
+++ b/include/dba.php
@@ -71,22 +71,32 @@ class dba {
 	}
 
 	public function q($sql) {
+		global $a;
 
 		if((! $this->db) || (! $this->connected))
 			return false;
 
 		$this->error = '';
 
-		//if (get_config("system", "db_log") != "")
-		//	@file_put_contents(get_config("system", "db_log"), datetime_convert().':'.session_id(). ' Start '.$sql."\n", FILE_APPEND);
+		if ($a->config["system"]["db_log"] != "")
+			$stamp1 = microtime(true);
 
 		if($this->mysqli)
 			$result = @$this->db->query($sql);
 		else
 			$result = @mysql_query($sql,$this->db);
 
-		//if (get_config("system", "db_log") != "")
-		//	@file_put_contents(get_config("system", "db_log"), datetime_convert().':'.session_id(). ' Stop '."\n", FILE_APPEND);
+		if ($a->config["system"]["db_log"] != "") {
+			$stamp2 = microtime(true);
+			$duration = round($stamp2-$stamp1, 3);
+			if ($duration > $a->config["system"]["db_loglimit"]) {
+				$backtrace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS);
+				@file_put_contents($a->config["system"]["db_log"], $duration."\t".
+						basename($backtrace[1]["file"])."\t".
+						$backtrace[1]["line"]."\t".$backtrace[2]["function"]."\t".
+						substr($sql, 0, 2000)."\n", FILE_APPEND);
+			}
+		}
 
 		if($this->mysqli) {
 			if($this->db->errno)
diff --git a/include/network.php b/include/network.php
index 500dff08be..d69454899a 100644
--- a/include/network.php
+++ b/include/network.php
@@ -795,6 +795,9 @@ function scale_external_images($s, $include_link = true, $scale_replace = false)
 
 	$a = get_app();
 
+	// Picture addresses can contain special characters
+	$s = htmlspecialchars_decode($s);
+
 	$matches = null;
 	$c = preg_match_all('/\[img\](.*?)\[\/img\]/ism',$s,$matches,PREG_SET_ORDER);
 	if($c) {

From 48f67ead9bbebe868e0ab585d56538540ba80339 Mon Sep 17 00:00:00 2001
From: Michael Vogel <icarus@dabo.de>
Date: Thu, 12 Jul 2012 01:17:33 +0200
Subject: [PATCH 07/16] parse_url: complete new code for fetching website
 information

---
 mod/parse_url.php | 366 ++++++++++++++++++++++++----------------------
 1 file changed, 193 insertions(+), 173 deletions(-)

diff --git a/mod/parse_url.php b/mod/parse_url.php
index a38f7e2702..09722341cb 100644
--- a/mod/parse_url.php
+++ b/mod/parse_url.php
@@ -1,7 +1,163 @@
 <?php
+if(!function_exists('deletenode')) {
+	function deletenode(&$doc, $node)
+	{
+		$xpath = new DomXPath($doc);
+		$list = $xpath->query("//".$node);
+		foreach ($list as $child)
+			$child->parentNode->removeChild($child);
+	}
+}
 
-require_once('library/HTML5/Parser.php');
-require_once('library/HTMLPurifier.auto.php');
+function parseurl_getsiteinfo($url) {
+	$siteinfo = array();
+
+	$ch = curl_init();
+	curl_setopt($ch, CURLOPT_URL, $url);
+	curl_setopt($ch, CURLOPT_HEADER, 1);
+	curl_setopt($ch, CURLOPT_NOBODY, 0);
+	curl_setopt($ch, CURLOPT_TIMEOUT, 3);
+	curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
+	curl_setopt($ch,CURLOPT_USERAGENT,'Opera/9.64(Windows NT 5.1; U; de) Presto/2.1.1');
+
+	$header = curl_exec($ch);
+	curl_close($ch);
+
+	if (preg_match('/charset=(.*?)\n/', $header, $matches))
+		$charset = trim(array_pop($matches));
+	else
+		$charset = "utf-8";
+
+	$pos = strpos($header, "\r\n\r\n");
+
+	if ($pos)
+		$body = trim(substr($header, $pos));
+	else
+		$body = $header;
+
+	$body = mb_convert_encoding($body, "UTF-8", $charset);
+	$body = mb_convert_encoding($body, 'HTML-ENTITIES', "UTF-8");
+
+	$doc = new DOMDocument();
+	@$doc->loadHTML($body);
+
+	deletenode($doc, 'style');
+	deletenode($doc, 'script');
+	deletenode($doc, 'option');
+	deletenode($doc, 'h1');
+	deletenode($doc, 'h2');
+	deletenode($doc, 'h3');
+	deletenode($doc, 'h4');
+	deletenode($doc, 'h5');
+	deletenode($doc, 'h6');
+	deletenode($doc, 'ol');
+	deletenode($doc, 'ul');
+
+	$xpath = new DomXPath($doc);
+
+	$list = $xpath->query("head/title");
+	foreach ($list as $node)
+		$siteinfo["title"] =  html_entity_decode($node->nodeValue, ENT_QUOTES, "UTF-8");
+
+	$list = $xpath->query("head/meta[@name]");
+	foreach ($list as $node) {
+		$attr = array();
+		if ($node->attributes->length)
+                        foreach ($node->attributes as $attribute)
+                                $attr[$attribute->name] = $attribute->value;
+
+		$attr["content"] = html_entity_decode($attr["content"], ENT_QUOTES, "UTF-8");
+
+		switch (strtolower($attr["name"])) {
+			case "fulltitle":
+				$siteinfo["title"] = $attr["content"];
+				break;
+			case "description":
+				$siteinfo["text"] = $attr["content"];
+				break;
+			case "dc.title":
+				$siteinfo["title"] = $attr["content"];
+				break;
+			case "dc.description":
+				$siteinfo["text"] = $attr["content"];
+				break;
+		}
+	}
+
+	$list = $xpath->query("head/meta[@property]");
+	foreach ($list as $node) {
+		$attr = array();
+		if ($node->attributes->length)
+                        foreach ($node->attributes as $attribute)
+                                $attr[$attribute->name] = $attribute->value;
+
+		$attr["content"] = html_entity_decode($attr["content"], ENT_QUOTES, "UTF-8");
+
+		switch (strtolower($attr["property"])) {
+			case "og:image":
+				$siteinfo["image"] = $attr["content"];
+				break;
+			case "og:title":
+				$siteinfo["title"] = $attr["content"];
+				break;
+			case "og:description":
+				$siteinfo["text"] = $attr["content"];
+				break;
+		}
+	}
+
+	if ($siteinfo["image"] == "") {
+                require_once('include/Photo.php');
+                $list = $xpath->query("//img[@src]");
+                foreach ($list as $node) {
+                        $attr = array();
+                        if ($node->attributes->length)
+                                foreach ($node->attributes as $attribute)
+                                        $attr[$attribute->name] = $attribute->value;
+
+                        // guess mimetype from headers or filename
+                        $type = guess_image_type($attr["src"],true);
+
+                        $i = fetch_url($attr["src"]);
+                        $ph = new Photo($i, $type);
+
+                        if(($ph->getWidth() > 200) and ($ph->getHeight() > 200))
+                                $siteinfo["image"] = $attr["src"];
+                }
+        }
+
+	if ($siteinfo["text"] == "") {
+		$text = "";
+
+		$list = $xpath->query("//div[@class='article']");
+		foreach ($list as $node)
+			$text .= " ".trim($node->nodeValue);
+
+		if ($text == "") {
+			$list = $xpath->query("//div[@class='content']");
+			foreach ($list as $node)
+				$text .= " ".trim($node->nodeValue);
+		}
+
+		// If none text was found then take the paragraph content
+		if ($text == "") {
+			$list = $xpath->query("//p");
+			foreach ($list as $node)
+				$text .= " ".trim($node->nodeValue);
+		}
+
+		if ($text != "") {
+			$text = trim(str_replace(array("\n", "\r"), array(" ", " "), $text));
+
+			while (strpos($text, "  "))
+				$text = trim(str_replace("  ", " ", $text));
+
+			$siteinfo["text"] = html_entity_decode(substr($text,0,350), ENT_QUOTES, "UTF-8").'...';
+		}
+	}
+
+	return($siteinfo);
+}
 
 function arr_add_hashes(&$item,$k) {
 	$item = '#' . $item;
@@ -40,13 +196,11 @@ function parse_url_content(&$a) {
 
 	logger('parse_url: ' . $url);
 
-
 	if($textmode)
 		$template = $br . '[bookmark=%s]%s[/bookmark]%s' . $br;
 	else
 		$template = "<br /><a class=\"bookmark\" href=\"%s\" >%s</a>%s<br />";
 
-
 	$arr = array('url' => $url, 'text' => '');
 
 	call_hooks('parse_link', $arr);
@@ -60,187 +214,53 @@ function parse_url_content(&$a) {
 	if($url && $title && $text) {
 
 		if($textmode)
-			$text = $br . $br . '[quote]' . $text . '[/quote]' . $br;
+			$text = $br . $br . '[quote]' . trim($text) . '[/quote]' . $br;
 		else
-			$text = '<br /><br /><blockquote>' . $text . '</blockquote><br />';
+			$text = '<br /><br /><blockquote>' . trim($text) . '</blockquote><br />';
 
 		$title = str_replace(array("\r","\n"),array('',''),$title);
 
 		$result = sprintf($template,$url,($title) ? $title : $url,$text) . $str_tags;
 
-		logger('parse_url (unparsed): returns: ' . $result); 
+		logger('parse_url (unparsed): returns: ' . $result);
 
 		echo $result;
 		killme();
 	}
 
+	$siteinfo = parseurl_getsiteinfo($url);
 
-	if($url) {
-		$s = fetch_url($url);
+	if($siteinfo["title"] == "") {
+		echo sprintf($template,$url,$url,'') . $str_tags;
+		killme();
 	} else {
-		echo '';
-		killme();
+		$image = $siteinfo["image"];
+		$text = $siteinfo["text"];
+		$title = $siteinfo["title"];
 	}
 
-//	logger('parse_url: data: ' . $s, LOGGER_DATA);
+	if ($image != "") {
+		$i = fetch_url($image);
+		if($i) {
+			require_once('include/Photo.php');
+			// guess mimetype from headers or filename
+			$type = guess_image_type($image,true);
 
-	if(! $s) {
-		echo sprintf($template,$url,$url,'') . $str_tags;
-		killme();
-	}
-
-	$matches = '';
-	$c = preg_match('/\<head(.*?)\>(.*?)\<\/head\>/ism',$s,$matches);
-	if($c) {
-//		logger('parse_url: header: ' . $matches[2], LOGGER_DATA);
-		try {
-			$domhead = HTML5_Parser::parse($matches[2]);
-		} catch (DOMException $e) {
-			logger('scrape_dfrn: parse error: ' . $e);
-		}
-		if($domhead)
-			logger('parsed header');
-	}
-
-	if(! $title) {
-		if(strpos($s,'<title>')) {
-			$title = substr($s,strpos($s,'<title>')+7,64);
-			if(strpos($title,'<') !== false)
-				$title = strip_tags(substr($title,0,strpos($title,'<')));
-		}
-	}
-
-	$config = HTMLPurifier_Config::createDefault();
-	$config->set('Cache.DefinitionImpl', null);
-	$purifier = new HTMLPurifier($config);
-	$s = $purifier->purify($s);
-
-//	logger('purify_output: ' . $s);
-
-	try {
-		$dom = HTML5_Parser::parse($s);
-	} catch (DOMException $e) {
-		logger('scrape_dfrn: parse error: ' . $e);
-	}
-
-	if(! $dom) {
-		echo sprintf($template,$url,$url,'') . $str_tags;
-		killme();
-	}
-
-	$items = $dom->getElementsByTagName('title');
-
-	if($items) {
-		foreach($items as $item) {
-			$title = trim($item->textContent);
-			break;
-		}
-	}
-
-
-	if(! $text) {
-		$divs = $dom->getElementsByTagName('div');
-		if($divs) {
-			foreach($divs as $div) {
-				$class = $div->getAttribute('class');
-				if($class && (stristr($class,'article') || stristr($class,'content'))) {
-					$items = $div->getElementsByTagName('p');
-					if($items) {
-						foreach($items as $item) {
-							$text = $item->textContent;
-							if(stristr($text,'<script')) {
-								$text = '';
-								continue;
-							}
-							$text = strip_tags($text);
-							if(strlen($text) < 100) {
-								$text = '';
-								continue;
-							}
-							$text = substr($text,0,250) . '...' ;
-							break;
-						}
-					}
-				}
-				if($text)
-					break;
-			}
-		}
-
-		if(! $text) {
-			$items = $dom->getElementsByTagName('p');
-			if($items) {
-				foreach($items as $item) {
-					$text = $item->textContent;
-					if(stristr($text,'<script'))
-						continue;
-					$text = strip_tags($text);
-					if(strlen($text) < 100) {
-						$text = '';
-						continue;
-					}
-					$text = substr($text,0,250) . '...' ;
-					break;
-				}
-			}
-		}
-	}
-
-	if(! $text) {
-		logger('parsing meta');
-		$items = (isset($domhead) && is_object($domhead) ? $domhead->getElementsByTagName('meta') : null);
-		if($items) {
-			foreach($items as $item) {
-				$property = $item->getAttribute('property');
-				if($property && (stristr($property,':description'))) {
-
-					$text = $item->getAttribute('content');
-					if(stristr($text,'<script')) {
-						$text = '';
-						continue;
-					}
-					$text = strip_tags($text);
-
-
-					$text = substr($text,0,250) . '...' ;
-				}
-				if($property && (stristr($property,':image'))) {
-
-					$image = $item->getAttribute('content');
-					if(stristr($text,'<script')) {
-						$image = '';
-						continue;
-					}
-					$image = strip_tags($image);
-					
-					$i = fetch_url($image);
-					if($i) {
-						require_once('include/Photo.php');
-						// guess mimetype from headers or filename
-						$type = guess_image_type($image,true);
-
-						$ph = new Photo($i, $type);
-						if($ph->is_valid()) {
-							if($ph->getWidth() > 300 || $ph->getHeight() > 300) {
-								$ph->scaleImage(300);
-								$new_width = $ph->getWidth();
-								$new_height = $ph->getHeight();
-								if($textmode)
-									$image = $br . $br . '[img=' . $new_width . 'x' . $new_height . ']' . $image . '[/img]';
-								else
-									$image = '<br /><br /><img height="' . $new_height . '" width="' . $new_width . '" src="' .$image . '" alt="photo" />';
-							}
-							else {
-								if($textmode) 
-									$image = $br . $br . '[img]' . $image . '[/img]';
-								else
-									$image = '<br /><br /><img src="' . $image . '" alt="photo" />';
-							}
-						}
-						else
-							$image = '';
-					
-					}
+			$ph = new Photo($i, $type);
+			if($ph->is_valid()) {
+				if($ph->getWidth() > 300 || $ph->getHeight() > 300) {
+					$ph->scaleImage(300);
+					$new_width = $ph->getWidth();
+					$new_height = $ph->getHeight();
+					if($textmode)
+						$image = $br . $br . '[img=' . $new_width . 'x' . $new_height . ']' . $image . '[/img]';
+					else
+						$image = '<br /><br /><img height="' . $new_height . '" width="' . $new_width . '" src="' .$image . '" alt="photo" />';
+				} else {
+					if($textmode)
+						$image = $br.$br.'[img]'.$image.'[/img]';
+					else
+						$image = '<br /><br /><img src="'.$image.'" alt="photo" />';
 				}
 			}
 		}
@@ -248,19 +268,19 @@ function parse_url_content(&$a) {
 
 	if(strlen($text)) {
 		if($textmode)
-			$text = $br .$br . '[quote]' . $text . '[/quote]' . $br ;
+			$text = $br.$br.'[quote]'.trim($text).'[/quote]'.$br ;
 		else
-			$text = '<br /><br /><blockquote>' . $text . '</blockquote><br />';
+			$text = '<br /><br /><blockquote>'.trim($text).'</blockquote><br />';
 	}
 
 	if($image) {
-		$text = $image . $br . $text;
+		$text = $image.$br.$text;
 	}
 	$title = str_replace(array("\r","\n"),array('',''),$title);
 
 	$result = sprintf($template,$url,($title) ? $title : $url,$text) . $str_tags;
 
-	logger('parse_url: returns: ' . $result); 
+	logger('parse_url: returns: ' . $result);
 
 	echo $result;
 	killme();

From 09034ce0ee78e2906033b79f7844cd63b9bab18c Mon Sep 17 00:00:00 2001
From: Michael Vogel <icarus@dabo.de>
Date: Thu, 12 Jul 2012 08:20:27 +0200
Subject: [PATCH 08/16] parse_url: Fetch multiple pictures so that the user can
 decide what to take.

---
 mod/parse_url.php | 55 ++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 43 insertions(+), 12 deletions(-)

diff --git a/mod/parse_url.php b/mod/parse_url.php
index 09722341cb..97e1658c89 100644
--- a/mod/parse_url.php
+++ b/mod/parse_url.php
@@ -1,4 +1,6 @@
 <?php
+require_once('include/Photo.php');
+
 if(!function_exists('deletenode')) {
 	function deletenode(&$doc, $node)
 	{
@@ -107,7 +109,6 @@ function parseurl_getsiteinfo($url) {
 	}
 
 	if ($siteinfo["image"] == "") {
-                require_once('include/Photo.php');
                 $list = $xpath->query("//img[@src]");
                 foreach ($list as $node) {
                         $attr = array();
@@ -121,10 +122,33 @@ function parseurl_getsiteinfo($url) {
                         $i = fetch_url($attr["src"]);
                         $ph = new Photo($i, $type);
 
-                        if(($ph->getWidth() > 200) and ($ph->getHeight() > 200))
-                                $siteinfo["image"] = $attr["src"];
+			if($ph->is_valid() and ($ph->getWidth() > 200) and ($ph->getHeight() > 200)) {
+				if ($siteinfo["image"] == "")
+	                                $siteinfo["image"] = $attr["src"];
+
+				if($ph->getWidth() > 300 || $ph->getHeight() > 300) {
+					$ph->scaleImage(300);
+	                                $siteinfo["images"][] = array("src"=>$attr["src"],
+									"width"=>$ph->getWidth(),
+									"height"=>$ph->getHeight());
+				} else
+	                                $siteinfo["images"][] = array("src"=>$attr["src"],
+									"width"=>$ph->getWidth(),
+									"height"=>$ph->getHeight());
+			}
                 }
-        }
+        } else {
+		// guess mimetype from headers or filename
+                $type = guess_image_type($siteinfo["image"],true);
+
+                $i = fetch_url($siteinfo["image"]);
+                $ph = new Photo($i, $type);
+
+		if($ph->is_valid())
+			$siteinfo["images"][] = array("src"=>$siteinfo["image"],
+							"width"=>$ph->getWidth(),
+							"height"=>$ph->getHeight());
+	}
 
 	if ($siteinfo["text"] == "") {
 		$text = "";
@@ -172,8 +196,8 @@ function parse_url_content(&$a) {
 	if(local_user() && intval(get_pconfig(local_user(),'system','plaintext')))
 		$textmode = true;
 
-	if($textmode)
-	$br = (($textmode) ? "\n" : '<br /?');
+	//if($textmode)
+	$br = (($textmode) ? "\n" : '<br />');
 
 	if(x($_GET,'binurl'))
 		$url = trim(hex2bin($_GET['binurl']));
@@ -234,12 +258,19 @@ function parse_url_content(&$a) {
 		echo sprintf($template,$url,$url,'') . $str_tags;
 		killme();
 	} else {
-		$image = $siteinfo["image"];
 		$text = $siteinfo["text"];
 		$title = $siteinfo["title"];
 	}
 
-	if ($image != "") {
+	$image = "";
+
+	foreach ($siteinfo["images"] as $imagedata)
+		if($textmode)
+			$image .= '[img='.$imagedata["width"].'x'.$imagedata["height"].']'.$imagedata["src"].'[/img]';
+		else
+			$image .= '<img height="'.$imagedata["height"].'" width="'.$imagedata["width"].'" src="'.$imagedata["src"].'" alt="photo" />';
+
+/*	if ($image != "") {
 		$i = fetch_url($image);
 		if($i) {
 			require_once('include/Photo.php');
@@ -264,17 +295,17 @@ function parse_url_content(&$a) {
 				}
 			}
 		}
-	}
+	}*/
 
 	if(strlen($text)) {
 		if($textmode)
-			$text = $br.$br.'[quote]'.trim($text).'[/quote]'.$br ;
+			$text = $br.'[quote]'.trim($text).'[/quote]'.$br ;
 		else
-			$text = '<br /><br /><blockquote>'.trim($text).'</blockquote><br />';
+			$text = '<br /><blockquote>'.trim($text).'</blockquote><br />';
 	}
 
 	if($image) {
-		$text = $image.$br.$text;
+		$text = $br.$br.$image.$br.$text;
 	}
 	$title = str_replace(array("\r","\n"),array('',''),$title);
 

From 02a1fc9cd08fba2168895d1892a91d8143323848 Mon Sep 17 00:00:00 2001
From: Michael Vogel <icarus@dabo.de>
Date: Thu, 12 Jul 2012 23:41:04 +0200
Subject: [PATCH 09/16] parse_url: Further improvements of the new method to
 fetch page data

---
 include/api.php   |   1 +
 mod/parse_url.php | 106 ++++++++++++++++++++++++++++------------------
 2 files changed, 66 insertions(+), 41 deletions(-)

diff --git a/include/api.php b/include/api.php
index 3858b9fe32..e0b788424e 100644
--- a/include/api.php
+++ b/include/api.php
@@ -1727,5 +1727,6 @@ notifications/follow
 notifications/leave
 blocks/exists
 blocks/blocking
+lists
 */
 
diff --git a/mod/parse_url.php b/mod/parse_url.php
index 97e1658c89..4d894969aa 100644
--- a/mod/parse_url.php
+++ b/mod/parse_url.php
@@ -1,6 +1,4 @@
 <?php
-require_once('include/Photo.php');
-
 if(!function_exists('deletenode')) {
 	function deletenode(&$doc, $node)
 	{
@@ -11,6 +9,30 @@ if(!function_exists('deletenode')) {
 	}
 }
 
+function completeurl($url, $scheme) {
+        $urlarr = parse_url($url);
+
+        if (isset($urlarr["scheme"]))
+                return($url);
+
+        $schemearr = parse_url($scheme);
+
+        $complete = $schemearr["scheme"]."://".$schemearr["host"];
+
+        if ($schemearr["port"] != "")
+                $complete .= ":".$schemearr["port"];
+
+        $complete .= $urlarr["path"];
+
+        if ($urlarr["query"] != "")
+                $complete .= "?".$urlarr["query"];
+
+        if ($urlarr["fragment"] != "")
+                $complete .= "#".$urlarr["fragment"];
+
+        return($complete);
+}
+
 function parseurl_getsiteinfo($url) {
 	$siteinfo = array();
 
@@ -25,7 +47,8 @@ function parseurl_getsiteinfo($url) {
 	$header = curl_exec($ch);
 	curl_close($ch);
 
-	if (preg_match('/charset=(.*?)\n/', $header, $matches))
+	// Fetch the first mentioned charset. Can be in body or header
+	if (preg_match('/charset=(.*?)['."'".'"\s\n]/', $header, $matches))
 		$charset = trim(array_pop($matches));
 	else
 		$charset = "utf-8";
@@ -57,11 +80,13 @@ function parseurl_getsiteinfo($url) {
 
 	$xpath = new DomXPath($doc);
 
-	$list = $xpath->query("head/title");
+	//$list = $xpath->query("head/title");
+	$list = $xpath->query("//title");
 	foreach ($list as $node)
 		$siteinfo["title"] =  html_entity_decode($node->nodeValue, ENT_QUOTES, "UTF-8");
 
-	$list = $xpath->query("head/meta[@name]");
+	//$list = $xpath->query("head/meta[@name]");
+	$list = $xpath->query("//meta[@name]");
 	foreach ($list as $node) {
 		$attr = array();
 		if ($node->attributes->length)
@@ -86,7 +111,8 @@ function parseurl_getsiteinfo($url) {
 		}
 	}
 
-	$list = $xpath->query("head/meta[@property]");
+	//$list = $xpath->query("head/meta[@property]");
+	$list = $xpath->query("//meta[@property]");
 	foreach ($list as $node) {
 		$attr = array();
 		if ($node->attributes->length)
@@ -116,38 +142,32 @@ function parseurl_getsiteinfo($url) {
                                 foreach ($node->attributes as $attribute)
                                         $attr[$attribute->name] = $attribute->value;
 
-                        // guess mimetype from headers or filename
-                        $type = guess_image_type($attr["src"],true);
+			$src = completeurl($attr["src"], $url);
+			$photodata = getimagesize($src);
 
-                        $i = fetch_url($attr["src"]);
-                        $ph = new Photo($i, $type);
-
-			if($ph->is_valid() and ($ph->getWidth() > 200) and ($ph->getHeight() > 200)) {
-				if ($siteinfo["image"] == "")
-	                                $siteinfo["image"] = $attr["src"];
-
-				if($ph->getWidth() > 300 || $ph->getHeight() > 300) {
-					$ph->scaleImage(300);
-	                                $siteinfo["images"][] = array("src"=>$attr["src"],
-									"width"=>$ph->getWidth(),
-									"height"=>$ph->getHeight());
-				} else
-	                                $siteinfo["images"][] = array("src"=>$attr["src"],
-									"width"=>$ph->getWidth(),
-									"height"=>$ph->getHeight());
+			if (($photodata[0] > 150) and ($photodata[1] > 150)) {
+				if ($photodata[0] > 300) {
+					$photodata[1] = $photodata[1] * (300 / $photodata[0]);
+					$photodata[0] = 300;
+				}
+				if ($photodata[1] > 300) {
+					$photodata[0] = $photodata[0] * (300 / $photodata[1]);
+					$photodata[1] = 300;
+				}
+				$siteinfo["images"][] = array("src"=>$src,
+								"width"=>$photodata[0],
+								"height"=>$photodata[1]);
 			}
+
                 }
         } else {
-		// guess mimetype from headers or filename
-                $type = guess_image_type($siteinfo["image"],true);
+		$src = completeurl($siteinfo["image"], $url);
+		$photodata = getimagesize($src);
 
-                $i = fetch_url($siteinfo["image"]);
-                $ph = new Photo($i, $type);
-
-		if($ph->is_valid())
-			$siteinfo["images"][] = array("src"=>$siteinfo["image"],
-							"width"=>$ph->getWidth(),
-							"height"=>$ph->getHeight());
+		if (($photodata[0] > 10) and ($photodata[1] > 10))
+			$siteinfo["images"][] = array("src"=>$src,
+							"width"=>$photodata[0],
+							"height"=>$photodata[1]);
 	}
 
 	if ($siteinfo["text"] == "") {
@@ -155,19 +175,22 @@ function parseurl_getsiteinfo($url) {
 
 		$list = $xpath->query("//div[@class='article']");
 		foreach ($list as $node)
-			$text .= " ".trim($node->nodeValue);
+			if (strlen($node->nodeValue) > 40)
+				$text .= " ".trim($node->nodeValue);
 
 		if ($text == "") {
 			$list = $xpath->query("//div[@class='content']");
 			foreach ($list as $node)
-				$text .= " ".trim($node->nodeValue);
+				if (strlen($node->nodeValue) > 40)
+					$text .= " ".trim($node->nodeValue);
 		}
 
 		// If none text was found then take the paragraph content
 		if ($text == "") {
 			$list = $xpath->query("//p");
 			foreach ($list as $node)
-				$text .= " ".trim($node->nodeValue);
+				if (strlen($node->nodeValue) > 40)
+					$text .= " ".trim($node->nodeValue);
 		}
 
 		if ($text != "") {
@@ -238,9 +261,9 @@ function parse_url_content(&$a) {
 	if($url && $title && $text) {
 
 		if($textmode)
-			$text = $br . $br . '[quote]' . trim($text) . '[/quote]' . $br;
+			$text = $br . '[quote]' . trim($text) . '[/quote]' . $br;
 		else
-			$text = '<br /><br /><blockquote>' . trim($text) . '</blockquote><br />';
+			$text = '<br /><blockquote>' . trim($text) . '</blockquote><br />';
 
 		$title = str_replace(array("\r","\n"),array('',''),$title);
 
@@ -255,7 +278,8 @@ function parse_url_content(&$a) {
 	$siteinfo = parseurl_getsiteinfo($url);
 
 	if($siteinfo["title"] == "") {
-		echo sprintf($template,$url,$url,'') . $str_tags;
+		echo print_r($siteinfo, true);
+		//echo sprintf($template,$url,$url,'') . $str_tags;
 		killme();
 	} else {
 		$text = $siteinfo["text"];
@@ -305,7 +329,7 @@ function parse_url_content(&$a) {
 	}
 
 	if($image) {
-		$text = $br.$br.$image.$br.$text;
+		$text = $br.$br.$image.$text;
 	}
 	$title = str_replace(array("\r","\n"),array('',''),$title);
 
@@ -313,6 +337,6 @@ function parse_url_content(&$a) {
 
 	logger('parse_url: returns: ' . $result);
 
-	echo $result;
+	echo trim($result);
 	killme();
 }

From 5f400c4a9798d04c267cc05f2250a6c810802553 Mon Sep 17 00:00:00 2001
From: Michael Vogel <icarus@dabo.de>
Date: Fri, 13 Jul 2012 23:23:31 +0200
Subject: [PATCH 10/16] API: Now returns a real 404 when a function isn't
 implemented parse_url: Image handling improved

---
 include/api.php   |  1 +
 include/items.php |  7 ++++---
 mod/parse_url.php | 27 ---------------------------
 3 files changed, 5 insertions(+), 30 deletions(-)

diff --git a/include/api.php b/include/api.php
index e0b788424e..7d230629b0 100644
--- a/include/api.php
+++ b/include/api.php
@@ -156,6 +156,7 @@
 				//echo "<pre>"; var_dump($r); die();
 			}
 		}
+		header("HTTP/1.1 404 Not Found");
 		logger('API call not implemented: '.$a->query_string." - ".print_r($_REQUEST,true));
 		$r = '<status><error>not implemented</error></status>';
 		switch($type){
diff --git a/include/items.php b/include/items.php
index 6ab681c875..d888f314de 100755
--- a/include/items.php
+++ b/include/items.php
@@ -698,9 +698,10 @@ function get_atom_elements($feed,$item) {
 	call_hooks('parse_atom', $arr);
 
 	//if (($res["title"] != "") or (strpos($res["body"], "RT @") > 0)) {
-	//	$debugfile = tempnam("/home/ike/log", "item-res2-");
-	//	file_put_contents($debugfile, serialize($res));
-	//}
+	if (strpos($res["body"], "RT @") !== false) {
+		$debugfile = tempnam("/home/ike/log", "item-res2-");
+		file_put_contents($debugfile, serialize($arr));
+	}
 
 	return $res;
 }
diff --git a/mod/parse_url.php b/mod/parse_url.php
index 4d894969aa..32f28b7b7b 100644
--- a/mod/parse_url.php
+++ b/mod/parse_url.php
@@ -294,33 +294,6 @@ function parse_url_content(&$a) {
 		else
 			$image .= '<img height="'.$imagedata["height"].'" width="'.$imagedata["width"].'" src="'.$imagedata["src"].'" alt="photo" />';
 
-/*	if ($image != "") {
-		$i = fetch_url($image);
-		if($i) {
-			require_once('include/Photo.php');
-			// guess mimetype from headers or filename
-			$type = guess_image_type($image,true);
-
-			$ph = new Photo($i, $type);
-			if($ph->is_valid()) {
-				if($ph->getWidth() > 300 || $ph->getHeight() > 300) {
-					$ph->scaleImage(300);
-					$new_width = $ph->getWidth();
-					$new_height = $ph->getHeight();
-					if($textmode)
-						$image = $br . $br . '[img=' . $new_width . 'x' . $new_height . ']' . $image . '[/img]';
-					else
-						$image = '<br /><br /><img height="' . $new_height . '" width="' . $new_width . '" src="' .$image . '" alt="photo" />';
-				} else {
-					if($textmode)
-						$image = $br.$br.'[img]'.$image.'[/img]';
-					else
-						$image = '<br /><br /><img src="'.$image.'" alt="photo" />';
-				}
-			}
-		}
-	}*/
-
 	if(strlen($text)) {
 		if($textmode)
 			$text = $br.'[quote]'.trim($text).'[/quote]'.$br ;

From 72f430ae58b830e499114241d340f39a228b867c Mon Sep 17 00:00:00 2001
From: Michael Vogel <icarus@dabo.de>
Date: Sat, 14 Jul 2012 13:59:42 +0200
Subject: [PATCH 11/16] parse_url: Problem when resizing images

---
 include/items.php | 8 ++++----
 mod/parse_url.php | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/items.php b/include/items.php
index b81208f366..724e0ac0f5 100755
--- a/include/items.php
+++ b/include/items.php
@@ -791,10 +791,10 @@ function get_atom_elements($feed,$item) {
 	call_hooks('parse_atom', $arr);
 
 	//if (($res["title"] != "") or (strpos($res["body"], "RT @") > 0)) {
-	if (strpos($res["body"], "RT @") !== false) {
-		$debugfile = tempnam("/home/ike/log", "item-res2-");
-		file_put_contents($debugfile, serialize($arr));
-	}
+	//if (strpos($res["body"], "RT @") !== false) {
+	//	$debugfile = tempnam("/home/ike/log", "item-res2-");
+	//	file_put_contents($debugfile, serialize($arr));
+	//}
 
 	return $res;
 }
diff --git a/mod/parse_url.php b/mod/parse_url.php
index 32f28b7b7b..3d4018745c 100644
--- a/mod/parse_url.php
+++ b/mod/parse_url.php
@@ -147,11 +147,11 @@ function parseurl_getsiteinfo($url) {
 
 			if (($photodata[0] > 150) and ($photodata[1] > 150)) {
 				if ($photodata[0] > 300) {
-					$photodata[1] = $photodata[1] * (300 / $photodata[0]);
+					$photodata[1] = round($photodata[1] * (300 / $photodata[0]));
 					$photodata[0] = 300;
 				}
 				if ($photodata[1] > 300) {
-					$photodata[0] = $photodata[0] * (300 / $photodata[1]);
+					$photodata[0] = round($photodata[0] * (300 / $photodata[1]));
 					$photodata[1] = 300;
 				}
 				$siteinfo["images"][] = array("src"=>$src,

From 8d7d46ae7a4a2ff049491f1c4b053fc55e8f0e13 Mon Sep 17 00:00:00 2001
From: Michael Vogel <icarus@dabo.de>
Date: Sat, 14 Jul 2012 19:54:27 +0200
Subject: [PATCH 12/16] Bugfix: multiple linefeeds were generated when items
 where written in the editor. The existing fix didn't really work

---
 include/bbcode.php |  3 +++
 include/items.php  | 25 +++++++++++++++++++++++++
 include/text.php   |  1 +
 3 files changed, 29 insertions(+)

diff --git a/include/bbcode.php b/include/bbcode.php
index 4aac33f112..a90be5de70 100644
--- a/include/bbcode.php
+++ b/include/bbcode.php
@@ -369,6 +369,9 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true) {
 	// oembed tag
 	$Text = oembed_bbcode2html($Text);
 
+	// Avoid triple linefeeds through oembed
+	$Text = str_replace("<br style='clear:left'></span><br /><br />", "<br style='clear:left'></span><br />", $Text);
+
 	// If we found an event earlier, strip out all the event code and replace with a reformatted version.
 	// Replace the event-start section with the entire formatted event. The other bbcode is stripped.
 	// Summary (e.g. title) is required, earlier revisions only required description (in addition to 
diff --git a/include/items.php b/include/items.php
index 724e0ac0f5..fff9e5fcf5 100755
--- a/include/items.php
+++ b/include/items.php
@@ -786,6 +786,31 @@ function get_atom_elements($feed,$item) {
 		$res['target'] .= '</target>' . "\n";
 	}
 
+	// This is some experimental stuff. By now retweets are shown with "RT:"
+	// But: There is data so that the message could be shown similar to native retweets
+	// There is some better way to parse this array - but it didn't worked for me.
+	$child = $item->feed->data["child"][SIMPLEPIE_NAMESPACE_ATOM_10]["feed"][0]["child"][SIMPLEPIE_NAMESPACE_ATOM_10]["entry"][0]["child"]["http://activitystrea.ms/spec/1.0/"][object][0]["child"];
+	if (is_array($child)) {
+		$message = $child["http://activitystrea.ms/spec/1.0/"]["object"][0]["child"][SIMPLEPIE_NAMESPACE_ATOM_10]["content"][0]["data"];
+		$author = $child[SIMPLEPIE_NAMESPACE_ATOM_10]["author"][0]["child"][SIMPLEPIE_NAMESPACE_ATOM_10];
+		$uri = $author["uri"][0]["data"];
+		$name = $author["name"][0]["data"];
+		$avatar = @array_shift($author["link"][2]["attribs"]);
+		$avatar = $avatar["href"];
+
+		if (($name != "") and ($uri != "") and ($avatar != "") and ($message != "")) {
+			$res["owner-name"] = $res["author-name"];
+			$res["owner-link"] = $res["author-link"];
+			$res["owner-avatar"] = $res["author-avatar"];
+
+			$res["author-name"] = $name;
+			$res["author-link"] = $uri;
+			$res["author-avatar"] = $avatar;
+
+			$res["body"] = html2bbcode($message);
+		}
+	}
+
 	$arr = array('feed' => $feed, 'item' => $item, 'result' => $res);
 
 	call_hooks('parse_atom', $arr);
diff --git a/include/text.php b/include/text.php
index 409d40d59f..c3558c6418 100644
--- a/include/text.php
+++ b/include/text.php
@@ -1537,6 +1537,7 @@ function undo_post_tagging($s) {
 
 function fix_mce_lf($s) {
 	$s = str_replace("\r\n","\n",$s);
+	$s = str_replace("\n\n","\n",$s);
 	return $s;
 }
 

From 4e9086f9b0a07672821c1d2dd72d01fcf639c45d Mon Sep 17 00:00:00 2001
From: Michael Vogel <icarus@dabo.de>
Date: Sun, 15 Jul 2012 12:47:24 +0200
Subject: [PATCH 13/16] Changes in the bbcode to markdown converter to make it
 more compatible with Diaspora

---
 include/bb2diaspora.php             |  9 +++++++++
 include/bbcode.php                  |  4 ++++
 include/markdownify/markdownify.php | 10 +++++++++-
 3 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/include/bb2diaspora.php b/include/bb2diaspora.php
index 436412dbd4..25e4640b3a 100644
--- a/include/bb2diaspora.php
+++ b/include/bb2diaspora.php
@@ -196,6 +196,15 @@ function bb2diaspora($Text,$preserve_nl = false) {
 	// The bbcode parser now handles youtube-links (and the other stuff) correctly.
 	// Additionally the html code is now fixed so that lists are now working.
 
+	// Converting images with size parameters to simple images. Markdown doesn't know it.
+	$Text = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '[img]$3[/img]', $Text);
+
+	// the following was added on 10-January-2012 due to an inability of Diaspora's
+	// new javascript markdown processor to handle links with images as the link "text"
+	// It is not optimal and may be removed if this ability is restored in the future
+	$Text = preg_replace("/\[url\=([^\[\]]*)\]\[img\](.*?)\[\/img\]\[\/url\]/ism",
+				"[img]$2[/img]\n[url]$1[/url]", $Text);
+
 	// Convert it to HTML - don't try oembed
 	$Text = bbcode($Text, $preserve_nl, false);
 
diff --git a/include/bbcode.php b/include/bbcode.php
index a90be5de70..b5f2ec5697 100644
--- a/include/bbcode.php
+++ b/include/bbcode.php
@@ -115,6 +115,10 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true) {
 
 	$a = get_app();
 
+	// Move all spaces out of the tags
+	$Text = preg_replace("/\[(\w*)\](\s*)/ism", '$2[$1]', $Text);
+	$Text = preg_replace("/(\s*)\[\/(\w*)\]/ism", '[/$2]$1', $Text);
+
 	// Hide all [noparse] contained bbtags by spacefying them
 	// POSSIBLE BUG --> Will the 'preg' functions crash if there's an embedded image?
 
diff --git a/include/markdownify/markdownify.php b/include/markdownify/markdownify.php
index 43730cb77f..7bbf1cbbed 100644
--- a/include/markdownify/markdownify.php
+++ b/include/markdownify/markdownify.php
@@ -735,6 +735,13 @@ class Markdownify {
       $this->parser->tagAttributes['src'] = $this->decode($this->parser->tagAttributes['src']);
     }
 
+// ![Alt text](/path/to/img.jpg "Optional title")
+    if ($this->parser->tagAttributes['title'] != "")
+      $this->out('!['.$this->parser->tagAttributes['alt'].']('.$this->parser->tagAttributes['src'].'"'.$this->parser->tagAttributes['title'].'")', true);
+    else
+      $this->out('!['.$this->parser->tagAttributes['alt'].']('.$this->parser->tagAttributes['src'].')', true);
+
+/*
     # [This link][id]
     $link_id = false;
     if (!empty($this->stack['a'])) {
@@ -759,6 +766,7 @@ class Markdownify {
     }
 
     $this->out('!['.$this->parser->tagAttributes['alt'].']['.$link_id.']', true);
+*/
   }
   /**
    * handle <code> tags
@@ -1181,4 +1189,4 @@ class Markdownify {
   function parent() {
     return end($this->parser->openTags);
   }
-}
\ No newline at end of file
+}

From 8aeb3ec2fb2596ddef0200122583c23b0d549351 Mon Sep 17 00:00:00 2001
From: Michael Vogel <icarus@dabo.de>
Date: Sun, 15 Jul 2012 13:12:24 +0200
Subject: [PATCH 14/16] Corrected the fix for images with links

---
 include/bb2diaspora.php | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/bb2diaspora.php b/include/bb2diaspora.php
index 25e4640b3a..1a7d8eaffa 100644
--- a/include/bb2diaspora.php
+++ b/include/bb2diaspora.php
@@ -202,7 +202,7 @@ function bb2diaspora($Text,$preserve_nl = false) {
 	// the following was added on 10-January-2012 due to an inability of Diaspora's
 	// new javascript markdown processor to handle links with images as the link "text"
 	// It is not optimal and may be removed if this ability is restored in the future
-	$Text = preg_replace("/\[url\=([^\[\]]*)\]\[img\](.*?)\[\/img\]\[\/url\]/ism",
+	$Text = preg_replace("/\[url\=([^\[\]]*)\]\s*\[img\](.*?)\[\/img\]\s*\[\/url\]/ism",
 				"[img]$2[/img]\n[url]$1[/url]", $Text);
 
 	// Convert it to HTML - don't try oembed

From 335c15ede55997989647552c05dd74db3431b499 Mon Sep 17 00:00:00 2001
From: Michael Vogel <icarus@dabo.de>
Date: Sun, 15 Jul 2012 13:40:01 +0200
Subject: [PATCH 15/16] Hopefully the final fix for image links in bb2diaspora

---
 include/bb2diaspora.php | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/include/bb2diaspora.php b/include/bb2diaspora.php
index 1a7d8eaffa..b95dee8f3b 100644
--- a/include/bb2diaspora.php
+++ b/include/bb2diaspora.php
@@ -190,7 +190,7 @@ function diaspora_ol($s) {
 }
 
 
-function bb2diaspora($Text,$preserve_nl = false) {
+function bb2diaspora($Text,$preserve_nl = false, $fordiaspora = true) {
 
 	// Re-enabling the converter again.
 	// The bbcode parser now handles youtube-links (and the other stuff) correctly.
@@ -202,8 +202,9 @@ function bb2diaspora($Text,$preserve_nl = false) {
 	// the following was added on 10-January-2012 due to an inability of Diaspora's
 	// new javascript markdown processor to handle links with images as the link "text"
 	// It is not optimal and may be removed if this ability is restored in the future
-	$Text = preg_replace("/\[url\=([^\[\]]*)\]\s*\[img\](.*?)\[\/img\]\s*\[\/url\]/ism",
-				"[img]$2[/img]\n[url]$1[/url]", $Text);
+	if ($fordiaspora)
+		$Text = preg_replace("/\[url\=([^\[\]]*)\]\s*\[img\](.*?)\[\/img\]\s*\[\/url\]/ism",
+					"[url]$1[/url]\n[img]$2[/img]", $Text);
 
 	// Convert it to HTML - don't try oembed
 	$Text = bbcode($Text, $preserve_nl, false);

From 222635d15afba7d3a7aef002e90d06f549961012 Mon Sep 17 00:00:00 2001
From: Michael Vogel <icarus@dabo.de>
Date: Wed, 18 Jul 2012 21:06:38 +0200
Subject: [PATCH 16/16] parse_url: Just added a comment for future elements
 that should be added as well.

---
 mod/parse_url.php | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/mod/parse_url.php b/mod/parse_url.php
index 3d4018745c..0610531967 100644
--- a/mod/parse_url.php
+++ b/mod/parse_url.php
@@ -1,4 +1,18 @@
 <?php
+/* To-Do
+https://developers.google.com/+/plugins/snippet/
+
+<meta itemprop="name" content="Toller Titel">
+<meta itemprop="description" content="Eine tolle Beschreibung">
+<meta itemprop="image" content="http://maple.libertreeproject.org/images/tree-icon.png">
+
+<body itemscope itemtype="http://schema.org/Product">
+  <h1 itemprop="name">Shiny Trinket</h1>
+  <img itemprop="image" src="{image-url}" />
+  <p itemprop="description">Shiny trinkets are shiny.</p>
+</body>
+*/
+
 if(!function_exists('deletenode')) {
 	function deletenode(&$doc, $node)
 	{