From 5d4719f0096925fc14d8a0504d0045a1b35ef08a Mon Sep 17 00:00:00 2001
From: Michael Vogel <icarus@dabo.de>
Date: Fri, 4 Apr 2014 10:52:53 +0200
Subject: [PATCH] The function to fetch data about a site (via the "parse url"
 function) is moved from diaspora to another place, since it is multi purpose

---
 include/diaspora.php | 59 ++++++--------------------------------
 include/items.php    | 67 +++++++++++++++++++++++++++++++++-----------
 2 files changed, 60 insertions(+), 66 deletions(-)

diff --git a/include/diaspora.php b/include/diaspora.php
index 7476a5c26d..feb57a0404 100755
--- a/include/diaspora.php
+++ b/include/diaspora.php
@@ -7,47 +7,6 @@ require_once('include/contact_selectors.php');
 require_once('include/queue_fn.php');
 require_once('include/lock.php');
 
-function diaspora_add_page_info($url) {
-	require_once("mod/parse_url.php");
-	$data = parseurl_getsiteinfo($url, true);
-
-	logger('diaspora_add_page_info: fetch page info for '.$url.' '.print_r($data, true), LOGGER_DATA);
-
-	if (($data["type"] != "link") OR ($data["title"] == $url))
-		return("");
-
-	if (is_string($data["title"]))
-		$text .= "[bookmark=".$url."]".trim($data["title"])."[/bookmark]";
-
-	if (sizeof($data["images"]) > 0) {
-		$imagedata = $data["images"][0];
-		$text .= '[img]'.$imagedata["src"].'[/img]';
-	}
-
-	if (is_string($data["text"]))
-		$text .= "[quote]".$data["text"]."[/quote]";
-
-	return("\n[class=type-".$data["type"]."]".$text."[/class]");
-}
-
-function diaspora_add_page_info_to_body($body) {
-
-	logger('diaspora_add_page_info_to_body: fetch page info for body '.$body, LOGGER_DATA);
-
-	$URLSearchString = "^\[\]";
-
-	// Adding these spaces is a quick hack due to my problems with regular expressions :)
-	preg_match("/[^@#]\[url\]([$URLSearchString]*)\[\/url\]/ism", " ".$body, $matches);
-
-	if (!$matches)
-		preg_match("/[^@#]\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism", " ".$body, $matches);
-
-	if ($matches)
-		$body .= diaspora_add_page_info($matches[1]);
-
-	return $body;
-}
-
 function diaspora_dispatch_public($msg) {
 
 	$enabled = intval(get_config('system','diaspora_enabled'));
@@ -526,7 +485,7 @@ function diaspora_decode($importer,$xml) {
 		$base = $dom->env;
 	elseif($dom->data)
 		$base = $dom;
-	
+
 	if(! $base) {
 		logger('mod-diaspora: unable to locate salmon data in xml ');
 		http_status_exit(400);
@@ -654,7 +613,7 @@ function diaspora_request($importer,$xml) {
 				$arr['author-avatar'] = $arr['owner-avatar'] = $self[0]['thumb'];
 				$arr['verb'] = ACTIVITY_FRIEND;
 				$arr['object-type'] = ACTIVITY_OBJ_PERSON;
-				
+
 				$A = '[url=' . $self[0]['url'] . ']' . $self[0]['name'] . '[/url]';
 				$B = '[url=' . $contact['url'] . ']' . $contact['name'] . '[/url]';
 				$BPhoto = '[url=' . $contact['url'] . ']' . '[img]' . $contact['thumb'] . '[/img][/url]';
@@ -713,7 +672,7 @@ function diaspora_request($importer,$xml) {
 		1,
 		2
 	);
-		 
+
 	// find the contact record we just created
 
 	$contact_record = diaspora_get_contact_by_handle($importer['uid'],$sender_handle);
@@ -734,7 +693,7 @@ function diaspora_request($importer,$xml) {
 	if($importer['page-flags'] == PAGE_NORMAL) {
 
 		$hash = random_string() . (string) time();   // Generate a confirm_key
-	
+
 		$ret = q("INSERT INTO `intro` ( `uid`, `contact-id`, `blocked`, `knowyou`, `note`, `hash`, `datetime` )
 			VALUES ( %d, %d, %d, %d, '%s', '%s', '%s' )",
 			intval($importer['uid']),
@@ -753,7 +712,7 @@ function diaspora_request($importer,$xml) {
 		require_once('include/Photo.php');
 
 		$photos = import_profile_photo($contact_record['photo'],$importer['uid'],$contact_record['id']);
-		
+
 		// technically they are sharing with us (CONTACT_IS_SHARING), 
 		// but if our page-type is PAGE_COMMUNITY or PAGE_SOAPBOX
 		// we are going to change the relationship and make them a follower.
@@ -868,7 +827,7 @@ function diaspora_post($importer,$xml,$msg) {
 	$body = diaspora2bb($xml->raw_message);
 
 	// Add OEmbed and other information to the body
-	$body = diaspora_add_page_info_to_body($body);
+	$body = add_page_info_to_body($body, false, true);
 
 	$datarray = array();
 
@@ -1025,7 +984,7 @@ function diaspora_reshare($importer,$xml,$msg) {
 		$body = scale_external_images($body);
 
 		// Add OEmbed and other information to the body
-		$body = diaspora_add_page_info_to_body($body);
+		$body = add_page_info_to_body($body, false, true);
 	}
 	else {
 		// Maybe it is a reshare of a photo that will be delivered at a later time (testing)
@@ -1445,12 +1404,12 @@ function diaspora_comment($importer,$xml,$msg) {
 		foreach($myconv as $conv) {
 
 			// now if we find a match, it means we're in this conversation
-	
+
 			if(! link_compare($conv['author-link'],$importer_url))
 				continue;
 
 			require_once('include/enotify.php');
-								
+
 			$conv_parent = $conv['parent'];
 
 			notification(array(
diff --git a/include/items.php b/include/items.php
index ac8c34794a..1516c12015 100755
--- a/include/items.php
+++ b/include/items.php
@@ -856,9 +856,10 @@ function get_atom_elements($feed, $item, $contact = array()) {
 	}
 
 	if (isset($contact["network"]) AND ($contact["network"] == NETWORK_FEED) AND $contact['fetch_further_information']) {
-		$res["body"] = $res["title"]."\n\n[class=type-link]".fetch_siteinfo($res['plink'])."[/class]";
+		$res["body"] = $res["title"].add_page_info($res['plink']);
 		$res["title"] = "";
-	}
+	} elseif (isset($contact["network"]) AND ($contact["network"] == NETWORK_OSTATUS))
+		$res["body"] = add_page_info_to_body($res["body"]);
 
 	$arr = array('feed' => $feed, 'item' => $item, 'result' => $res);
 
@@ -874,29 +875,63 @@ function get_atom_elements($feed, $item, $contact = array()) {
 	return $res;
 }
 
-function fetch_siteinfo($url) {
-	require_once("mod/parse_url.php");
+function add_page_info($url, $no_photos = false) {
+        require_once("mod/parse_url.php");
+        $data = parseurl_getsiteinfo($url, true);
 
-	// Fetch site infos - but only from the meta data
-	$data = parseurl_getsiteinfo($url, true);
+        logger('add_page_info: fetch page info for '.$url.' '.print_r($data, true), LOGGER_DEBUG);
 
-	$text = "";
+	// It maybe is a rich content, but if it does have everything that a link has,
+	// then treat it that way
+	if (($data["type"] == "rich") AND is_string($data["title"]) AND
+		is_string($data["text"]) AND (sizeof($data["images"]) > 0))
+		$data["type"] = "link";
 
-	if (!is_string($data["text"]) AND (sizeof($data["images"]) == 0) AND ($data["title"] == $url))
+        if ((($data["type"] != "link") AND ($data["type"] != "video") AND ($data["type"] != "photo")) OR ($data["title"] == $url))
+                return("");
+
+	if ($no_photos AND ($data["type"] == "photo"))
 		return("");
 
-	if (is_string($data["title"]))
-		$text .= "[bookmark=".$url."]".trim($data["title"])."[/bookmark]\n";
+        if (($data["type"] != "photo") AND is_string($data["title"]))
+                $text .= "[bookmark=".$url."]".trim($data["title"])."[/bookmark]";
 
-	if (sizeof($data["images"]) > 0) {
-		$imagedata = $data["images"][0];
-		$text .= '[img='.$imagedata["width"].'x'.$imagedata["height"].']'.$imagedata["src"].'[/img]' . "\n";
+        if (($data["type"] != "video") AND (sizeof($data["images"]) > 0)) {
+                $imagedata = $data["images"][0];
+                $text .= '[img]'.$imagedata["src"].'[/img]';
+        }
+
+        if (($data["type"] != "photo") AND is_string($data["text"]))
+                $text .= "[quote]".$data["text"]."[/quote]";
+
+        return("\n[class=type-".$data["type"]."]".$text."[/class]");
+}
+
+function add_page_info_to_body($body, $texturl = false, $no_photos = false) {
+
+        logger('add_page_info_to_body: fetch page info for body '.$body, LOGGER_DEBUG);
+
+        $URLSearchString = "^\[\]";
+
+        // Adding these spaces is a quick hack due to my problems with regular expressions :)
+        preg_match("/[^@#]\[url\]([$URLSearchString]*)\[\/url\]/ism", " ".$body, $matches);
+
+        if (!$matches)
+                preg_match("/[^@#]\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism", " ".$body, $matches);
+
+	// Convert urls without bbcode elements
+	if (!$matches AND $texturl) {
+		preg_match("/([^\]\='".'"'."]|^)(https?\:\/\/[a-zA-Z0-9\:\/\-\?\&\;\.\=\_\~\#\%\$\!\+\,]+)/ism", " ".$body, $matches);
+
+		// Yeah, a hack. I really hate regular expressions :)
+	        if ($matches)
+        	        $matches[1] = $matches[2];
 	}
 
-	if (is_string($data["text"]))
-		$text .= "[quote]".$data["text"]."[/quote]";
+        if ($matches)
+                $body .= add_page_info($matches[1], $no_photos);
 
-	return($text);
+        return $body;
 }
 
 function encode_rel_links($links) {