Merge pull request #932 from annando/master

Preparation for bidirectional statusnet sync, functions moved from addons into the core and some converting stuff
2014-04-07 09:53:47 +02:00 · 2014-04-07 09:53:47 +02:00 · 84189ac775
commit 84189ac775
parent 7ede281db6 73f0496127
17 changed files with 298 additions and 192 deletions
--- a/include/Scrape.php
+++ b/include/Scrape.php
@ -14,7 +14,7 @@ function scrape_dfrn($url) {

 	$s = fetch_url($url);

-	if(! $s) 
+	if(! $s)
 		return $ret;

 	$headers = $a->get_curl_headers();
@ -23,7 +23,7 @@ function scrape_dfrn($url) {

 	$lines = explode("\n",$headers);
 	if(count($lines)) {
-		foreach($lines as $line) {				
+		foreach($lines as $line) {
 			// don't try and run feeds through the html5 parser
 			if(stristr($line,'content-type:') && ((stristr($line,'application/atom+xml')) || (stristr($line,'application/rss+xml'))))
 				return ret;
@ -351,7 +351,8 @@ function probe_url($url, $mode = PROBE_NORMAL) {
 	$has_lrdd = false;
 	$email_conversant = false;

-	$twitter = ((strpos($url,'twitter.com') !== false) ? true : false);
+	// Twitter is deactivated since twitter closed its old API
+	//$twitter = ((strpos($url,'twitter.com') !== false) ? true : false);
 	$lastfm  = ((strpos($url,'last.fm/user') !== false) ? true : false);

 	$at_addr = ((strpos($url,'@') !== false) ? true : false);
@ -561,7 +562,7 @@ function probe_url($url, $mode = PROBE_NORMAL) {
 			$vcard['nick'] = $addr_parts[0];
 		}

-		if($twitter) {
+		/* if($twitter) {
 			logger('twitter: setup');
 			$tid = basename($url);
 			$tapi = 'https://api.twitter.com/1/statuses/user_timeline.rss';
@ -574,7 +575,7 @@ function probe_url($url, $mode = PROBE_NORMAL) {
 			$vcard['photo'] = 'https://api.twitter.com/1/users/profile_image?screen_name=' . $tid . '&size=bigger';
 			$vcard['nick'] = $tid;
 			$vcard['fn'] = $tid;
-		}
+		} */

 		if($lastfm) {
 			$profile = $url;
@ -609,7 +610,7 @@ function probe_url($url, $mode = PROBE_NORMAL) {
 			logger('probe_url: scrape_feed ' . (($poll)? $poll : $url) . ' returns: ' . print_r($feedret,true), LOGGER_DATA);
 			if(count($feedret) && ($feedret['feed_atom'] || $feedret['feed_rss'])) {
 				$poll = ((x($feedret,'feed_atom')) ? unamp($feedret['feed_atom']) : unamp($feedret['feed_rss']));
-				if(! x($vcard)) 
+				if(! x($vcard))
 					$vcard = array();
 			}

--- a/include/api.php
+++ b/include/api.php
@ -2420,10 +2420,11 @@ function api_clean_plain_items($Text) {

 	$Text = bb_CleanPictureLinks($Text);

-	if ($include_entities == "true") {
-		$URLSearchString = "^\[\]";
+	$URLSearchString = "^\[\]";

-		$Text = preg_replace("/#\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism",'#$2',$Text);
+	$Text = preg_replace("/([!#@])\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism",'$1$3',$Text);
+
+	if ($include_entities == "true") {
 		$Text = preg_replace("/\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism",'[url=$1]$1[/url]',$Text);
 	}

--- a/include/bb2diaspora.php
+++ b/include/bb2diaspora.php
@ -27,7 +27,9 @@ function diaspora2bb($s) {
 	$s = str_replace("\r","",$s);

 	// <br/> is invalid. Replace it with the valid expression
-	$s = str_replace("<br/>","<br />",$s);
+	logger("diaspora2bb: 1: ".$s);
+	$s = str_replace(array("<br/>", "</p>", "<p>"),array("<br />", "<br />", "<br />"),$s);
+	logger("diaspora2bb: 2: ".$s);

 	$s = preg_replace('/\@\{(.+?)\; (.+?)\@(.+?)\}/','@[url=https://$3/u/$2]$1[/url]',$s);

--- a/include/contact_selectors.php
+++ b/include/contact_selectors.php
@ -86,7 +86,9 @@ function network_to_name($s) {
 		NETWORK_MAIL2    => t('Email'),
 		NETWORK_GPLUS    => t('Google+'),
 		NETWORK_PUMPIO   => t('pump.io'),
-		NETWORK_TWITTER  => t('Twitter')
+		NETWORK_TWITTER  => t('Twitter'),
+		NETWORK_DIASPORA2 => t('Diaspora Connector'),
+		NETWORK_STATUSNET => t('Statusnet')
 	);

 	call_hooks('network_to_name', $nets);
--- a/include/cronhooks.php
+++ b/include/cronhooks.php
@ -9,7 +9,7 @@ function cronhooks_run(&$argv, &$argc){
 	if(is_null($a)) {
 		$a = new App;
 	}
-  
+
 	if(is_null($db)) {
 	    @include(".htconfig.php");
    	require_once("include/dba.php");
@ -24,6 +24,17 @@ function cronhooks_run(&$argv, &$argc){
 	load_config('config');
 	load_config('system');

+	$maxsysload = intval(get_config('system','maxloadavg'));
+	if($maxsysload < 1)
+		$maxsysload = 50;
+	if(function_exists('sys_getloadavg')) {
+		$load = sys_getloadavg();
+		if(intval($load[0]) > $maxsysload) {
+			logger('system: load ' . $load . ' too high. Poller deferred to next scheduled run.');
+			return;
+		}
+	}
+
 	$lockpath = get_config('system','lockpath');
 	if ($lockpath != '') {
 		$pidfile = new pidfile($lockpath, 'cron.lck');
@ -38,7 +49,7 @@ function cronhooks_run(&$argv, &$argc){
 	load_hooks();

 	logger('cronhooks: start');
-	
+

 	$d = datetime_convert();

--- a/include/diaspora.php
+++ b/include/diaspora.php
@ -7,47 +7,6 @@ require_once('include/contact_selectors.php');
 require_once('include/queue_fn.php');
 require_once('include/lock.php');

-function diaspora_add_page_info($url) {
-	require_once("mod/parse_url.php");
-	$data = parseurl_getsiteinfo($url, true);
-
-	logger('diaspora_add_page_info: fetch page info for '.$url.' '.print_r($data, true), LOGGER_DATA);
-
-	if (($data["type"] != "link") OR ($data["title"] == $url))
-		return("");
-
-	if (is_string($data["title"]))
-		$text .= "[bookmark=".$url."]".trim($data["title"])."[/bookmark]";
-
-	if (sizeof($data["images"]) > 0) {
-		$imagedata = $data["images"][0];
-		$text .= '[img]'.$imagedata["src"].'[/img]';
-	}
-
-	if (is_string($data["text"]))
-		$text .= "[quote]".$data["text"]."[/quote]";
-
-	return("\n[class=type-".$data["type"]."]".$text."[/class]");
-}
-
-function diaspora_add_page_info_to_body($body) {
-
-	logger('diaspora_add_page_info_to_body: fetch page info for body '.$body, LOGGER_DATA);
-
-	$URLSearchString = "^\[\]";
-
-	// Adding these spaces is a quick hack due to my problems with regular expressions :)
-	preg_match("/[^@#]\[url\]([$URLSearchString]*)\[\/url\]/ism", " ".$body, $matches);
-
-	if (!$matches)
-		preg_match("/[^@#]\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism", " ".$body, $matches);
-
-	if ($matches)
-		$body .= diaspora_add_page_info($matches[1]);
-
-	return $body;
-}
-
 function diaspora_dispatch_public($msg) {

 	$enabled = intval(get_config('system','diaspora_enabled'));
@ -526,7 +485,7 @@ function diaspora_decode($importer,$xml) {
 		$base = $dom->env;
 	elseif($dom->data)
 		$base = $dom;
-	
+
 	if(! $base) {
 		logger('mod-diaspora: unable to locate salmon data in xml ');
 		http_status_exit(400);
@ -654,7 +613,7 @@ function diaspora_request($importer,$xml) {
 				$arr['author-avatar'] = $arr['owner-avatar'] = $self[0]['thumb'];
 				$arr['verb'] = ACTIVITY_FRIEND;
 				$arr['object-type'] = ACTIVITY_OBJ_PERSON;
-				
+
 				$A = '[url=' . $self[0]['url'] . ']' . $self[0]['name'] . '[/url]';
 				$B = '[url=' . $contact['url'] . ']' . $contact['name'] . '[/url]';
 				$BPhoto = '[url=' . $contact['url'] . ']' . '[img]' . $contact['thumb'] . '[/img][/url]';
@ -713,7 +672,7 @@ function diaspora_request($importer,$xml) {
 		1,
 		2
 	);
-		 
+
 	// find the contact record we just created

 	$contact_record = diaspora_get_contact_by_handle($importer['uid'],$sender_handle);
@ -734,7 +693,7 @@ function diaspora_request($importer,$xml) {
 	if($importer['page-flags'] == PAGE_NORMAL) {

 		$hash = random_string() . (string) time();   // Generate a confirm_key
-	
+
 		$ret = q("INSERT INTO `intro` ( `uid`, `contact-id`, `blocked`, `knowyou`, `note`, `hash`, `datetime` )
 			VALUES ( %d, %d, %d, %d, '%s', '%s', '%s' )",
 			intval($importer['uid']),
@ -753,7 +712,7 @@ function diaspora_request($importer,$xml) {
 		require_once('include/Photo.php');

 		$photos = import_profile_photo($contact_record['photo'],$importer['uid'],$contact_record['id']);
-		
+
 		// technically they are sharing with us (CONTACT_IS_SHARING), 
 		// but if our page-type is PAGE_COMMUNITY or PAGE_SOAPBOX
 		// we are going to change the relationship and make them a follower.
@ -868,7 +827,7 @@ function diaspora_post($importer,$xml,$msg) {
 	$body = diaspora2bb($xml->raw_message);

 	// Add OEmbed and other information to the body
-	$body = diaspora_add_page_info_to_body($body);
+	$body = add_page_info_to_body($body, false, true);

 	$datarray = array();

@ -1025,7 +984,7 @@ function diaspora_reshare($importer,$xml,$msg) {
 		$body = scale_external_images($body);

 		// Add OEmbed and other information to the body
-		$body = diaspora_add_page_info_to_body($body);
+		$body = add_page_info_to_body($body, false, true);
 	}
 	else {
 		// Maybe it is a reshare of a photo that will be delivered at a later time (testing)
@ -1445,12 +1404,12 @@ function diaspora_comment($importer,$xml,$msg) {
 		foreach($myconv as $conv) {

 			// now if we find a match, it means we're in this conversation
-	
+
 			if(! link_compare($conv['author-link'],$importer_url))
 				continue;

 			require_once('include/enotify.php');
-								
+
 			$conv_parent = $conv['parent'];

 			notification(array(
--- a/include/html2bbcode.php
+++ b/include/html2bbcode.php
@ -212,6 +212,7 @@ function html2bbcode($message)
 	node2bbcode($doc, 'h5', array(), "\n\n[size=small][b]", "[/b][/size]\n");
 	node2bbcode($doc, 'h6', array(), "\n\n[size=x-small][b]", "[/b][/size]\n");

+	node2bbcode($doc, 'a', array('href'=>'/mailto:(.+)/'), '[mail=$1]', '[/mail]');
 	node2bbcode($doc, 'a', array('href'=>'/(.+)/'), '[url=$1]', '[/url]');

 	node2bbcode($doc, 'img', array('src'=>'/(.+)/', 'width'=>'/(\d+)/', 'height'=>'/(\d+)/'), '[img=$2x$3]$1', '[/img]');
--- a/include/items.php
+++ b/include/items.php
@ -856,9 +856,10 @@ function get_atom_elements($feed, $item, $contact = array()) {
 	}

 	if (isset($contact["network"]) AND ($contact["network"] == NETWORK_FEED) AND $contact['fetch_further_information']) {
-		$res["body"] = $res["title"]."\n\n[class=type-link]".fetch_siteinfo($res['plink'])."[/class]";
+		$res["body"] = $res["title"].add_page_info($res['plink']);
 		$res["title"] = "";
-	}
+	} elseif (isset($contact["network"]) AND ($contact["network"] == NETWORK_OSTATUS))
+		$res["body"] = add_page_info_to_body($res["body"]);

 	$arr = array('feed' => $feed, 'item' => $item, 'result' => $res);

@ -874,29 +875,63 @@ function get_atom_elements($feed, $item, $contact = array()) {
 	return $res;
 }

-function fetch_siteinfo($url) {
-	require_once("mod/parse_url.php");
+function add_page_info($url, $no_photos = false) {
+        require_once("mod/parse_url.php");
+        $data = parseurl_getsiteinfo($url, true);

-	// Fetch site infos - but only from the meta data
-	$data = parseurl_getsiteinfo($url, true);
+        logger('add_page_info: fetch page info for '.$url.' '.print_r($data, true), LOGGER_DEBUG);

-	$text = "";
+	// It maybe is a rich content, but if it does have everything that a link has,
+	// then treat it that way
+	if (($data["type"] == "rich") AND is_string($data["title"]) AND
+		is_string($data["text"]) AND (sizeof($data["images"]) > 0))
+		$data["type"] = "link";

-	if (!is_string($data["text"]) AND (sizeof($data["images"]) == 0) AND ($data["title"] == $url))
+        if ((($data["type"] != "link") AND ($data["type"] != "video") AND ($data["type"] != "photo")) OR ($data["title"] == $url))
+                return("");
+
+	if ($no_photos AND ($data["type"] == "photo"))
 		return("");

-	if (is_string($data["title"]))
-		$text .= "[bookmark=".$url."]".trim($data["title"])."[/bookmark]\n";
+        if (($data["type"] != "photo") AND is_string($data["title"]))
+                $text .= "[bookmark=".$url."]".trim($data["title"])."[/bookmark]";

-	if (sizeof($data["images"]) > 0) {
-		$imagedata = $data["images"][0];
-		$text .= '[img='.$imagedata["width"].'x'.$imagedata["height"].']'.$imagedata["src"].'[/img]' . "\n";
+        if (($data["type"] != "video") AND (sizeof($data["images"]) > 0)) {
+                $imagedata = $data["images"][0];
+                $text .= '[img]'.$imagedata["src"].'[/img]';
+        }
+
+        if (($data["type"] != "photo") AND is_string($data["text"]))
+                $text .= "[quote]".$data["text"]."[/quote]";
+
+        return("\n[class=type-".$data["type"]."]".$text."[/class]");
+}
+
+function add_page_info_to_body($body, $texturl = false, $no_photos = false) {
+
+        logger('add_page_info_to_body: fetch page info for body '.$body, LOGGER_DEBUG);
+
+        $URLSearchString = "^\[\]";
+
+        // Adding these spaces is a quick hack due to my problems with regular expressions :)
+        preg_match("/[^@#]\[url\]([$URLSearchString]*)\[\/url\]/ism", " ".$body, $matches);
+
+        if (!$matches)
+                preg_match("/[^@#]\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism", " ".$body, $matches);
+
+	// Convert urls without bbcode elements
+	if (!$matches AND $texturl) {
+		preg_match("/([^\]\='".'"'."]|^)(https?\:\/\/[a-zA-Z0-9\:\/\-\?\&\;\.\=\_\~\#\%\$\!\+\,]+)/ism", " ".$body, $matches);
+
+		// Yeah, a hack. I really hate regular expressions :)
+	        if ($matches)
+        	        $matches[1] = $matches[2];
 	}

-	if (is_string($data["text"]))
-		$text .= "[quote]".$data["text"]."[/quote]";
+        if ($matches)
+                $body .= add_page_info($matches[1], $no_photos);

-	return($text);
+        return $body;
 }

 function encode_rel_links($links) {
--- a/include/network.php
+++ b/include/network.php
@ -189,7 +189,7 @@ function post_url($url,$params, $headers = null, &$redirects = 0, $timeout = 0)
 		$base = substr($base,strlen($chunk));
 	}

-	if($http_code == 301 || $http_code == 302 || $http_code == 303) {
+	if($http_code == 301 || $http_code == 302 || $http_code == 303 || $http_code == 307) {
        $matches = array();
        preg_match('/(Location:|URI:)(.*?)\n/', $header, $matches);
        $newurl = trim(array_pop($matches));
@ -1105,3 +1105,71 @@ function xml2array($contents, $namespaces = true, $get_attributes=1, $priority =

    return($xml_array);
 }
+
+function original_url($url, $depth=1, $fetchbody = false) {
+        if ($depth > 10)
+                return($url);
+
+        $url = trim($url, "'");
+
+        $siteinfo = array();
+        $ch = curl_init();
+        curl_setopt($ch, CURLOPT_URL, $url);
+        curl_setopt($ch, CURLOPT_HEADER, 1);
+
+        if ($fetchbody)
+                curl_setopt($ch, CURLOPT_NOBODY, 0);
+        else
+                curl_setopt($ch, CURLOPT_NOBODY, 1);
+
+        curl_setopt($ch, CURLOPT_TIMEOUT, 10);
+        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
+        curl_setopt($ch,CURLOPT_USERAGENT,'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:24.0) Gecko/20100101 Firefox/24.0');
+
+        $header = curl_exec($ch);
+        $curl_info = @curl_getinfo($ch);
+        $http_code = $curl_info['http_code'];
+        curl_close($ch);
+
+        if ((($curl_info['http_code'] == "301") OR ($curl_info['http_code'] == "302"))
+                AND (($curl_info['redirect_url'] != "") OR ($curl_info['location'] != ""))) {
+                if ($curl_info['redirect_url'] != "")
+                        return(original_url($curl_info['redirect_url'], ++$depth, $fetchbody));
+                else
+                        return(original_url($curl_info['location'], ++$depth, $fetchbody));
+        }
+
+        $pos = strpos($header, "\r\n\r\n");
+
+        if ($pos)
+                $body = trim(substr($header, $pos));
+        else
+                $body = $header;
+
+        if (trim($body) == "")
+                return(original_url($url, ++$depth, true));
+
+        $doc = new DOMDocument();
+        @$doc->loadHTML($body);
+
+        $xpath = new DomXPath($doc);
+
+        $list = $xpath->query("//meta[@content]");
+        foreach ($list as $node) {
+                $attr = array();
+                if ($node->attributes->length)
+                        foreach ($node->attributes as $attribute)
+                                $attr[$attribute->name] = $attribute->value;
+
+                if (@$attr["http-equiv"] == 'refresh') {
+                        $path = $attr["content"];
+                        $pathinfo = explode(";", $path);
+                        $content = "";
+                        foreach ($pathinfo AS $value)
+                                if (substr(strtolower($value), 0, 4) == "url=")
+                                        return(original_url(substr($value, 4), ++$depth));
+                }
+        }
+
+        return($url);
+}
--- a/include/oembed.php
+++ b/include/oembed.php
@ -12,6 +12,9 @@ function oembed_replacecb($matches){

 function oembed_fetch_url($embedurl){

+	$embedurl = trim($embedurl, "'");
+	$embedurl = trim($embedurl, '"');
+
 	$a = get_app();

 	$txt = Cache::get($a->videowidth . $embedurl);
@ -48,9 +51,18 @@ function oembed_fetch_url($embedurl){
 		}

 		if ($txt==false || $txt==""){
-			// try oohembed service
-			$ourl = "http://oohembed.com/oohembed/?url=".urlencode($embedurl).'&maxwidth=' . $a->videowidth;  
-			$txt = fetch_url($ourl);
+			$embedly = get_config("system", "embedly");
+			if ($embedly == "") {
+				// try oohembed service
+				$ourl = "http://oohembed.com/oohembed/?url=".urlencode($embedurl).'&maxwidth=' . $a->videowidth;
+				$txt = fetch_url($ourl);
+			} else {
+				// try embedly service
+				$ourl = "https://api.embed.ly/1/oembed?key=".$embedly."&url=".urlencode($embedurl);
+				$txt = fetch_url($ourl);
+			}
+
+			logger("oembed_fetch_url: ".$txt, LOGGER_DEBUG);
 		}

 		$txt=trim($txt);