From 52569e536e217c3178f13d2a722eb8ebfd801465 Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Fri, 29 Jun 2012 21:40:54 +0200 Subject: [PATCH 01/16] Tests considering removing of the title --- include/items.php | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/items.php b/include/items.php index a0dd1c815..fbd4af5f3 100755 --- a/include/items.php +++ b/include/items.php @@ -306,6 +306,9 @@ function get_atom_elements($feed,$item) { $res['body'] = unxmlify($item->get_content()); $res['plink'] = unxmlify($item->get_link(0)); + //$debugfile = tempnam("/home/ike/log", "item-res-"); + //file_put_contents($debugfile, serialize($res)); + if($res['plink']) $base_url = implode('/', array_slice(explode('/',$res['plink']),0,3)); else @@ -1645,7 +1648,6 @@ function consume_feed($xml,$importer,&$contact, &$hub, $datedir = 0, $pass = 0) $item_id = $item->get_id(); $datarray = get_atom_elements($feed,$item); - if((! x($datarray,'author-name')) && ($contact['network'] != NETWORK_DFRN)) $datarray['author-name'] = $contact['name']; if((! x($datarray,'author-link')) && ($contact['network'] != NETWORK_DFRN)) @@ -3469,4 +3471,4 @@ function posted_date_widget($url,$uid,$wall) { '$dates' => $ret )); return $o; -} \ No newline at end of file +} From bcca817b9ab8f0a9dc0af56d55a3a347056a4494 Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Sat, 30 Jun 2012 09:25:22 +0200 Subject: [PATCH 02/16] Removing of the title if it is the same (or part) from the body. Helps against auto generated titles from tumblr. --- include/items.php | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/include/items.php b/include/items.php index aed0a8e68..87d6ab479 100755 --- a/include/items.php +++ b/include/items.php @@ -280,6 +280,29 @@ function construct_activity_target($item) { return ''; } +function title_is_body($title, $body) { + + $title = strip_tags($title); + $title = trim($title); + $title = str_replace(array("\n", "\r", "\t", " "), array("","","",""), $title); + + $body = strip_tags($body); + $body = trim($body); + $body = str_replace(array("\n", "\r", "\t", " "), array("","","",""), $body); + + if (strlen($title) < strlen($body)) + $body = substr($body, 0, strlen($title)); + + if (($title != $body) and (substr($title, -3) == "...")) { + $pos = strrpos($title, "..."); + if ($pos > 0) { + $title = substr($title, 0, $pos); + $body = substr($body, 0, $pos); + } + } + + return($title == $body); +} @@ -306,8 +329,10 @@ function get_atom_elements($feed,$item) { $res['body'] = unxmlify($item->get_content()); $res['plink'] = unxmlify($item->get_link(0)); - //$debugfile = tempnam("/home/ike/log", "item-res-"); - //file_put_contents($debugfile, serialize($res)); + // removing the content of the title if its identically to the body + // This helps with auto generated titles e.g. from tumblr + if (title_is_body($res["title"], $res["body"])) + $res['title'] = ""; if($res['plink']) $base_url = implode('/', array_slice(explode('/',$res['plink']),0,3)); From dafc84390db4b57e5958f1af386c778b481c7d04 Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Thu, 5 Jul 2012 23:36:00 +0200 Subject: [PATCH 03/16] Improvments of the HTML to make a better bb2markdown conversion (nested lists, youtube, vimeo, ...) --- include/api.php | 1 + include/bb2diaspora.php | 4 ++-- include/bbcode.php | 46 +++++++++++++++++++++++++++++++++-------- include/items.php | 19 ++++++++++------- 4 files changed, 52 insertions(+), 18 deletions(-) diff --git a/include/api.php b/include/api.php index d790b4b87..629b5fcc2 100644 --- a/include/api.php +++ b/include/api.php @@ -1726,3 +1726,4 @@ notifications/leave blocks/exists blocks/blocking */ + diff --git a/include/bb2diaspora.php b/include/bb2diaspora.php index 77a5f5c2a..4a82635e5 100644 --- a/include/bb2diaspora.php +++ b/include/bb2diaspora.php @@ -112,7 +112,7 @@ function bb2diaspora($Text,$preserve_nl = false) { // Note that to get nested lists to work for Diaspora, we would need // to define the closing tag for the list elements. So nested lists // are going to be flattened out in Diaspora for now - $endlessloop = 0; +/* $endlessloop = 0; while ((((strpos($Text, "[/list]") !== false) && (strpos($Text, "[list") !== false)) || ((strpos($Text, "[/ol]") !== false) && (strpos($Text, "[ol]") !== false)) || ((strpos($Text, "[/ul]") !== false) && (strpos($Text, "[ul]") !== false))) && (++$endlessloop < 20)) { @@ -125,7 +125,7 @@ function bb2diaspora($Text,$preserve_nl = false) { $Text = preg_replace_callback("/\[ul\](.*?)\[\/ul\]/is", 'diaspora_ul', $Text); $Text = preg_replace_callback("/\[ol\](.*?)\[\/ol\]/is", 'diaspora_ol', $Text); } - +*/ // Convert it to HTML - don't try oembed $Text = bbcode($Text, $preserve_nl, false); diff --git a/include/bbcode.php b/include/bbcode.php index 9071c767b..988e75d41 100644 --- a/include/bbcode.php +++ b/include/bbcode.php @@ -251,21 +251,25 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true) { $Text = preg_replace("/\[img\](.*?)\[\/img\]/ism", '' . t('Image/photo') . '', $Text); - $Text = preg_replace("/\[video\](.*?\.(ogg|ogv|oga|ogm|webm|mp4))\[\/video\]/ism", '', $Text); - - $Text = preg_replace("/\[audio\](.*?\.(ogg|ogv|oga|ogm|webm|mp4|mp3))\[\/audio\]/ism", '', $Text); - // Try to Oembed if ($tryoembed) { + $Text = preg_replace("/\[video\](.*?\.(ogg|ogv|oga|ogm|webm|mp4))\[\/video\]/ism", '', $Text); + $Text = preg_replace("/\[audio\](.*?\.(ogg|ogv|oga|ogm|webm|mp4|mp3))\[\/audio\]/ism", '', $Text); + $Text = preg_replace_callback("/\[video\](.*?)\[\/video\]/ism", 'tryoembed', $Text); $Text = preg_replace_callback("/\[audio\](.*?)\[\/audio\]/ism", 'tryoembed', $Text); + } else { + $Text = preg_replace("/\[video\](.*?)\[\/video\]/", '$1', $Text); + $Text = preg_replace("/\[audio\](.*?)\[\/audio\]/", '$1', $Text); } // html5 video and audio - $Text = preg_replace("/\[iframe\](.*?)\[\/iframe\]/ism", '', $Text); - + if ($tryoembed) + $Text = preg_replace("/\[iframe\](.*?)\[\/iframe\]/ism", '', $Text); + else + $Text = preg_replace("/\[iframe\](.*?)\[\/iframe\]/ism", '$1', $Text); // Youtube extensions if ($tryoembed) { @@ -278,7 +282,10 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true) { $Text = preg_replace("/\[youtube\]https?:\/\/www.youtube.com\/embed\/(.*?)\[\/youtube\]/ism",'[youtube]$1[/youtube]',$Text); $Text = preg_replace("/\[youtube\]https?:\/\/youtu.be\/(.*?)\[\/youtube\]/ism",'[youtube]$1[/youtube]',$Text); - $Text = preg_replace("/\[youtube\]([A-Za-z0-9\-_=]+)(.*?)\[\/youtube\]/ism", '', $Text); + if ($tryoembed) + $Text = preg_replace("/\[youtube\]([A-Za-z0-9\-_=]+)(.*?)\[\/youtube\]/ism", '', $Text); + else + $Text = preg_replace("/\[youtube\]([A-Za-z0-9\-_=]+)(.*?)\[\/youtube\]/ism", "http://www.youtube.com/watch?v=$1", $Text); if ($tryoembed) { @@ -287,8 +294,12 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true) { } $Text = preg_replace("/\[vimeo\]https?:\/\/player.vimeo.com\/video\/([0-9]+)(.*?)\[\/vimeo\]/ism",'[vimeo]$1[/vimeo]',$Text); - $Text = preg_replace("/\[vimeo\]https?:\/\/vimeo.com\/([0-9]+)(.*?)\[\/vimeo\]/ism",'[vimeo]$1[/vimeo]',$Text); - $Text = preg_replace("/\[vimeo\]([0-9]+)(.*?)\[\/vimeo\]/ism", '', $Text); + $Text = preg_replace("/\[vimeo\]https?:\/\/vimeo.com\/([0-9]+)(.*?)\[\/vimeo\]/ism",'[vimeo]$1[/vimeo]',$Text); + + if ($tryoembed) + $Text = preg_replace("/\[vimeo\]([0-9]+)(.*?)\[\/vimeo\]/ism", '', $Text); + else + $Text = preg_replace("/\[vimeo\]([0-9]+)(.*?)\[\/vimeo\]/ism", "http://vimeo.com/$1", $Text); // $Text = preg_replace("/\[youtube\](.*?)\[\/youtube\]/", '', $Text); @@ -327,6 +338,23 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true) { if(strlen($saved_image)) $Text = str_replace('[$#saved_image#$]','' . t('Image/photo') . '',$Text); + // Clean up the HTML by loading and saving the HTML with the DOM + // Only do it when it has to be done - for performance reasons + if (!$tryoembed) { + $doc = new DOMDocument(); + $doc->preserveWhiteSpace = false; + + $Text = mb_convert_encoding($Text, 'HTML-ENTITIES', "UTF-8"); + + $doctype = ''; + @$doc->loadHTML($doctype."".$Text.""); + + $Text = $doc->saveHTML(); + $Text = str_replace(array("", "", $doctype), array("", "", ""), $Text); + + $Text = str_replace('
','', $Text); + } + call_hooks('bbcode',$Text); return $Text; diff --git a/include/items.php b/include/items.php index 87d6ab479..6ab681c87 100755 --- a/include/items.php +++ b/include/items.php @@ -352,7 +352,7 @@ function get_atom_elements($feed,$item) { $res['author-avatar'] = unxmlify($link['attribs']['']['href']); } } - } + } $rawactor = $item->get_item_tags(NAMESPACE_ACTIVITY, 'actor'); @@ -384,7 +384,7 @@ function get_atom_elements($feed,$item) { $res['author-avatar'] = unxmlify($link['attribs']['']['href']); } } - } + } $rawactor = $feed->get_feed_tags(NAMESPACE_ACTIVITY, 'subject'); @@ -409,7 +409,7 @@ function get_atom_elements($feed,$item) { $res['app'] = strip_tags(unxmlify($apps[0]['attribs']['']['source'])); if($res['app'] === 'web') $res['app'] = 'OStatus'; - } + } // base64 encoded json structure representing Diaspora signature @@ -553,7 +553,7 @@ function get_atom_elements($feed,$item) { foreach($base as $link) { if(!x($res, 'owner-avatar') || !$res['owner-avatar']) { - if($link['attribs']['']['rel'] === 'photo' || $link['attribs']['']['rel'] === 'avatar') + if($link['attribs']['']['rel'] === 'photo' || $link['attribs']['']['rel'] === 'avatar') $res['owner-avatar'] = unxmlify($link['attribs']['']['href']); } } @@ -697,6 +697,11 @@ function get_atom_elements($feed,$item) { call_hooks('parse_atom', $arr); + //if (($res["title"] != "") or (strpos($res["body"], "RT @") > 0)) { + // $debugfile = tempnam("/home/ike/log", "item-res2-"); + // file_put_contents($debugfile, serialize($res)); + //} + return $res; } @@ -1644,7 +1649,7 @@ function consume_feed($xml,$importer,&$contact, &$hub, $datedir = 0, $pass = 0) // Now process the feed - if($feed->get_item_quantity()) { + if($feed->get_item_quantity()) { logger('consume_feed: feed item count = ' . $feed->get_item_quantity()); @@ -1657,7 +1662,7 @@ function consume_feed($xml,$importer,&$contact, &$hub, $datedir = 0, $pass = 0) foreach($items as $item) { - $is_reply = false; + $is_reply = false; $item_id = $item->get_id(); $rawthread = $item->get_item_tags( NAMESPACE_THREAD,'in-reply-to'); if(isset($rawthread[0]['attribs']['']['ref'])) { @@ -1671,7 +1676,7 @@ function consume_feed($xml,$importer,&$contact, &$hub, $datedir = 0, $pass = 0) continue; // Have we seen it? If not, import it. - + $item_id = $item->get_id(); $datarray = get_atom_elements($feed,$item); From e9d7f852978e7fe4231893680c60c13dc3eb5bd4 Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Sat, 7 Jul 2012 20:40:15 +0200 Subject: [PATCH 04/16] network: Small changes --- include/network.php | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/include/network.php b/include/network.php index c1a76000e..500dff08b 100644 --- a/include/network.php +++ b/include/network.php @@ -14,15 +14,16 @@ function fetch_url($url,$binary = false, &$redirects = 0, $timeout = 0, $accept_ return false; @curl_setopt($ch, CURLOPT_HEADER, true); - + if (!is_null($accept_content)){ curl_setopt($ch,CURLOPT_HTTPHEADER, array ( "Accept: " . $accept_content )); } - + @curl_setopt($ch, CURLOPT_RETURNTRANSFER,true); - @curl_setopt($ch, CURLOPT_USERAGENT, "Friendica"); + //@curl_setopt($ch, CURLOPT_USERAGENT, "Friendica"); + @curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; Friendica)"); if(intval($timeout)) { @@ -59,7 +60,6 @@ function fetch_url($url,$binary = false, &$redirects = 0, $timeout = 0, $accept_ $base = $s; $curl_info = @curl_getinfo($ch); $http_code = $curl_info['http_code']; - // logger('fetch_url:' . $http_code . ' data: ' . $s); $header = ''; @@ -73,24 +73,22 @@ function fetch_url($url,$binary = false, &$redirects = 0, $timeout = 0, $accept_ } if($http_code == 301 || $http_code == 302 || $http_code == 303 || $http_code == 307) { - $matches = array(); - preg_match('/(Location:|URI:)(.*?)\n/', $header, $matches); - $newurl = trim(array_pop($matches)); + $matches = array(); + preg_match('/(Location:|URI:)(.*?)\n/', $header, $matches); + $newurl = trim(array_pop($matches)); if(strpos($newurl,'/') === 0) $newurl = $url . $newurl; - $url_parsed = @parse_url($newurl); - if (isset($url_parsed)) { - $redirects++; - return fetch_url($newurl,$binary,$redirects,$timeout); - } - } + $url_parsed = @parse_url($newurl); + if (isset($url_parsed)) { + $redirects++; + return fetch_url($newurl,$binary,$redirects,$timeout); + } + } $a->set_curl_code($http_code); $body = substr($s,strlen($header)); - $a->set_curl_headers($header); - @curl_close($ch); return($body); }} From db2f0f79190650e6c63e7c1ac10bf9f3304d2115 Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Sun, 8 Jul 2012 18:27:11 +0200 Subject: [PATCH 05/16] Bugfix: API: No extra encoding for the status text - makes it more compatible to statusnet Bugfix: BBCode: Fixed charset problems. --- include/api.php | 6 ++++-- include/bbcode.php | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/api.php b/include/api.php index 629b5fcc2..3858b9fe3 100644 --- a/include/api.php +++ b/include/api.php @@ -490,7 +490,8 @@ $_REQUEST['type'] = 'wall'; $_REQUEST['profile_uid'] = local_user(); $_REQUEST['api_source'] = true; - $txt = urldecode(requestdata('status')); + $txt = requestdata('status'); + //$txt = urldecode(requestdata('status')); require_once('library/HTMLPurifier.auto.php'); require_once('include/html2bbcode.php'); @@ -554,7 +555,8 @@ } else - $_REQUEST['body'] = urldecode(requestdata('status')); + $_REQUEST['body'] = requestdata('status'); + //$_REQUEST['body'] = urldecode(requestdata('status')); $parent = requestdata('in_reply_to_status_id'); if(ctype_digit($parent)) diff --git a/include/bbcode.php b/include/bbcode.php index 988e75d41..e212ec4ae 100644 --- a/include/bbcode.php +++ b/include/bbcode.php @@ -353,6 +353,8 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true) { $Text = str_replace(array("", "", $doctype), array("", "", ""), $Text); $Text = str_replace('
','', $Text); + + $Text = mb_convert_encoding($Text, "UTF-8", 'HTML-ENTITIES'); } call_hooks('bbcode',$Text); From 47c48aa438e30573a4df7ade39ecaacb804f7710 Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Sun, 8 Jul 2012 21:27:20 +0200 Subject: [PATCH 06/16] DBA: Possibility to log the database speed --- include/dba.php | 18 ++++++++++++++---- include/network.php | 3 +++ 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/include/dba.php b/include/dba.php index 881097f30..d37b756ae 100644 --- a/include/dba.php +++ b/include/dba.php @@ -71,22 +71,32 @@ class dba { } public function q($sql) { + global $a; if((! $this->db) || (! $this->connected)) return false; $this->error = ''; - //if (get_config("system", "db_log") != "") - // @file_put_contents(get_config("system", "db_log"), datetime_convert().':'.session_id(). ' Start '.$sql."\n", FILE_APPEND); + if ($a->config["system"]["db_log"] != "") + $stamp1 = microtime(true); if($this->mysqli) $result = @$this->db->query($sql); else $result = @mysql_query($sql,$this->db); - //if (get_config("system", "db_log") != "") - // @file_put_contents(get_config("system", "db_log"), datetime_convert().':'.session_id(). ' Stop '."\n", FILE_APPEND); + if ($a->config["system"]["db_log"] != "") { + $stamp2 = microtime(true); + $duration = round($stamp2-$stamp1, 3); + if ($duration > $a->config["system"]["db_loglimit"]) { + $backtrace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS); + @file_put_contents($a->config["system"]["db_log"], $duration."\t". + basename($backtrace[1]["file"])."\t". + $backtrace[1]["line"]."\t".$backtrace[2]["function"]."\t". + substr($sql, 0, 2000)."\n", FILE_APPEND); + } + } if($this->mysqli) { if($this->db->errno) diff --git a/include/network.php b/include/network.php index 500dff08b..d69454899 100644 --- a/include/network.php +++ b/include/network.php @@ -795,6 +795,9 @@ function scale_external_images($s, $include_link = true, $scale_replace = false) $a = get_app(); + // Picture addresses can contain special characters + $s = htmlspecialchars_decode($s); + $matches = null; $c = preg_match_all('/\[img\](.*?)\[\/img\]/ism',$s,$matches,PREG_SET_ORDER); if($c) { From 48f67ead9bbebe868e0ab585d56538540ba80339 Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Thu, 12 Jul 2012 01:17:33 +0200 Subject: [PATCH 07/16] parse_url: complete new code for fetching website information --- mod/parse_url.php | 366 ++++++++++++++++++++++++---------------------- 1 file changed, 193 insertions(+), 173 deletions(-) diff --git a/mod/parse_url.php b/mod/parse_url.php index a38f7e270..09722341c 100644 --- a/mod/parse_url.php +++ b/mod/parse_url.php @@ -1,7 +1,163 @@ query("//".$node); + foreach ($list as $child) + $child->parentNode->removeChild($child); + } +} -require_once('library/HTML5/Parser.php'); -require_once('library/HTMLPurifier.auto.php'); +function parseurl_getsiteinfo($url) { + $siteinfo = array(); + + $ch = curl_init(); + curl_setopt($ch, CURLOPT_URL, $url); + curl_setopt($ch, CURLOPT_HEADER, 1); + curl_setopt($ch, CURLOPT_NOBODY, 0); + curl_setopt($ch, CURLOPT_TIMEOUT, 3); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + curl_setopt($ch,CURLOPT_USERAGENT,'Opera/9.64(Windows NT 5.1; U; de) Presto/2.1.1'); + + $header = curl_exec($ch); + curl_close($ch); + + if (preg_match('/charset=(.*?)\n/', $header, $matches)) + $charset = trim(array_pop($matches)); + else + $charset = "utf-8"; + + $pos = strpos($header, "\r\n\r\n"); + + if ($pos) + $body = trim(substr($header, $pos)); + else + $body = $header; + + $body = mb_convert_encoding($body, "UTF-8", $charset); + $body = mb_convert_encoding($body, 'HTML-ENTITIES', "UTF-8"); + + $doc = new DOMDocument(); + @$doc->loadHTML($body); + + deletenode($doc, 'style'); + deletenode($doc, 'script'); + deletenode($doc, 'option'); + deletenode($doc, 'h1'); + deletenode($doc, 'h2'); + deletenode($doc, 'h3'); + deletenode($doc, 'h4'); + deletenode($doc, 'h5'); + deletenode($doc, 'h6'); + deletenode($doc, 'ol'); + deletenode($doc, 'ul'); + + $xpath = new DomXPath($doc); + + $list = $xpath->query("head/title"); + foreach ($list as $node) + $siteinfo["title"] = html_entity_decode($node->nodeValue, ENT_QUOTES, "UTF-8"); + + $list = $xpath->query("head/meta[@name]"); + foreach ($list as $node) { + $attr = array(); + if ($node->attributes->length) + foreach ($node->attributes as $attribute) + $attr[$attribute->name] = $attribute->value; + + $attr["content"] = html_entity_decode($attr["content"], ENT_QUOTES, "UTF-8"); + + switch (strtolower($attr["name"])) { + case "fulltitle": + $siteinfo["title"] = $attr["content"]; + break; + case "description": + $siteinfo["text"] = $attr["content"]; + break; + case "dc.title": + $siteinfo["title"] = $attr["content"]; + break; + case "dc.description": + $siteinfo["text"] = $attr["content"]; + break; + } + } + + $list = $xpath->query("head/meta[@property]"); + foreach ($list as $node) { + $attr = array(); + if ($node->attributes->length) + foreach ($node->attributes as $attribute) + $attr[$attribute->name] = $attribute->value; + + $attr["content"] = html_entity_decode($attr["content"], ENT_QUOTES, "UTF-8"); + + switch (strtolower($attr["property"])) { + case "og:image": + $siteinfo["image"] = $attr["content"]; + break; + case "og:title": + $siteinfo["title"] = $attr["content"]; + break; + case "og:description": + $siteinfo["text"] = $attr["content"]; + break; + } + } + + if ($siteinfo["image"] == "") { + require_once('include/Photo.php'); + $list = $xpath->query("//img[@src]"); + foreach ($list as $node) { + $attr = array(); + if ($node->attributes->length) + foreach ($node->attributes as $attribute) + $attr[$attribute->name] = $attribute->value; + + // guess mimetype from headers or filename + $type = guess_image_type($attr["src"],true); + + $i = fetch_url($attr["src"]); + $ph = new Photo($i, $type); + + if(($ph->getWidth() > 200) and ($ph->getHeight() > 200)) + $siteinfo["image"] = $attr["src"]; + } + } + + if ($siteinfo["text"] == "") { + $text = ""; + + $list = $xpath->query("//div[@class='article']"); + foreach ($list as $node) + $text .= " ".trim($node->nodeValue); + + if ($text == "") { + $list = $xpath->query("//div[@class='content']"); + foreach ($list as $node) + $text .= " ".trim($node->nodeValue); + } + + // If none text was found then take the paragraph content + if ($text == "") { + $list = $xpath->query("//p"); + foreach ($list as $node) + $text .= " ".trim($node->nodeValue); + } + + if ($text != "") { + $text = trim(str_replace(array("\n", "\r"), array(" ", " "), $text)); + + while (strpos($text, " ")) + $text = trim(str_replace(" ", " ", $text)); + + $siteinfo["text"] = html_entity_decode(substr($text,0,350), ENT_QUOTES, "UTF-8").'...'; + } + } + + return($siteinfo); +} function arr_add_hashes(&$item,$k) { $item = '#' . $item; @@ -40,13 +196,11 @@ function parse_url_content(&$a) { logger('parse_url: ' . $url); - if($textmode) $template = $br . '[bookmark=%s]%s[/bookmark]%s' . $br; else $template = "
%s%s
"; - $arr = array('url' => $url, 'text' => ''); call_hooks('parse_link', $arr); @@ -60,187 +214,53 @@ function parse_url_content(&$a) { if($url && $title && $text) { if($textmode) - $text = $br . $br . '[quote]' . $text . '[/quote]' . $br; + $text = $br . $br . '[quote]' . trim($text) . '[/quote]' . $br; else - $text = '

' . $text . '

'; + $text = '

' . trim($text) . '

'; $title = str_replace(array("\r","\n"),array('',''),$title); $result = sprintf($template,$url,($title) ? $title : $url,$text) . $str_tags; - logger('parse_url (unparsed): returns: ' . $result); + logger('parse_url (unparsed): returns: ' . $result); echo $result; killme(); } + $siteinfo = parseurl_getsiteinfo($url); - if($url) { - $s = fetch_url($url); + if($siteinfo["title"] == "") { + echo sprintf($template,$url,$url,'') . $str_tags; + killme(); } else { - echo ''; - killme(); + $image = $siteinfo["image"]; + $text = $siteinfo["text"]; + $title = $siteinfo["title"]; } -// logger('parse_url: data: ' . $s, LOGGER_DATA); + if ($image != "") { + $i = fetch_url($image); + if($i) { + require_once('include/Photo.php'); + // guess mimetype from headers or filename + $type = guess_image_type($image,true); - if(! $s) { - echo sprintf($template,$url,$url,'') . $str_tags; - killme(); - } - - $matches = ''; - $c = preg_match('/\(.*?)\<\/head\>/ism',$s,$matches); - if($c) { -// logger('parse_url: header: ' . $matches[2], LOGGER_DATA); - try { - $domhead = HTML5_Parser::parse($matches[2]); - } catch (DOMException $e) { - logger('scrape_dfrn: parse error: ' . $e); - } - if($domhead) - logger('parsed header'); - } - - if(! $title) { - if(strpos($s,'')) { - $title = substr($s,strpos($s,'<title>')+7,64); - if(strpos($title,'<') !== false) - $title = strip_tags(substr($title,0,strpos($title,'<'))); - } - } - - $config = HTMLPurifier_Config::createDefault(); - $config->set('Cache.DefinitionImpl', null); - $purifier = new HTMLPurifier($config); - $s = $purifier->purify($s); - -// logger('purify_output: ' . $s); - - try { - $dom = HTML5_Parser::parse($s); - } catch (DOMException $e) { - logger('scrape_dfrn: parse error: ' . $e); - } - - if(! $dom) { - echo sprintf($template,$url,$url,'') . $str_tags; - killme(); - } - - $items = $dom->getElementsByTagName('title'); - - if($items) { - foreach($items as $item) { - $title = trim($item->textContent); - break; - } - } - - - if(! $text) { - $divs = $dom->getElementsByTagName('div'); - if($divs) { - foreach($divs as $div) { - $class = $div->getAttribute('class'); - if($class && (stristr($class,'article') || stristr($class,'content'))) { - $items = $div->getElementsByTagName('p'); - if($items) { - foreach($items as $item) { - $text = $item->textContent; - if(stristr($text,'<script')) { - $text = ''; - continue; - } - $text = strip_tags($text); - if(strlen($text) < 100) { - $text = ''; - continue; - } - $text = substr($text,0,250) . '...' ; - break; - } - } - } - if($text) - break; - } - } - - if(! $text) { - $items = $dom->getElementsByTagName('p'); - if($items) { - foreach($items as $item) { - $text = $item->textContent; - if(stristr($text,'<script')) - continue; - $text = strip_tags($text); - if(strlen($text) < 100) { - $text = ''; - continue; - } - $text = substr($text,0,250) . '...' ; - break; - } - } - } - } - - if(! $text) { - logger('parsing meta'); - $items = (isset($domhead) && is_object($domhead) ? $domhead->getElementsByTagName('meta') : null); - if($items) { - foreach($items as $item) { - $property = $item->getAttribute('property'); - if($property && (stristr($property,':description'))) { - - $text = $item->getAttribute('content'); - if(stristr($text,'<script')) { - $text = ''; - continue; - } - $text = strip_tags($text); - - - $text = substr($text,0,250) . '...' ; - } - if($property && (stristr($property,':image'))) { - - $image = $item->getAttribute('content'); - if(stristr($text,'<script')) { - $image = ''; - continue; - } - $image = strip_tags($image); - - $i = fetch_url($image); - if($i) { - require_once('include/Photo.php'); - // guess mimetype from headers or filename - $type = guess_image_type($image,true); - - $ph = new Photo($i, $type); - if($ph->is_valid()) { - if($ph->getWidth() > 300 || $ph->getHeight() > 300) { - $ph->scaleImage(300); - $new_width = $ph->getWidth(); - $new_height = $ph->getHeight(); - if($textmode) - $image = $br . $br . '[img=' . $new_width . 'x' . $new_height . ']' . $image . '[/img]'; - else - $image = '<br /><br /><img height="' . $new_height . '" width="' . $new_width . '" src="' .$image . '" alt="photo" />'; - } - else { - if($textmode) - $image = $br . $br . '[img]' . $image . '[/img]'; - else - $image = '<br /><br /><img src="' . $image . '" alt="photo" />'; - } - } - else - $image = ''; - - } + $ph = new Photo($i, $type); + if($ph->is_valid()) { + if($ph->getWidth() > 300 || $ph->getHeight() > 300) { + $ph->scaleImage(300); + $new_width = $ph->getWidth(); + $new_height = $ph->getHeight(); + if($textmode) + $image = $br . $br . '[img=' . $new_width . 'x' . $new_height . ']' . $image . '[/img]'; + else + $image = '<br /><br /><img height="' . $new_height . '" width="' . $new_width . '" src="' .$image . '" alt="photo" />'; + } else { + if($textmode) + $image = $br.$br.'[img]'.$image.'[/img]'; + else + $image = '<br /><br /><img src="'.$image.'" alt="photo" />'; } } } @@ -248,19 +268,19 @@ function parse_url_content(&$a) { if(strlen($text)) { if($textmode) - $text = $br .$br . '[quote]' . $text . '[/quote]' . $br ; + $text = $br.$br.'[quote]'.trim($text).'[/quote]'.$br ; else - $text = '<br /><br /><blockquote>' . $text . '</blockquote><br />'; + $text = '<br /><br /><blockquote>'.trim($text).'</blockquote><br />'; } if($image) { - $text = $image . $br . $text; + $text = $image.$br.$text; } $title = str_replace(array("\r","\n"),array('',''),$title); $result = sprintf($template,$url,($title) ? $title : $url,$text) . $str_tags; - logger('parse_url: returns: ' . $result); + logger('parse_url: returns: ' . $result); echo $result; killme(); From 09034ce0ee78e2906033b79f7844cd63b9bab18c Mon Sep 17 00:00:00 2001 From: Michael Vogel <icarus@dabo.de> Date: Thu, 12 Jul 2012 08:20:27 +0200 Subject: [PATCH 08/16] parse_url: Fetch multiple pictures so that the user can decide what to take. --- mod/parse_url.php | 55 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 43 insertions(+), 12 deletions(-) diff --git a/mod/parse_url.php b/mod/parse_url.php index 09722341c..97e1658c8 100644 --- a/mod/parse_url.php +++ b/mod/parse_url.php @@ -1,4 +1,6 @@ <?php +require_once('include/Photo.php'); + if(!function_exists('deletenode')) { function deletenode(&$doc, $node) { @@ -107,7 +109,6 @@ function parseurl_getsiteinfo($url) { } if ($siteinfo["image"] == "") { - require_once('include/Photo.php'); $list = $xpath->query("//img[@src]"); foreach ($list as $node) { $attr = array(); @@ -121,10 +122,33 @@ function parseurl_getsiteinfo($url) { $i = fetch_url($attr["src"]); $ph = new Photo($i, $type); - if(($ph->getWidth() > 200) and ($ph->getHeight() > 200)) - $siteinfo["image"] = $attr["src"]; + if($ph->is_valid() and ($ph->getWidth() > 200) and ($ph->getHeight() > 200)) { + if ($siteinfo["image"] == "") + $siteinfo["image"] = $attr["src"]; + + if($ph->getWidth() > 300 || $ph->getHeight() > 300) { + $ph->scaleImage(300); + $siteinfo["images"][] = array("src"=>$attr["src"], + "width"=>$ph->getWidth(), + "height"=>$ph->getHeight()); + } else + $siteinfo["images"][] = array("src"=>$attr["src"], + "width"=>$ph->getWidth(), + "height"=>$ph->getHeight()); + } } - } + } else { + // guess mimetype from headers or filename + $type = guess_image_type($siteinfo["image"],true); + + $i = fetch_url($siteinfo["image"]); + $ph = new Photo($i, $type); + + if($ph->is_valid()) + $siteinfo["images"][] = array("src"=>$siteinfo["image"], + "width"=>$ph->getWidth(), + "height"=>$ph->getHeight()); + } if ($siteinfo["text"] == "") { $text = ""; @@ -172,8 +196,8 @@ function parse_url_content(&$a) { if(local_user() && intval(get_pconfig(local_user(),'system','plaintext'))) $textmode = true; - if($textmode) - $br = (($textmode) ? "\n" : '<br /?'); + //if($textmode) + $br = (($textmode) ? "\n" : '<br />'); if(x($_GET,'binurl')) $url = trim(hex2bin($_GET['binurl'])); @@ -234,12 +258,19 @@ function parse_url_content(&$a) { echo sprintf($template,$url,$url,'') . $str_tags; killme(); } else { - $image = $siteinfo["image"]; $text = $siteinfo["text"]; $title = $siteinfo["title"]; } - if ($image != "") { + $image = ""; + + foreach ($siteinfo["images"] as $imagedata) + if($textmode) + $image .= '[img='.$imagedata["width"].'x'.$imagedata["height"].']'.$imagedata["src"].'[/img]'; + else + $image .= '<img height="'.$imagedata["height"].'" width="'.$imagedata["width"].'" src="'.$imagedata["src"].'" alt="photo" />'; + +/* if ($image != "") { $i = fetch_url($image); if($i) { require_once('include/Photo.php'); @@ -264,17 +295,17 @@ function parse_url_content(&$a) { } } } - } + }*/ if(strlen($text)) { if($textmode) - $text = $br.$br.'[quote]'.trim($text).'[/quote]'.$br ; + $text = $br.'[quote]'.trim($text).'[/quote]'.$br ; else - $text = '<br /><br /><blockquote>'.trim($text).'</blockquote><br />'; + $text = '<br /><blockquote>'.trim($text).'</blockquote><br />'; } if($image) { - $text = $image.$br.$text; + $text = $br.$br.$image.$br.$text; } $title = str_replace(array("\r","\n"),array('',''),$title); From 02a1fc9cd08fba2168895d1892a91d8143323848 Mon Sep 17 00:00:00 2001 From: Michael Vogel <icarus@dabo.de> Date: Thu, 12 Jul 2012 23:41:04 +0200 Subject: [PATCH 09/16] parse_url: Further improvements of the new method to fetch page data --- include/api.php | 1 + mod/parse_url.php | 106 ++++++++++++++++++++++++++++------------------ 2 files changed, 66 insertions(+), 41 deletions(-) diff --git a/include/api.php b/include/api.php index 3858b9fe3..e0b788424 100644 --- a/include/api.php +++ b/include/api.php @@ -1727,5 +1727,6 @@ notifications/follow notifications/leave blocks/exists blocks/blocking +lists */ diff --git a/mod/parse_url.php b/mod/parse_url.php index 97e1658c8..4d894969a 100644 --- a/mod/parse_url.php +++ b/mod/parse_url.php @@ -1,6 +1,4 @@ <?php -require_once('include/Photo.php'); - if(!function_exists('deletenode')) { function deletenode(&$doc, $node) { @@ -11,6 +9,30 @@ if(!function_exists('deletenode')) { } } +function completeurl($url, $scheme) { + $urlarr = parse_url($url); + + if (isset($urlarr["scheme"])) + return($url); + + $schemearr = parse_url($scheme); + + $complete = $schemearr["scheme"]."://".$schemearr["host"]; + + if ($schemearr["port"] != "") + $complete .= ":".$schemearr["port"]; + + $complete .= $urlarr["path"]; + + if ($urlarr["query"] != "") + $complete .= "?".$urlarr["query"]; + + if ($urlarr["fragment"] != "") + $complete .= "#".$urlarr["fragment"]; + + return($complete); +} + function parseurl_getsiteinfo($url) { $siteinfo = array(); @@ -25,7 +47,8 @@ function parseurl_getsiteinfo($url) { $header = curl_exec($ch); curl_close($ch); - if (preg_match('/charset=(.*?)\n/', $header, $matches)) + // Fetch the first mentioned charset. Can be in body or header + if (preg_match('/charset=(.*?)['."'".'"\s\n]/', $header, $matches)) $charset = trim(array_pop($matches)); else $charset = "utf-8"; @@ -57,11 +80,13 @@ function parseurl_getsiteinfo($url) { $xpath = new DomXPath($doc); - $list = $xpath->query("head/title"); + //$list = $xpath->query("head/title"); + $list = $xpath->query("//title"); foreach ($list as $node) $siteinfo["title"] = html_entity_decode($node->nodeValue, ENT_QUOTES, "UTF-8"); - $list = $xpath->query("head/meta[@name]"); + //$list = $xpath->query("head/meta[@name]"); + $list = $xpath->query("//meta[@name]"); foreach ($list as $node) { $attr = array(); if ($node->attributes->length) @@ -86,7 +111,8 @@ function parseurl_getsiteinfo($url) { } } - $list = $xpath->query("head/meta[@property]"); + //$list = $xpath->query("head/meta[@property]"); + $list = $xpath->query("//meta[@property]"); foreach ($list as $node) { $attr = array(); if ($node->attributes->length) @@ -116,38 +142,32 @@ function parseurl_getsiteinfo($url) { foreach ($node->attributes as $attribute) $attr[$attribute->name] = $attribute->value; - // guess mimetype from headers or filename - $type = guess_image_type($attr["src"],true); + $src = completeurl($attr["src"], $url); + $photodata = getimagesize($src); - $i = fetch_url($attr["src"]); - $ph = new Photo($i, $type); - - if($ph->is_valid() and ($ph->getWidth() > 200) and ($ph->getHeight() > 200)) { - if ($siteinfo["image"] == "") - $siteinfo["image"] = $attr["src"]; - - if($ph->getWidth() > 300 || $ph->getHeight() > 300) { - $ph->scaleImage(300); - $siteinfo["images"][] = array("src"=>$attr["src"], - "width"=>$ph->getWidth(), - "height"=>$ph->getHeight()); - } else - $siteinfo["images"][] = array("src"=>$attr["src"], - "width"=>$ph->getWidth(), - "height"=>$ph->getHeight()); + if (($photodata[0] > 150) and ($photodata[1] > 150)) { + if ($photodata[0] > 300) { + $photodata[1] = $photodata[1] * (300 / $photodata[0]); + $photodata[0] = 300; + } + if ($photodata[1] > 300) { + $photodata[0] = $photodata[0] * (300 / $photodata[1]); + $photodata[1] = 300; + } + $siteinfo["images"][] = array("src"=>$src, + "width"=>$photodata[0], + "height"=>$photodata[1]); } + } } else { - // guess mimetype from headers or filename - $type = guess_image_type($siteinfo["image"],true); + $src = completeurl($siteinfo["image"], $url); + $photodata = getimagesize($src); - $i = fetch_url($siteinfo["image"]); - $ph = new Photo($i, $type); - - if($ph->is_valid()) - $siteinfo["images"][] = array("src"=>$siteinfo["image"], - "width"=>$ph->getWidth(), - "height"=>$ph->getHeight()); + if (($photodata[0] > 10) and ($photodata[1] > 10)) + $siteinfo["images"][] = array("src"=>$src, + "width"=>$photodata[0], + "height"=>$photodata[1]); } if ($siteinfo["text"] == "") { @@ -155,19 +175,22 @@ function parseurl_getsiteinfo($url) { $list = $xpath->query("//div[@class='article']"); foreach ($list as $node) - $text .= " ".trim($node->nodeValue); + if (strlen($node->nodeValue) > 40) + $text .= " ".trim($node->nodeValue); if ($text == "") { $list = $xpath->query("//div[@class='content']"); foreach ($list as $node) - $text .= " ".trim($node->nodeValue); + if (strlen($node->nodeValue) > 40) + $text .= " ".trim($node->nodeValue); } // If none text was found then take the paragraph content if ($text == "") { $list = $xpath->query("//p"); foreach ($list as $node) - $text .= " ".trim($node->nodeValue); + if (strlen($node->nodeValue) > 40) + $text .= " ".trim($node->nodeValue); } if ($text != "") { @@ -238,9 +261,9 @@ function parse_url_content(&$a) { if($url && $title && $text) { if($textmode) - $text = $br . $br . '[quote]' . trim($text) . '[/quote]' . $br; + $text = $br . '[quote]' . trim($text) . '[/quote]' . $br; else - $text = '<br /><br /><blockquote>' . trim($text) . '</blockquote><br />'; + $text = '<br /><blockquote>' . trim($text) . '</blockquote><br />'; $title = str_replace(array("\r","\n"),array('',''),$title); @@ -255,7 +278,8 @@ function parse_url_content(&$a) { $siteinfo = parseurl_getsiteinfo($url); if($siteinfo["title"] == "") { - echo sprintf($template,$url,$url,'') . $str_tags; + echo print_r($siteinfo, true); + //echo sprintf($template,$url,$url,'') . $str_tags; killme(); } else { $text = $siteinfo["text"]; @@ -305,7 +329,7 @@ function parse_url_content(&$a) { } if($image) { - $text = $br.$br.$image.$br.$text; + $text = $br.$br.$image.$text; } $title = str_replace(array("\r","\n"),array('',''),$title); @@ -313,6 +337,6 @@ function parse_url_content(&$a) { logger('parse_url: returns: ' . $result); - echo $result; + echo trim($result); killme(); } From 5f400c4a9798d04c267cc05f2250a6c810802553 Mon Sep 17 00:00:00 2001 From: Michael Vogel <icarus@dabo.de> Date: Fri, 13 Jul 2012 23:23:31 +0200 Subject: [PATCH 10/16] API: Now returns a real 404 when a function isn't implemented parse_url: Image handling improved --- include/api.php | 1 + include/items.php | 7 ++++--- mod/parse_url.php | 27 --------------------------- 3 files changed, 5 insertions(+), 30 deletions(-) diff --git a/include/api.php b/include/api.php index e0b788424..7d230629b 100644 --- a/include/api.php +++ b/include/api.php @@ -156,6 +156,7 @@ //echo "<pre>"; var_dump($r); die(); } } + header("HTTP/1.1 404 Not Found"); logger('API call not implemented: '.$a->query_string." - ".print_r($_REQUEST,true)); $r = '<status><error>not implemented</error></status>'; switch($type){ diff --git a/include/items.php b/include/items.php index 6ab681c87..d888f314d 100755 --- a/include/items.php +++ b/include/items.php @@ -698,9 +698,10 @@ function get_atom_elements($feed,$item) { call_hooks('parse_atom', $arr); //if (($res["title"] != "") or (strpos($res["body"], "RT @") > 0)) { - // $debugfile = tempnam("/home/ike/log", "item-res2-"); - // file_put_contents($debugfile, serialize($res)); - //} + if (strpos($res["body"], "RT @") !== false) { + $debugfile = tempnam("/home/ike/log", "item-res2-"); + file_put_contents($debugfile, serialize($arr)); + } return $res; } diff --git a/mod/parse_url.php b/mod/parse_url.php index 4d894969a..32f28b7b7 100644 --- a/mod/parse_url.php +++ b/mod/parse_url.php @@ -294,33 +294,6 @@ function parse_url_content(&$a) { else $image .= '<img height="'.$imagedata["height"].'" width="'.$imagedata["width"].'" src="'.$imagedata["src"].'" alt="photo" />'; -/* if ($image != "") { - $i = fetch_url($image); - if($i) { - require_once('include/Photo.php'); - // guess mimetype from headers or filename - $type = guess_image_type($image,true); - - $ph = new Photo($i, $type); - if($ph->is_valid()) { - if($ph->getWidth() > 300 || $ph->getHeight() > 300) { - $ph->scaleImage(300); - $new_width = $ph->getWidth(); - $new_height = $ph->getHeight(); - if($textmode) - $image = $br . $br . '[img=' . $new_width . 'x' . $new_height . ']' . $image . '[/img]'; - else - $image = '<br /><br /><img height="' . $new_height . '" width="' . $new_width . '" src="' .$image . '" alt="photo" />'; - } else { - if($textmode) - $image = $br.$br.'[img]'.$image.'[/img]'; - else - $image = '<br /><br /><img src="'.$image.'" alt="photo" />'; - } - } - } - }*/ - if(strlen($text)) { if($textmode) $text = $br.'[quote]'.trim($text).'[/quote]'.$br ; From 72f430ae58b830e499114241d340f39a228b867c Mon Sep 17 00:00:00 2001 From: Michael Vogel <icarus@dabo.de> Date: Sat, 14 Jul 2012 13:59:42 +0200 Subject: [PATCH 11/16] parse_url: Problem when resizing images --- include/items.php | 8 ++++---- mod/parse_url.php | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/items.php b/include/items.php index b81208f36..724e0ac0f 100755 --- a/include/items.php +++ b/include/items.php @@ -791,10 +791,10 @@ function get_atom_elements($feed,$item) { call_hooks('parse_atom', $arr); //if (($res["title"] != "") or (strpos($res["body"], "RT @") > 0)) { - if (strpos($res["body"], "RT @") !== false) { - $debugfile = tempnam("/home/ike/log", "item-res2-"); - file_put_contents($debugfile, serialize($arr)); - } + //if (strpos($res["body"], "RT @") !== false) { + // $debugfile = tempnam("/home/ike/log", "item-res2-"); + // file_put_contents($debugfile, serialize($arr)); + //} return $res; } diff --git a/mod/parse_url.php b/mod/parse_url.php index 32f28b7b7..3d4018745 100644 --- a/mod/parse_url.php +++ b/mod/parse_url.php @@ -147,11 +147,11 @@ function parseurl_getsiteinfo($url) { if (($photodata[0] > 150) and ($photodata[1] > 150)) { if ($photodata[0] > 300) { - $photodata[1] = $photodata[1] * (300 / $photodata[0]); + $photodata[1] = round($photodata[1] * (300 / $photodata[0])); $photodata[0] = 300; } if ($photodata[1] > 300) { - $photodata[0] = $photodata[0] * (300 / $photodata[1]); + $photodata[0] = round($photodata[0] * (300 / $photodata[1])); $photodata[1] = 300; } $siteinfo["images"][] = array("src"=>$src, From 8d7d46ae7a4a2ff049491f1c4b053fc55e8f0e13 Mon Sep 17 00:00:00 2001 From: Michael Vogel <icarus@dabo.de> Date: Sat, 14 Jul 2012 19:54:27 +0200 Subject: [PATCH 12/16] Bugfix: multiple linefeeds were generated when items where written in the editor. The existing fix didn't really work --- include/bbcode.php | 3 +++ include/items.php | 25 +++++++++++++++++++++++++ include/text.php | 1 + 3 files changed, 29 insertions(+) diff --git a/include/bbcode.php b/include/bbcode.php index 4aac33f11..a90be5de7 100644 --- a/include/bbcode.php +++ b/include/bbcode.php @@ -369,6 +369,9 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true) { // oembed tag $Text = oembed_bbcode2html($Text); + // Avoid triple linefeeds through oembed + $Text = str_replace("<br style='clear:left'></span><br /><br />", "<br style='clear:left'></span><br />", $Text); + // If we found an event earlier, strip out all the event code and replace with a reformatted version. // Replace the event-start section with the entire formatted event. The other bbcode is stripped. // Summary (e.g. title) is required, earlier revisions only required description (in addition to diff --git a/include/items.php b/include/items.php index 724e0ac0f..fff9e5fcf 100755 --- a/include/items.php +++ b/include/items.php @@ -786,6 +786,31 @@ function get_atom_elements($feed,$item) { $res['target'] .= '</target>' . "\n"; } + // This is some experimental stuff. By now retweets are shown with "RT:" + // But: There is data so that the message could be shown similar to native retweets + // There is some better way to parse this array - but it didn't worked for me. + $child = $item->feed->data["child"][SIMPLEPIE_NAMESPACE_ATOM_10]["feed"][0]["child"][SIMPLEPIE_NAMESPACE_ATOM_10]["entry"][0]["child"]["http://activitystrea.ms/spec/1.0/"][object][0]["child"]; + if (is_array($child)) { + $message = $child["http://activitystrea.ms/spec/1.0/"]["object"][0]["child"][SIMPLEPIE_NAMESPACE_ATOM_10]["content"][0]["data"]; + $author = $child[SIMPLEPIE_NAMESPACE_ATOM_10]["author"][0]["child"][SIMPLEPIE_NAMESPACE_ATOM_10]; + $uri = $author["uri"][0]["data"]; + $name = $author["name"][0]["data"]; + $avatar = @array_shift($author["link"][2]["attribs"]); + $avatar = $avatar["href"]; + + if (($name != "") and ($uri != "") and ($avatar != "") and ($message != "")) { + $res["owner-name"] = $res["author-name"]; + $res["owner-link"] = $res["author-link"]; + $res["owner-avatar"] = $res["author-avatar"]; + + $res["author-name"] = $name; + $res["author-link"] = $uri; + $res["author-avatar"] = $avatar; + + $res["body"] = html2bbcode($message); + } + } + $arr = array('feed' => $feed, 'item' => $item, 'result' => $res); call_hooks('parse_atom', $arr); diff --git a/include/text.php b/include/text.php index 409d40d59..c3558c641 100644 --- a/include/text.php +++ b/include/text.php @@ -1537,6 +1537,7 @@ function undo_post_tagging($s) { function fix_mce_lf($s) { $s = str_replace("\r\n","\n",$s); + $s = str_replace("\n\n","\n",$s); return $s; } From 4e9086f9b0a07672821c1d2dd72d01fcf639c45d Mon Sep 17 00:00:00 2001 From: Michael Vogel <icarus@dabo.de> Date: Sun, 15 Jul 2012 12:47:24 +0200 Subject: [PATCH 13/16] Changes in the bbcode to markdown converter to make it more compatible with Diaspora --- include/bb2diaspora.php | 9 +++++++++ include/bbcode.php | 4 ++++ include/markdownify/markdownify.php | 10 +++++++++- 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/include/bb2diaspora.php b/include/bb2diaspora.php index 436412dbd..25e4640b3 100644 --- a/include/bb2diaspora.php +++ b/include/bb2diaspora.php @@ -196,6 +196,15 @@ function bb2diaspora($Text,$preserve_nl = false) { // The bbcode parser now handles youtube-links (and the other stuff) correctly. // Additionally the html code is now fixed so that lists are now working. + // Converting images with size parameters to simple images. Markdown doesn't know it. + $Text = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '[img]$3[/img]', $Text); + + // the following was added on 10-January-2012 due to an inability of Diaspora's + // new javascript markdown processor to handle links with images as the link "text" + // It is not optimal and may be removed if this ability is restored in the future + $Text = preg_replace("/\[url\=([^\[\]]*)\]\[img\](.*?)\[\/img\]\[\/url\]/ism", + "[img]$2[/img]\n[url]$1[/url]", $Text); + // Convert it to HTML - don't try oembed $Text = bbcode($Text, $preserve_nl, false); diff --git a/include/bbcode.php b/include/bbcode.php index a90be5de7..b5f2ec569 100644 --- a/include/bbcode.php +++ b/include/bbcode.php @@ -115,6 +115,10 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true) { $a = get_app(); + // Move all spaces out of the tags + $Text = preg_replace("/\[(\w*)\](\s*)/ism", '$2[$1]', $Text); + $Text = preg_replace("/(\s*)\[\/(\w*)\]/ism", '[/$2]$1', $Text); + // Hide all [noparse] contained bbtags by spacefying them // POSSIBLE BUG --> Will the 'preg' functions crash if there's an embedded image? diff --git a/include/markdownify/markdownify.php b/include/markdownify/markdownify.php index 43730cb77..7bbf1cbbe 100644 --- a/include/markdownify/markdownify.php +++ b/include/markdownify/markdownify.php @@ -735,6 +735,13 @@ class Markdownify { $this->parser->tagAttributes['src'] = $this->decode($this->parser->tagAttributes['src']); } +// ![Alt text](/path/to/img.jpg "Optional title") + if ($this->parser->tagAttributes['title'] != "") + $this->out('!['.$this->parser->tagAttributes['alt'].']('.$this->parser->tagAttributes['src'].'"'.$this->parser->tagAttributes['title'].'")', true); + else + $this->out('!['.$this->parser->tagAttributes['alt'].']('.$this->parser->tagAttributes['src'].')', true); + +/* # [This link][id] $link_id = false; if (!empty($this->stack['a'])) { @@ -759,6 +766,7 @@ class Markdownify { } $this->out('!['.$this->parser->tagAttributes['alt'].']['.$link_id.']', true); +*/ } /** * handle <code> tags @@ -1181,4 +1189,4 @@ class Markdownify { function parent() { return end($this->parser->openTags); } -} \ No newline at end of file +} From 8aeb3ec2fb2596ddef0200122583c23b0d549351 Mon Sep 17 00:00:00 2001 From: Michael Vogel <icarus@dabo.de> Date: Sun, 15 Jul 2012 13:12:24 +0200 Subject: [PATCH 14/16] Corrected the fix for images with links --- include/bb2diaspora.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/bb2diaspora.php b/include/bb2diaspora.php index 25e4640b3..1a7d8eaff 100644 --- a/include/bb2diaspora.php +++ b/include/bb2diaspora.php @@ -202,7 +202,7 @@ function bb2diaspora($Text,$preserve_nl = false) { // the following was added on 10-January-2012 due to an inability of Diaspora's // new javascript markdown processor to handle links with images as the link "text" // It is not optimal and may be removed if this ability is restored in the future - $Text = preg_replace("/\[url\=([^\[\]]*)\]\[img\](.*?)\[\/img\]\[\/url\]/ism", + $Text = preg_replace("/\[url\=([^\[\]]*)\]\s*\[img\](.*?)\[\/img\]\s*\[\/url\]/ism", "[img]$2[/img]\n[url]$1[/url]", $Text); // Convert it to HTML - don't try oembed From 335c15ede55997989647552c05dd74db3431b499 Mon Sep 17 00:00:00 2001 From: Michael Vogel <icarus@dabo.de> Date: Sun, 15 Jul 2012 13:40:01 +0200 Subject: [PATCH 15/16] Hopefully the final fix for image links in bb2diaspora --- include/bb2diaspora.php | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/include/bb2diaspora.php b/include/bb2diaspora.php index 1a7d8eaff..b95dee8f3 100644 --- a/include/bb2diaspora.php +++ b/include/bb2diaspora.php @@ -190,7 +190,7 @@ function diaspora_ol($s) { } -function bb2diaspora($Text,$preserve_nl = false) { +function bb2diaspora($Text,$preserve_nl = false, $fordiaspora = true) { // Re-enabling the converter again. // The bbcode parser now handles youtube-links (and the other stuff) correctly. @@ -202,8 +202,9 @@ function bb2diaspora($Text,$preserve_nl = false) { // the following was added on 10-January-2012 due to an inability of Diaspora's // new javascript markdown processor to handle links with images as the link "text" // It is not optimal and may be removed if this ability is restored in the future - $Text = preg_replace("/\[url\=([^\[\]]*)\]\s*\[img\](.*?)\[\/img\]\s*\[\/url\]/ism", - "[img]$2[/img]\n[url]$1[/url]", $Text); + if ($fordiaspora) + $Text = preg_replace("/\[url\=([^\[\]]*)\]\s*\[img\](.*?)\[\/img\]\s*\[\/url\]/ism", + "[url]$1[/url]\n[img]$2[/img]", $Text); // Convert it to HTML - don't try oembed $Text = bbcode($Text, $preserve_nl, false); From 222635d15afba7d3a7aef002e90d06f549961012 Mon Sep 17 00:00:00 2001 From: Michael Vogel <icarus@dabo.de> Date: Wed, 18 Jul 2012 21:06:38 +0200 Subject: [PATCH 16/16] parse_url: Just added a comment for future elements that should be added as well. --- mod/parse_url.php | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/mod/parse_url.php b/mod/parse_url.php index 3d4018745..061053196 100644 --- a/mod/parse_url.php +++ b/mod/parse_url.php @@ -1,4 +1,18 @@ <?php +/* To-Do +https://developers.google.com/+/plugins/snippet/ + +<meta itemprop="name" content="Toller Titel"> +<meta itemprop="description" content="Eine tolle Beschreibung"> +<meta itemprop="image" content="http://maple.libertreeproject.org/images/tree-icon.png"> + +<body itemscope itemtype="http://schema.org/Product"> + <h1 itemprop="name">Shiny Trinket</h1> + <img itemprop="image" src="{image-url}" /> + <p itemprop="description">Shiny trinkets are shiny.</p> +</body> +*/ + if(!function_exists('deletenode')) { function deletenode(&$doc, $node) {