From dafc84390db4b57e5958f1af386c778b481c7d04 Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Thu, 5 Jul 2012 23:36:00 +0200 Subject: [PATCH] Improvments of the HTML to make a better bb2markdown conversion (nested lists, youtube, vimeo, ...) --- include/api.php | 1 + include/bb2diaspora.php | 4 ++-- include/bbcode.php | 46 +++++++++++++++++++++++++++++++++-------- include/items.php | 19 ++++++++++------- 4 files changed, 52 insertions(+), 18 deletions(-) diff --git a/include/api.php b/include/api.php index d790b4b875..629b5fcc2a 100644 --- a/include/api.php +++ b/include/api.php @@ -1726,3 +1726,4 @@ notifications/leave blocks/exists blocks/blocking */ + diff --git a/include/bb2diaspora.php b/include/bb2diaspora.php index 77a5f5c2a0..4a82635e57 100644 --- a/include/bb2diaspora.php +++ b/include/bb2diaspora.php @@ -112,7 +112,7 @@ function bb2diaspora($Text,$preserve_nl = false) { // Note that to get nested lists to work for Diaspora, we would need // to define the closing tag for the list elements. So nested lists // are going to be flattened out in Diaspora for now - $endlessloop = 0; +/* $endlessloop = 0; while ((((strpos($Text, "[/list]") !== false) && (strpos($Text, "[list") !== false)) || ((strpos($Text, "[/ol]") !== false) && (strpos($Text, "[ol]") !== false)) || ((strpos($Text, "[/ul]") !== false) && (strpos($Text, "[ul]") !== false))) && (++$endlessloop < 20)) { @@ -125,7 +125,7 @@ function bb2diaspora($Text,$preserve_nl = false) { $Text = preg_replace_callback("/\[ul\](.*?)\[\/ul\]/is", 'diaspora_ul', $Text); $Text = preg_replace_callback("/\[ol\](.*?)\[\/ol\]/is", 'diaspora_ol', $Text); } - +*/ // Convert it to HTML - don't try oembed $Text = bbcode($Text, $preserve_nl, false); diff --git a/include/bbcode.php b/include/bbcode.php index 9071c767b6..988e75d417 100644 --- a/include/bbcode.php +++ b/include/bbcode.php @@ -251,21 +251,25 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true) { $Text = preg_replace("/\[img\](.*?)\[\/img\]/ism", '' . t('Image/photo') . '', $Text); - $Text = preg_replace("/\[video\](.*?\.(ogg|ogv|oga|ogm|webm|mp4))\[\/video\]/ism", '', $Text); - - $Text = preg_replace("/\[audio\](.*?\.(ogg|ogv|oga|ogm|webm|mp4|mp3))\[\/audio\]/ism", '', $Text); - // Try to Oembed if ($tryoembed) { + $Text = preg_replace("/\[video\](.*?\.(ogg|ogv|oga|ogm|webm|mp4))\[\/video\]/ism", '', $Text); + $Text = preg_replace("/\[audio\](.*?\.(ogg|ogv|oga|ogm|webm|mp4|mp3))\[\/audio\]/ism", '', $Text); + $Text = preg_replace_callback("/\[video\](.*?)\[\/video\]/ism", 'tryoembed', $Text); $Text = preg_replace_callback("/\[audio\](.*?)\[\/audio\]/ism", 'tryoembed', $Text); + } else { + $Text = preg_replace("/\[video\](.*?)\[\/video\]/", '$1', $Text); + $Text = preg_replace("/\[audio\](.*?)\[\/audio\]/", '$1', $Text); } // html5 video and audio - $Text = preg_replace("/\[iframe\](.*?)\[\/iframe\]/ism", '', $Text); - + if ($tryoembed) + $Text = preg_replace("/\[iframe\](.*?)\[\/iframe\]/ism", '', $Text); + else + $Text = preg_replace("/\[iframe\](.*?)\[\/iframe\]/ism", '$1', $Text); // Youtube extensions if ($tryoembed) { @@ -278,7 +282,10 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true) { $Text = preg_replace("/\[youtube\]https?:\/\/www.youtube.com\/embed\/(.*?)\[\/youtube\]/ism",'[youtube]$1[/youtube]',$Text); $Text = preg_replace("/\[youtube\]https?:\/\/youtu.be\/(.*?)\[\/youtube\]/ism",'[youtube]$1[/youtube]',$Text); - $Text = preg_replace("/\[youtube\]([A-Za-z0-9\-_=]+)(.*?)\[\/youtube\]/ism", '', $Text); + if ($tryoembed) + $Text = preg_replace("/\[youtube\]([A-Za-z0-9\-_=]+)(.*?)\[\/youtube\]/ism", '', $Text); + else + $Text = preg_replace("/\[youtube\]([A-Za-z0-9\-_=]+)(.*?)\[\/youtube\]/ism", "http://www.youtube.com/watch?v=$1", $Text); if ($tryoembed) { @@ -287,8 +294,12 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true) { } $Text = preg_replace("/\[vimeo\]https?:\/\/player.vimeo.com\/video\/([0-9]+)(.*?)\[\/vimeo\]/ism",'[vimeo]$1[/vimeo]',$Text); - $Text = preg_replace("/\[vimeo\]https?:\/\/vimeo.com\/([0-9]+)(.*?)\[\/vimeo\]/ism",'[vimeo]$1[/vimeo]',$Text); - $Text = preg_replace("/\[vimeo\]([0-9]+)(.*?)\[\/vimeo\]/ism", '', $Text); + $Text = preg_replace("/\[vimeo\]https?:\/\/vimeo.com\/([0-9]+)(.*?)\[\/vimeo\]/ism",'[vimeo]$1[/vimeo]',$Text); + + if ($tryoembed) + $Text = preg_replace("/\[vimeo\]([0-9]+)(.*?)\[\/vimeo\]/ism", '', $Text); + else + $Text = preg_replace("/\[vimeo\]([0-9]+)(.*?)\[\/vimeo\]/ism", "http://vimeo.com/$1", $Text); // $Text = preg_replace("/\[youtube\](.*?)\[\/youtube\]/", '', $Text); @@ -327,6 +338,23 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true) { if(strlen($saved_image)) $Text = str_replace('[$#saved_image#$]','' . t('Image/photo') . '',$Text); + // Clean up the HTML by loading and saving the HTML with the DOM + // Only do it when it has to be done - for performance reasons + if (!$tryoembed) { + $doc = new DOMDocument(); + $doc->preserveWhiteSpace = false; + + $Text = mb_convert_encoding($Text, 'HTML-ENTITIES', "UTF-8"); + + $doctype = ''; + @$doc->loadHTML($doctype."".$Text.""); + + $Text = $doc->saveHTML(); + $Text = str_replace(array("", "", $doctype), array("", "", ""), $Text); + + $Text = str_replace('
','', $Text); + } + call_hooks('bbcode',$Text); return $Text; diff --git a/include/items.php b/include/items.php index 87d6ab4794..6ab681c875 100755 --- a/include/items.php +++ b/include/items.php @@ -352,7 +352,7 @@ function get_atom_elements($feed,$item) { $res['author-avatar'] = unxmlify($link['attribs']['']['href']); } } - } + } $rawactor = $item->get_item_tags(NAMESPACE_ACTIVITY, 'actor'); @@ -384,7 +384,7 @@ function get_atom_elements($feed,$item) { $res['author-avatar'] = unxmlify($link['attribs']['']['href']); } } - } + } $rawactor = $feed->get_feed_tags(NAMESPACE_ACTIVITY, 'subject'); @@ -409,7 +409,7 @@ function get_atom_elements($feed,$item) { $res['app'] = strip_tags(unxmlify($apps[0]['attribs']['']['source'])); if($res['app'] === 'web') $res['app'] = 'OStatus'; - } + } // base64 encoded json structure representing Diaspora signature @@ -553,7 +553,7 @@ function get_atom_elements($feed,$item) { foreach($base as $link) { if(!x($res, 'owner-avatar') || !$res['owner-avatar']) { - if($link['attribs']['']['rel'] === 'photo' || $link['attribs']['']['rel'] === 'avatar') + if($link['attribs']['']['rel'] === 'photo' || $link['attribs']['']['rel'] === 'avatar') $res['owner-avatar'] = unxmlify($link['attribs']['']['href']); } } @@ -697,6 +697,11 @@ function get_atom_elements($feed,$item) { call_hooks('parse_atom', $arr); + //if (($res["title"] != "") or (strpos($res["body"], "RT @") > 0)) { + // $debugfile = tempnam("/home/ike/log", "item-res2-"); + // file_put_contents($debugfile, serialize($res)); + //} + return $res; } @@ -1644,7 +1649,7 @@ function consume_feed($xml,$importer,&$contact, &$hub, $datedir = 0, $pass = 0) // Now process the feed - if($feed->get_item_quantity()) { + if($feed->get_item_quantity()) { logger('consume_feed: feed item count = ' . $feed->get_item_quantity()); @@ -1657,7 +1662,7 @@ function consume_feed($xml,$importer,&$contact, &$hub, $datedir = 0, $pass = 0) foreach($items as $item) { - $is_reply = false; + $is_reply = false; $item_id = $item->get_id(); $rawthread = $item->get_item_tags( NAMESPACE_THREAD,'in-reply-to'); if(isset($rawthread[0]['attribs']['']['ref'])) { @@ -1671,7 +1676,7 @@ function consume_feed($xml,$importer,&$contact, &$hub, $datedir = 0, $pass = 0) continue; // Have we seen it? If not, import it. - + $item_id = $item->get_id(); $datarray = get_atom_elements($feed,$item);