From 92db5e5ec295fc83d3fc479f42cf0c4fe1ffaa74 Mon Sep 17 00:00:00 2001 From: Zach Prezkuta Date: Thu, 12 Jul 2012 23:12:48 -0600 Subject: [PATCH] focus preg_replace to prevent it from catching too much --- include/bb2diaspora.php | 68 ++++++++++++++++++++++++++++++++++++++--- mod/babel.php | 19 ++++++++++++ 2 files changed, 83 insertions(+), 4 deletions(-) diff --git a/include/bb2diaspora.php b/include/bb2diaspora.php index d0cb60302..b5feacea8 100644 --- a/include/bb2diaspora.php +++ b/include/bb2diaspora.php @@ -7,6 +7,66 @@ require_once("include/html2bbcode.php"); require_once("include/bbcode.php"); require_once("include/markdownify/markdownify.php"); + +function get_bb_tag_pos($s, $name, $occurance = 1) { + + if($occurance < 1) + $occurance = 1; + + $start_open = -1; + for($i = 1; $i <= $occurance; $i++) { + if( $start_open !== false) + $start_open = strpos($s, '[' . $name, $start_open + 1); // allow [name= type tags + } + + if( $start_open === false) + return false; + + $start_equal = strpos($s, '=', $start_open); + $start_close = strpos($s, ']', $start_open); + + if( $start_close === false) + return false; + + $start_close++; + + $end_open = strpos($s, '[/' . $name . ']', $start_close); + + if( $end_open === false) + return false; + + $res = array( 'start' => array('open' => $start_open, 'close' => $start_close), + 'end' => array('open' => $end_open, 'close' => $end_open + strlen('[/' . $name . ']')) ); + if( $start_equal !== false) + $res['start']['equal'] = $start_equal + 1; + + return $res; +} + +function bb_tag_preg_replace($pattern, $replace, $name, $s) { + + $string = $s; + + $occurance = 1; + $pos = get_bb_tag_pos($string, $name, $occurance); + while($pos !== false && $occurance < 1000) { + + $start = substr($string, 0, $pos['start']['open']); + $subject = substr($string, $pos['start']['open'], $pos['end']['close'] - $pos['start']['open']); + $end = substr($string, $pos['end']['close']); + if($end === false) + $end = ''; + + $subject = preg_replace($pattern, $replace, $subject); + $string = $start . $subject . $end; + + $occurance++; + $pos = get_bb_tag_pos($string, $name, $occurance); + } + + return $string; +} + // we don't want to support a bbcode specific markdown interpreter // and the markdown library we have is pretty good, but provides HTML output. // So we'll use that to convert to HTML, then convert the HTML back to bbcode, @@ -51,10 +111,10 @@ function diaspora2bb($s) { $s = preg_replace("/([^\]\=]|^)(https?\:\/\/)([a-zA-Z0-9\:\/\-\?\&\;\.\=\_\~\#\%\$\!\+\,]+)/ism", '$1[url=$2$3]$2$3[/url]',$s); //$s = preg_replace("/([^\]\=]|^)(https?\:\/\/)(vimeo|youtu|www\.youtube|soundcloud)([a-zA-Z0-9\:\/\-\?\&\;\.\=\_\~\#\%\$\!\+\,]+)/ism", '$1[url=$2$3$4]$2$3$4[/url]',$s); - $s = preg_replace("/\[url\=?(.*?)\]https?:\/\/www.youtube.com\/watch\?v\=(.*?)\[\/url\]/ism",'[youtube]$2[/youtube]',$s); - $s = preg_replace("/\[url\=https?:\/\/www.youtube.com\/watch\?v\=(.*?)\].*?\[\/url\]/ism",'[youtube]$1[/youtube]',$s); - $s = preg_replace("/\[url\=?(.*?)\]https?:\/\/vimeo.com\/([0-9]+)(.*?)\[\/url\]/ism",'[vimeo]$2[/vimeo]',$s); - $s = preg_replace("/\[url\=https?:\/\/vimeo.com\/([0-9]+)\](.*?)\[\/url\]/ism",'[vimeo]$1[/vimeo]',$s); + $s = bb_tag_preg_replace("/\[url\=?(.*?)\]https?:\/\/www.youtube.com\/watch\?v\=(.*?)\[\/url\]/ism",'[youtube]$2[/youtube]','url',$s); + $s = bb_tag_preg_replace("/\[url\=https?:\/\/www.youtube.com\/watch\?v\=(.*?)\].*?\[\/url\]/ism",'[youtube]$1[/youtube]','url',$s); + $s = bb_tag_preg_replace("/\[url\=?(.*?)\]https?:\/ \/vimeo.com\/([0-9]+)(.*?)\[\/url\]/ism",'[vimeo]$2[/vimeo]','url',$s); + $s = bb_tag_preg_replace("/\[url\=https?:\/\/vimeo.com\/([0-9]+)\](.*?)\[\/url\]/ism",'[vimeo]$1[/vimeo]','url',$s); // remove duplicate adjacent code tags $s = preg_replace("/(\[code\])+(.*?)(\[\/code\])+/ism","[code]$2[/code]", $s); diff --git a/mod/babel.php b/mod/babel.php index 1c881a5bd..ca2c068f8 100644 --- a/mod/babel.php +++ b/mod/babel.php @@ -19,6 +19,12 @@ function babel_content(&$a) { $o .= '

'; + $o .= '
'; + $o .= t('Source (Diaspora) text to convert to BBcode:') . EOL . '' . EOL; + $o .= '
'; + + $o .= '

'; + if(x($_REQUEST,'text')) { $text = trim($_REQUEST['text']); @@ -52,5 +58,18 @@ function babel_content(&$a) { } + + if(x($_REQUEST,'d2bbtext')) { + + $d2bbtext = trim($_REQUEST['d2bbtext']); + $o .= t("Source input (Diaspora format): ") . EOL. EOL; + $o .= visible_lf($d2bbtext) . EOL. EOL; + + + $bb = diaspora2bb($d2bbtext); + $o .= t("diaspora2bb: ") . EOL. EOL; + $o .= visible_lf($bb) . EOL. EOL; + } + return $o; }