focus preg_replace to prevent it from catching too much

This commit is contained in:
Zach Prezkuta 2012-07-12 23:12:48 -06:00
parent bd08a6b565
commit 92db5e5ec2
2 changed files with 83 additions and 4 deletions

View file

@ -7,6 +7,66 @@ require_once("include/html2bbcode.php");
require_once("include/bbcode.php"); require_once("include/bbcode.php");
require_once("include/markdownify/markdownify.php"); require_once("include/markdownify/markdownify.php");
function get_bb_tag_pos($s, $name, $occurance = 1) {
if($occurance < 1)
$occurance = 1;
$start_open = -1;
for($i = 1; $i <= $occurance; $i++) {
if( $start_open !== false)
$start_open = strpos($s, '[' . $name, $start_open + 1); // allow [name= type tags
}
if( $start_open === false)
return false;
$start_equal = strpos($s, '=', $start_open);
$start_close = strpos($s, ']', $start_open);
if( $start_close === false)
return false;
$start_close++;
$end_open = strpos($s, '[/' . $name . ']', $start_close);
if( $end_open === false)
return false;
$res = array( 'start' => array('open' => $start_open, 'close' => $start_close),
'end' => array('open' => $end_open, 'close' => $end_open + strlen('[/' . $name . ']')) );
if( $start_equal !== false)
$res['start']['equal'] = $start_equal + 1;
return $res;
}
function bb_tag_preg_replace($pattern, $replace, $name, $s) {
$string = $s;
$occurance = 1;
$pos = get_bb_tag_pos($string, $name, $occurance);
while($pos !== false && $occurance < 1000) {
$start = substr($string, 0, $pos['start']['open']);
$subject = substr($string, $pos['start']['open'], $pos['end']['close'] - $pos['start']['open']);
$end = substr($string, $pos['end']['close']);
if($end === false)
$end = '';
$subject = preg_replace($pattern, $replace, $subject);
$string = $start . $subject . $end;
$occurance++;
$pos = get_bb_tag_pos($string, $name, $occurance);
}
return $string;
}
// we don't want to support a bbcode specific markdown interpreter // we don't want to support a bbcode specific markdown interpreter
// and the markdown library we have is pretty good, but provides HTML output. // and the markdown library we have is pretty good, but provides HTML output.
// So we'll use that to convert to HTML, then convert the HTML back to bbcode, // So we'll use that to convert to HTML, then convert the HTML back to bbcode,
@ -51,10 +111,10 @@ function diaspora2bb($s) {
$s = preg_replace("/([^\]\=]|^)(https?\:\/\/)([a-zA-Z0-9\:\/\-\?\&\;\.\=\_\~\#\%\$\!\+\,]+)/ism", '$1[url=$2$3]$2$3[/url]',$s); $s = preg_replace("/([^\]\=]|^)(https?\:\/\/)([a-zA-Z0-9\:\/\-\?\&\;\.\=\_\~\#\%\$\!\+\,]+)/ism", '$1[url=$2$3]$2$3[/url]',$s);
//$s = preg_replace("/([^\]\=]|^)(https?\:\/\/)(vimeo|youtu|www\.youtube|soundcloud)([a-zA-Z0-9\:\/\-\?\&\;\.\=\_\~\#\%\$\!\+\,]+)/ism", '$1[url=$2$3$4]$2$3$4[/url]',$s); //$s = preg_replace("/([^\]\=]|^)(https?\:\/\/)(vimeo|youtu|www\.youtube|soundcloud)([a-zA-Z0-9\:\/\-\?\&\;\.\=\_\~\#\%\$\!\+\,]+)/ism", '$1[url=$2$3$4]$2$3$4[/url]',$s);
$s = preg_replace("/\[url\=?(.*?)\]https?:\/\/www.youtube.com\/watch\?v\=(.*?)\[\/url\]/ism",'[youtube]$2[/youtube]',$s); $s = bb_tag_preg_replace("/\[url\=?(.*?)\]https?:\/\/www.youtube.com\/watch\?v\=(.*?)\[\/url\]/ism",'[youtube]$2[/youtube]','url',$s);
$s = preg_replace("/\[url\=https?:\/\/www.youtube.com\/watch\?v\=(.*?)\].*?\[\/url\]/ism",'[youtube]$1[/youtube]',$s); $s = bb_tag_preg_replace("/\[url\=https?:\/\/www.youtube.com\/watch\?v\=(.*?)\].*?\[\/url\]/ism",'[youtube]$1[/youtube]','url',$s);
$s = preg_replace("/\[url\=?(.*?)\]https?:\/\/vimeo.com\/([0-9]+)(.*?)\[\/url\]/ism",'[vimeo]$2[/vimeo]',$s); $s = bb_tag_preg_replace("/\[url\=?(.*?)\]https?:\/ \/vimeo.com\/([0-9]+)(.*?)\[\/url\]/ism",'[vimeo]$2[/vimeo]','url',$s);
$s = preg_replace("/\[url\=https?:\/\/vimeo.com\/([0-9]+)\](.*?)\[\/url\]/ism",'[vimeo]$1[/vimeo]',$s); $s = bb_tag_preg_replace("/\[url\=https?:\/\/vimeo.com\/([0-9]+)\](.*?)\[\/url\]/ism",'[vimeo]$1[/vimeo]','url',$s);
// remove duplicate adjacent code tags // remove duplicate adjacent code tags
$s = preg_replace("/(\[code\])+(.*?)(\[\/code\])+/ism","[code]$2[/code]", $s); $s = preg_replace("/(\[code\])+(.*?)(\[\/code\])+/ism","[code]$2[/code]", $s);

View file

@ -19,6 +19,12 @@ function babel_content(&$a) {
$o .= '<br /><br />'; $o .= '<br /><br />';
$o .= '<form action="babel" method="post">';
$o .= t('Source (Diaspora) text to convert to BBcode:') . EOL . '<textarea name="d2bbtext" >' . htmlspecialchars($_REQUEST['d2bbtext']) .'</textarea>' . EOL;
$o .= '<input type="submit" name="submit" value="Submit" /></form>';
$o .= '<br /><br />';
if(x($_REQUEST,'text')) { if(x($_REQUEST,'text')) {
$text = trim($_REQUEST['text']); $text = trim($_REQUEST['text']);
@ -52,5 +58,18 @@ function babel_content(&$a) {
} }
if(x($_REQUEST,'d2bbtext')) {
$d2bbtext = trim($_REQUEST['d2bbtext']);
$o .= t("Source input (Diaspora format): ") . EOL. EOL;
$o .= visible_lf($d2bbtext) . EOL. EOL;
$bb = diaspora2bb($d2bbtext);
$o .= t("diaspora2bb: ") . EOL. EOL;
$o .= visible_lf($bb) . EOL. EOL;
}
return $o; return $o;
} }