1
1
Fork 0

regular expressions were grabbing too much

This commit is contained in:
Zach Prezkuta 2012-08-26 14:56:34 -06:00
parent 17b59a7e3f
commit 72712175b5
3 changed files with 84 additions and 73 deletions

View file

@ -8,65 +8,6 @@ require_once("include/bbcode.php");
require_once("include/markdownify/markdownify.php"); require_once("include/markdownify/markdownify.php");
function get_bb_tag_pos($s, $name, $occurance = 1) {
if($occurance < 1)
$occurance = 1;
$start_open = -1;
for($i = 1; $i <= $occurance; $i++) {
if( $start_open !== false)
$start_open = strpos($s, '[' . $name, $start_open + 1); // allow [name= type tags
}
if( $start_open === false)
return false;
$start_equal = strpos($s, '=', $start_open);
$start_close = strpos($s, ']', $start_open);
if( $start_close === false)
return false;
$start_close++;
$end_open = strpos($s, '[/' . $name . ']', $start_close);
if( $end_open === false)
return false;
$res = array( 'start' => array('open' => $start_open, 'close' => $start_close),
'end' => array('open' => $end_open, 'close' => $end_open + strlen('[/' . $name . ']')) );
if( $start_equal !== false)
$res['start']['equal'] = $start_equal + 1;
return $res;
}
function bb_tag_preg_replace($pattern, $replace, $name, $s) {
$string = $s;
$occurance = 1;
$pos = get_bb_tag_pos($string, $name, $occurance);
while($pos !== false && $occurance < 1000) {
$start = substr($string, 0, $pos['start']['open']);
$subject = substr($string, $pos['start']['open'], $pos['end']['close'] - $pos['start']['open']);
$end = substr($string, $pos['end']['close']);
if($end === false)
$end = '';
$subject = preg_replace($pattern, $replace, $subject);
$string = $start . $subject . $end;
$occurance++;
$pos = get_bb_tag_pos($string, $name, $occurance);
}
return $string;
}
// we don't want to support a bbcode specific markdown interpreter // we don't want to support a bbcode specific markdown interpreter
// and the markdown library we have is pretty good, but provides HTML output. // and the markdown library we have is pretty good, but provides HTML output.
// So we'll use that to convert to HTML, then convert the HTML back to bbcode, // So we'll use that to convert to HTML, then convert the HTML back to bbcode,
@ -113,7 +54,7 @@ function diaspora2bb($s) {
//$s = preg_replace("/([^\]\=]|^)(https?\:\/\/)(vimeo|youtu|www\.youtube|soundcloud)([a-zA-Z0-9\:\/\-\?\&\;\.\=\_\~\#\%\$\!\+\,]+)/ism", '$1[url=$2$3$4]$2$3$4[/url]',$s); //$s = preg_replace("/([^\]\=]|^)(https?\:\/\/)(vimeo|youtu|www\.youtube|soundcloud)([a-zA-Z0-9\:\/\-\?\&\;\.\=\_\~\#\%\$\!\+\,]+)/ism", '$1[url=$2$3$4]$2$3$4[/url]',$s);
$s = bb_tag_preg_replace("/\[url\=?(.*?)\]https?:\/\/www.youtube.com\/watch\?v\=(.*?)\[\/url\]/ism",'[youtube]$2[/youtube]','url',$s); $s = bb_tag_preg_replace("/\[url\=?(.*?)\]https?:\/\/www.youtube.com\/watch\?v\=(.*?)\[\/url\]/ism",'[youtube]$2[/youtube]','url',$s);
$s = bb_tag_preg_replace("/\[url\=https?:\/\/www.youtube.com\/watch\?v\=(.*?)\].*?\[\/url\]/ism",'[youtube]$1[/youtube]','url',$s); $s = bb_tag_preg_replace("/\[url\=https?:\/\/www.youtube.com\/watch\?v\=(.*?)\].*?\[\/url\]/ism",'[youtube]$1[/youtube]','url',$s);
$s = bb_tag_preg_replace("/\[url\=?(.*?)\]https?:\/ \/vimeo.com\/([0-9]+)(.*?)\[\/url\]/ism",'[vimeo]$2[/vimeo]','url',$s); $s = bb_tag_preg_replace("/\[url\=?(.*?)\]https?:\/\/vimeo.com\/([0-9]+)(.*?)\[\/url\]/ism",'[vimeo]$2[/vimeo]','url',$s);
$s = bb_tag_preg_replace("/\[url\=https?:\/\/vimeo.com\/([0-9]+)\](.*?)\[\/url\]/ism",'[vimeo]$1[/vimeo]','url',$s); $s = bb_tag_preg_replace("/\[url\=https?:\/\/vimeo.com\/([0-9]+)\](.*?)\[\/url\]/ism",'[vimeo]$1[/vimeo]','url',$s);
// remove duplicate adjacent code tags // remove duplicate adjacent code tags
$s = preg_replace("/(\[code\])+(.*?)(\[\/code\])+/ism","[code]$2[/code]", $s); $s = preg_replace("/(\[code\])+(.*?)(\[\/code\])+/ism","[code]$2[/code]", $s);

View file

@ -47,6 +47,65 @@ function bb_unspacefy_and_trim($st) {
return $unspacefied; return $unspacefied;
} }
function get_bb_tag_pos($s, $name, $occurance = 1) {
if($occurance < 1)
$occurance = 1;
$start_open = -1;
for($i = 1; $i <= $occurance; $i++) {
if( $start_open !== false)
$start_open = strpos($s, '[' . $name, $start_open + 1); // allow [name= type tags
}
if( $start_open === false)
return false;
$start_equal = strpos($s, '=', $start_open);
$start_close = strpos($s, ']', $start_open);
if( $start_close === false)
return false;
$start_close++;
$end_open = strpos($s, '[/' . $name . ']', $start_close);
if( $end_open === false)
return false;
$res = array( 'start' => array('open' => $start_open, 'close' => $start_close),
'end' => array('open' => $end_open, 'close' => $end_open + strlen('[/' . $name . ']')) );
if( $start_equal !== false)
$res['start']['equal'] = $start_equal + 1;
return $res;
}
function bb_tag_preg_replace($pattern, $replace, $name, $s) {
$string = $s;
$occurance = 1;
$pos = get_bb_tag_pos($string, $name, $occurance);
while($pos !== false && $occurance < 1000) {
$start = substr($string, 0, $pos['start']['open']);
$subject = substr($string, $pos['start']['open'], $pos['end']['close'] - $pos['start']['open']);
$end = substr($string, $pos['end']['close']);
if($end === false)
$end = '';
$subject = preg_replace($pattern, $replace, $subject);
$string = $start . $subject . $end;
$occurance++;
$pos = get_bb_tag_pos($string, $name, $occurance);
}
return $string;
}
if(! function_exists('bb_extract_images')) { if(! function_exists('bb_extract_images')) {
function bb_extract_images($body) { function bb_extract_images($body) {

View file

@ -1,5 +1,8 @@
<?php <?php
require_once("include/bbcode.php");
// Note: the code in 'item_extract_images' and 'item_redir_and_replace_images' // Note: the code in 'item_extract_images' and 'item_redir_and_replace_images'
// is identical to the code in mod/message.php for 'item_extract_images' and // is identical to the code in mod/message.php for 'item_extract_images' and
// 'item_redir_and_replace_images' // 'item_redir_and_replace_images'
@ -51,19 +54,27 @@ function item_redir_and_replace_images($body, $images, $cid) {
$origbody = $body; $origbody = $body;
$newbody = ''; $newbody = '';
for($i = 0; $i < count($images); $i++) { $cnt = 1;
$search = '/\[url\=(.*?)\]\[!#saved_image' . $i . '#!\]\[\/url\]' . '/is'; $pos = get_bb_tag_pos($origbody, 'url', 1);
while($pos !== false && $cnt < 1000) {
$search = '/\[url\=(.*?)\]\[!#saved_image([0-9]*)#!\]\[\/url\]' . '/is';
$replace = '[url=' . z_path() . '/redir/' . $cid $replace = '[url=' . z_path() . '/redir/' . $cid
. '?f=1&url=' . '$1' . '][!#saved_image' . $i . '#!][/url]' ; . '?f=1&url=' . '$1' . '][!#saved_image' . '$2' .'#!][/url]';
$img_end = strpos($origbody, '[!#saved_image' . $i . '#!][/url]') + strlen('[!#saved_image' . $i . '#!][/url]'); $newbody .= substr($origbody, 0, $pos['start']['open']);
$process_part = substr($origbody, 0, $img_end); $subject = substr($origbody, $pos['start']['open'], $pos['end']['close'] - $pos['start']['open']);
$origbody = substr($origbody, $img_end); $origbody = substr($origbody, $pos['end']['close']);
if($origbody === false)
$origbody = '';
$process_part = preg_replace($search, $replace, $process_part); $subject = preg_replace($search, $replace, $subject);
$newbody = $newbody . $process_part; $newbody .= $subject;
$cnt++;
$pos = get_bb_tag_pos($origbody, 'url', 1);
} }
$newbody = $newbody . $origbody; $newbody .= $origbody;
$cnt = 0; $cnt = 0;
foreach($images as $image) { foreach($images as $image) {
@ -293,10 +304,10 @@ function localize_item(&$item){
} }
// add zrl's to public images // add zrl's to public images
if(preg_match_all('/\[url=(.*?)\/photos\/(.*?)\/image\/(.*?)\]\[img(.*?)\]h(.*?)\[\/img\]\[\/url\]/is',$item['body'],$matches,PREG_SET_ORDER)) { $photo_pattern = "/\[url=(.*?)\/photos\/(.*?)\/image\/(.*?)\]\[img(.*?)\]h(.*?)\[\/img\]\[\/url\]/is";
foreach($matches as $mtch) { if(preg_match($photo_pattern,$item['body'])) {
$item['body'] = str_replace($mtch[0],'[url=' . zrl($mtch[1] . '/photos/' . $mtch[2] . '/image/' . $mtch[3] ,true) . '][img' . $mtch[4] . ']h' . $mtch[5] . '[/img][/url]',$item['body']); $photo_replace = '[url=' . zrl('$1' . '/photos/' . '$2' . '/image/' . '$3' ,true) . '][img' . '$4' . ']h' . '$5' . '[/img][/url]';
} $item['body'] = bb_tag_preg_replace($photo_pattern, $photo_replace, 'url', $item['body']);
} }
// add sparkle links to appropriate permalinks // add sparkle links to appropriate permalinks