From 22acba24609e1db2430755a799d127946e748f06 Mon Sep 17 00:00:00 2001 From: Friendika Date: Tue, 1 Feb 2011 18:20:25 -0800 Subject: [PATCH] trying to solve the double encoding issue --- include/items.php | 63 +++++++++++++++++------------------------------ 1 file changed, 22 insertions(+), 41 deletions(-) diff --git a/include/items.php b/include/items.php index d091e4c038..c29ad9e440 100644 --- a/include/items.php +++ b/include/items.php @@ -390,7 +390,6 @@ function get_atom_elements($feed,$item) { $res['body'] = str_replace(array(' ',"\t","\r","\n"), array('','','',''),$res['body']); // make sure nobody is trying to sneak some html tags by us $res['body'] = notags(base64url_decode($res['body'])); - $res['realbody'] = true; } $maxlen = get_max_import_size(); @@ -408,31 +407,26 @@ function get_atom_elements($feed,$item) { // html. - if(! $have_real_body) { - if((strpos($res['body'],'<')) || (strpos($res['body'],'>'))) { + if((strpos($res['body'],'<') !== false) || (strpos($res['body'],'>') !== false)) { - $res['body'] = preg_replace('#]+>.+?' . 'http://www.youtube.com/((?:v|cp)/[A-Za-z0-9\-_=]+).+?#s', - '[youtube]$1[/youtube]', $res['body']); + $res['body'] = preg_replace('#]+>.+?' . 'http://www.youtube.com/((?:v|cp)/[A-Za-z0-9\-_=]+).+?#s', + '[youtube]$1[/youtube]', $res['body']); - $res['body'] = oembed_html2bbcode($res['body']); + $res['body'] = oembed_html2bbcode($res['body']); - $config = HTMLPurifier_Config::createDefault(); - $config->set('Cache.DefinitionImpl', null); + $config = HTMLPurifier_Config::createDefault(); + $config->set('Cache.DefinitionImpl', null); - // we shouldn't need a whitelist, because the bbcode converter - // will strip out any unsupported tags. - // $config->set('HTML.Allowed', 'p,b,a[href],i'); + // we shouldn't need a whitelist, because the bbcode converter + // will strip out any unsupported tags. + // $config->set('HTML.Allowed', 'p,b,a[href],i'); - $purifier = new HTMLPurifier($config); - $res['body'] = $purifier->purify($res['body']); + $purifier = new HTMLPurifier($config); + $res['body'] = $purifier->purify($res['body']); - $res['body'] = html2bbcode($res['body']); - } - else - $res['body'] = escape_tags($res['body']); + $res['body'] = html2bbcode($res['body']); } - $allow = $item->get_item_tags(NAMESPACE_DFRN,'comment-allow'); if($allow && $allow[0]['data'] == 1) $res['last-child'] = 1; @@ -531,7 +525,7 @@ function get_atom_elements($feed,$item) { $body = $rawobj[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['summary'][0]['data']; // preserve a copy of the original body content in case we later need to parse out any microformat information, e.g. events $res['object'] .= '' . xmlify($body) . '' . "\n"; - if((strpos($body,'<')) || (strpos($body,'>'))) { + if((strpos($body,'<') !== false) || (strpos($body,'>') !== false)) { $body = preg_replace('#]+>.+?' . 'http://www.youtube.com/((?:v|cp)/[A-Za-z0-9\-_=]+).+?#s', '[youtube]$1[/youtube]', $body); @@ -543,8 +537,6 @@ function get_atom_elements($feed,$item) { $body = $purifier->purify($body); $body = html2bbcode($body); } - else - $body = escape_tags($body); $res['object'] .= '' . $body . '' . "\n"; } @@ -572,7 +564,7 @@ function get_atom_elements($feed,$item) { $body = $rawobj[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['summary'][0]['data']; // preserve a copy of the original body content in case we later need to parse out any microformat information, e.g. events $res['object'] .= '' . xmlify($body) . '' . "\n"; - if((strpos($body,'<')) || (strpos($body,'>'))) { + if((strpos($body,'<') !== false) || (strpos($body,'>') !== false)) { $body = preg_replace('#]+>.+?' . 'http://www.youtube.com/((?:v|cp)/[A-Za-z0-9\-_=]+).+?#s', '[youtube]$1[/youtube]', $body); @@ -584,8 +576,6 @@ function get_atom_elements($feed,$item) { $body = $purifier->purify($body); $body = html2bbcode($body); } - else - $body = escape_tags($body); $res['target'] .= '' . $body . '' . "\n"; } @@ -634,6 +624,13 @@ function item_store($arr) { if(! x($arr,'type')) $arr['type'] = 'remote'; + + // Shouldn't happen but we want to make absolutely sure it doesn't leak from a plugin. + + if((strpos($arr['body'],'<') !== false) || (strpos($arr['body'],'>') !== false)) + $arr['body'] = strip_tags($arr['body']); + + $arr['wall'] = ((x($arr,'wall')) ? intval($arr['wall']) : 0); $arr['uri'] = ((x($arr,'uri')) ? notags(trim($arr['uri'])) : random_string()); $arr['author-name'] = ((x($arr,'author-name')) ? notags(trim($arr['author-name'])) : ''); @@ -662,23 +659,7 @@ function item_store($arr) { $arr['deny_cid'] = ((x($arr,'deny_cid')) ? trim($arr['deny_cid']) : ''); $arr['deny_gid'] = ((x($arr,'deny_gid')) ? trim($arr['deny_gid']) : ''); $arr['private'] = ((x($arr,'private')) ? intval($arr['private']) : 0 ); - $arr['body'] = ((x($arr,'body')) ? escape_tags(trim($arr['body'])) : ''); - - // The content body may have been through a lot of filtering and transport escaping by now. - // We don't want to skip any filters, however a side effect of all this filtering - // is that ampersands and <> may have been double encoded, depending on which filter chain - // they came through. The presence of $res['realbody'] means we have something encoded in a - // transport safe manner at the source and does not require any filter corrections. - - if(x($arr,'realbody')) - unset($arr['realbody']); - else { - $arr['body'] = str_replace( - array('&amp;', '&gt;', '&lt;', '&quot;'), - array('&' , '>' , '<', '"'), - $arr['body'] - ); - } + $arr['body'] = ((x($arr,'body')) ? trim($arr['body']) : ''); if($arr['parent-uri'] === $arr['uri']) { $parent_id = 0;