From 8acc76a3c6fa3fe5401811fcd90a20a69388a850 Mon Sep 17 00:00:00 2001 From: Friendika Date: Tue, 1 Feb 2011 05:10:04 -0800 Subject: [PATCH] provide ability to survive transport mangling when travelling through html networks --- include/items.php | 40 ++++++++++++++++++++++++++++++---------- include/salmon.php | 14 ++++++-------- 2 files changed, 36 insertions(+), 18 deletions(-) diff --git a/include/items.php b/include/items.php index f204745b..5747d301 100644 --- a/include/items.php +++ b/include/items.php @@ -2,6 +2,7 @@ require_once('bbcode.php'); require_once('oembed.php'); +require_once('include/salmon.php'); function get_feed_for(&$a, $dfrn_id, $owner_nick, $last_update, $direction = 0) { @@ -376,6 +377,21 @@ function get_atom_elements($feed,$item) { } + /** + * If there's a copy of the body content which is guaranteed to have survived mangling in transit, use it. + */ + + $have_real_body = false; + + $rawenv = $item->get_item_tags(NAMESPACE_DFRN, 'env'); + if($rawenv) { + $have_real_body = true; + $res['body'] = $rawenv[0]['data']; + $res['body'] = str_replace(array(' ',"\t","\r","\n"), array('','','',''),$res['body']); + $res['body'] = base64url_decode($res['body']); + $res['realbody'] = true; + } + $maxlen = get_max_import_size(); if($maxlen && (strlen($res['body']) > $maxlen)) $res['body'] = substr($res['body'],0, $maxlen); @@ -391,7 +407,7 @@ function get_atom_elements($feed,$item) { // html. - if((strpos($res['body'],'<')) || (strpos($res['body'],'>'))) { + if((! $have_real_body) || (strpos($res['body'],'<')) || (strpos($res['body'],'>'))) { $res['body'] = preg_replace('#]+>.+?' . 'http://www.youtube.com/((?:v|cp)/[A-Za-z0-9\-_=]+).+?#s', '[youtube]$1[/youtube]', $res['body']); @@ -643,18 +659,21 @@ function item_store($arr) { $arr['private'] = ((x($arr,'private')) ? intval($arr['private']) : 0 ); $arr['body'] = ((x($arr,'body')) ? escape_tags(trim($arr['body'])) : ''); - // The content body has been through a lot of filtering and transport escaping by now. + // The content body may have been through a lot of filtering and transport escaping by now. // We don't want to skip any filters, however a side effect of all this filtering // is that ampersands and <> may have been double encoded, depending on which filter chain - // they came through. - - $arr['body'] = str_replace( - array('&amp;', '&gt;', '&lt;', '&quot;'), - array('&' , '>' , '<', '"'), - $arr['body'] - ); - + // they came through. The presence of $res['realbody'] means we have something encoded in a + // transport safe manner at the source and does not require any filter corrections. + if(x($arr,'realbody')) + unset($arr['realbody']); + else { + $arr['body'] = str_replace( + array('&amp;', '&gt;', '&lt;', '&quot;'), + array('&' , '>' , '<', '"'), + $arr['body'] + ); + } if($arr['parent-uri'] === $arr['uri']) { $parent_id = 0; @@ -1421,6 +1440,7 @@ function atom_entry($item,$type,$author,$owner,$comment = false) { $o .= '' . xmlify($item['title']) . '' . "\r\n"; $o .= '' . xmlify(datetime_convert('UTC','UTC',$item['created'] . '+00:00',ATOM_TIME)) . '' . "\r\n"; $o .= '' . xmlify(datetime_convert('UTC','UTC',$item['edited'] . '+00:00',ATOM_TIME)) . '' . "\r\n"; + $o .= '' . base64url_encode($item['body'], true) . '' . "\r\n"; $o .= '' . xmlify(($type === 'html') ? bbcode($item['body']) : $item['body']) . '' . "\r\n"; $o .= '' . "\r\n"; if($comment) diff --git a/include/salmon.php b/include/salmon.php index 49384efe..8a56882a 100644 --- a/include/salmon.php +++ b/include/salmon.php @@ -18,15 +18,13 @@ function salmon_key($pubkey) { } -function base64url_encode($s) { +function base64url_encode($s, $strip_padding = false) { + $s = strtr(base64_encode($s),'+/','-_'); -/* - * // placeholder for un-padded base64url_encode - * // per latest salmon rev - * - * $s = str_replace('=','',$s); - * - */ + + if($strip_padding) + $s = str_replace('=','',$s); + return $s; }