From b4f2aac7e76d8d8a8330ade4116a015593f001a6 Mon Sep 17 00:00:00 2001 From: Friendika Date: Tue, 1 Feb 2011 14:09:47 -0800 Subject: [PATCH 1/7] truncate long names --- mod/contacts.php | 2 +- mod/viewcontacts.php | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mod/contacts.php b/mod/contacts.php index 4c627c88f9..61d9ce3986 100644 --- a/mod/contacts.php +++ b/mod/contacts.php @@ -367,7 +367,7 @@ function contacts_content(&$a) { '$alt_text' => $alt_text, '$dir_icon' => $dir_icon, '$thumb' => $rr['thumb'], - '$name' => $rr['name'], + '$name' => substr($rr['name'],0,20), '$sparkle' => $sparkle, '$url' => $url )); diff --git a/mod/viewcontacts.php b/mod/viewcontacts.php index bd73b2ffbe..90ff85b9db 100644 --- a/mod/viewcontacts.php +++ b/mod/viewcontacts.php @@ -43,7 +43,7 @@ function viewcontacts_content(&$a) { '$id' => $rr['id'], '$alt_text' => t('Visit ') . $rr['name'] . t('\'s profile'), '$thumb' => $rr['thumb'], - '$name' => $rr['name'], + '$name' => substr($rr['name'],0,20), '$url' => $rr['url'] )); } From 8c5c07b2654f5ff28cea1a7012198f74c5b1af03 Mon Sep 17 00:00:00 2001 From: Friendika Date: Tue, 1 Feb 2011 14:55:29 -0800 Subject: [PATCH 2/7] don't try to scrape atom/rss feeds. missing param in contact store (mod_follow) --- boot.php | 3 +++ include/Scrape.php | 58 ++++++++++++++++++++++++++++++++++++++++++++++ mod/follow.php | 2 +- 3 files changed, 62 insertions(+), 1 deletion(-) diff --git a/boot.php b/boot.php index ac010c19de..6bd82bb082 100644 --- a/boot.php +++ b/boot.php @@ -1374,6 +1374,9 @@ function lrdd($uri) { $link = $matches[1]; break; } + // don't try and run feeds through the html5 parser + if(stristr($line,'content-type:') && ((stristr($line,'application/atom+xml')) || (stristr($line,'application/rss+xml')))) + return array(); } } if(! isset($link)) { diff --git a/include/Scrape.php b/include/Scrape.php index e4f7a0878c..bb42c3bdd3 100644 --- a/include/Scrape.php +++ b/include/Scrape.php @@ -5,12 +5,25 @@ require_once('library/HTML5/Parser.php'); if(! function_exists('scrape_dfrn')) { function scrape_dfrn($url) { + $a = get_app(); + $ret = array(); $s = fetch_url($url); if(! $s) return $ret; + $headers = $a->get_curl_headers(); + $lines = explode("\n",$headers); + if(count($lines)) { + foreach($lines as $line) { + // don't try and run feeds through the html5 parser + if(stristr($line,'content-type:') && ((stristr($line,'application/atom+xml')) || (stristr($line,'application/rss+xml')))) + return ret; + } + } + + $dom = HTML5_Parser::parse($s); if(! $dom) @@ -77,12 +90,26 @@ function validate_dfrn($a) { if(! function_exists('scrape_meta')) { function scrape_meta($url) { + $a = get_app(); + $ret = array(); $s = fetch_url($url); if(! $s) return $ret; + $headers = $a->get_curl_headers(); + $lines = explode("\n",$headers); + if(count($lines)) { + foreach($lines as $line) { + // don't try and run feeds through the html5 parser + if(stristr($line,'content-type:') && ((stristr($line,'application/atom+xml')) || (stristr($line,'application/rss+xml')))) + return ret; + } + } + + + $dom = HTML5_Parser::parse($s); if(! $dom) @@ -105,12 +132,24 @@ function scrape_meta($url) { if(! function_exists('scrape_vcard')) { function scrape_vcard($url) { + $a = get_app(); + $ret = array(); $s = fetch_url($url); if(! $s) return $ret; + $headers = $a->get_curl_headers(); + $lines = explode("\n",$headers); + if(count($lines)) { + foreach($lines as $line) { + // don't try and run feeds through the html5 parser + if(stristr($line,'content-type:') && ((stristr($line,'application/atom+xml')) || (stristr($line,'application/rss+xml')))) + return ret; + } + } + $dom = HTML5_Parser::parse($s); if(! $dom) @@ -142,12 +181,31 @@ function scrape_vcard($url) { if(! function_exists('scrape_feed')) { function scrape_feed($url) { + $a = get_app(); + $ret = array(); $s = fetch_url($url); if(! $s) return $ret; + $headers = $a->get_curl_headers(); + $lines = explode("\n",$headers); + if(count($lines)) { + foreach($lines as $line) { + if(stristr($line,'content-type:')) { + if(stristr($line,'application/atom+xml')) { + $ret['feed_atom'] = $url; + return $ret; + } + if(stristr($line,'application/rss+xml')) { + $ret['feed_rss'] = $url; + return ret; + } + } + } + } + $dom = HTML5_Parser::parse($s); if(! $dom) diff --git a/mod/follow.php b/mod/follow.php index 62ba2585c2..c5f6d9e3fc 100644 --- a/mod/follow.php +++ b/mod/follow.php @@ -161,7 +161,7 @@ function follow_post(&$a) { // create contact record $r = q("INSERT INTO `contact` ( `uid`, `created`, `url`, `notify`, `poll`, `name`, `nick`, `photo`, `network`, `rel`, `priority`, `blocked`, `readonly`, `pending` ) - VALUES ( %d, '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', %d, 0, 0, 0 ) ", + VALUES ( %d, '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', %d, %d, 0, 0, 0 ) ", intval(local_user()), dbesc(datetime_convert()), dbesc($profile), From 6a4ae2e845f6f9897574833ced287a3a316119b0 Mon Sep 17 00:00:00 2001 From: Friendika Date: Tue, 1 Feb 2011 16:50:19 -0800 Subject: [PATCH 3/7] get correct article timestamp for feeds with no 'published' and with non-utc stamps --- include/items.php | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/include/items.php b/include/items.php index 5747d301df..ebb7b7af0b 100644 --- a/include/items.php +++ b/include/items.php @@ -455,14 +455,16 @@ function get_atom_elements($feed,$item) { $rawedited = $item->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_10,'updated'); if($rawedited) - $res['edited'] = unxmlify($rawcreated[0]['data']); + $res['edited'] = unxmlify($rawedited[0]['data']); + if((x($res,'edited')) && (! (x($res,'created')))) + $res['created = $res['edited']; if(! $res['created']) - $res['created'] = $item->get_date(); + $res['created'] = $item->get_date('c'); if(! $res['edited']) - $res['edited'] = $item->get_date(); + $res['edited'] = $item->get_date('c'); $rawowner = $item->get_item_tags(NAMESPACE_DFRN, 'owner'); From 968e8bb9c4a1183483e487a1c1ac557fa9c21ad0 Mon Sep 17 00:00:00 2001 From: Friendika Date: Tue, 1 Feb 2011 17:05:17 -0800 Subject: [PATCH 4/7] typo --- include/items.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/items.php b/include/items.php index ebb7b7af0b..d06cc30cd4 100644 --- a/include/items.php +++ b/include/items.php @@ -458,7 +458,7 @@ function get_atom_elements($feed,$item) { $res['edited'] = unxmlify($rawedited[0]['data']); if((x($res,'edited')) && (! (x($res,'created')))) - $res['created = $res['edited']; + $res['created'] = $res['edited']; if(! $res['created']) $res['created'] = $item->get_date('c'); From bafae56b39d018dd2ef778399fd753302e0ab8c3 Mon Sep 17 00:00:00 2001 From: Friendika Date: Tue, 1 Feb 2011 17:50:49 -0800 Subject: [PATCH 5/7] make sure content is unaltered --- include/items.php | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/include/items.php b/include/items.php index d06cc30cd4..d091e4c038 100644 --- a/include/items.php +++ b/include/items.php @@ -388,7 +388,8 @@ function get_atom_elements($feed,$item) { $have_real_body = true; $res['body'] = $rawenv[0]['data']; $res['body'] = str_replace(array(' ',"\t","\r","\n"), array('','','',''),$res['body']); - $res['body'] = base64url_decode($res['body']); + // make sure nobody is trying to sneak some html tags by us + $res['body'] = notags(base64url_decode($res['body'])); $res['realbody'] = true; } @@ -407,27 +408,29 @@ function get_atom_elements($feed,$item) { // html. - if((! $have_real_body) || (strpos($res['body'],'<')) || (strpos($res['body'],'>'))) { + if(! $have_real_body) { + if((strpos($res['body'],'<')) || (strpos($res['body'],'>'))) { - $res['body'] = preg_replace('#]+>.+?' . 'http://www.youtube.com/((?:v|cp)/[A-Za-z0-9\-_=]+).+?#s', - '[youtube]$1[/youtube]', $res['body']); + $res['body'] = preg_replace('#]+>.+?' . 'http://www.youtube.com/((?:v|cp)/[A-Za-z0-9\-_=]+).+?#s', + '[youtube]$1[/youtube]', $res['body']); - $res['body'] = oembed_html2bbcode($res['body']); + $res['body'] = oembed_html2bbcode($res['body']); - $config = HTMLPurifier_Config::createDefault(); - $config->set('Cache.DefinitionImpl', null); + $config = HTMLPurifier_Config::createDefault(); + $config->set('Cache.DefinitionImpl', null); - // we shouldn't need a whitelist, because the bbcode converter - // will strip out any unsupported tags. - // $config->set('HTML.Allowed', 'p,b,a[href],i'); + // we shouldn't need a whitelist, because the bbcode converter + // will strip out any unsupported tags. + // $config->set('HTML.Allowed', 'p,b,a[href],i'); - $purifier = new HTMLPurifier($config); - $res['body'] = $purifier->purify($res['body']); + $purifier = new HTMLPurifier($config); + $res['body'] = $purifier->purify($res['body']); - $res['body'] = html2bbcode($res['body']); + $res['body'] = html2bbcode($res['body']); + } + else + $res['body'] = escape_tags($res['body']); } - else - $res['body'] = escape_tags($res['body']); $allow = $item->get_item_tags(NAMESPACE_DFRN,'comment-allow'); From 22acba24609e1db2430755a799d127946e748f06 Mon Sep 17 00:00:00 2001 From: Friendika Date: Tue, 1 Feb 2011 18:20:25 -0800 Subject: [PATCH 6/7] trying to solve the double encoding issue --- include/items.php | 63 +++++++++++++++++------------------------------ 1 file changed, 22 insertions(+), 41 deletions(-) diff --git a/include/items.php b/include/items.php index d091e4c038..c29ad9e440 100644 --- a/include/items.php +++ b/include/items.php @@ -390,7 +390,6 @@ function get_atom_elements($feed,$item) { $res['body'] = str_replace(array(' ',"\t","\r","\n"), array('','','',''),$res['body']); // make sure nobody is trying to sneak some html tags by us $res['body'] = notags(base64url_decode($res['body'])); - $res['realbody'] = true; } $maxlen = get_max_import_size(); @@ -408,31 +407,26 @@ function get_atom_elements($feed,$item) { // html. - if(! $have_real_body) { - if((strpos($res['body'],'<')) || (strpos($res['body'],'>'))) { + if((strpos($res['body'],'<') !== false) || (strpos($res['body'],'>') !== false)) { - $res['body'] = preg_replace('#]+>.+?' . 'http://www.youtube.com/((?:v|cp)/[A-Za-z0-9\-_=]+).+?#s', - '[youtube]$1[/youtube]', $res['body']); + $res['body'] = preg_replace('#]+>.+?' . 'http://www.youtube.com/((?:v|cp)/[A-Za-z0-9\-_=]+).+?#s', + '[youtube]$1[/youtube]', $res['body']); - $res['body'] = oembed_html2bbcode($res['body']); + $res['body'] = oembed_html2bbcode($res['body']); - $config = HTMLPurifier_Config::createDefault(); - $config->set('Cache.DefinitionImpl', null); + $config = HTMLPurifier_Config::createDefault(); + $config->set('Cache.DefinitionImpl', null); - // we shouldn't need a whitelist, because the bbcode converter - // will strip out any unsupported tags. - // $config->set('HTML.Allowed', 'p,b,a[href],i'); + // we shouldn't need a whitelist, because the bbcode converter + // will strip out any unsupported tags. + // $config->set('HTML.Allowed', 'p,b,a[href],i'); - $purifier = new HTMLPurifier($config); - $res['body'] = $purifier->purify($res['body']); + $purifier = new HTMLPurifier($config); + $res['body'] = $purifier->purify($res['body']); - $res['body'] = html2bbcode($res['body']); - } - else - $res['body'] = escape_tags($res['body']); + $res['body'] = html2bbcode($res['body']); } - $allow = $item->get_item_tags(NAMESPACE_DFRN,'comment-allow'); if($allow && $allow[0]['data'] == 1) $res['last-child'] = 1; @@ -531,7 +525,7 @@ function get_atom_elements($feed,$item) { $body = $rawobj[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['summary'][0]['data']; // preserve a copy of the original body content in case we later need to parse out any microformat information, e.g. events $res['object'] .= '' . xmlify($body) . '' . "\n"; - if((strpos($body,'<')) || (strpos($body,'>'))) { + if((strpos($body,'<') !== false) || (strpos($body,'>') !== false)) { $body = preg_replace('#]+>.+?' . 'http://www.youtube.com/((?:v|cp)/[A-Za-z0-9\-_=]+).+?#s', '[youtube]$1[/youtube]', $body); @@ -543,8 +537,6 @@ function get_atom_elements($feed,$item) { $body = $purifier->purify($body); $body = html2bbcode($body); } - else - $body = escape_tags($body); $res['object'] .= '' . $body . '' . "\n"; } @@ -572,7 +564,7 @@ function get_atom_elements($feed,$item) { $body = $rawobj[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['summary'][0]['data']; // preserve a copy of the original body content in case we later need to parse out any microformat information, e.g. events $res['object'] .= '' . xmlify($body) . '' . "\n"; - if((strpos($body,'<')) || (strpos($body,'>'))) { + if((strpos($body,'<') !== false) || (strpos($body,'>') !== false)) { $body = preg_replace('#]+>.+?' . 'http://www.youtube.com/((?:v|cp)/[A-Za-z0-9\-_=]+).+?#s', '[youtube]$1[/youtube]', $body); @@ -584,8 +576,6 @@ function get_atom_elements($feed,$item) { $body = $purifier->purify($body); $body = html2bbcode($body); } - else - $body = escape_tags($body); $res['target'] .= '' . $body . '' . "\n"; } @@ -634,6 +624,13 @@ function item_store($arr) { if(! x($arr,'type')) $arr['type'] = 'remote'; + + // Shouldn't happen but we want to make absolutely sure it doesn't leak from a plugin. + + if((strpos($arr['body'],'<') !== false) || (strpos($arr['body'],'>') !== false)) + $arr['body'] = strip_tags($arr['body']); + + $arr['wall'] = ((x($arr,'wall')) ? intval($arr['wall']) : 0); $arr['uri'] = ((x($arr,'uri')) ? notags(trim($arr['uri'])) : random_string()); $arr['author-name'] = ((x($arr,'author-name')) ? notags(trim($arr['author-name'])) : ''); @@ -662,23 +659,7 @@ function item_store($arr) { $arr['deny_cid'] = ((x($arr,'deny_cid')) ? trim($arr['deny_cid']) : ''); $arr['deny_gid'] = ((x($arr,'deny_gid')) ? trim($arr['deny_gid']) : ''); $arr['private'] = ((x($arr,'private')) ? intval($arr['private']) : 0 ); - $arr['body'] = ((x($arr,'body')) ? escape_tags(trim($arr['body'])) : ''); - - // The content body may have been through a lot of filtering and transport escaping by now. - // We don't want to skip any filters, however a side effect of all this filtering - // is that ampersands and <> may have been double encoded, depending on which filter chain - // they came through. The presence of $res['realbody'] means we have something encoded in a - // transport safe manner at the source and does not require any filter corrections. - - if(x($arr,'realbody')) - unset($arr['realbody']); - else { - $arr['body'] = str_replace( - array('&amp;', '&gt;', '&lt;', '&quot;'), - array('&' , '>' , '<', '"'), - $arr['body'] - ); - } + $arr['body'] = ((x($arr,'body')) ? trim($arr['body']) : ''); if($arr['parent-uri'] === $arr['uri']) { $parent_id = 0; From 777c47aaa697ff46cf554fb4f36d0c252ae42e94 Mon Sep 17 00:00:00 2001 From: Friendika Date: Tue, 1 Feb 2011 20:23:58 -0800 Subject: [PATCH 7/7] silence a few notice warnings --- mod/network.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mod/network.php b/mod/network.php index 80edca1d02..2ec06fe614 100644 --- a/mod/network.php +++ b/mod/network.php @@ -38,11 +38,11 @@ function network_content(&$a, $update = 0) { $group = 0; + $nouveau = false; + if(! $update) { $o .= ''; - $nouveau = false; - if(($a->argc > 2) && $a->argv[2] === 'new') $nouveau = true;