From ee45dee9324be48cd87a883405bdd9e11e3f39f5 Mon Sep 17 00:00:00 2001 From: Friendika Date: Wed, 2 Feb 2011 14:48:27 -0800 Subject: [PATCH] suppress some scraping errors when confronted with hybrid/strange feeds that provide insufficient content-type and choke the html parser. --- boot.php | 3 +++ include/Scrape.php | 20 ++++++++++++++++++-- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/boot.php b/boot.php index e47f1834d..0dd507168 100644 --- a/boot.php +++ b/boot.php @@ -1366,6 +1366,7 @@ function lrdd($uri) { else { $html = fetch_url($uri); $headers = $a->get_curl_headers(); + logger('lrdd: headers=' . $headers, LOGGER_DEBUG); $lines = explode("\n",$headers); if(count($lines)) { foreach($lines as $line) { @@ -1377,6 +1378,8 @@ function lrdd($uri) { // don't try and run feeds through the html5 parser if(stristr($line,'content-type:') && ((stristr($line,'application/atom+xml')) || (stristr($line,'application/rss+xml')))) return array(); + if(stristr($html,'