From aa284a2ae60cca7f2f80013801999ad8a6596d84 Mon Sep 17 00:00:00 2001 From: Michael Date: Tue, 17 Oct 2017 09:10:19 +0000 Subject: [PATCH 1/4] Enhance feed information --- include/feed.php | 56 ++++++++++++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 26 deletions(-) diff --git a/include/feed.php b/include/feed.php index 1aab26e1df..db6f631d45 100644 --- a/include/feed.php +++ b/include/feed.php @@ -319,6 +319,30 @@ function feed_import($xml,$importer,&$contact, &$hub, $simulate = false) { $item["attach"] .= '[attach]href="'.$href.'" length="'.$length.'" type="'.$type.'"[/attach]'; } + $body = trim($xpath->evaluate('atom:content/text()', $entry)->item(0)->nodeValue); + + if ($body == "") { + $body = trim($xpath->evaluate('content:encoded/text()', $entry)->item(0)->nodeValue); + } + if ($body == "") { + $body = trim($xpath->evaluate('description/text()', $entry)->item(0)->nodeValue); + } + if ($body == "") { + $body = trim($xpath->evaluate('atom:summary/text()', $entry)->item(0)->nodeValue); + } + + // remove the content of the title if it is identically to the body + // This helps with auto generated titles e.g. from tumblr + if (title_is_body($item["title"], $body)) { + $item["title"] = ""; + } + $item["body"] = html2bbcode($body, $basepath); + + if (($item["body"] == '') && ($item["title"] != '')) { + $item["body"] = $item["title"]; + $item["title"] = ''; + } + if ($contact["fetch_further_information"]) { $preview = ""; @@ -329,36 +353,16 @@ function feed_import($xml,$importer,&$contact, &$hub, $simulate = false) { } } - $item["body"] = $item["title"].add_page_info($item["plink"], false, $preview, ($contact["fetch_further_information"] == 2), $contact["ffi_keyword_blacklist"]); + if (strlen($item["title"]) > strlen($item["body"])) { +//echo "*".strlen($item["title"]).">".strlen($item["body"])."\n"; + $item["body"] = $item["title"]; + $item["title"] = ""; + } + $item["body"] = $item["body"].add_page_info($item["plink"], false, $preview, ($contact["fetch_further_information"] == 2), $contact["ffi_keyword_blacklist"]); $item["tag"] = add_page_keywords($item["plink"], false, $preview, ($contact["fetch_further_information"] == 2), $contact["ffi_keyword_blacklist"]); - $item["title"] = ""; $item["object-type"] = ACTIVITY_OBJ_BOOKMARK; unset($item["attach"]); } else { - $body = trim($xpath->evaluate('atom:content/text()', $entry)->item(0)->nodeValue); - - if ($body == "") { - $body = trim($xpath->evaluate('content:encoded/text()', $entry)->item(0)->nodeValue); - } - if ($body == "") { - $body = trim($xpath->evaluate('description/text()', $entry)->item(0)->nodeValue); - } - if ($body == "") { - $body = trim($xpath->evaluate('atom:summary/text()', $entry)->item(0)->nodeValue); - } - - // remove the content of the title if it is identically to the body - // This helps with auto generated titles e.g. from tumblr - if (title_is_body($item["title"], $body)) { - $item["title"] = ""; - } - $item["body"] = html2bbcode($body, $basepath); - - if (($item["body"] == '') && ($item["title"] != '')) { - $item["body"] = $item["title"]; - $item["title"] = ''; - } - if (!strstr($item["body"], '[url') && ($item['plink'] != '')) { $item["body"] .= "[hr][url]".$item['plink']."[/url]"; } From 7c658439da1fffe54859fe0b8814943fdc53c932 Mon Sep 17 00:00:00 2001 From: Michael Date: Tue, 17 Oct 2017 09:14:10 +0000 Subject: [PATCH 2/4] Removed unneeded stuff --- include/feed.php | 1 - 1 file changed, 1 deletion(-) diff --git a/include/feed.php b/include/feed.php index db6f631d45..6c0b3b145a 100644 --- a/include/feed.php +++ b/include/feed.php @@ -354,7 +354,6 @@ function feed_import($xml,$importer,&$contact, &$hub, $simulate = false) { } if (strlen($item["title"]) > strlen($item["body"])) { -//echo "*".strlen($item["title"]).">".strlen($item["body"])."\n"; $item["body"] = $item["title"]; $item["title"] = ""; } From 9c5cd476dd718f3bd4b6dc7b43f53fa61904a193 Mon Sep 17 00:00:00 2001 From: Michael Date: Tue, 17 Oct 2017 09:58:29 +0000 Subject: [PATCH 3/4] Better rule to replace content --- include/feed.php | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/include/feed.php b/include/feed.php index 6c0b3b145a..11ccd6af27 100644 --- a/include/feed.php +++ b/include/feed.php @@ -353,7 +353,20 @@ function feed_import($xml,$importer,&$contact, &$hub, $simulate = false) { } } - if (strlen($item["title"]) > strlen($item["body"])) { + // Replace the content when the title is longer than the body + $replace = (strlen($item["title"]) > strlen($item["body"])); + + // Replace it, when there is an image in the body + if (strstr($item["body"], '[/img]')) { + $replace = true; + } + + // Replace it, when there is a link in the body + if (strstr($item["body"], '[/url]')) { + $replace = true; + } + + if ($replace) { $item["body"] = $item["title"]; $item["title"] = ""; } From e38da9a3a5d9dad56afc877e4f27ad332f9e5f8f Mon Sep 17 00:00:00 2001 From: Michael Date: Tue, 17 Oct 2017 11:39:09 +0000 Subject: [PATCH 4/4] And some further refinements --- include/feed.php | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/include/feed.php b/include/feed.php index 11ccd6af27..fb97c14ba1 100644 --- a/include/feed.php +++ b/include/feed.php @@ -353,6 +353,10 @@ function feed_import($xml,$importer,&$contact, &$hub, $simulate = false) { } } + // Remove a possible link to the item itself + $item["body"] = str_replace($item["plink"], '', $item["body"]); + $item["body"] = preg_replace('/\[url\=\](\w+.*?)\[\/url\]/i', '', $item["body"]); + // Replace the content when the title is longer than the body $replace = (strlen($item["title"]) > strlen($item["body"])); @@ -368,8 +372,9 @@ function feed_import($xml,$importer,&$contact, &$hub, $simulate = false) { if ($replace) { $item["body"] = $item["title"]; - $item["title"] = ""; } + // We always strip the title since it will be added in the page information + $item["title"] = ""; $item["body"] = $item["body"].add_page_info($item["plink"], false, $preview, ($contact["fetch_further_information"] == 2), $contact["ffi_keyword_blacklist"]); $item["tag"] = add_page_keywords($item["plink"], false, $preview, ($contact["fetch_further_information"] == 2), $contact["ffi_keyword_blacklist"]); $item["object-type"] = ACTIVITY_OBJ_BOOKMARK;