Removing of the title if it is the same (or part) from the body. Helps against auto generated titles from tumblr.

This commit is contained in:
Michael Vogel 2012-06-30 09:25:22 +02:00
parent 65268d25e6
commit bcca817b9a

View file

@ -280,6 +280,29 @@ function construct_activity_target($item) {
return ''; return '';
} }
function title_is_body($title, $body) {
$title = strip_tags($title);
$title = trim($title);
$title = str_replace(array("\n", "\r", "\t", " "), array("","","",""), $title);
$body = strip_tags($body);
$body = trim($body);
$body = str_replace(array("\n", "\r", "\t", " "), array("","","",""), $body);
if (strlen($title) < strlen($body))
$body = substr($body, 0, strlen($title));
if (($title != $body) and (substr($title, -3) == "...")) {
$pos = strrpos($title, "...");
if ($pos > 0) {
$title = substr($title, 0, $pos);
$body = substr($body, 0, $pos);
}
}
return($title == $body);
}
@ -306,8 +329,10 @@ function get_atom_elements($feed,$item) {
$res['body'] = unxmlify($item->get_content()); $res['body'] = unxmlify($item->get_content());
$res['plink'] = unxmlify($item->get_link(0)); $res['plink'] = unxmlify($item->get_link(0));
//$debugfile = tempnam("/home/ike/log", "item-res-"); // removing the content of the title if its identically to the body
//file_put_contents($debugfile, serialize($res)); // This helps with auto generated titles e.g. from tumblr
if (title_is_body($res["title"], $res["body"]))
$res['title'] = "";
if($res['plink']) if($res['plink'])
$base_url = implode('/', array_slice(explode('/',$res['plink']),0,3)); $base_url = implode('/', array_slice(explode('/',$res['plink']),0,3));