From 7f8284eb1b2e77b0fd0b1ca77d9eba4c2ebda067 Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Sun, 27 Sep 2015 14:02:05 +0200 Subject: [PATCH] New functionality to import feeds. Now with support of every used feed format (RSS, RDF, Atom) --- include/feed.php | 275 ++++++++++++++++++++++++++++++++++++++++++++++ include/items.php | 13 ++- 2 files changed, 284 insertions(+), 4 deletions(-) create mode 100644 include/feed.php diff --git a/include/feed.php b/include/feed.php new file mode 100644 index 0000000000..18d96e6abb --- /dev/null +++ b/include/feed.php @@ -0,0 +1,275 @@ +loadXML($xml); + $xpath = new DomXPath($doc); + $xpath->registerNamespace('atom', "http://www.w3.org/2005/Atom"); + $xpath->registerNamespace('dc', "http://purl.org/dc/elements/1.1/"); + $xpath->registerNamespace('content', "http://purl.org/rss/1.0/modules/content/"); + $xpath->registerNamespace('rdf', "http://www.w3.org/1999/02/22-rdf-syntax-ns#"); + $xpath->registerNamespace('rss', "http://purl.org/rss/1.0/"); + $xpath->registerNamespace('media', "http://search.yahoo.com/mrss/"); + + $author = array(); + + // Is it RDF? + if ($xpath->query('/rdf:RDF/rss:channel')->length > 0) { + //$author["author-link"] = $xpath->evaluate('/rdf:RDF/rss:channel/rss:link/text()')->item(0)->nodeValue; + $author["author-name"] = $xpath->evaluate('/rdf:RDF/rss:channel/rss:title/text()')->item(0)->nodeValue; + + if ($author["author-name"] == "") + $author["author-name"] = $xpath->evaluate('/rdf:RDF/rss:channel/rss:description/text()')->item(0)->nodeValue; + + $entries = $xpath->query('/rdf:RDF/rss:item'); + } + + // Is it Atom? + if ($xpath->query('/atom:feed/atom:entry')->length > 0) { + //$self = $xpath->query("/atom:feed/atom:link[@rel='self']")->item(0)->attributes; + //if (is_object($self)) + // foreach($self AS $attributes) + // if ($attributes->name == "href") + // $author["author-link"] = $attributes->textContent; + + //if ($author["author-link"] == "") { + // $alternate = $xpath->query("/atom:feed/atom:link[@rel='alternate']")->item(0)->attributes; + // if (is_object($alternate)) + // foreach($alternate AS $attributes) + // if ($attributes->name == "href") + // $author["author-link"] = $attributes->textContent; + //} + + $author["author-name"] = $xpath->evaluate('/atom:feed/atom:title/text()')->item(0)->nodeValue; + + if ($author["author-name"] == "") + $author["author-name"] = $xpath->evaluate('/atom:feed/atom:subtitle/text()')->item(0)->nodeValue; + + if ($author["author-name"] == "") + $author["author-name"] = $xpath->evaluate('/atom:feed/atom:author/atom:name/text()')->item(0)->nodeValue; + + //$author["author-avatar"] = $xpath->evaluate('/atom:feed/atom:logo/text()')->item(0)->nodeValue; + + $author["edited"] = $author["created"] = $xpath->query('/atom:feed/atom:updated/text()')->item(0)->nodeValue; + + $author["app"] = $xpath->evaluate('/atom:feed/atom:generator/text()')->item(0)->nodeValue; + + $entries = $xpath->query('/atom:feed/atom:entry'); + } + + // Is it RSS? + if ($xpath->query('/rss/channel')->length > 0) { + //$author["author-link"] = $xpath->evaluate('/rss/channel/link/text()')->item(0)->nodeValue; + $author["author-name"] = $xpath->evaluate('/rss/channel/title/text()')->item(0)->nodeValue; + //$author["author-avatar"] = $xpath->evaluate('/rss/channel/image/url/text()')->item(0)->nodeValue; + + if ($author["author-name"] == "") + $author["author-name"] = $xpath->evaluate('/rss/channel/copyright/text()')->item(0)->nodeValue; + + if ($author["author-name"] == "") + $author["author-name"] = $xpath->evaluate('/rss/channel/description/text()')->item(0)->nodeValue; + + $author["edited"] = $author["created"] = $xpath->query('/rss/channel/pubDate/text()')->item(0)->nodeValue; + + $author["app"] = $xpath->evaluate('/rss/channel/generator/text()')->item(0)->nodeValue; + + $entries = $xpath->query('/rss/channel/item'); + } + + //if ($author["author-link"] == "") + $author["author-link"] = $contact["url"]; + + if ($author["author-name"] == "") + $author["author-name"] = $contact["name"]; + + //if ($author["author-avatar"] == "") + $author["author-avatar"] = $contact["thumb"]; + + $author["owner-link"] = $contact["url"]; + $author["owner-name"] = $contact["name"]; + $author["owner-avatar"] = $contact["thumb"]; + + $header = array(); + $header["uid"] = $importer["uid"]; + $header["network"] = NETWORK_FEED; + $header["type"] = "remote"; + $header["wall"] = 0; + $header["origin"] = 0; + $header["gravity"] = GRAVITY_PARENT; + + $header["contact-id"] = $contact["id"]; + + if (!is_object($entries)) + return; + + foreach ($entries AS $entry) + $entrylist[] = $entry; + + foreach (array_reverse($entrylist) AS $entry) { + $item = array_merge($header, $author); + + $item["title"] = $xpath->evaluate('atom:title/text()', $entry)->item(0)->nodeValue; + + if ($item["title"] == "") + $item["title"] = $xpath->evaluate('title/text()', $entry)->item(0)->nodeValue; + + if ($item["title"] == "") + $item["title"] = $xpath->evaluate('rss:title/text()', $entry)->item(0)->nodeValue; + + $alternate = $xpath->query("atom:link[@rel='alternate']", $entry)->item(0)->attributes; + if (!is_object($alternate)) + $alternate = $xpath->query("atom:link", $entry)->item(0)->attributes; + + if (is_object($alternate)) + foreach($alternate AS $attributes) + if ($attributes->name == "href") + $item["plink"] = $attributes->textContent; + + if ($item["plink"] == "") + $item["plink"] = $xpath->evaluate('link/text()', $entry)->item(0)->nodeValue; + + if ($item["plink"] == "") + $item["plink"] = $xpath->evaluate('rss:link/text()', $entry)->item(0)->nodeValue; + + $item["plink"] = original_url($item["plink"]); + + $item["uri"] = $xpath->evaluate('atom:id/text()', $entry)->item(0)->nodeValue; + + if ($item["uri"] == "") + $item["uri"] = $xpath->evaluate('guid/text()', $entry)->item(0)->nodeValue; + + if ($item["uri"] == "") + $item["uri"] = $item["plink"]; + + $item["parent-uri"] = $item["uri"]; + + $published = $xpath->query('atom:published/text()', $entry)->item(0)->nodeValue; + + if ($published == "") + $published = $xpath->query('pubDate/text()', $entry)->item(0)->nodeValue; + + if ($published == "") + $published = $xpath->query('dc:date/text()', $entry)->item(0)->nodeValue; + + $updated = $xpath->query('atom:updated/text()', $entry)->item(0)->nodeValue; + + if ($updated == "") + $updated = $published; + + if ($published != "") + $item["created"] = $published; + + if ($updated != "") + $item["edited"] = $updated; + + $creator = $xpath->query('author/text()', $entry)->item(0)->nodeValue; + + if ($creator == "") + $creator = $xpath->query('atom:author/atom:name/text()', $entry)->item(0)->nodeValue; + + if ($creator == "") + $creator = $xpath->query('dc:creator/text()', $entry)->item(0)->nodeValue; + + if ($creator != "") + $item["author-name"] = $creator; + + if ($pubDate != "") + $item["edited"] = $item["created"] = $pubDate; + + $creator = $xpath->query('dc:creator/text()', $entry)->item(0)->nodeValue; + + if ($creator != "") + $item["author-name"] = $creator; + + //$item["object"] = $xml; + + $r = q("SELECT `id` FROM `item` WHERE `uid` = %d AND `uri` = '%s'", + intval($importer["uid"]), dbesc($item["uri"])); + if ($r) { + logger("Item with uri ".$item["uri"]." for user ".$importer["uid"]." already existed under id ".$r[0]["id"], LOGGER_DEBUG); + continue; + } + + // To-Do? + // Ausland + // + + $attachments = array(); + + $enclosures = $xpath->query("enclosure", $entry); + foreach ($enclosures AS $enclosure) { + $href = ""; + $length = ""; + $type = ""; + $title = ""; + + foreach($enclosure->attributes AS $attributes) { + if ($attributes->name == "url") + $href = $attributes->textContent; + elseif ($attributes->name == "length") + $length = $attributes->textContent; + elseif ($attributes->name == "type") + $type = $attributes->textContent; + } + if(strlen($item["attach"])) + $item["attach"] .= ','; + + $attachments[] = array("link" => $href, "type" => $type, "length" => $length); + + $item["attach"] .= '[attach]href="'.$href.'" length="'.$length.'" type="'.$type.'"[/attach]'; + } + + if ($contact["fetch_further_information"]) { + $preview = ""; + + // Handle enclosures and treat them as preview picture + foreach ($attachments AS $attachment) + if ($attachment["type"] == "image/jpeg") + $preview = $attachment["link"]; + + $item["body"] = $item["title"].add_page_info($item["plink"], false, $preview, ($contact["fetch_further_information"] == 2), $contact["ffi_keyword_blacklist"]); + $item["tag"] = add_page_keywords($item["plink"], false, $preview, ($contact["fetch_further_information"] == 2), $contact["ffi_keyword_blacklist"]); + $item["title"] = ""; + $item["object-type"] = ACTIVITY_OBJ_BOOKMARK; + unset($item["attach"]); + } else { + $body = trim($xpath->evaluate('atom:content/text()', $entry)->item(0)->nodeValue); + + if ($body == "") + $body = trim($xpath->evaluate('content:encoded/text()', $entry)->item(0)->nodeValue); + + if ($body == "") + $body = trim($xpath->evaluate('description/text()', $entry)->item(0)->nodeValue); + + if ($body == "") + $body = trim($xpath->evaluate('atom:summary/text()', $entry)->item(0)->nodeValue); + + // remove the content of the title if it is identically to the body + // This helps with auto generated titles e.g. from tumblr + if (title_is_body($item["title"], $body)) + $item["title"] = ""; + + $item["body"] = html2bbcode($body); + } + + logger("Stored feed: ".print_r($item, true), LOGGER_DEBUG); + + $notify = item_is_remote_self($contact, $item); + $id = item_store($item, false, $notify); + + //print_r($item); + + logger("Feed for contact ".$contact["url"]." stored under id ".$id); + } +} +?> diff --git a/include/items.php b/include/items.php index 5915e2ecee..6d5c35d5e3 100644 --- a/include/items.php +++ b/include/items.php @@ -13,6 +13,7 @@ require_once('include/threads.php'); require_once('include/socgraph.php'); require_once('include/plaintext.php'); require_once('include/ostatus.php'); +require_once('include/feed.php'); require_once('mod/share.php'); require_once('library/defuse/php-encryption-1.2.1/Crypto.php'); @@ -2251,16 +2252,20 @@ function edited_timestamp_is_newer($existing, $update) { function consume_feed($xml,$importer,&$contact, &$hub, $datedir = 0, $pass = 0) { if ($contact['network'] === NETWORK_OSTATUS) { if ($pass < 2) { - // Test - remove before flight - //$tempfile = tempnam(get_temppath(), "ostatus"); - //file_put_contents($tempfile, $xml); - logger("Consume OStatus messages ", LOGGER_DEBUG); ostatus_import($xml,$importer,$contact, $hub); } return; } + if ($contact['network'] === NETWORK_FEED) { + if ($pass < 2) { + logger("Consume feeds", LOGGER_DEBUG); + feed_import($xml,$importer,$contact, $hub); + } + return; + } + require_once('library/simplepie/simplepie.inc'); require_once('include/contact_selectors.php');