Only process feed and dfrn items when they aren't already stored

This commit is contained in:
Michael 2017-01-31 19:39:09 +00:00
parent 44bd890645
commit 2424cb78d1
3 changed files with 65 additions and 47 deletions

View file

@ -391,7 +391,7 @@ class dfrn {
* *
* @return object XML root object * @return object XML root object
*/ */
private function add_header($doc, $owner, $authorelement, $alternatelink = "", $public = false) { private static function add_header($doc, $owner, $authorelement, $alternatelink = "", $public = false) {
if ($alternatelink == "") if ($alternatelink == "")
$alternatelink = $owner['url']; $alternatelink = $owner['url'];
@ -462,7 +462,7 @@ class dfrn {
* *
* @return object XML author object * @return object XML author object
*/ */
private function add_author($doc, $owner, $authorelement, $public) { private static function add_author($doc, $owner, $authorelement, $public) {
// Is the profile hidden or shouldn't be published in the net? Then add the "hide" element // Is the profile hidden or shouldn't be published in the net? Then add the "hide" element
$r = q("SELECT `id` FROM `profile` INNER JOIN `user` ON `user`.`uid` = `profile`.`uid` $r = q("SELECT `id` FROM `profile` INNER JOIN `user` ON `user`.`uid` = `profile`.`uid`
@ -591,7 +591,7 @@ class dfrn {
* *
* @return object XML author object * @return object XML author object
*/ */
private function add_entry_author($doc, $element, $contact_url, $item) { private static function add_entry_author($doc, $element, $contact_url, $item) {
$contact = get_contact_details_by_url($contact_url, $item["uid"]); $contact = get_contact_details_by_url($contact_url, $item["uid"]);
@ -631,7 +631,7 @@ class dfrn {
* *
* @return object XML activity object * @return object XML activity object
*/ */
private function create_activity($doc, $element, $activity) { private static function create_activity($doc, $element, $activity) {
if($activity) { if($activity) {
$entry = $doc->createElement($element); $entry = $doc->createElement($element);
@ -685,7 +685,7 @@ class dfrn {
* *
* @return object XML attachment object * @return object XML attachment object
*/ */
private function get_attachment($doc, $root, $item) { private static function get_attachment($doc, $root, $item) {
$arr = explode('[/attach],',$item['attach']); $arr = explode('[/attach],',$item['attach']);
if(count($arr)) { if(count($arr)) {
foreach($arr as $r) { foreach($arr as $r) {
@ -720,7 +720,7 @@ class dfrn {
* *
* @return object XML entry object * @return object XML entry object
*/ */
private function entry($doc, $type, $item, $owner, $comment = false, $cid = 0) { private static function entry($doc, $type, $item, $owner, $comment = false, $cid = 0) {
$mentioned = array(); $mentioned = array();
@ -1103,7 +1103,7 @@ class dfrn {
* @param string $birthday Birthday of the contact * @param string $birthday Birthday of the contact
* *
*/ */
private function birthday_event($contact, $birthday) { private static function birthday_event($contact, $birthday) {
// Check for duplicates // Check for duplicates
$r = q("SELECT `id` FROM `event` WHERE `uid` = %d AND `cid` = %d AND `start` = '%s' AND `type` = '%s' LIMIT 1", $r = q("SELECT `id` FROM `event` WHERE `uid` = %d AND `cid` = %d AND `start` = '%s' AND `type` = '%s' LIMIT 1",
@ -1146,7 +1146,7 @@ class dfrn {
* *
* @return Returns an array with relevant data of the author * @return Returns an array with relevant data of the author
*/ */
private function fetchauthor($xpath, $context, $importer, $element, $onlyfetch, $xml = "") { private static function fetchauthor($xpath, $context, $importer, $element, $onlyfetch, $xml = "") {
$author = array(); $author = array();
$author["name"] = $xpath->evaluate($element."/atom:name/text()", $context)->item(0)->nodeValue; $author["name"] = $xpath->evaluate($element."/atom:name/text()", $context)->item(0)->nodeValue;
@ -1358,7 +1358,7 @@ class dfrn {
* *
* @return string XML string * @return string XML string
*/ */
private function transform_activity($xpath, $activity, $element) { private static function transform_activity($xpath, $activity, $element) {
if (!is_object($activity)) if (!is_object($activity))
return ""; return "";
@ -1403,7 +1403,7 @@ class dfrn {
* @param object $mail mail elements * @param object $mail mail elements
* @param array $importer Record of the importer user mixed with contact of the content * @param array $importer Record of the importer user mixed with contact of the content
*/ */
private function process_mail($xpath, $mail, $importer) { private static function process_mail($xpath, $mail, $importer) {
logger("Processing mails"); logger("Processing mails");
@ -1454,7 +1454,7 @@ class dfrn {
* @param object $suggestion suggestion elements * @param object $suggestion suggestion elements
* @param array $importer Record of the importer user mixed with contact of the content * @param array $importer Record of the importer user mixed with contact of the content
*/ */
private function process_suggestion($xpath, $suggestion, $importer) { private static function process_suggestion($xpath, $suggestion, $importer) {
$a = get_app(); $a = get_app();
logger("Processing suggestions"); logger("Processing suggestions");
@ -1556,7 +1556,7 @@ class dfrn {
* @param object $relocation relocation elements * @param object $relocation relocation elements
* @param array $importer Record of the importer user mixed with contact of the content * @param array $importer Record of the importer user mixed with contact of the content
*/ */
private function process_relocation($xpath, $relocation, $importer) { private static function process_relocation($xpath, $relocation, $importer) {
logger("Processing relocations"); logger("Processing relocations");
@ -1685,7 +1685,7 @@ class dfrn {
* @param array $importer Record of the importer user mixed with contact of the content * @param array $importer Record of the importer user mixed with contact of the content
* @param int $entrytype Is it a toplevel entry, a comment or a relayed comment? * @param int $entrytype Is it a toplevel entry, a comment or a relayed comment?
*/ */
private function update_content($current, $item, $importer, $entrytype) { private static function update_content($current, $item, $importer, $entrytype) {
$changed = false; $changed = false;
if (edited_timestamp_is_newer($current, $item)) { if (edited_timestamp_is_newer($current, $item)) {
@ -1737,7 +1737,7 @@ class dfrn {
* *
* @return int Is it a toplevel entry, a comment or a relayed comment? * @return int Is it a toplevel entry, a comment or a relayed comment?
*/ */
private function get_entry_type($importer, $item) { private static function get_entry_type($importer, $item) {
if ($item["parent-uri"] != $item["uri"]) { if ($item["parent-uri"] != $item["uri"]) {
$community = false; $community = false;
@ -1803,7 +1803,7 @@ class dfrn {
* @param array $importer Record of the importer user mixed with contact of the content * @param array $importer Record of the importer user mixed with contact of the content
* @param int $posted_id The record number of item record that was just posted * @param int $posted_id The record number of item record that was just posted
*/ */
private function do_poke($item, $importer, $posted_id) { private static function do_poke($item, $importer, $posted_id) {
$verb = urldecode(substr($item["verb"],strpos($item["verb"], "#")+1)); $verb = urldecode(substr($item["verb"],strpos($item["verb"], "#")+1));
if(!$verb) if(!$verb)
return; return;
@ -1858,7 +1858,7 @@ class dfrn {
* *
* @return bool Should the processing of the entries be continued? * @return bool Should the processing of the entries be continued?
*/ */
private function process_verbs($entrytype, $importer, &$item, &$is_like) { private static function process_verbs($entrytype, $importer, &$item, &$is_like) {
logger("Process verb ".$item["verb"]." and object-type ".$item["object-type"]." for entrytype ".$entrytype, LOGGER_DEBUG); logger("Process verb ".$item["verb"]." and object-type ".$item["object-type"]." for entrytype ".$entrytype, LOGGER_DEBUG);
@ -1958,7 +1958,7 @@ class dfrn {
* @param object $links link elements * @param object $links link elements
* @param array $item the item record * @param array $item the item record
*/ */
private function parse_links($links, &$item) { private static function parse_links($links, &$item) {
$rel = ""; $rel = "";
$href = ""; $href = "";
$type = ""; $type = "";
@ -2001,7 +2001,7 @@ class dfrn {
* @param object $entry entry elements * @param object $entry entry elements
* @param array $importer Record of the importer user mixed with contact of the content * @param array $importer Record of the importer user mixed with contact of the content
*/ */
private function process_entry($header, $xpath, $entry, $importer) { private static function process_entry($header, $xpath, $entry, $importer) {
logger("Processing entries"); logger("Processing entries");
@ -2010,6 +2010,20 @@ class dfrn {
// Get the uri // Get the uri
$item["uri"] = $xpath->query("atom:id/text()", $entry)->item(0)->nodeValue; $item["uri"] = $xpath->query("atom:id/text()", $entry)->item(0)->nodeValue;
$item["edited"] = $xpath->query("atom:updated/text()", $entry)->item(0)->nodeValue;
$current = q("SELECT `id`, `uid`, `last-child`, `edited`, `body` FROM `item` WHERE `uri` = '%s' AND `uid` = %d LIMIT 1",
dbesc($item["uri"]),
intval($importer["importer_uid"])
);
// Is there an existing item?
if (dbm::is_result($current) AND edited_timestamp_is_newer($current[0], $item) AND
(datetime_convert("UTC","UTC",$item["edited"]) < $current[0]["edited"])) {
logger("Item ".$item["uri"]." already existed.", LOGGER_DEBUG);
return;
}
// Fetch the owner // Fetch the owner
$owner = self::fetchauthor($xpath, $entry, $importer, "dfrn:owner", true); $owner = self::fetchauthor($xpath, $entry, $importer, "dfrn:owner", true);
@ -2027,7 +2041,6 @@ class dfrn {
$item["title"] = $xpath->query("atom:title/text()", $entry)->item(0)->nodeValue; $item["title"] = $xpath->query("atom:title/text()", $entry)->item(0)->nodeValue;
$item["created"] = $xpath->query("atom:published/text()", $entry)->item(0)->nodeValue; $item["created"] = $xpath->query("atom:published/text()", $entry)->item(0)->nodeValue;
$item["edited"] = $xpath->query("atom:updated/text()", $entry)->item(0)->nodeValue;
$item["body"] = $xpath->query("dfrn:env/text()", $entry)->item(0)->nodeValue; $item["body"] = $xpath->query("dfrn:env/text()", $entry)->item(0)->nodeValue;
$item["body"] = str_replace(array(' ',"\t","\r","\n"), array('','','',''),$item["body"]); $item["body"] = str_replace(array(' ',"\t","\r","\n"), array('','','',''),$item["body"]);
@ -2215,18 +2228,13 @@ class dfrn {
} }
} }
$r = q("SELECT `id`, `uid`, `last-child`, `edited`, `body` FROM `item` WHERE `uri` = '%s' AND `uid` = %d LIMIT 1",
dbesc($item["uri"]),
intval($importer["importer_uid"])
);
if (!self::process_verbs($entrytype, $importer, $item, $is_like)) { if (!self::process_verbs($entrytype, $importer, $item, $is_like)) {
logger("Exiting because 'process_verbs' told us so", LOGGER_DEBUG); logger("Exiting because 'process_verbs' told us so", LOGGER_DEBUG);
return; return;
} }
// Update content if 'updated' changes // Update content if 'updated' changes
if (dbm::is_result($r)) { if (dbm::is_result($current)) {
if (self::update_content($r[0], $item, $importer, $entrytype)) if (self::update_content($r[0], $item, $importer, $entrytype))
logger("Item ".$item["uri"]." was updated.", LOGGER_DEBUG); logger("Item ".$item["uri"]." was updated.", LOGGER_DEBUG);
else else
@ -2311,7 +2319,7 @@ class dfrn {
* @param object $deletion deletion elements * @param object $deletion deletion elements
* @param array $importer Record of the importer user mixed with contact of the content * @param array $importer Record of the importer user mixed with contact of the content
*/ */
private function process_deletion($xpath, $deletion, $importer) { private static function process_deletion($xpath, $deletion, $importer) {
logger("Processing deletions"); logger("Processing deletions");

View file

@ -177,18 +177,6 @@ function feed_import($xml,$importer,&$contact, &$hub, $simulate = false) {
foreach (array_reverse($entrylist) AS $entry) { foreach (array_reverse($entrylist) AS $entry) {
$item = array_merge($header, $author); $item = array_merge($header, $author);
$item["title"] = $xpath->evaluate('atom:title/text()', $entry)->item(0)->nodeValue;
if ($item["title"] == "")
$item["title"] = $xpath->evaluate('title/text()', $entry)->item(0)->nodeValue;
if ($item["title"] == "")
$item["title"] = $xpath->evaluate('rss:title/text()', $entry)->item(0)->nodeValue;
$alternate = $xpath->query("atom:link[@rel='alternate']", $entry)->item(0)->attributes;
if (!is_object($alternate))
$alternate = $xpath->query("atom:link", $entry)->item(0)->attributes;
if (is_object($alternate)) if (is_object($alternate))
foreach($alternate AS $attributes) foreach($alternate AS $attributes)
if ($attributes->name == "href") if ($attributes->name == "href")
@ -212,6 +200,27 @@ function feed_import($xml,$importer,&$contact, &$hub, $simulate = false) {
$item["parent-uri"] = $item["uri"]; $item["parent-uri"] = $item["uri"];
if (!$simulate) {
$r = q("SELECT `id` FROM `item` WHERE `uid` = %d AND `uri` = '%s' AND `network` IN ('%s', '%s')",
intval($importer["uid"]), dbesc($item["uri"]), dbesc(NETWORK_FEED), dbesc(NETWORK_DFRN));
if ($r) {
logger("Item with uri ".$item["uri"]." for user ".$importer["uid"]." already existed under id ".$r[0]["id"], LOGGER_DEBUG);
continue;
}
}
$item["title"] = $xpath->evaluate('atom:title/text()', $entry)->item(0)->nodeValue;
if ($item["title"] == "")
$item["title"] = $xpath->evaluate('title/text()', $entry)->item(0)->nodeValue;
if ($item["title"] == "")
$item["title"] = $xpath->evaluate('rss:title/text()', $entry)->item(0)->nodeValue;
$alternate = $xpath->query("atom:link[@rel='alternate']", $entry)->item(0)->attributes;
if (!is_object($alternate))
$alternate = $xpath->query("atom:link", $entry)->item(0)->attributes;
$published = $xpath->query('atom:published/text()', $entry)->item(0)->nodeValue; $published = $xpath->query('atom:published/text()', $entry)->item(0)->nodeValue;
if ($published == "") if ($published == "")
@ -250,15 +259,6 @@ function feed_import($xml,$importer,&$contact, &$hub, $simulate = false) {
if ($creator != "") if ($creator != "")
$item["author-name"] = $creator; $item["author-name"] = $creator;
if (!$simulate) {
$r = q("SELECT `id` FROM `item` WHERE `uid` = %d AND `uri` = '%s' AND `network` IN ('%s', '%s')",
intval($importer["uid"]), dbesc($item["uri"]), dbesc(NETWORK_FEED), dbesc(NETWORK_DFRN));
if ($r) {
logger("Item with uri ".$item["uri"]." for user ".$importer["uid"]." already existed under id ".$r[0]["id"], LOGGER_DEBUG);
continue;
}
}
/// @TODO ? /// @TODO ?
// <category>Ausland</category> // <category>Ausland</category>
// <media:thumbnail width="152" height="76" url="http://www.taz.de/picture/667875/192/14388767.jpg"/> // <media:thumbnail width="152" height="76" url="http://www.taz.de/picture/667875/192/14388767.jpg"/>

View file

@ -206,6 +206,16 @@ function poller_exec_function($queue, $funcname, $argv) {
$duration = number_format(microtime(true) - $stamp, 3); $duration = number_format(microtime(true) - $stamp, 3);
if ($duration > 3600) {
logger("Prio ".$queue["priority"].": ".$queue["parameter"]." - longer than 1 hour (".round($duration/60, 3).")", LOGGER_DEBUG);
} elseif ($duration > 600) {
logger("Prio ".$queue["priority"].": ".$queue["parameter"]." - longer than 10 minutes (".round($duration/60, 3).")", LOGGER_DEBUG);
} elseif ($duration > 300) {
logger("Prio ".$queue["priority"].": ".$queue["parameter"]." - longer than 5 minutes (".round($duration/60, 3).")", LOGGER_DEBUG);
} elseif ($duration > 120) {
logger("Prio ".$queue["priority"].": ".$queue["parameter"]." - longer than 2 minutes (".round($duration/60, 3).")", LOGGER_DEBUG);
}
logger("Process ".$mypid." - Prio ".$queue["priority"]." - ID ".$queue["id"].": ".$funcname." - done in ".$duration." seconds."); logger("Process ".$mypid." - Prio ".$queue["priority"]." - ID ".$queue["id"].": ".$funcname." - done in ".$duration." seconds.");
// Write down the performance values into the log // Write down the performance values into the log