From b994de33082d23fe40a0a894b86e58bac3a4acb6 Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Sun, 7 Jul 2019 14:45:23 +0100
Subject: [PATCH 01/94] Latest version of retriever
---
retriever/database.sql | 40 ++
retriever/retriever.php | 832 ++++++++++++++++++++++++++++
retriever/templates/extract.tpl | 32 ++
retriever/templates/fix-urls.tpl | 26 +
retriever/templates/help.tpl | 148 +++++
retriever/templates/rule-config.tpl | 112 ++++
retriever/templates/settings.tpl | 9 +
7 files changed, 1199 insertions(+)
create mode 100644 retriever/database.sql
create mode 100644 retriever/retriever.php
create mode 100644 retriever/templates/extract.tpl
create mode 100644 retriever/templates/fix-urls.tpl
create mode 100644 retriever/templates/help.tpl
create mode 100644 retriever/templates/rule-config.tpl
create mode 100644 retriever/templates/settings.tpl
diff --git a/retriever/database.sql b/retriever/database.sql
new file mode 100644
index 00000000..340e33eb
--- /dev/null
+++ b/retriever/database.sql
@@ -0,0 +1,40 @@
+CREATE TABLE IF NOT EXISTS `retriever_rule` (
+ `id` int(11) unsigned NOT NULL AUTO_INCREMENT,
+ `uid` int(11) NOT NULL,
+ `contact-id` int(11) NOT NULL,
+ `data` mediumtext NULL DEFAULT NULL,
+ PRIMARY KEY (`id`),
+ KEY `uid` (`uid`),
+ KEY `contact-id` (`contact-id`)
+) DEFAULT CHARSET=utf8 COLLATE=utf8_bin;
+
+CREATE TABLE IF NOT EXISTS `retriever_item` (
+ `id` int(11) unsigned NOT NULL AUTO_INCREMENT,
+ `item-uri` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NOT NULL,
+ `item-uid` int(10) unsigned NOT NULL DEFAULT '0',
+ `contact-id` int(10) unsigned NOT NULL DEFAULT '0',
+ `resource` int(11) NOT NULL,
+ `finished` tinyint(1) unsigned NOT NULL DEFAULT '0',
+ KEY `resource` (`resource`),
+ KEY `finished` (`finished`),
+ KEY `item-uid` (`item-uid`),
+ KEY `all` (`item-uri`, `item-uid`, `contact-id`),
+ PRIMARY KEY (`id`)
+) DEFAULT CHARSET=utf8 COLLATE=utf8_bin;
+
+CREATE TABLE IF NOT EXISTS `retriever_resource` (
+ `id` int(11) unsigned NOT NULL AUTO_INCREMENT,
+ `type` char(255) NULL DEFAULT NULL,
+ `binary` int(1) NOT NULL DEFAULT 0,
+ `url` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NOT NULL,
+ `created` timestamp NOT NULL DEFAULT now(),
+ `completed` timestamp NULL DEFAULT NULL,
+ `last-try` timestamp NULL DEFAULT NULL,
+ `num-tries` int(11) NOT NULL DEFAULT 0,
+ `data` mediumblob NULL DEFAULT NULL,
+ `http-code` smallint(1) unsigned NULL DEFAULT NULL,
+ `redirect-url` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NULL DEFAULT NULL,
+ KEY `retriever_resource` ADD INDEX `url` (`url`),
+ KEY `retriever_resource` ADD INDEX `completed` (`completed`),
+ PRIMARY KEY (`id`)
+) DEFAULT CHARSET=utf8 COLLATE=utf8_bin
diff --git a/retriever/retriever.php b/retriever/retriever.php
new file mode 100644
index 00000000..78fe575f
--- /dev/null
+++ b/retriever/retriever.php
@@ -0,0 +1,832 @@
+
+ * Status: Unsupported
+ */
+
+use Friendica\Core\Addon;
+use Friendica\Core\Config;
+use Friendica\Core\PConfig;
+use Friendica\Content\Text\HTML;
+use Friendica\Content\Text\BBCode;
+use Friendica\Object\Image;
+use Friendica\Util\Network;
+use Friendica\Core\L10n;
+use Friendica\Database\DBA;
+
+function retriever_install() {
+ Addon::registerHook('plugin_settings', 'addon/retriever/retriever.php', 'retriever_plugin_settings');
+ Addon::registerHook('plugin_settings_post', 'addon/retriever/retriever.php', 'retriever_plugin_settings_post');
+ Addon::registerHook('post_remote', 'addon/retriever/retriever.php', 'retriever_post_remote_hook');
+ Addon::registerHook('contact_photo_menu', 'addon/retriever/retriever.php', 'retriever_contact_photo_menu');
+ Addon::registerHook('cron', 'addon/retriever/retriever.php', 'retriever_cron');
+
+ $r = q("SELECT `id` FROM `pconfig` WHERE `cat` LIKE 'retriever_%%'");
+ if (count($r) || (Config::get('retriever', 'dbversion') == '0.1')) {
+ $retrievers = array();
+ $r = q("SELECT SUBSTRING(`cat`, 10) AS `contact`, `k`, `v` FROM `pconfig` WHERE `cat` LIKE 'retriever%%'");
+ foreach ($r as $rr) {
+ $retrievers[$rr['contact']][$rr['k']] = $rr['v'];
+ }
+ foreach ($retrievers as $k => $v) {
+ $rr = q("SELECT `uid` FROM `contact` WHERE `id` = %d", intval($k));
+ $uid = $rr[0]['uid'];
+ $v['images'] = 'on';
+ q("INSERT INTO `retriever_rule` (`uid`, `contact-id`, `data`) VALUES (%d, %d, '%s')",
+ intval($uid), intval($k), DBA::escape(json_encode($v)));
+ }
+ q("DELETE FROM `pconfig` WHERE `cat` LIKE 'retriever_%%'");
+ Config::set('retriever', 'dbversion', '0.2');
+ }
+ if (Config::get('retriever', 'dbversion') == '0.2') {
+ q("ALTER TABLE `retriever_resource` DROP COLUMN `retriever`");
+ Config::set('retriever', 'dbversion', '0.3');
+ }
+ if (Config::get('retriever', 'dbversion') == '0.3') {
+ q("ALTER TABLE `retriever_item` MODIFY COLUMN `item-uri` varchar(800) CHARACTER SET ascii NOT NULL");
+ q("ALTER TABLE `retriever_resource` MODIFY COLUMN `url` varchar(800) CHARACTER SET ascii NOT NULL");
+ Config::set('retriever', 'dbversion', '0.4');
+ }
+ if (Config::get('retriever', 'dbversion') == '0.4') {
+ q("ALTER TABLE `retriever_item` ADD COLUMN `finished` tinyint(1) unsigned NOT NULL DEFAULT '0'");
+ Config::set('retriever', 'dbversion', '0.5');
+ }
+ if (Config::get('retriever', 'dbversion') == '0.5') {
+ q('ALTER TABLE `retriever_resource` CHANGE `created` `created` timestamp NOT NULL DEFAULT now()');
+ q('ALTER TABLE `retriever_resource` CHANGE `completed` `completed` timestamp NULL DEFAULT NULL');
+ q('ALTER TABLE `retriever_resource` CHANGE `last-try` `last-try` timestamp NULL DEFAULT NULL');
+ q('ALTER TABLE `retriever_item` DROP KEY `all`');
+ q('ALTER TABLE `retriever_item` ADD KEY `all` (`item-uri`, `item-uid`, `contact-id`)');
+ Config::set('retriever', 'dbversion', '0.6');
+ }
+ if (Config::get('retriever', 'dbversion') == '0.6') {
+ q('ALTER TABLE `retriever_item` CONVERT TO CHARACTER SET utf8 COLLATE utf8_bin');
+ q('ALTER TABLE `retriever_item` CHANGE `item-uri` `item-uri` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NOT NULL');
+ q('ALTER TABLE `retriever_resource` CONVERT TO CHARACTER SET utf8 COLLATE utf8_bin');
+ q('ALTER TABLE `retriever_resource` CHANGE `url` `url` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NOT NULL');
+ q('ALTER TABLE `retriever_rule` CONVERT TO CHARACTER SET utf8 COLLATE utf8_bin');
+ Config::set('retriever', 'dbversion', '0.7');
+ }
+ if (Config::get('retriever', 'dbversion') == '0.7') {
+ $r = q("SELECT `id`, `data` FROM `retriever_rule`");
+ foreach ($r as $rr) {
+ logger('retriever_install: retriever ' . $rr['id'] . ' old config ' . $rr['data'], LOGGER_DATA);
+ $data = json_decode($rr['data'], true);
+ if ($data['pattern']) {
+ $matches = array();
+ if (preg_match("/\/(.*)\//", $data['pattern'], $matches)) {
+ $data['pattern'] = $matches[1];
+ }
+ }
+ if ($data['match']) {
+ $include = array();
+ foreach (explode('|', $data['match']) as $component) {
+ $matches = array();
+ if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[@([A-Za-z][a-z0-9]*)='([^']*)'\]/", $component, $matches)) {
+ $include[] = array(
+ 'element' => $matches[1],
+ 'attribute' => $matches[2],
+ 'value' => $matches[3]);
+ }
+ if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[contains(concat(' ',normalize-space(@class),' '),' ([^ ']+) ')]/", $component, $matches)) {
+ $include[] = array(
+ 'element' => $matches[1],
+ 'attribute' => $matches[2],
+ 'value' => $matches[3]);
+ }
+ }
+ $data['include'] = $include;
+ unset($data['match']);
+ }
+ if ($data['remove']) {
+ $exclude = array();
+ foreach (explode('|', $data['remove']) as $component) {
+ $matches = array();
+ if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[@([A-Za-z][a-z0-9]*)='([^']*)'\]/", $component, $matches)) {
+ $exclude[] = array(
+ 'element' => $matches[1],
+ 'attribute' => $matches[2],
+ 'value' => $matches[3]);
+ }
+ if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[contains(concat(' ',normalize-space(@class),' '),' ([^ ']+) ')]/", $component, $matches)) {
+ $exclude[] = array(
+ 'element' => $matches[1],
+ 'attribute' => $matches[2],
+ 'value' => $matches[3]);
+ }
+ }
+ $data['exclude'] = $exclude;
+ unset($data['remove']);
+ }
+ $r = q('UPDATE `retriever_rule` SET `data` = "%s" WHERE `id` = %d', DBA::escape(json_encode($data)), $rr['id']);
+ logger('retriever_install: retriever ' . $rr['id'] . ' new config ' . json_encode($data), LOGGER_DATA);
+ }
+ Config::set('retriever', 'dbversion', '0.8');
+ }
+ if (Config::get('retriever', 'dbversion') == '0.8') {
+ q("ALTER TABLE `retriever_resource` ADD COLUMN `http-code` smallint(1) unsigned NULL DEFAULT NULL");
+ Config::set('retriever', 'dbversion', '0.9');
+ }
+ if (Config::get('retriever', 'dbversion') == '0.9') {
+ q("ALTER TABLE `retriever_item` DROP COLUMN `parent`");
+ q("ALTER TABLE `retriever_resource` ADD COLUMN `redirect-url` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NULL DEFAULT NULL");
+ Config::set('retriever', 'dbversion', '0.10');
+ }
+ if (Config::get('retriever', 'dbversion') == '0.10') {
+ q("ALTER TABLE `retriever_resource` MODIFY COLUMN `type` char(255) NULL DEFAULT NULL");
+ q("ALTER TABLE `retriever_resource` MODIFY COLUMN `data` mediumblob NULL DEFAULT NULL");
+ q("ALTER TABLE `retriever_rule` MODIFY COLUMN `data` mediumtext NULL DEFAULT NULL");
+ Config::set('retriever', 'dbversion', '0.11');
+ }
+ if (Config::get('retriever', 'dbversion') == '0.11') {
+ q("ALTER TABLE `retriever_resource` ADD INDEX `url` (`url`)");
+ q("ALTER TABLE `retriever_resource` ADD INDEX `completed` (`completed`)");
+ q("ALTER TABLE `retriever_item` ADD INDEX `finished` (`finished`)");
+ q("ALTER TABLE `retriever_item` ADD INDEX `item-uid` (`item-uid`)");
+ Config::set('retriever', 'dbversion', '0.12');
+ }
+ if (Config::get('retriever', 'dbversion') != '0.12') {
+ $schema = file_get_contents(dirname(__file__).'/database.sql');
+ $arr = explode(';', $schema);
+ foreach ($arr as $a) {
+ $r = q($a);
+ }
+ Config::set('retriever', 'dbversion', '0.12');
+ }
+}
+
+function retriever_uninstall() {
+ Addon::unregisterHook('plugin_settings', 'addon/retriever/retriever.php', 'retriever_plugin_settings');
+ Addon::unregisterHook('plugin_settings_post', 'addon/retriever/retriever.php', 'retriever_plugin_settings_post');
+ Addon::unregisterHook('post_remote', 'addon/retriever/retriever.php', 'retriever_post_remote_hook');
+ Addon::unregisterHook('plugin_settings', 'addon/retriever/retriever.php', 'retriever_plugin_settings');
+ Addon::unregisterHook('plugin_settings_post', 'addon/retriever/retriever.php', 'retriever_plugin_settings_post');
+ Addon::unregisterHook('contact_photo_menu', 'addon/retriever/retriever.php', 'retriever_contact_photo_menu');
+ Addon::unregisterHook('cron', 'addon/retriever/retriever.php', 'retriever_cron');
+}
+
+function retriever_module() {}
+
+function retriever_cron($a, $b) {
+ // 100 is a nice sane number. Maybe this should be configurable.
+ retriever_retrieve_items(100, $a);
+ retriever_tidy();
+}
+
+$retriever_item_count = 0;
+
+function retriever_retrieve_items($max_items, $a) {
+ global $retriever_item_count;
+
+ $retriever_schedule = array(array(1,'minute'),
+ array(10,'minute'),
+ array(1,'hour'),
+ array(1,'day'),
+ array(2,'day'),
+ array(1,'week'),
+ array(1,'month'));
+
+ $schedule_clauses = array();
+ for ($i = 0; $i < count($retriever_schedule); $i++) {
+ $num = $retriever_schedule[$i][0];
+ $unit = $retriever_schedule[$i][1];
+ array_push($schedule_clauses,
+ '(`num-tries` = ' . $i . ' AND TIMESTAMPADD(' . DBA::escape($unit) .
+ ', ' . intval($num) . ', `last-try`) < now())');
+ }
+
+ $retrieve_items = $max_items - $retriever_item_count;
+ logger('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . $retriever_item_count . ', retrieve ' . $retrieve_items, LOGGER_DEBUG);
+ do {
+ $r = q("SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR %s) ORDER BY `last-try` ASC LIMIT %d",
+ DBA::escape(implode($schedule_clauses, ' OR ')),
+ intval($retrieve_items));
+ if (!is_array($r)) {
+ break;
+ }
+ if (count($r) == 0) {
+ break;
+ }
+ logger('retriever_retrieve_items: found ' . count($r) . ' waiting resources in database', LOGGER_DEBUG);
+ foreach ($r as $rr) {
+ retrieve_resource($rr);
+ $retriever_item_count++;
+ }
+ $retrieve_items = $max_items - $retriever_item_count;
+ }
+ while ($retrieve_items > 0);
+
+ /* Look for items that are waiting even though the resource has
+ * completed. This usually happens because we've been asked to
+ * retrospectively apply a config change. It could also happen
+ * due to a cron job dying or something. */
+ $r = q("SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT %d",
+ intval($retrieve_items));
+ if (!$r) {
+ $r = array();
+ }
+ logger('retriever_retrieve_items: items waiting even though resource has completed: ' . count($r), LOGGER_DEBUG);
+ foreach ($r as $rr) {
+ $resource = q("SELECT * FROM retriever_resource WHERE `id` = %d", $rr['resource']);
+ $retriever_item = retriever_get_retriever_item($rr['item']);
+ if (!$retriever_item) {
+ logger('retriever_retrieve_items: no retriever item with id ' . $rr['item'], LOGGER_INFO);
+ continue;
+ }
+ $item = retriever_get_item($retriever_item);
+ if (!$item) {
+ logger('retriever_retrieve_items: no item ' . $retriever_item['item-uri'], LOGGER_INFO);
+ continue;
+ }
+ $retriever = get_retriever($item['contact-id'], $item['uid']);
+ if (!$retriever) {
+ logger('retriever_retrieve_items: no retriever for item ' .
+ $retriever_item['item-uri'] . ' ' . $retriever_item['uid'] . ' ' . $item['contact-id'],
+ LOGGER_INFO);
+ continue;
+ }
+ retriever_apply_completed_resource_to_item($retriever, $item, $resource[0], $a);
+ q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d",
+ intval($retriever_item['id']));
+ retriever_check_item_completed($item);
+ }
+}
+
+function retriever_tidy() {
+ q("DELETE FROM retriever_resource WHERE completed IS NOT NULL AND completed < DATE_SUB(now(), INTERVAL 1 WEEK)");
+ q("DELETE FROM retriever_resource WHERE completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)");
+
+ $r = q("SELECT retriever_item.id FROM retriever_item LEFT OUTER JOIN retriever_resource ON (retriever_item.resource = retriever_resource.id) WHERE retriever_resource.id is null");
+ logger('retriever_tidy: found ' . count($r) . ' retriever_items with no retriever_resource');
+ foreach ($r as $rr) {
+ q('DELETE FROM retriever_item WHERE id = %d', intval($rr['id']));
+ }
+}
+
+function retrieve_dataurl_resource($resource) {
+ if (!preg_match("/date:(.*);base64,(.*)/", $resource['url'], $matches)) {
+ logger('retrieve_dataurl_resource: ' . $resource['id'] . ' does not match pattern');
+ } else {
+ $resource['type'] = $matches[1];
+ $resource['data'] = base64url_decode($matches[2]);
+ }
+
+ // Succeed or fail, there's no point retrying
+ q("UPDATE `retriever_resource` SET `last-try` = now(), `num-tries` = `num-tries` + 1, `completed` = now(), `data` = '%s', `type` = '%s' WHERE id = %d",
+ DBA::escape($resource['data']),
+ DBA::escape($resource['type']),
+ intval($resource['id']));
+ retriever_resource_completed($resource, $a);
+}
+
+function retrieve_resource($resource) {
+ if (substr($resource['url'], 0, 5) == "data:") {
+ return retrieve_dataurl_resource($resource);
+ }
+
+ $a = get_app();
+
+ try {
+ logger('retrieve_resource: ' . ($resource['num-tries'] + 1) .
+ ' attempt at resource ' . $resource['id'] . ' ' . $resource['url'], LOGGER_DEBUG);
+ $redirects;
+ $cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-');
+ $fetch_result = Network::fetchUrlFull($resource['url'], $resource['binary'], $redirects, array('cookiejar' => $cookiejar));
+ unlink($cookiejar);
+ $resource['data'] = $fetch_result['body'];
+ $resource['http-code'] = $a->get_curl_code();
+ $resource['type'] = $a->get_curl_content_type();
+ $resource['redirect-url'] = $fetch_result['redirect_url'];
+ logger('retrieve_resource: got code ' . $resource['http-code'] .
+ ' retrieving resource ' . $resource['id'] .
+ ' final url ' . $resource['redirect-url'], LOGGER_DEBUG);
+ } catch (Exception $e) {
+ logger('retrieve_resource: unable to retrieve ' . $resource['url'] . ' - ' . $e->getMessage());
+ }
+ q("UPDATE `retriever_resource` SET `last-try` = now(), `num-tries` = `num-tries` + 1, `http-code` = %d, `redirect-url` = '%s' WHERE id = %d",
+ intval($resource['http-code']),
+ DBA::escape($resource['redirect-url']),
+ intval($resource['id']));
+ if ($resource['data']) {
+ q("UPDATE `retriever_resource` SET `completed` = now(), `data` = '%s', `type` = '%s' WHERE id = %d",
+ DBA::escape($resource['data']),
+ DBA::escape($resource['type']),
+ intval($resource['id']));
+ retriever_resource_completed($resource, $a);
+ }
+}
+
+function get_retriever($contact_id, $uid, $create = false) {
+ $r = q("SELECT * FROM `retriever_rule` WHERE `contact-id` = %d AND `uid` = %d",
+ intval($contact_id), intval($uid));
+ if (count($r)) {
+ $r[0]['data'] = json_decode($r[0]['data'], true);
+ return $r[0];
+ }
+ if ($create) {
+ q("INSERT INTO `retriever_rule` (`uid`, `contact-id`) VALUES (%d, %d)",
+ intval($uid), intval($contact_id));
+ $r = q("SELECT * FROM `retriever_rule` WHERE `contact-id` = %d AND `uid` = %d",
+ intval($contact_id), intval($uid));
+ return $r[0];
+ }
+}
+
+function retriever_get_retriever_item($id) {
+ $retriever_items = q("SELECT * FROM `retriever_item` WHERE id = %d", intval($id));
+ if (count($retriever_items) != 1) {
+ logger('retriever_get_retriever_item: unable to find retriever_item ' . $id, LOGGER_INFO);
+ return;
+ }
+ return $retriever_items[0];
+}
+
+function retriever_get_item($retriever_item) {
+ $items = q("SELECT * FROM `item` WHERE `uri` = '%s' AND `uid` = %d AND `contact-id` = %d",
+ DBA::escape($retriever_item['item-uri']),
+ intval($retriever_item['item-uid']),
+ intval($retriever_item['contact-id']));
+ if (count($items) != 1) {
+ logger('retriever_get_item: unexpected number of results ' .
+ count($items) . " when searching for item $uri $uid $cid", LOGGER_INFO);
+ return;
+ }
+ return $items[0];
+}
+
+function retriever_item_completed($retriever_item_id, $resource, $a) {
+ logger('retriever_item_completed: id ' . $retriever_item_id . ' url ' . $resource['url'], LOGGER_DEBUG);
+
+ $retriever_item = retriever_get_retriever_item($retriever_item_id);
+ if (!$retriever_item) {
+ return;
+ }
+ // Note: the retriever might be null. Doesn't matter.
+ $retriever = get_retriever($retriever_item['contact-id'], $retriever_item['item-uid']);
+ $item = retriever_get_item($retriever_item);
+ if (!$item) {
+ return;
+ }
+
+ retriever_apply_completed_resource_to_item($retriever, $item, $resource, $a);
+
+ q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d",
+ intval($retriever_item['id']));
+ retriever_check_item_completed($item);
+}
+
+function retriever_resource_completed($resource, $a) {
+ logger('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url'], LOGGER_DEBUG);
+ $r = q("SELECT `id` FROM `retriever_item` WHERE `resource` = %d", $resource['id']);
+ foreach ($r as $rr) {
+ retriever_item_completed($rr['id'], $resource, $a);
+ }
+}
+
+function apply_retrospective($a, $retriever, $num) {
+ $r = q("SELECT * FROM `item` WHERE `contact-id` = %d ORDER BY `received` DESC LIMIT %d",
+ intval($retriever['contact-id']), intval($num));
+ foreach ($r as $item) {
+ q('UPDATE `item` SET `visible` = 0 WHERE `id` = %d', $item['id']);
+ q('UPDATE `thread` SET `visible` = 0 WHERE `iid` = %d', $item['id']);
+ retriever_on_item_insert($a, $retriever, $item);
+ }
+}
+
+function retriever_on_item_insert($a, $retriever, &$item) {
+ if (!$retriever || !$retriever['id']) {
+ logger('retriever_on_item_insert: No retriever supplied', LOGGER_INFO);
+ return;
+ }
+ if (!$retriever["data"]['enable'] == "on") {
+ return;
+ }
+ if (array_key_exists('pattern', $retriever["data"]) && $retriever["data"]['pattern']) {
+ $url = preg_replace('/' . $retriever["data"]['pattern'] . '/', $retriever["data"]['replace'], $item['plink']);
+ logger('retriever_on_item_insert: Changed ' . $item['plink'] . ' to ' . $url, LOGGER_DATA);
+ }
+ else {
+ $url = $item['plink'];
+ }
+
+ $resource = add_retriever_resource($a, $url);
+ $retriever_item_id = add_retriever_item($item, $resource);
+}
+
+function add_retriever_resource($a, $url, $binary = false) {
+ logger('add_retriever_resource: ' . $url, LOGGER_DEBUG);
+
+ $scheme = parse_url($url, PHP_URL_SCHEME);
+ if ($scheme == 'data') {
+ $fp = fopen($url, 'r');
+ $meta = stream_get_meta_data($fp);
+ $type = $meta['mediatype'];
+ $data = stream_get_contents($fp);
+ fclose($fp);
+
+ $url = 'md5://' . hash('md5', $url);
+ $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url));
+ $resource = $r[0];
+ if (count($r)) {
+ logger('add_retriever_resource: Resource ' . $url . ' already requested', LOGGER_DEBUG);
+ return $resource;
+ }
+
+ logger('retrieve_resource: got data URL type ' . $resource['type'], LOGGER_DEBUG);
+ q("INSERT INTO `retriever_resource` (`type`, `binary`, `url`, `completed`, `data`) " .
+ "VALUES ('%s', %d, '%s', now(), '%s')",
+ DBA::escape($type),
+ intval($binary ? 1 : 0),
+ DBA::escape($url),
+ DBA::escape($data));
+ $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url));
+ $resource = $r[0];
+ if (count($r)) {
+ retriever_resource_completed($resource, $a);
+ }
+ return $resource;
+ }
+
+ if (strlen($url) > 800) {
+ logger('add_retriever_resource: URL is longer than 800 characters', LOGGER_INFO);
+ }
+
+ $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url));
+ if (count($r)) {
+ logger('add_retriever_resource: Resource ' . $url . ' already requested', LOGGER_DEBUG);
+ return $r[0];
+ }
+
+ q("INSERT INTO `retriever_resource` (`binary`, `url`) " .
+ "VALUES (%d, '%s')", intval($binary ? 1 : 0), DBA::escape($url));
+ $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url));
+ return $r[0];
+}
+
+function add_retriever_item(&$item, $resource) {
+ logger('add_retriever_item: ' . $resource['url'] . ' for ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], LOGGER_DEBUG);
+
+ q("INSERT INTO `retriever_item` (`item-uri`, `item-uid`, `contact-id`, `resource`) " .
+ "VALUES ('%s', %d, %d, %d)",
+ DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource["id"]));
+ $r = q("SELECT id FROM `retriever_item` WHERE " .
+ "`item-uri` = '%s' AND `item-uid` = %d AND `contact-id` = %d AND `resource` = %d ORDER BY id DESC",
+ DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource['id']));
+ if (!count($r)) {
+ logger("add_retriever_item: couldn't create retriever item for " .
+ $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'],
+ LOGGER_INFO);
+ return;
+ }
+ logger('add_retriever_item: created retriever_item ' . $r[0]['id'] . ' for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], LOGGER_DEBUG);
+ return $r[0]['id'];
+}
+
+function retriever_get_encoding($resource) {
+ $matches = array();
+ if (preg_match('/charset=(.*)/', $resource['type'], $matches)) {
+ return trim(array_pop($matches));
+ }
+ return 'utf-8';
+}
+
+function retriever_apply_xslt_text($xslt_text, $doc) {
+ if (!$xslt_text) {
+ logger('retriever_apply_xslt_text: empty XSLT text', LOGGER_INFO);
+ return $doc;
+ }
+ $xslt_doc = new DOMDocument();
+ if (!$xslt_doc->loadXML($xslt_text)) {
+ logger('retriever_apply_xslt_text: could not load XML', LOGGER_INFO);
+ return $doc;
+ }
+ $xp = new XsltProcessor();
+ $xp->importStylesheet($xslt_doc);
+ $result = $xp->transformToDoc($doc);
+ return $result;
+}
+
+function retriever_apply_dom_filter($retriever, &$item, $resource) {
+ logger('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['uri'] . ' contact ' . $item['contact-id'], LOGGER_DEBUG);
+
+ if (!$retriever['data']['include'] && !$retriever['data']['customxslt']) {
+ return;
+ }
+ if (!$resource['data']) {
+ logger('retriever_apply_dom_filter: no text to work with', LOGGER_INFO);
+ return;
+ }
+
+ $encoding = retriever_get_encoding($resource);
+ $content = mb_convert_encoding($resource['data'], 'HTML-ENTITIES', $encoding);
+ $doc = new DOMDocument('1.0', 'UTF-8');
+ if (strpos($resource['type'], 'html') !== false) {
+ @$doc->loadHTML($content);
+ }
+ else {
+ $doc->loadXML($content);
+ }
+
+ $params = array('$spec' => $retriever['data']);
+ $extract_template = get_markup_template('extract.tpl', 'addon/retriever/');
+ $extract_xslt = replace_macros($extract_template, $params);
+ if ($retriever['data']['include']) {
+ $doc = retriever_apply_xslt_text($extract_xslt, $doc);
+ }
+ if (array_key_exists('customxslt', $retriever['data']) && $retriever['data']['customxslt']) {
+ $doc = retriever_apply_xslt_text($retriever['data']['customxslt'], $doc);
+ }
+ if (!$doc) {
+ logger('retriever_apply_dom_filter: failed to apply extract XSLT template', LOGGER_INFO);
+ return;
+ }
+
+ $components = parse_url($resource['redirect-url']);
+ $rooturl = $components['scheme'] . "://" . $components['host'];
+ $dirurl = $rooturl . dirname($components['path']) . "/";
+ $params = array('$dirurl' => $dirurl, '$rooturl' => $rooturl);
+ $fix_urls_template = get_markup_template('fix-urls.tpl', 'addon/retriever/');
+ $fix_urls_xslt = replace_macros($fix_urls_template, $params);
+ $doc = retriever_apply_xslt_text($fix_urls_xslt, $doc);
+ if (!$doc) {
+ logger('retriever_apply_dom_filter: failed to apply fix urls XSLT template', LOGGER_INFO);
+ return;
+ }
+
+ $item['body'] = HTML::toBBCode($doc->saveHTML());
+ if (!strlen($item['body'])) {
+ logger('retriever_apply_dom_filter retriever ' . $retriever['id'] . ' item ' . $item['id'] . ': output was empty', LOGGER_INFO);
+ return;
+ }
+ $item['body'] .= "\n\n" . L10n::t('Retrieved') . ' ' . date("Y-m-d") . ': [url=';
+ $item['body'] .= $item['plink'];
+ $item['body'] .= ']' . $item['plink'] . '[/url]';
+ q("UPDATE `item` SET `body` = '%s' WHERE `id` = %d",
+ DBA::escape($item['body']), intval($item['id']));
+}
+
+function retrieve_images(&$item, $a) {
+ $matches1 = array();
+ preg_match_all("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", $item["body"], $matches1);
+ $matches2 = array();
+ preg_match_all("/\[img\](.*?)\[\/img\]/ism", $item["body"], $matches2);
+ $matches = array_merge($matches1[3], $matches2[1]);
+ logger('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], LOGGER_DEBUG);
+ foreach ($matches as $url) {
+ if (strpos($url, get_app()->get_baseurl()) === FALSE) {
+ $resource = add_retriever_resource($a, $url, true);
+ if (!$resource['completed']) {
+ add_retriever_item($item, $resource);
+ }
+ else {
+ retriever_transform_images($a, $item, $resource);
+ }
+ }
+ }
+}
+
+function retriever_check_item_completed(&$item)
+{
+ $r = q('SELECT count(*) FROM retriever_item WHERE `item-uri` = "%s" ' .
+ 'AND `item-uid` = %d AND `contact-id` = %d AND `finished` = 0',
+ DBA::escape($item['uri']), intval($item['uid']),
+ intval($item['contact-id']));
+ $waiting = $r[0]['count(*)'];
+ logger('retriever_check_item_completed: item ' . $item['uri'] . ' ' . $item['uid']
+ . ' '. $item['contact-id'] . ' waiting for ' . $waiting . ' resources', LOGGER_DEBUG);
+ $old_visible = $item['visible'];
+ $item['visible'] = $waiting ? 0 : 1;
+ if (array_key_exists('id', $item) && ($item['id'] > 0) && ($old_visible != $item['visible'])) {
+ logger('retriever_check_item_completed: changing visible flag to ' . $item['visible'] . ' and invoking notifier ("edit_post", ' . $item['id'] . ')', LOGGER_DEBUG);
+ q("UPDATE `item` SET `visible` = %d WHERE `id` = %d",
+ intval($item['visible']),
+ intval($item['id']));
+ q("UPDATE `thread` SET `visible` = %d WHERE `iid` = %d",
+ intval($item['visible']),
+ intval($item['id']));
+ }
+}
+
+function retriever_apply_completed_resource_to_item($retriever, &$item, $resource, $a) {
+ logger('retriever_apply_completed_resource_to_item: retriever ' .
+ ($retriever ? $retriever['id'] : 'none') .
+ ' resource ' . $resource['url'] . ' plink ' . $item['plink'], LOGGER_DEBUG);
+ if (strpos($resource['type'], 'image') !== false) {
+ retriever_transform_images($a, $item, $resource);
+ }
+ if (!$retriever) {
+ return;
+ }
+ if ((strpos($resource['type'], 'html') !== false) ||
+ (strpos($resource['type'], 'xml') !== false)) {
+ retriever_apply_dom_filter($retriever, $item, $resource);
+ if ($retriever["data"]['images'] ) {
+ retrieve_images($item, $a);
+ }
+ }
+}
+
+function retriever_transform_images($a, &$item, $resource) {
+ if (!$resource["data"]) {
+ logger('retriever_transform_images: no data available for '
+ . $resource['id'] . ' ' . $resource['url'], LOGGER_INFO);
+ return;
+ }
+
+ try {
+ $photo = Image::storePhoto($a, $item['uid'], $resource['data'], $resource['url']);
+ } catch (Exception $e) {
+ logger('retriever_transform_images caught exception ' . $e->getMessage());
+ return;
+ }
+ foreach ($photo as $k => $v)
+ {
+ logger('@@@ photo key ' . $k);
+ }
+ $new_url = $photo['full'];
+ logger('retriever_transform_images: replacing ' . $resource['url'] . ' with ' .
+ $new_url . ' in item ' . $item['plink'], LOGGER_DEBUG);
+ $transformed = str_replace($resource["url"], $new_url, $item['body']);
+ if ($transformed === $item['body']) {
+ return;
+ }
+
+ $item['body'] = $transformed;
+ q("UPDATE `item` SET `body` = '%s' WHERE `plink` = '%s' AND `uid` = %d AND `contact-id` = %d",
+ DBA::escape($item['body']),
+ DBA::escape($item['plink']),
+ intval($item['uid']),
+ intval($item['contact-id']));
+}
+
+function retriever_content($a) {
+ if (!local_user()) {
+ $a->page['content'] .= "Please log in
";
+ return;
+ }
+ if ($a->argv[1] === 'help') {
+ $feeds = q("SELECT `id`, `name`, `thumb` FROM contact WHERE `uid` = %d AND `network` = 'feed'",
+ local_user());
+ foreach ($feeds as $k=>$v) {
+ $feeds[$k]['url'] = $a->get_baseurl() . '/retriever/' . $v['id'];
+ }
+ $template = get_markup_template('/help.tpl', 'addon/retriever/');
+ $a->page['content'] .= replace_macros($template, array(
+ '$config' => $a->get_baseurl() . '/settings/addon',
+ '$feeds' => $feeds));
+ return;
+ }
+ if ($a->argv[1]) {
+ $retriever = get_retriever($a->argv[1], local_user(), false);
+
+ if (x($_POST["id"])) {
+ $retriever = get_retriever($a->argv[1], local_user(), true);
+ $retriever["data"] = array();
+ foreach (array('pattern', 'replace', 'enable', 'images', 'customxslt') as $setting) {
+ if (x($_POST['retriever_' . $setting])) {
+ $retriever["data"][$setting] = $_POST['retriever_' . $setting];
+ }
+ }
+ foreach ($_POST as $k=>$v) {
+ if (preg_match("/retriever-(include|exclude)-(\d+)-(element|attribute|value)/", $k, $matches)) {
+ $retriever['data'][$matches[1]][intval($matches[2])][$matches[3]] = $v;
+ }
+ }
+ // You've gotta have an element, even if it's just "*"
+ foreach ($retriever['data']['include'] as $k=>$clause) {
+ if (!$clause['element']) {
+ unset($retriever['data']['include'][$k]);
+ }
+ }
+ foreach ($retriever['data']['exclude'] as $k=>$clause) {
+ if (!$clause['element']) {
+ unset($retriever['data']['exclude'][$k]);
+ }
+ }
+ q("UPDATE `retriever_rule` SET `data`='%s' WHERE `id` = %d",
+ DBA::escape(json_encode($retriever["data"])), intval($retriever["id"]));
+ $a->page['content'] .= "Settings Updated";
+ if (x($_POST["retriever_retrospective"])) {
+ apply_retrospective($a, $retriever, $_POST["retriever_retrospective"]);
+ $a->page['content'] .= " and retrospectively applied to " . $_POST["apply"] . " posts";
+ }
+ $a->page['content'] .= ".
";
+ }
+
+ $template = get_markup_template('/rule-config.tpl', 'addon/retriever/');
+ $a->page['content'] .= replace_macros($template, array(
+ '$enable' => array(
+ 'retriever_enable',
+ L10n::t('Enabled'),
+ $retriever['data']['enable']),
+ '$pattern' => array(
+ 'retriever_pattern',
+ L10n::t('URL Pattern'),
+ $retriever["data"]['pattern'],
+ L10n::t('Regular expression matching part of the URL to replace')),
+ '$replace' => array(
+ 'retriever_replace',
+ L10n::t('URL Replace'),
+ $retriever["data"]['replace'],
+ L10n::t('Text to replace matching part of above regular expression')),
+ '$images' => array(
+ 'retriever_images',
+ L10n::t('Download Images'),
+ $retriever['data']['images']),
+ '$retrospective' => array(
+ 'retriever_retrospective',
+ L10n::t('Retrospectively Apply'),
+ '0',
+ L10n::t('Reapply the rules to this number of posts')),
+ '$customxslt' => array(
+ 'retriever_customxslt',
+ L10n::t('Custom XSLT'),
+ $retriever['data']['customxslt'],
+ L10n::t("When standard rules aren't enough, apply custom XSLT to the article")),
+ '$title' => L10n::t('Retrieve Feed Content'),
+ '$help' => $a->get_baseurl() . '/retriever/help',
+ '$help_t' => L10n::t('Get Help'),
+ '$submit_t' => L10n::t('Submit'),
+ '$submit' => L10n::t('Save Settings'),
+ '$id' => ($retriever["id"] ? $retriever["id"] : "create"),
+ '$tag_t' => L10n::t('Tag'),
+ '$attribute_t' => L10n::t('Attribute'),
+ '$value_t' => L10n::t('Value'),
+ '$add_t' => L10n::t('Add'),
+ '$remove_t' => L10n::t('Remove'),
+ '$include_t' => L10n::t('Include'),
+ '$include' => $retriever['data']['include'],
+ '$exclude_t' => L10n::t('Exclude'),
+ '$exclude' => $retriever["data"]['exclude']));
+ return;
+ }
+}
+
+function retriever_contact_photo_menu($a, &$args) {
+ if (!$args) {
+ return;
+ }
+ if ($args["contact"]["network"] == "feed") {
+ $args["menu"][ 'retriever' ] = array(L10n::t('Retriever'), $a->get_baseurl() . '/retriever/' . $args["contact"]['id']);
+ }
+}
+
+function retriever_post_remote_hook(&$a, &$item) {
+ logger('retriever_post_remote_hook: ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], LOGGER_DEBUG);
+
+ $retriever = get_retriever($item['contact-id'], $item["uid"], false);
+ if ($retriever) {
+ retriever_on_item_insert($a, $retriever, $item);
+ }
+ else {
+ if (PConfig::get($item["uid"], 'retriever', 'oembed')) {
+ // Convert to HTML and back to take advantage of bbcode's resolution of oembeds.
+ $body = HTML::toBBCode(BBCode::convert($item['body']));
+ if ($body) {
+ $item['body'] = $body;
+ }
+ }
+ if (PConfig::get($item["uid"], 'retriever', 'all_photos')) {
+ retrieve_images($item, $a);
+ }
+ }
+ retriever_check_item_completed($item);
+}
+
+function retriever_plugin_settings(&$a,&$s) {
+ $all_photos = PConfig::get(local_user(), 'retriever', 'all_photos');
+ $oembed = PConfig::get(local_user(), 'retriever', 'oembed');
+ $template = get_markup_template('/settings.tpl', 'addon/retriever/');
+ $s .= replace_macros($template, array(
+ '$allphotos' => array(
+ 'retriever_all_photos',
+ L10n::t('All Photos'),
+ $all_photos,
+ L10n::t('Check this to retrieve photos for all posts')),
+ '$oembed' => array(
+ 'retriever_oembed',
+ L10n::t('Resolve OEmbed'),
+ $oembed,
+ L10n::t('Check this to attempt to retrieve embedded content for all posts - useful e.g. for Facebook posts')),
+ '$submit' => L10n::t('Save Settings'),
+ '$title' => L10n::t('Retriever Settings'),
+ '$help' => $a->get_baseurl() . '/retriever/help'));
+}
+
+function retriever_plugin_settings_post($a,$post) {
+ if ($_POST['retriever_all_photos']) {
+ PConfig::set(local_user(), 'retriever', 'all_photos', $_POST['retriever_all_photos']);
+ }
+ else {
+ PConfig::del(local_user(), 'retriever', 'all_photos');
+ }
+ if ($_POST['retriever_oembed']) {
+ PConfig::set(local_user(), 'retriever', 'oembed', $_POST['retriever_oembed']);
+ }
+ else {
+ PConfig::del(local_user(), 'retriever', 'oembed');
+ }
+}
diff --git a/retriever/templates/extract.tpl b/retriever/templates/extract.tpl
new file mode 100644
index 00000000..f24a860d
--- /dev/null
+++ b/retriever/templates/extract.tpl
@@ -0,0 +1,32 @@
+
+
+
+
+
+
+{{function clause_xpath}}
+{{if !$clause.attribute}}
+{{$clause.element}}{{elseif $clause.attribute == 'class'}}
+{{$clause.element}}[contains(concat(' ', normalize-space(@class), ' '), '{{$clause.value}}')]{{else}}
+{{$clause.element}}[@{{$clause.attribute}}='{{$clause.value}}']{{/if}}
+{{/function}}
+
+{{foreach $spec.include as $clause}}
+
+
+
+
+
+{{/foreach}}
+
+{{foreach $spec.exclude as $clause}}
+
+{{/foreach}}
+
+
+
+
+
+
+
+
diff --git a/retriever/templates/fix-urls.tpl b/retriever/templates/fix-urls.tpl
new file mode 100644
index 00000000..248d4770
--- /dev/null
+++ b/retriever/templates/fix-urls.tpl
@@ -0,0 +1,26 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/retriever/templates/help.tpl b/retriever/templates/help.tpl
new file mode 100644
index 00000000..10b421d0
--- /dev/null
+++ b/retriever/templates/help.tpl
@@ -0,0 +1,148 @@
+Retriever Plugin Help
+
+This plugin replaces the short excerpts you normally get in RSS feeds
+with the full content of the article from the source website. You
+specify which part of the page you're interested in with a set of
+rules. When each item arrives, the plugin downloads the full page
+from the website, extracts content using the rules, and replaces the
+original article.
+
+
+There's a few reasons you may want to do this. The source website
+might be slow or overloaded. The source website might be
+untrustworthy, in which case using Friendica to scrub the HTML is a
+good idea. You might be on a LAN that blacklists certain websites.
+It also works neatly with the mailstream plugin, allowing you to read
+a news stream comfortably without needing continuous Internet
+connectivity.
+
+
+However, setting up retriever can be quite tricky since it depends on
+the internal design of the website. That was designed to make life
+easy for the website's developers, not for you. You'll need to have
+some familiarity with HTML, and be willing to adapt when the website
+suddenly changes everything without notice.
+
+Configuring Retriever for a feed
+
+To set up retriever for an RSS feed, go to the "Contacts" page and
+find your feed. Then click on the drop-down menu on the contact.
+Select "Retriever" to get to the retriever configuration.
+
+
+The "Include" configuration section specifies parts of the page to
+include in the article. Each row has three components:
+
+
+An HTML tag (e.g. "div", "span", "p")
+An attribute (usually "class" or "id")
+A value for the attribute
+
+
+A simple case is when the article is wrapped in a "div" element:
+
+
+ ...
+ <div class="ArticleWrapper">
+ <h2>Man Bites Dog</h2>
+ <img src="mbd.jpg">
+ <p>
+ Residents of the sleepy community of Nowheresville were
+ shocked yesterday by the sight of creepy local weirdo Jim
+ McOddman assaulting innocent local dog Snufflekins with his
+ false teeth.
+ </p>
+ ...
+ </div>
+ ...
+
+
+You then specify the tag "div", attribute "class", and value
+"ArticleWrapper". Everything else in the page, such as navigation
+panels and menus and footers and so on, will be discarded. If there
+is more than one section of the page you want to include, specify each
+one on a separate row. If the matching section contains some sections
+you want to remove, specify those in the "Exclude" section in the same
+way.
+
+
+Once you've got a configuration that you think will work, you can try
+it out on some existing articles. Type a number into the
+"Retrospectively Apply" box and click "Submit". After a while
+(exactly how long depends on your system's cron configuration) the new
+articles should be available.
+
+Techniques
+
+You can leave the attribute and value blank to include all the
+corresponding elements with the specified tag name. You can also use
+a tag name of just an asterisk ("*"), which will match any element type with the
+specified attribute regardless of the tag.
+
+
+Note that the "class" attribute is a special case. Many web page
+templates will put multiple different classes in the same element,
+separated by spaces. If you specify an attribute of "class" it will
+match an element if any of its classes matches the specified value.
+For example:
+
+
+ <div class="article breaking-news">
+
+
+In this case you can specify a value of "article", or "breaking-news".
+You can also specify "article breaking-news", but that won't match if
+the website suddenly changes to "breaking-news article", so that's not
+recommended.
+
+
+One useful trick you can try is using the website's "print" pages.
+Many news sites have print versions of all their articles. These are
+usually drastically simplified compared to the live website page.
+Sometimes this is a good way to get the whole article when it's
+normally split across multiple pages.
+
+
+Hopefully the URL for the print page is a predictable variant of the
+normal article URL. For example, an article URL like:
+
+
+ http://www.newssite.com/article-8636.html
+
+
+...might have a print version at:
+
+
+ http://www.newssite.com/print/article-8636.html
+
+
+To change the URL used to retrieve the page, use the "URL Pattern" and
+"URL Replace" fields. The pattern is a regular expression matching
+part of the URL to replace. In this case, you might use a pattern of
+"/article" and a replace string of "/print/article". A common pattern
+is simply a dollar sign ("$"), used to add the replace string to the end of the URL.
+
+Background Processing
+
+Note that retrieving and processing the articles can take some time,
+so it's done in the background. Incoming articles will be marked as
+invisible while they're in the process of being downloaded. If a URL
+fails, the plugin will keep trying at progressively longer intervals
+for up to a month, in case the website is temporarily overloaded or
+the network is down.
+
+Retrieving Images
+
+Retriever can also optionally download images and store them in the
+local Friendica instance. Just check the "Download Images" box. You
+can also download images in every item from your network, whether it's
+an RSS feed or not. Go to the "Settings" page and
+click "Plugin settings" . Then check the "All
+Photos" box in the "Retriever Settings" section and click "Submit".
+
+Configure Feeds:
+
+{{foreach $feeds as $feed}}
+{{include file='contact_template.tpl' contact=$feed}}
+{{/foreach}}
+
diff --git a/retriever/templates/rule-config.tpl b/retriever/templates/rule-config.tpl
new file mode 100644
index 00000000..228d0326
--- /dev/null
+++ b/retriever/templates/rule-config.tpl
@@ -0,0 +1,112 @@
+
diff --git a/retriever/templates/settings.tpl b/retriever/templates/settings.tpl
new file mode 100644
index 00000000..8bfe8db0
--- /dev/null
+++ b/retriever/templates/settings.tpl
@@ -0,0 +1,9 @@
+
+
{{$title}}
+
+ Get Help
+
+{{include file="field_checkbox.tpl" field=$allphotos}}
+{{include file="field_checkbox.tpl" field=$oembed}}
+
+
From f453c15259e5fecf151a987ac84c58126653e793 Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Sat, 20 Jul 2019 10:44:38 +0100
Subject: [PATCH 02/94] Fixes for retriever
---
retriever/retriever.php | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/retriever/retriever.php b/retriever/retriever.php
index 78fe575f..5f2b855a 100644
--- a/retriever/retriever.php
+++ b/retriever/retriever.php
@@ -512,7 +512,8 @@ function retriever_apply_xslt_text($xslt_text, $doc) {
function retriever_apply_dom_filter($retriever, &$item, $resource) {
logger('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['uri'] . ' contact ' . $item['contact-id'], LOGGER_DEBUG);
- if (!$retriever['data']['include'] && !$retriever['data']['customxslt']) {
+ if (!array_key_exists('include', $retriever['data']) && !array_key_exists('customxslt', $retriever['data'])) {
+ logger('retriever_apply_dom_filter: no include and no customxslt', LOGGER_INFO);
return;
}
if (!$resource['data']) {
@@ -564,8 +565,8 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) {
$item['body'] .= "\n\n" . L10n::t('Retrieved') . ' ' . date("Y-m-d") . ': [url=';
$item['body'] .= $item['plink'];
$item['body'] .= ']' . $item['plink'] . '[/url]';
- q("UPDATE `item` SET `body` = '%s' WHERE `id` = %d",
- DBA::escape($item['body']), intval($item['id']));
+ DBA::update('item', ['body' => $item['body']], ['id' => $item['id']]);
+ DBA::update('item-content', ['body' => $item['body']], ['uri' => $item['uri']]);
}
function retrieve_images(&$item, $a) {
@@ -642,9 +643,9 @@ function retriever_transform_images($a, &$item, $resource) {
logger('retriever_transform_images caught exception ' . $e->getMessage());
return;
}
- foreach ($photo as $k => $v)
- {
- logger('@@@ photo key ' . $k);
+ if (!array_key_exists('full', $photo)) {
+ logger('retriever_transform_images: no replacement URL for image ' . $resource['url']);
+ return;
}
$new_url = $photo['full'];
logger('retriever_transform_images: replacing ' . $resource['url'] . ' with ' .
From ae3fa6cea2d8e2a480958b68deaf2323d45d24ac Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Sat, 20 Jul 2019 14:37:57 +0100
Subject: [PATCH 03/94] more fixes
---
retriever/retriever.php | 119 ++++++++++++++++++++--------------------
1 file changed, 60 insertions(+), 59 deletions(-)
diff --git a/retriever/retriever.php b/retriever/retriever.php
index 5f2b855a..18351f1e 100644
--- a/retriever/retriever.php
+++ b/retriever/retriever.php
@@ -10,6 +10,7 @@
use Friendica\Core\Addon;
use Friendica\Core\Config;
use Friendica\Core\PConfig;
+use Friendica\Core\Logger;
use Friendica\Content\Text\HTML;
use Friendica\Content\Text\BBCode;
use Friendica\Object\Image;
@@ -73,7 +74,7 @@ function retriever_install() {
if (Config::get('retriever', 'dbversion') == '0.7') {
$r = q("SELECT `id`, `data` FROM `retriever_rule`");
foreach ($r as $rr) {
- logger('retriever_install: retriever ' . $rr['id'] . ' old config ' . $rr['data'], LOGGER_DATA);
+ Logger::log('retriever_install: retriever ' . $rr['id'] . ' old config ' . $rr['data'], Logger::DATA);
$data = json_decode($rr['data'], true);
if ($data['pattern']) {
$matches = array();
@@ -122,7 +123,7 @@ function retriever_install() {
unset($data['remove']);
}
$r = q('UPDATE `retriever_rule` SET `data` = "%s" WHERE `id` = %d', DBA::escape(json_encode($data)), $rr['id']);
- logger('retriever_install: retriever ' . $rr['id'] . ' new config ' . json_encode($data), LOGGER_DATA);
+ Logger::log('retriever_install: retriever ' . $rr['id'] . ' new config ' . json_encode($data), Logger::DATA);
}
Config::set('retriever', 'dbversion', '0.8');
}
@@ -199,7 +200,7 @@ function retriever_retrieve_items($max_items, $a) {
}
$retrieve_items = $max_items - $retriever_item_count;
- logger('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . $retriever_item_count . ', retrieve ' . $retrieve_items, LOGGER_DEBUG);
+ Logger::log('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . $retriever_item_count . ', retrieve ' . $retrieve_items, Logger::DEBUG);
do {
$r = q("SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR %s) ORDER BY `last-try` ASC LIMIT %d",
DBA::escape(implode($schedule_clauses, ' OR ')),
@@ -210,7 +211,7 @@ function retriever_retrieve_items($max_items, $a) {
if (count($r) == 0) {
break;
}
- logger('retriever_retrieve_items: found ' . count($r) . ' waiting resources in database', LOGGER_DEBUG);
+ Logger::log('retriever_retrieve_items: found ' . count($r) . ' waiting resources in database', Logger::DEBUG);
foreach ($r as $rr) {
retrieve_resource($rr);
$retriever_item_count++;
@@ -228,24 +229,24 @@ function retriever_retrieve_items($max_items, $a) {
if (!$r) {
$r = array();
}
- logger('retriever_retrieve_items: items waiting even though resource has completed: ' . count($r), LOGGER_DEBUG);
+ Logger::log('retriever_retrieve_items: items waiting even though resource has completed: ' . count($r), Logger::DEBUG);
foreach ($r as $rr) {
$resource = q("SELECT * FROM retriever_resource WHERE `id` = %d", $rr['resource']);
$retriever_item = retriever_get_retriever_item($rr['item']);
if (!$retriever_item) {
- logger('retriever_retrieve_items: no retriever item with id ' . $rr['item'], LOGGER_INFO);
+ Logger::log('retriever_retrieve_items: no retriever item with id ' . $rr['item'], Logger::INFO);
continue;
}
$item = retriever_get_item($retriever_item);
if (!$item) {
- logger('retriever_retrieve_items: no item ' . $retriever_item['item-uri'], LOGGER_INFO);
+ Logger::log('retriever_retrieve_items: no item ' . $retriever_item['item-uri'], Logger::INFO);
continue;
}
$retriever = get_retriever($item['contact-id'], $item['uid']);
if (!$retriever) {
- logger('retriever_retrieve_items: no retriever for item ' .
+ Logger::log('retriever_retrieve_items: no retriever for item ' .
$retriever_item['item-uri'] . ' ' . $retriever_item['uid'] . ' ' . $item['contact-id'],
- LOGGER_INFO);
+ Logger::INFO);
continue;
}
retriever_apply_completed_resource_to_item($retriever, $item, $resource[0], $a);
@@ -260,7 +261,7 @@ function retriever_tidy() {
q("DELETE FROM retriever_resource WHERE completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)");
$r = q("SELECT retriever_item.id FROM retriever_item LEFT OUTER JOIN retriever_resource ON (retriever_item.resource = retriever_resource.id) WHERE retriever_resource.id is null");
- logger('retriever_tidy: found ' . count($r) . ' retriever_items with no retriever_resource');
+ Logger::log('retriever_tidy: found ' . count($r) . ' retriever_items with no retriever_resource');
foreach ($r as $rr) {
q('DELETE FROM retriever_item WHERE id = %d', intval($rr['id']));
}
@@ -268,7 +269,7 @@ function retriever_tidy() {
function retrieve_dataurl_resource($resource) {
if (!preg_match("/date:(.*);base64,(.*)/", $resource['url'], $matches)) {
- logger('retrieve_dataurl_resource: ' . $resource['id'] . ' does not match pattern');
+ Logger::log('retrieve_dataurl_resource: ' . $resource['id'] . ' does not match pattern');
} else {
$resource['type'] = $matches[1];
$resource['data'] = base64url_decode($matches[2]);
@@ -290,21 +291,21 @@ function retrieve_resource($resource) {
$a = get_app();
try {
- logger('retrieve_resource: ' . ($resource['num-tries'] + 1) .
- ' attempt at resource ' . $resource['id'] . ' ' . $resource['url'], LOGGER_DEBUG);
+ Logger::log('retrieve_resource: ' . ($resource['num-tries'] + 1) .
+ ' attempt at resource ' . $resource['id'] . ' ' . $resource['url'], Logger::DEBUG);
$redirects;
$cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-');
$fetch_result = Network::fetchUrlFull($resource['url'], $resource['binary'], $redirects, array('cookiejar' => $cookiejar));
unlink($cookiejar);
- $resource['data'] = $fetch_result['body'];
- $resource['http-code'] = $a->get_curl_code();
- $resource['type'] = $a->get_curl_content_type();
- $resource['redirect-url'] = $fetch_result['redirect_url'];
- logger('retrieve_resource: got code ' . $resource['http-code'] .
+ $resource['data'] = $fetch_result->getBody();
+ $resource['http-code'] = $fetch_result->getReturnCode();
+ $resource['type'] = $fetch_result->getContentType();
+ $resource['redirect-url'] = $fetch_result->getRedirectUrl();
+ Logger::log('retrieve_resource: got code ' . $resource['http-code'] .
' retrieving resource ' . $resource['id'] .
- ' final url ' . $resource['redirect-url'], LOGGER_DEBUG);
+ ' final url ' . $resource['redirect-url'], Logger::DEBUG);
} catch (Exception $e) {
- logger('retrieve_resource: unable to retrieve ' . $resource['url'] . ' - ' . $e->getMessage());
+ Logger::log('retrieve_resource: unable to retrieve ' . $resource['url'] . ' - ' . $e->getMessage());
}
q("UPDATE `retriever_resource` SET `last-try` = now(), `num-tries` = `num-tries` + 1, `http-code` = %d, `redirect-url` = '%s' WHERE id = %d",
intval($resource['http-code']),
@@ -338,7 +339,7 @@ function get_retriever($contact_id, $uid, $create = false) {
function retriever_get_retriever_item($id) {
$retriever_items = q("SELECT * FROM `retriever_item` WHERE id = %d", intval($id));
if (count($retriever_items) != 1) {
- logger('retriever_get_retriever_item: unable to find retriever_item ' . $id, LOGGER_INFO);
+ Logger::log('retriever_get_retriever_item: unable to find retriever_item ' . $id, Logger::INFO);
return;
}
return $retriever_items[0];
@@ -350,15 +351,15 @@ function retriever_get_item($retriever_item) {
intval($retriever_item['item-uid']),
intval($retriever_item['contact-id']));
if (count($items) != 1) {
- logger('retriever_get_item: unexpected number of results ' .
- count($items) . " when searching for item $uri $uid $cid", LOGGER_INFO);
+ Logger::log('retriever_get_item: unexpected number of results ' .
+ count($items) . " when searching for item $uri $uid $cid", Logger::INFO);
return;
}
return $items[0];
}
function retriever_item_completed($retriever_item_id, $resource, $a) {
- logger('retriever_item_completed: id ' . $retriever_item_id . ' url ' . $resource['url'], LOGGER_DEBUG);
+ Logger::log('retriever_item_completed: id ' . $retriever_item_id . ' url ' . $resource['url'], Logger::DEBUG);
$retriever_item = retriever_get_retriever_item($retriever_item_id);
if (!$retriever_item) {
@@ -379,7 +380,7 @@ function retriever_item_completed($retriever_item_id, $resource, $a) {
}
function retriever_resource_completed($resource, $a) {
- logger('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url'], LOGGER_DEBUG);
+ Logger::log('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url'], Logger::DEBUG);
$r = q("SELECT `id` FROM `retriever_item` WHERE `resource` = %d", $resource['id']);
foreach ($r as $rr) {
retriever_item_completed($rr['id'], $resource, $a);
@@ -398,7 +399,7 @@ function apply_retrospective($a, $retriever, $num) {
function retriever_on_item_insert($a, $retriever, &$item) {
if (!$retriever || !$retriever['id']) {
- logger('retriever_on_item_insert: No retriever supplied', LOGGER_INFO);
+ Logger::log('retriever_on_item_insert: No retriever supplied', Logger::INFO);
return;
}
if (!$retriever["data"]['enable'] == "on") {
@@ -406,7 +407,7 @@ function retriever_on_item_insert($a, $retriever, &$item) {
}
if (array_key_exists('pattern', $retriever["data"]) && $retriever["data"]['pattern']) {
$url = preg_replace('/' . $retriever["data"]['pattern'] . '/', $retriever["data"]['replace'], $item['plink']);
- logger('retriever_on_item_insert: Changed ' . $item['plink'] . ' to ' . $url, LOGGER_DATA);
+ Logger::log('retriever_on_item_insert: Changed ' . $item['plink'] . ' to ' . $url, Logger::DATA);
}
else {
$url = $item['plink'];
@@ -417,7 +418,7 @@ function retriever_on_item_insert($a, $retriever, &$item) {
}
function add_retriever_resource($a, $url, $binary = false) {
- logger('add_retriever_resource: ' . $url, LOGGER_DEBUG);
+ Logger::log('add_retriever_resource: ' . $url, Logger::DEBUG);
$scheme = parse_url($url, PHP_URL_SCHEME);
if ($scheme == 'data') {
@@ -431,11 +432,11 @@ function add_retriever_resource($a, $url, $binary = false) {
$r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url));
$resource = $r[0];
if (count($r)) {
- logger('add_retriever_resource: Resource ' . $url . ' already requested', LOGGER_DEBUG);
+ Logger::log('add_retriever_resource: Resource ' . $url . ' already requested', Logger::DEBUG);
return $resource;
}
- logger('retrieve_resource: got data URL type ' . $resource['type'], LOGGER_DEBUG);
+ Logger::log('retrieve_resource: got data URL type ' . $resource['type'], Logger::DEBUG);
q("INSERT INTO `retriever_resource` (`type`, `binary`, `url`, `completed`, `data`) " .
"VALUES ('%s', %d, '%s', now(), '%s')",
DBA::escape($type),
@@ -451,12 +452,12 @@ function add_retriever_resource($a, $url, $binary = false) {
}
if (strlen($url) > 800) {
- logger('add_retriever_resource: URL is longer than 800 characters', LOGGER_INFO);
+ Logger::log('add_retriever_resource: URL is longer than 800 characters', Logger::INFO);
}
$r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url));
if (count($r)) {
- logger('add_retriever_resource: Resource ' . $url . ' already requested', LOGGER_DEBUG);
+ Logger::log('add_retriever_resource: Resource ' . $url . ' already requested', Logger::DEBUG);
return $r[0];
}
@@ -467,7 +468,7 @@ function add_retriever_resource($a, $url, $binary = false) {
}
function add_retriever_item(&$item, $resource) {
- logger('add_retriever_item: ' . $resource['url'] . ' for ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], LOGGER_DEBUG);
+ Logger::log('add_retriever_item: ' . $resource['url'] . ' for ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG);
q("INSERT INTO `retriever_item` (`item-uri`, `item-uid`, `contact-id`, `resource`) " .
"VALUES ('%s', %d, %d, %d)",
@@ -476,12 +477,12 @@ function add_retriever_item(&$item, $resource) {
"`item-uri` = '%s' AND `item-uid` = %d AND `contact-id` = %d AND `resource` = %d ORDER BY id DESC",
DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource['id']));
if (!count($r)) {
- logger("add_retriever_item: couldn't create retriever item for " .
+ Logger::log("add_retriever_item: couldn't create retriever item for " .
$item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'],
- LOGGER_INFO);
+ Logger::INFO);
return;
}
- logger('add_retriever_item: created retriever_item ' . $r[0]['id'] . ' for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], LOGGER_DEBUG);
+ Logger::log('add_retriever_item: created retriever_item ' . $r[0]['id'] . ' for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG);
return $r[0]['id'];
}
@@ -495,12 +496,12 @@ function retriever_get_encoding($resource) {
function retriever_apply_xslt_text($xslt_text, $doc) {
if (!$xslt_text) {
- logger('retriever_apply_xslt_text: empty XSLT text', LOGGER_INFO);
+ Logger::log('retriever_apply_xslt_text: empty XSLT text', Logger::INFO);
return $doc;
}
$xslt_doc = new DOMDocument();
if (!$xslt_doc->loadXML($xslt_text)) {
- logger('retriever_apply_xslt_text: could not load XML', LOGGER_INFO);
+ Logger::log('retriever_apply_xslt_text: could not load XML', Logger::INFO);
return $doc;
}
$xp = new XsltProcessor();
@@ -510,14 +511,14 @@ function retriever_apply_xslt_text($xslt_text, $doc) {
}
function retriever_apply_dom_filter($retriever, &$item, $resource) {
- logger('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['uri'] . ' contact ' . $item['contact-id'], LOGGER_DEBUG);
+ Logger::log('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['uri'] . ' contact ' . $item['contact-id'], Logger::DEBUG);
if (!array_key_exists('include', $retriever['data']) && !array_key_exists('customxslt', $retriever['data'])) {
- logger('retriever_apply_dom_filter: no include and no customxslt', LOGGER_INFO);
+ Logger::log('retriever_apply_dom_filter: no include and no customxslt', Logger::INFO);
return;
}
if (!$resource['data']) {
- logger('retriever_apply_dom_filter: no text to work with', LOGGER_INFO);
+ Logger::log('retriever_apply_dom_filter: no text to work with', Logger::INFO);
return;
}
@@ -541,7 +542,7 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) {
$doc = retriever_apply_xslt_text($retriever['data']['customxslt'], $doc);
}
if (!$doc) {
- logger('retriever_apply_dom_filter: failed to apply extract XSLT template', LOGGER_INFO);
+ Logger::log('retriever_apply_dom_filter: failed to apply extract XSLT template', Logger::INFO);
return;
}
@@ -553,13 +554,13 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) {
$fix_urls_xslt = replace_macros($fix_urls_template, $params);
$doc = retriever_apply_xslt_text($fix_urls_xslt, $doc);
if (!$doc) {
- logger('retriever_apply_dom_filter: failed to apply fix urls XSLT template', LOGGER_INFO);
+ Logger::log('retriever_apply_dom_filter: failed to apply fix urls XSLT template', Logger::INFO);
return;
}
$item['body'] = HTML::toBBCode($doc->saveHTML());
if (!strlen($item['body'])) {
- logger('retriever_apply_dom_filter retriever ' . $retriever['id'] . ' item ' . $item['id'] . ': output was empty', LOGGER_INFO);
+ Logger::log('retriever_apply_dom_filter retriever ' . $retriever['id'] . ' item ' . $item['id'] . ': output was empty', Logger::INFO);
return;
}
$item['body'] .= "\n\n" . L10n::t('Retrieved') . ' ' . date("Y-m-d") . ': [url=';
@@ -575,9 +576,9 @@ function retrieve_images(&$item, $a) {
$matches2 = array();
preg_match_all("/\[img\](.*?)\[\/img\]/ism", $item["body"], $matches2);
$matches = array_merge($matches1[3], $matches2[1]);
- logger('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], LOGGER_DEBUG);
+ Logger::log('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG);
foreach ($matches as $url) {
- if (strpos($url, get_app()->get_baseurl()) === FALSE) {
+ if (strpos($url, get_app()->getBaseUrl()) === FALSE) {
$resource = add_retriever_resource($a, $url, true);
if (!$resource['completed']) {
add_retriever_item($item, $resource);
@@ -596,12 +597,12 @@ function retriever_check_item_completed(&$item)
DBA::escape($item['uri']), intval($item['uid']),
intval($item['contact-id']));
$waiting = $r[0]['count(*)'];
- logger('retriever_check_item_completed: item ' . $item['uri'] . ' ' . $item['uid']
- . ' '. $item['contact-id'] . ' waiting for ' . $waiting . ' resources', LOGGER_DEBUG);
+ Logger::log('retriever_check_item_completed: item ' . $item['uri'] . ' ' . $item['uid']
+ . ' '. $item['contact-id'] . ' waiting for ' . $waiting . ' resources', Logger::DEBUG);
$old_visible = $item['visible'];
$item['visible'] = $waiting ? 0 : 1;
if (array_key_exists('id', $item) && ($item['id'] > 0) && ($old_visible != $item['visible'])) {
- logger('retriever_check_item_completed: changing visible flag to ' . $item['visible'] . ' and invoking notifier ("edit_post", ' . $item['id'] . ')', LOGGER_DEBUG);
+ Logger::log('retriever_check_item_completed: changing visible flag to ' . $item['visible'] . ' and invoking notifier ("edit_post", ' . $item['id'] . ')', Logger::DEBUG);
q("UPDATE `item` SET `visible` = %d WHERE `id` = %d",
intval($item['visible']),
intval($item['id']));
@@ -612,9 +613,9 @@ function retriever_check_item_completed(&$item)
}
function retriever_apply_completed_resource_to_item($retriever, &$item, $resource, $a) {
- logger('retriever_apply_completed_resource_to_item: retriever ' .
+ Logger::log('retriever_apply_completed_resource_to_item: retriever ' .
($retriever ? $retriever['id'] : 'none') .
- ' resource ' . $resource['url'] . ' plink ' . $item['plink'], LOGGER_DEBUG);
+ ' resource ' . $resource['url'] . ' plink ' . $item['plink'], Logger::DEBUG);
if (strpos($resource['type'], 'image') !== false) {
retriever_transform_images($a, $item, $resource);
}
@@ -632,24 +633,24 @@ function retriever_apply_completed_resource_to_item($retriever, &$item, $resourc
function retriever_transform_images($a, &$item, $resource) {
if (!$resource["data"]) {
- logger('retriever_transform_images: no data available for '
- . $resource['id'] . ' ' . $resource['url'], LOGGER_INFO);
+ Logger::log('retriever_transform_images: no data available for '
+ . $resource['id'] . ' ' . $resource['url'], Logger::INFO);
return;
}
try {
$photo = Image::storePhoto($a, $item['uid'], $resource['data'], $resource['url']);
} catch (Exception $e) {
- logger('retriever_transform_images caught exception ' . $e->getMessage());
+ Logger::log('retriever_transform_images caught exception ' . $e->getMessage());
return;
}
if (!array_key_exists('full', $photo)) {
- logger('retriever_transform_images: no replacement URL for image ' . $resource['url']);
+ Logger::log('retriever_transform_images: no replacement URL for image ' . $resource['url']);
return;
}
$new_url = $photo['full'];
- logger('retriever_transform_images: replacing ' . $resource['url'] . ' with ' .
- $new_url . ' in item ' . $item['plink'], LOGGER_DEBUG);
+ Logger::log('retriever_transform_images: replacing ' . $resource['url'] . ' with ' .
+ $new_url . ' in item ' . $item['plink'], Logger::DEBUG);
$transformed = str_replace($resource["url"], $new_url, $item['body']);
if ($transformed === $item['body']) {
return;
@@ -672,7 +673,7 @@ function retriever_content($a) {
$feeds = q("SELECT `id`, `name`, `thumb` FROM contact WHERE `uid` = %d AND `network` = 'feed'",
local_user());
foreach ($feeds as $k=>$v) {
- $feeds[$k]['url'] = $a->get_baseurl() . '/retriever/' . $v['id'];
+ $feeds[$k]['url'] = $a->getBaseUrl() . '/retriever/' . $v['id'];
}
$template = get_markup_template('/help.tpl', 'addon/retriever/');
$a->page['content'] .= replace_macros($template, array(
@@ -776,7 +777,7 @@ function retriever_contact_photo_menu($a, &$args) {
}
function retriever_post_remote_hook(&$a, &$item) {
- logger('retriever_post_remote_hook: ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], LOGGER_DEBUG);
+ Logger::log('retriever_post_remote_hook: ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG);
$retriever = get_retriever($item['contact-id'], $item["uid"], false);
if ($retriever) {
From 738d1ab5880698a82d0d330ffb785d12b74e6541 Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Sat, 20 Jul 2019 14:45:10 +0100
Subject: [PATCH 04/94] more fixes
---
retriever/retriever.php | 21 +++++++++++----------
1 file changed, 11 insertions(+), 10 deletions(-)
diff --git a/retriever/retriever.php b/retriever/retriever.php
index 18351f1e..3072a743 100644
--- a/retriever/retriever.php
+++ b/retriever/retriever.php
@@ -11,6 +11,7 @@ use Friendica\Core\Addon;
use Friendica\Core\Config;
use Friendica\Core\PConfig;
use Friendica\Core\Logger;
+use Friendica\Core\Renderer;
use Friendica\Content\Text\HTML;
use Friendica\Content\Text\BBCode;
use Friendica\Object\Image;
@@ -533,8 +534,8 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) {
}
$params = array('$spec' => $retriever['data']);
- $extract_template = get_markup_template('extract.tpl', 'addon/retriever/');
- $extract_xslt = replace_macros($extract_template, $params);
+ $extract_template = Renderer::getMarkupTemplate('extract.tpl', 'addon/retriever/');
+ $extract_xslt = Renderer::replaceMacros($extract_template, $params);
if ($retriever['data']['include']) {
$doc = retriever_apply_xslt_text($extract_xslt, $doc);
}
@@ -550,8 +551,8 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) {
$rooturl = $components['scheme'] . "://" . $components['host'];
$dirurl = $rooturl . dirname($components['path']) . "/";
$params = array('$dirurl' => $dirurl, '$rooturl' => $rooturl);
- $fix_urls_template = get_markup_template('fix-urls.tpl', 'addon/retriever/');
- $fix_urls_xslt = replace_macros($fix_urls_template, $params);
+ $fix_urls_template = Renderer::getMarkupTemplate('fix-urls.tpl', 'addon/retriever/');
+ $fix_urls_xslt = Renderer::replaceMacros($fix_urls_template, $params);
$doc = retriever_apply_xslt_text($fix_urls_xslt, $doc);
if (!$doc) {
Logger::log('retriever_apply_dom_filter: failed to apply fix urls XSLT template', Logger::INFO);
@@ -675,8 +676,8 @@ function retriever_content($a) {
foreach ($feeds as $k=>$v) {
$feeds[$k]['url'] = $a->getBaseUrl() . '/retriever/' . $v['id'];
}
- $template = get_markup_template('/help.tpl', 'addon/retriever/');
- $a->page['content'] .= replace_macros($template, array(
+ $template = Renderer::getMarkupTemplate('/help.tpl', 'addon/retriever/');
+ $a->page['content'] .= Renderer::replaceMacros($template, array(
'$config' => $a->get_baseurl() . '/settings/addon',
'$feeds' => $feeds));
return;
@@ -718,8 +719,8 @@ function retriever_content($a) {
$a->page['content'] .= ".
";
}
- $template = get_markup_template('/rule-config.tpl', 'addon/retriever/');
- $a->page['content'] .= replace_macros($template, array(
+ $template = Renderer::getMarkupTemplate('/rule-config.tpl', 'addon/retriever/');
+ $a->page['content'] .= Renderer::replaceMacros($template, array(
'$enable' => array(
'retriever_enable',
L10n::t('Enabled'),
@@ -801,8 +802,8 @@ function retriever_post_remote_hook(&$a, &$item) {
function retriever_plugin_settings(&$a,&$s) {
$all_photos = PConfig::get(local_user(), 'retriever', 'all_photos');
$oembed = PConfig::get(local_user(), 'retriever', 'oembed');
- $template = get_markup_template('/settings.tpl', 'addon/retriever/');
- $s .= replace_macros($template, array(
+ $template = Renderer::getMarkupTemplate('/settings.tpl', 'addon/retriever/');
+ $s .= Renderer::replaceMacros($template, array(
'$allphotos' => array(
'retriever_all_photos',
L10n::t('All Photos'),
From c9f4ad7405370a7655f77dc1c0dac3392cca9b77 Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Sun, 21 Jul 2019 18:27:14 +0100
Subject: [PATCH 05/94] now working retriever
---
retriever/retriever.php | 147 +++++++---------------------------------
1 file changed, 23 insertions(+), 124 deletions(-)
diff --git a/retriever/retriever.php b/retriever/retriever.php
index 3072a743..97f29694 100644
--- a/retriever/retriever.php
+++ b/retriever/retriever.php
@@ -18,6 +18,7 @@ use Friendica\Object\Image;
use Friendica\Util\Network;
use Friendica\Core\L10n;
use Friendica\Database\DBA;
+use Friendica\Model\ItemURI;
function retriever_install() {
Addon::registerHook('plugin_settings', 'addon/retriever/retriever.php', 'retriever_plugin_settings');
@@ -27,116 +28,6 @@ function retriever_install() {
Addon::registerHook('cron', 'addon/retriever/retriever.php', 'retriever_cron');
$r = q("SELECT `id` FROM `pconfig` WHERE `cat` LIKE 'retriever_%%'");
- if (count($r) || (Config::get('retriever', 'dbversion') == '0.1')) {
- $retrievers = array();
- $r = q("SELECT SUBSTRING(`cat`, 10) AS `contact`, `k`, `v` FROM `pconfig` WHERE `cat` LIKE 'retriever%%'");
- foreach ($r as $rr) {
- $retrievers[$rr['contact']][$rr['k']] = $rr['v'];
- }
- foreach ($retrievers as $k => $v) {
- $rr = q("SELECT `uid` FROM `contact` WHERE `id` = %d", intval($k));
- $uid = $rr[0]['uid'];
- $v['images'] = 'on';
- q("INSERT INTO `retriever_rule` (`uid`, `contact-id`, `data`) VALUES (%d, %d, '%s')",
- intval($uid), intval($k), DBA::escape(json_encode($v)));
- }
- q("DELETE FROM `pconfig` WHERE `cat` LIKE 'retriever_%%'");
- Config::set('retriever', 'dbversion', '0.2');
- }
- if (Config::get('retriever', 'dbversion') == '0.2') {
- q("ALTER TABLE `retriever_resource` DROP COLUMN `retriever`");
- Config::set('retriever', 'dbversion', '0.3');
- }
- if (Config::get('retriever', 'dbversion') == '0.3') {
- q("ALTER TABLE `retriever_item` MODIFY COLUMN `item-uri` varchar(800) CHARACTER SET ascii NOT NULL");
- q("ALTER TABLE `retriever_resource` MODIFY COLUMN `url` varchar(800) CHARACTER SET ascii NOT NULL");
- Config::set('retriever', 'dbversion', '0.4');
- }
- if (Config::get('retriever', 'dbversion') == '0.4') {
- q("ALTER TABLE `retriever_item` ADD COLUMN `finished` tinyint(1) unsigned NOT NULL DEFAULT '0'");
- Config::set('retriever', 'dbversion', '0.5');
- }
- if (Config::get('retriever', 'dbversion') == '0.5') {
- q('ALTER TABLE `retriever_resource` CHANGE `created` `created` timestamp NOT NULL DEFAULT now()');
- q('ALTER TABLE `retriever_resource` CHANGE `completed` `completed` timestamp NULL DEFAULT NULL');
- q('ALTER TABLE `retriever_resource` CHANGE `last-try` `last-try` timestamp NULL DEFAULT NULL');
- q('ALTER TABLE `retriever_item` DROP KEY `all`');
- q('ALTER TABLE `retriever_item` ADD KEY `all` (`item-uri`, `item-uid`, `contact-id`)');
- Config::set('retriever', 'dbversion', '0.6');
- }
- if (Config::get('retriever', 'dbversion') == '0.6') {
- q('ALTER TABLE `retriever_item` CONVERT TO CHARACTER SET utf8 COLLATE utf8_bin');
- q('ALTER TABLE `retriever_item` CHANGE `item-uri` `item-uri` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NOT NULL');
- q('ALTER TABLE `retriever_resource` CONVERT TO CHARACTER SET utf8 COLLATE utf8_bin');
- q('ALTER TABLE `retriever_resource` CHANGE `url` `url` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NOT NULL');
- q('ALTER TABLE `retriever_rule` CONVERT TO CHARACTER SET utf8 COLLATE utf8_bin');
- Config::set('retriever', 'dbversion', '0.7');
- }
- if (Config::get('retriever', 'dbversion') == '0.7') {
- $r = q("SELECT `id`, `data` FROM `retriever_rule`");
- foreach ($r as $rr) {
- Logger::log('retriever_install: retriever ' . $rr['id'] . ' old config ' . $rr['data'], Logger::DATA);
- $data = json_decode($rr['data'], true);
- if ($data['pattern']) {
- $matches = array();
- if (preg_match("/\/(.*)\//", $data['pattern'], $matches)) {
- $data['pattern'] = $matches[1];
- }
- }
- if ($data['match']) {
- $include = array();
- foreach (explode('|', $data['match']) as $component) {
- $matches = array();
- if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[@([A-Za-z][a-z0-9]*)='([^']*)'\]/", $component, $matches)) {
- $include[] = array(
- 'element' => $matches[1],
- 'attribute' => $matches[2],
- 'value' => $matches[3]);
- }
- if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[contains(concat(' ',normalize-space(@class),' '),' ([^ ']+) ')]/", $component, $matches)) {
- $include[] = array(
- 'element' => $matches[1],
- 'attribute' => $matches[2],
- 'value' => $matches[3]);
- }
- }
- $data['include'] = $include;
- unset($data['match']);
- }
- if ($data['remove']) {
- $exclude = array();
- foreach (explode('|', $data['remove']) as $component) {
- $matches = array();
- if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[@([A-Za-z][a-z0-9]*)='([^']*)'\]/", $component, $matches)) {
- $exclude[] = array(
- 'element' => $matches[1],
- 'attribute' => $matches[2],
- 'value' => $matches[3]);
- }
- if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[contains(concat(' ',normalize-space(@class),' '),' ([^ ']+) ')]/", $component, $matches)) {
- $exclude[] = array(
- 'element' => $matches[1],
- 'attribute' => $matches[2],
- 'value' => $matches[3]);
- }
- }
- $data['exclude'] = $exclude;
- unset($data['remove']);
- }
- $r = q('UPDATE `retriever_rule` SET `data` = "%s" WHERE `id` = %d', DBA::escape(json_encode($data)), $rr['id']);
- Logger::log('retriever_install: retriever ' . $rr['id'] . ' new config ' . json_encode($data), Logger::DATA);
- }
- Config::set('retriever', 'dbversion', '0.8');
- }
- if (Config::get('retriever', 'dbversion') == '0.8') {
- q("ALTER TABLE `retriever_resource` ADD COLUMN `http-code` smallint(1) unsigned NULL DEFAULT NULL");
- Config::set('retriever', 'dbversion', '0.9');
- }
- if (Config::get('retriever', 'dbversion') == '0.9') {
- q("ALTER TABLE `retriever_item` DROP COLUMN `parent`");
- q("ALTER TABLE `retriever_resource` ADD COLUMN `redirect-url` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NULL DEFAULT NULL");
- Config::set('retriever', 'dbversion', '0.10');
- }
if (Config::get('retriever', 'dbversion') == '0.10') {
q("ALTER TABLE `retriever_resource` MODIFY COLUMN `type` char(255) NULL DEFAULT NULL");
q("ALTER TABLE `retriever_resource` MODIFY COLUMN `data` mediumblob NULL DEFAULT NULL");
@@ -347,6 +238,7 @@ function retriever_get_retriever_item($id) {
}
function retriever_get_item($retriever_item) {
+ // @@@ Need to replace this with Item::selectFirst
$items = q("SELECT * FROM `item` WHERE `uri` = '%s' AND `uid` = %d AND `contact-id` = %d",
DBA::escape($retriever_item['item-uri']),
intval($retriever_item['item-uid']),
@@ -537,9 +429,11 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) {
$extract_template = Renderer::getMarkupTemplate('extract.tpl', 'addon/retriever/');
$extract_xslt = Renderer::replaceMacros($extract_template, $params);
if ($retriever['data']['include']) {
+ Logger::log('retriever_apply_dom_filter: applying include/exclude template \"' . $extract_xslt . '\"', Logger::DEBUG);
$doc = retriever_apply_xslt_text($extract_xslt, $doc);
}
if (array_key_exists('customxslt', $retriever['data']) && $retriever['data']['customxslt']) {
+ Logger::log('retriever_apply_dom_filter: applying custom XSLT \"' . $retriever['data']['customxslt'] . '\"', Logger::DEBUG);
$doc = retriever_apply_xslt_text($retriever['data']['customxslt'], $doc);
}
if (!$doc) {
@@ -559,16 +453,21 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) {
return;
}
- $item['body'] = HTML::toBBCode($doc->saveHTML());
- if (!strlen($item['body'])) {
+ $body = HTML::toBBCode($doc->saveHTML());
+ if (!strlen($body)) {
Logger::log('retriever_apply_dom_filter retriever ' . $retriever['id'] . ' item ' . $item['id'] . ': output was empty', Logger::INFO);
return;
}
- $item['body'] .= "\n\n" . L10n::t('Retrieved') . ' ' . date("Y-m-d") . ': [url=';
- $item['body'] .= $item['plink'];
- $item['body'] .= ']' . $item['plink'] . '[/url]';
- DBA::update('item', ['body' => $item['body']], ['id' => $item['id']]);
- DBA::update('item-content', ['body' => $item['body']], ['uri' => $item['uri']]);
+ $body .= "\n\n" . L10n::t('Retrieved') . ' ' . date("Y-m-d") . ': [url=';
+ $body .= $item['plink'];
+ $body .= ']' . $item['plink'] . '[/url]';
+
+ $uri_id = ItemURI::getIdByURI($item['uri']);
+ //@@@ remove this
+ $item['body'] = $body;
+ Logger::log('retriever_apply_dom_filter: XSLT result \"' . $body . '\"', Logger::DATA);
+ DBA::update('item', ['body' => $body], ['id' => $item['id']]);
+ DBA::update('item-content', ['body' => $body], ['uri-id' => $uri_id]);
}
function retrieve_images(&$item, $a) {
@@ -678,18 +577,18 @@ function retriever_content($a) {
}
$template = Renderer::getMarkupTemplate('/help.tpl', 'addon/retriever/');
$a->page['content'] .= Renderer::replaceMacros($template, array(
- '$config' => $a->get_baseurl() . '/settings/addon',
+ '$config' => $a->getBaseUrl() . '/settings/addon',
'$feeds' => $feeds));
return;
}
if ($a->argv[1]) {
$retriever = get_retriever($a->argv[1], local_user(), false);
- if (x($_POST["id"])) {
+ if (!empty($_POST["id"])) {
$retriever = get_retriever($a->argv[1], local_user(), true);
$retriever["data"] = array();
foreach (array('pattern', 'replace', 'enable', 'images', 'customxslt') as $setting) {
- if (x($_POST['retriever_' . $setting])) {
+ if (!empty($_POST['retriever_' . $setting])) {
$retriever["data"][$setting] = $_POST['retriever_' . $setting];
}
}
@@ -712,7 +611,7 @@ function retriever_content($a) {
q("UPDATE `retriever_rule` SET `data`='%s' WHERE `id` = %d",
DBA::escape(json_encode($retriever["data"])), intval($retriever["id"]));
$a->page['content'] .= "Settings Updated";
- if (x($_POST["retriever_retrospective"])) {
+ if (!empty($_POST["retriever_retrospective"])) {
apply_retrospective($a, $retriever, $_POST["retriever_retrospective"]);
$a->page['content'] .= " and retrospectively applied to " . $_POST["apply"] . " posts";
}
@@ -750,7 +649,7 @@ function retriever_content($a) {
$retriever['data']['customxslt'],
L10n::t("When standard rules aren't enough, apply custom XSLT to the article")),
'$title' => L10n::t('Retrieve Feed Content'),
- '$help' => $a->get_baseurl() . '/retriever/help',
+ '$help' => $a->getBaseUrl() . '/retriever/help',
'$help_t' => L10n::t('Get Help'),
'$submit_t' => L10n::t('Submit'),
'$submit' => L10n::t('Save Settings'),
@@ -773,7 +672,7 @@ function retriever_contact_photo_menu($a, &$args) {
return;
}
if ($args["contact"]["network"] == "feed") {
- $args["menu"][ 'retriever' ] = array(L10n::t('Retriever'), $a->get_baseurl() . '/retriever/' . $args["contact"]['id']);
+ $args["menu"][ 'retriever' ] = array(L10n::t('Retriever'), $a->getBaseUrl() . '/retriever/' . $args["contact"]['id']);
}
}
@@ -816,7 +715,7 @@ function retriever_plugin_settings(&$a,&$s) {
L10n::t('Check this to attempt to retrieve embedded content for all posts - useful e.g. for Facebook posts')),
'$submit' => L10n::t('Save Settings'),
'$title' => L10n::t('Retriever Settings'),
- '$help' => $a->get_baseurl() . '/retriever/help'));
+ '$help' => $a->getBaseUrl() . '/retriever/help'));
}
function retriever_plugin_settings_post($a,$post) {
From 8b6a9c017ad13496a4adcf926fff0bc3cc9907ba Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Sun, 21 Jul 2019 18:27:55 +0100
Subject: [PATCH 06/94] beginnings of persistent cookiejar support
---
retriever/retriever.php | 5 +++++
retriever/templates/rule-config.tpl | 1 +
2 files changed, 6 insertions(+)
diff --git a/retriever/retriever.php b/retriever/retriever.php
index 97f29694..78a79a0e 100644
--- a/retriever/retriever.php
+++ b/retriever/retriever.php
@@ -643,6 +643,11 @@ function retriever_content($a) {
L10n::t('Retrospectively Apply'),
'0',
L10n::t('Reapply the rules to this number of posts')),
+ '$cookies' => array(
+ 'retriever_cookies',
+ L10n::t('Cookies'),
+ $retriever['data']['cookies'],
+ L10n::t("Persistent cookies for this feed. Netscape cookie file format.")),
'$customxslt' => array(
'retriever_customxslt',
L10n::t('Custom XSLT'),
diff --git a/retriever/templates/rule-config.tpl b/retriever/templates/rule-config.tpl
index 228d0326..847d9c3f 100644
--- a/retriever/templates/rule-config.tpl
+++ b/retriever/templates/rule-config.tpl
@@ -106,6 +106,7 @@ function retriever_remove_row(id, number)
+{{include file="field_textarea.tpl" field=$cookies}}
{{include file="field_textarea.tpl" field=$customxslt}}
From eb61f8f09acb85f84d00669e92516c824303d309 Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Sun, 21 Jul 2019 19:32:30 +0100
Subject: [PATCH 07/94] More preparation for persistent cookies
---
retriever/retriever.php | 17 +++++++++++------
retriever/templates/rule-config.tpl | 19 ++++++++++++++++++-
2 files changed, 29 insertions(+), 7 deletions(-)
diff --git a/retriever/retriever.php b/retriever/retriever.php
index 78a79a0e..bb3460a1 100644
--- a/retriever/retriever.php
+++ b/retriever/retriever.php
@@ -587,7 +587,7 @@ function retriever_content($a) {
if (!empty($_POST["id"])) {
$retriever = get_retriever($a->argv[1], local_user(), true);
$retriever["data"] = array();
- foreach (array('pattern', 'replace', 'enable', 'images', 'customxslt') as $setting) {
+ foreach (array('pattern', 'replace', 'enable', 'images', 'customxslt', 'storecookies', 'cookiedata') as $setting) {
if (!empty($_POST['retriever_' . $setting])) {
$retriever["data"][$setting] = $_POST['retriever_' . $setting];
}
@@ -643,11 +643,16 @@ function retriever_content($a) {
L10n::t('Retrospectively Apply'),
'0',
L10n::t('Reapply the rules to this number of posts')),
- '$cookies' => array(
- 'retriever_cookies',
- L10n::t('Cookies'),
- $retriever['data']['cookies'],
- L10n::t("Persistent cookies for this feed. Netscape cookie file format.")),
+ 'storecookies' => array(
+ 'retriever_storecookies',
+ L10n::t('Store cookies'),
+ $retriever['data']['storecookies'],
+ L10n::t("Preserve cookie data across fetches.")),
+ '$cookiedata' => array(
+ 'retriever_cookiedata',
+ L10n::t('Cookie Data'),
+ $retriever['data']['cookiedata'],
+ L10n::t("Latest cookie data for this feed. Netscape cookie file format.")),
'$customxslt' => array(
'retriever_customxslt',
L10n::t('Custom XSLT'),
diff --git a/retriever/templates/rule-config.tpl b/retriever/templates/rule-config.tpl
index 847d9c3f..9061d1ff 100644
--- a/retriever/templates/rule-config.tpl
+++ b/retriever/templates/rule-config.tpl
@@ -40,6 +40,22 @@ function retriever_remove_row(id, number)
var row = document.getElementById(id + '-' + number);
tbody.removeChild(row);
}
+
+function retriever_toggle_cookiedata_block()
+{
+ var div = document.querySelector("#id_retriever_cookiedata").parentNode;
+ if (document.querySelector("#id_retriever_storecookies").checked) {
+ div.style.display = "block";
+ }
+ else {
+ div.style.display = "none";
+ }
+}
+
+document.addEventListener('DOMContentLoaded', function() {
+ retriever_toggle_cookiedata_block();
+ document.querySelector("#id_retriever_storecookies").addEventListener('change', retriever_toggle_cookiedata_block, false);
+}, false);
{{$title}}
{{$help_t}}
@@ -106,8 +122,9 @@ function retriever_remove_row(id, number)
-{{include file="field_textarea.tpl" field=$cookies}}
{{include file="field_textarea.tpl" field=$customxslt}}
+{{include file="field_checkbox.tpl" field=$storecookies}}
+{{include file="field_textarea.tpl" field=$cookiedata}}
From df7ea6c3755fca9d1db13151f7608f116c50e6fa Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Wed, 24 Jul 2019 06:48:23 +0100
Subject: [PATCH 08/94] tentative database work
---
retriever/database.sql | 1 +
retriever/retriever.php | 4 ++++
2 files changed, 5 insertions(+)
diff --git a/retriever/database.sql b/retriever/database.sql
index 340e33eb..2a0db966 100644
--- a/retriever/database.sql
+++ b/retriever/database.sql
@@ -24,6 +24,7 @@ CREATE TABLE IF NOT EXISTS `retriever_item` (
CREATE TABLE IF NOT EXISTS `retriever_resource` (
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
+ `contact-id` int(10) unsigned NOT NULL DEFAULT '0',
`type` char(255) NULL DEFAULT NULL,
`binary` int(1) NOT NULL DEFAULT 0,
`url` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NOT NULL,
diff --git a/retriever/retriever.php b/retriever/retriever.php
index bb3460a1..adf9681e 100644
--- a/retriever/retriever.php
+++ b/retriever/retriever.php
@@ -41,6 +41,10 @@ function retriever_install() {
q("ALTER TABLE `retriever_item` ADD INDEX `item-uid` (`item-uid`)");
Config::set('retriever', 'dbversion', '0.12');
}
+ /* if (Config::get('retriever', 'dbversion') == '0.12') { */
+ /* q("ALTER TABLE `retriever_resource` ADD COLUMN `contact-id` int(10) unsigned NULL AFTER `id`"); */
+ /* Config::set('retriever', 'dbversion', '0.13'); */
+ /* } */
if (Config::get('retriever', 'dbversion') != '0.12') {
$schema = file_get_contents(dirname(__file__).'/database.sql');
$arr = explode(';', $schema);
From 034ed5fcd665bb3c784a247136cbef56c08e4955 Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Fri, 26 Jul 2019 05:49:53 +0100
Subject: [PATCH 09/94] fix
---
retriever/retriever.php | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/retriever/retriever.php b/retriever/retriever.php
index adf9681e..65471be9 100644
--- a/retriever/retriever.php
+++ b/retriever/retriever.php
@@ -189,9 +189,9 @@ function retrieve_resource($resource) {
try {
Logger::log('retrieve_resource: ' . ($resource['num-tries'] + 1) .
' attempt at resource ' . $resource['id'] . ' ' . $resource['url'], Logger::DEBUG);
- $redirects;
+ $redirects = 0;
$cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-');
- $fetch_result = Network::fetchUrlFull($resource['url'], $resource['binary'], $redirects, array('cookiejar' => $cookiejar));
+ $fetch_result = Network::fetchUrlFull($resource['url'], $resource['binary'], $redirects, '', $cookiejar);
unlink($cookiejar);
$resource['data'] = $fetch_result->getBody();
$resource['http-code'] = $fetch_result->getReturnCode();
From 10f7be958b59ce1a958adc69de4ce1f186384779 Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Sun, 15 Sep 2019 09:26:25 +0100
Subject: [PATCH 10/94] fixed a bug and commented on another
---
retriever/retriever.php | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/retriever/retriever.php b/retriever/retriever.php
index 65471be9..c70f906e 100644
--- a/retriever/retriever.php
+++ b/retriever/retriever.php
@@ -367,6 +367,15 @@ function add_retriever_resource($a, $url, $binary = false) {
function add_retriever_item(&$item, $resource) {
Logger::log('add_retriever_item: ' . $resource['url'] . ' for ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG);
+ $r = q("SELECT COUNT(*) FROM `retriever_item` WHERE " .
+ "`item-uri` = '%s' AND `item-uid` = %d AND `contact-id` = %d AND `resource` = %d",
+ DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource['id']));
+ if ($r[0]['COUNT(*)'] > 0) {
+ Logger::log("add_retriever_item: retriever item already present for " .
+ $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'],
+ Logger::INFO);
+ return;
+ }
q("INSERT INTO `retriever_item` (`item-uri`, `item-uid`, `contact-id`, `resource`) " .
"VALUES ('%s', %d, %d, %d)",
DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource["id"]));
@@ -536,6 +545,7 @@ function retriever_apply_completed_resource_to_item($retriever, &$item, $resourc
}
function retriever_transform_images($a, &$item, $resource) {
+ return; //@@@ not working
if (!$resource["data"]) {
Logger::log('retriever_transform_images: no data available for '
. $resource['id'] . ' ' . $resource['url'], Logger::INFO);
From 61e925630dce4eee5e6616f05a01a3df24535122 Mon Sep 17 00:00:00 2001
From: Administrator
Date: Sun, 22 Sep 2019 11:47:30 +0200
Subject: [PATCH 11/94] this is working OK
---
retriever/database.sql | 1 +
retriever/retriever.php | 387 +++++++++++++++++++++++++---------------
2 files changed, 247 insertions(+), 141 deletions(-)
diff --git a/retriever/database.sql b/retriever/database.sql
index 2a0db966..a29135e7 100644
--- a/retriever/database.sql
+++ b/retriever/database.sql
@@ -24,6 +24,7 @@ CREATE TABLE IF NOT EXISTS `retriever_item` (
CREATE TABLE IF NOT EXISTS `retriever_resource` (
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
+ `item-uid` int(10) unsigned NOT NULL DEFAULT '0',
`contact-id` int(10) unsigned NOT NULL DEFAULT '0',
`type` char(255) NULL DEFAULT NULL,
`binary` int(1) NOT NULL DEFAULT 0,
diff --git a/retriever/retriever.php b/retriever/retriever.php
index c70f906e..5644952a 100644
--- a/retriever/retriever.php
+++ b/retriever/retriever.php
@@ -12,13 +12,16 @@ use Friendica\Core\Config;
use Friendica\Core\PConfig;
use Friendica\Core\Logger;
use Friendica\Core\Renderer;
+use Friendica\Core\System;
use Friendica\Content\Text\HTML;
use Friendica\Content\Text\BBCode;
+use Friendica\Model\Photo;
use Friendica\Object\Image;
use Friendica\Util\Network;
use Friendica\Core\L10n;
use Friendica\Database\DBA;
use Friendica\Model\ItemURI;
+use Friendica\Model\Item;
function retriever_install() {
Addon::registerHook('plugin_settings', 'addon/retriever/retriever.php', 'retriever_plugin_settings');
@@ -41,17 +44,18 @@ function retriever_install() {
q("ALTER TABLE `retriever_item` ADD INDEX `item-uid` (`item-uid`)");
Config::set('retriever', 'dbversion', '0.12');
}
- /* if (Config::get('retriever', 'dbversion') == '0.12') { */
- /* q("ALTER TABLE `retriever_resource` ADD COLUMN `contact-id` int(10) unsigned NULL AFTER `id`"); */
- /* Config::set('retriever', 'dbversion', '0.13'); */
- /* } */
- if (Config::get('retriever', 'dbversion') != '0.12') {
+ if (Config::get('retriever', 'dbversion') == '0.12') {
+ q("ALTER TABLE `retriever_resource` ADD COLUMN `contact-id` int(10) unsigned NOT NULL DEFAULT '0' AFTER `id`");
+ q("ALTER TABLE `retriever_resource` ADD COLUMN `item-uid` int(10) unsigned NOT NULL DEFAULT '0' AFTER `id`");
+ Config::set('retriever', 'dbversion', '0.13');
+ }
+ if (Config::get('retriever', 'dbversion') != '0.13') {
$schema = file_get_contents(dirname(__file__).'/database.sql');
$arr = explode(';', $schema);
foreach ($arr as $a) {
$r = q($a);
}
- Config::set('retriever', 'dbversion', '0.12');
+ Config::set('retriever', 'dbversion', '0.13');
}
}
@@ -68,7 +72,11 @@ function retriever_uninstall() {
function retriever_module() {}
function retriever_cron($a, $b) {
- // 100 is a nice sane number. Maybe this should be configurable.
+ // 100 is a nice sane number. Maybe this should be configurable. @@@
+
+ // Do this first, otherwise it can interfere with retreiver_retrieve_items
+ retriever_clean_up_completed_resources(100, $a);
+
retriever_retrieve_items(100, $a);
retriever_tidy();
}
@@ -76,6 +84,7 @@ function retriever_cron($a, $b) {
$retriever_item_count = 0;
function retriever_retrieve_items($max_items, $a) {
+ Logger::log('@@@ retriever_retrieve_items', Logger::INFO);
global $retriever_item_count;
$retriever_schedule = array(array(1,'minute'),
@@ -98,56 +107,61 @@ function retriever_retrieve_items($max_items, $a) {
$retrieve_items = $max_items - $retriever_item_count;
Logger::log('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . $retriever_item_count . ', retrieve ' . $retrieve_items, Logger::DEBUG);
do {
- $r = q("SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR %s) ORDER BY `last-try` ASC LIMIT %d",
+ Logger::log('@@@ retriever_retrieve_items loop max ' . $max_items . ' count ' . $retriever_item_count, Logger::INFO);
+ Logger::log("@@@ SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR " . implode($schedule_clauses, ' OR ') . ") ORDER BY `last-try` ASC LIMIT " . $retrieve_items, Logger::INFO);
+ $retriever_resources = q("SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR %s) ORDER BY `last-try` ASC LIMIT %d",
DBA::escape(implode($schedule_clauses, ' OR ')),
intval($retrieve_items));
- if (!is_array($r)) {
+ if (!is_array($retriever_resources)) {
break;
}
- if (count($r) == 0) {
+ if (count($retriever_resources) == 0) {
break;
}
- Logger::log('retriever_retrieve_items: found ' . count($r) . ' waiting resources in database', Logger::DEBUG);
- foreach ($r as $rr) {
- retrieve_resource($rr);
+ Logger::log('retriever_retrieve_items: found ' . count($retriever_resources) . ' waiting resources in database', Logger::DEBUG);
+ foreach ($retriever_resources as $retriever_resource) {
+ Logger::log('@@@ need to get the retriever config here cid ' . $retriever_resource['contact-id'] . ' uid ' . $retriever_resource['item-uid'], Logger::INFO);
+ retrieve_resource($retriever_resource);
$retriever_item_count++;
}
$retrieve_items = $max_items - $retriever_item_count;
}
while ($retrieve_items > 0);
+ // @@@ todo: when items add further items (i.e. images), do the new images go round this loop again?
+ Logger::log('@@@ retriever_retrieve_items: finished retrieving items', Logger::INFO);
+}
- /* Look for items that are waiting even though the resource has
- * completed. This usually happens because we've been asked to
- * retrospectively apply a config change. It could also happen
- * due to a cron job dying or something. */
+/* Look for items that are waiting even though the resource has
+ * completed. This usually happens because we've been asked to
+ * retrospectively apply a config change. It could also happen due to
+ * a cron job dying or something. */
+function retriever_clean_up_completed_resources($max_items, $a) {
$r = q("SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT %d",
- intval($retrieve_items));
+ intval($max_items));
if (!$r) {
$r = array();
}
- Logger::log('retriever_retrieve_items: items waiting even though resource has completed: ' . count($r), Logger::DEBUG);
+ Logger::log('retriever_clean_up_completed_resources: items waiting even though resource has completed: ' . count($r), Logger::DEBUG);
foreach ($r as $rr) {
$resource = q("SELECT * FROM retriever_resource WHERE `id` = %d", $rr['resource']);
$retriever_item = retriever_get_retriever_item($rr['item']);
- if (!$retriever_item) {
- Logger::log('retriever_retrieve_items: no retriever item with id ' . $rr['item'], Logger::INFO);
+ if (!DBA::isResult($retriever_item)) {
+ Logger::log('retriever_clean_up_completed_resources: no retriever item with id ' . $rr['item'], Logger::WARNING);
continue;
}
$item = retriever_get_item($retriever_item);
if (!$item) {
- Logger::log('retriever_retrieve_items: no item ' . $retriever_item['item-uri'], Logger::INFO);
+ Logger::log('retriever_clean_up_completed_resources: no item ' . $retriever_item['item-uri'], Logger::WARNING);
continue;
}
- $retriever = get_retriever($item['contact-id'], $item['uid']);
- if (!$retriever) {
- Logger::log('retriever_retrieve_items: no retriever for item ' .
- $retriever_item['item-uri'] . ' ' . $retriever_item['uid'] . ' ' . $item['contact-id'],
- Logger::INFO);
+ $retriever_rule = get_retriever_rule($retriever_item['contact-id'], $item['uid']);
+ if (!$retriever_rule) {
+ Logger::log('retriever_clean_up_completed_resources: no retriever for uri ' . $retriever_item['item-uri'] . ' uid ' . $retriever_item['uid'] . ' ' . $retriever_item['contact-id'], Logger::WARNING);
continue;
}
- retriever_apply_completed_resource_to_item($retriever, $item, $resource[0], $a);
- q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d",
- intval($retriever_item['id']));
+ Logger::log('@@@ retriever_clean_up_completed_resources: about to retriever_apply_completed_resource_to_item', Logger::INFO);
+ retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource[0], $a);
+ q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d", intval($retriever_item['id']));
retriever_check_item_completed($item);
}
}
@@ -157,7 +171,7 @@ function retriever_tidy() {
q("DELETE FROM retriever_resource WHERE completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)");
$r = q("SELECT retriever_item.id FROM retriever_item LEFT OUTER JOIN retriever_resource ON (retriever_item.resource = retriever_resource.id) WHERE retriever_resource.id is null");
- Logger::log('retriever_tidy: found ' . count($r) . ' retriever_items with no retriever_resource');
+ Logger::log('retriever_tidy: found ' . count($r) . ' retriever_items with no retriever_resource', Logger::INFO);
foreach ($r as $rr) {
q('DELETE FROM retriever_item WHERE id = %d', intval($rr['id']));
}
@@ -165,7 +179,7 @@ function retriever_tidy() {
function retrieve_dataurl_resource($resource) {
if (!preg_match("/date:(.*);base64,(.*)/", $resource['url'], $matches)) {
- Logger::log('retrieve_dataurl_resource: ' . $resource['id'] . ' does not match pattern');
+ Logger::log('retrieve_dataurl_resource: ' . $resource['id'] . ' does not match pattern', Logger::INFO);
} else {
$resource['type'] = $matches[1];
$resource['data'] = base64url_decode($matches[2]);
@@ -180,28 +194,36 @@ function retrieve_dataurl_resource($resource) {
}
function retrieve_resource($resource) {
+ Logger::log('@@@ retrieve_resource: url ' . $resource['url'] . ' uid ' . $resource['item-uid'] . ' cid ' . $resource['contact-id'], Logger::INFO);
+
if (substr($resource['url'], 0, 5) == "data:") {
return retrieve_dataurl_resource($resource);
}
$a = get_app();
+ $retriever_rule = get_retriever_rule($resource['contact-id'], $resource['item-uid']);
+
try {
- Logger::log('retrieve_resource: ' . ($resource['num-tries'] + 1) .
- ' attempt at resource ' . $resource['id'] . ' ' . $resource['url'], Logger::DEBUG);
+ Logger::log('retrieve_resource: ' . ($resource['num-tries'] + 1) . ' attempt at resource ' . $resource['id'] . ' ' . $resource['url'], Logger::DEBUG);
$redirects = 0;
$cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-');
+ if ($retriever_rule['storecookies']) {
+ file_put_contents($cookiejar, $retriever_rule['cookiedata']);
+ }
$fetch_result = Network::fetchUrlFull($resource['url'], $resource['binary'], $redirects, '', $cookiejar);
+ if ($retriever_rule['storecookies']) {
+ $retriever_rule['cookiedata'] = file_get_contents($cookiejar);
+ //@@@ do the store here
+ }
unlink($cookiejar);
$resource['data'] = $fetch_result->getBody();
$resource['http-code'] = $fetch_result->getReturnCode();
$resource['type'] = $fetch_result->getContentType();
$resource['redirect-url'] = $fetch_result->getRedirectUrl();
- Logger::log('retrieve_resource: got code ' . $resource['http-code'] .
- ' retrieving resource ' . $resource['id'] .
- ' final url ' . $resource['redirect-url'], Logger::DEBUG);
+ Logger::log('retrieve_resource: got code ' . $resource['http-code'] . ' retrieving resource ' . $resource['id'] . ' final url ' . $resource['redirect-url'], Logger::DEBUG);
} catch (Exception $e) {
- Logger::log('retrieve_resource: unable to retrieve ' . $resource['url'] . ' - ' . $e->getMessage());
+ Logger::log('retrieve_resource: unable to retrieve ' . $resource['url'] . ' - ' . $e->getMessage(), Logger::INFO);
}
q("UPDATE `retriever_resource` SET `last-try` = now(), `num-tries` = `num-tries` + 1, `http-code` = %d, `redirect-url` = '%s' WHERE id = %d",
intval($resource['http-code']),
@@ -214,13 +236,17 @@ function retrieve_resource($resource) {
intval($resource['id']));
retriever_resource_completed($resource, $a);
}
+ Logger::log('@@@ retrieve_resource finished: ' . $resource['url'], Logger::INFO);
}
-function get_retriever($contact_id, $uid, $create = false) {
+function get_retriever_rule($contact_id, $uid, $create = false) {
+ Logger::log('@@@ get_retriever_rule ' . "SELECT * FROM `retriever_rule` WHERE `contact-id` = " . intval($contact_id) . " AND `uid` = " . intval($uid), Logger::INFO);
$r = q("SELECT * FROM `retriever_rule` WHERE `contact-id` = %d AND `uid` = %d",
intval($contact_id), intval($uid));
+ Logger::log('@@@ get_retriever_rule count is ' . count($r), Logger::INFO);
if (count($r)) {
$r[0]['data'] = json_decode($r[0]['data'], true);
+ Logger::log('@@@ get_retriever_rule returning an actual thing', Logger::INFO);
return $r[0];
}
if ($create) {
@@ -233,43 +259,62 @@ function get_retriever($contact_id, $uid, $create = false) {
}
function retriever_get_retriever_item($id) {
- $retriever_items = q("SELECT * FROM `retriever_item` WHERE id = %d", intval($id));
- if (count($retriever_items) != 1) {
- Logger::log('retriever_get_retriever_item: unable to find retriever_item ' . $id, Logger::INFO);
- return;
+ return DBA::selectFirst('retriever_item', [], ['id' => intval($id)]);
+}
+
+function retriever_class_of_item($item) { //@@@
+ if (!$item) {
+ return 'false';
}
- return $retriever_items[0];
+ if (array_key_exists('finished', $item)) {
+ Logger::log('@@@ oh no this is a bad thing', Logger::INFO);
+ return 'retriever_item';
+ }
+ if (array_key_exists('moderated', $item)) {
+ return 'friendica_item';
+ }
+ return 'unknown';
+}
+
+function mat_test($item) { //@@@
+ return 'mat_test';
}
function retriever_get_item($retriever_item) {
- // @@@ Need to replace this with Item::selectFirst
- $items = q("SELECT * FROM `item` WHERE `uri` = '%s' AND `uid` = %d AND `contact-id` = %d",
- DBA::escape($retriever_item['item-uri']),
- intval($retriever_item['item-uid']),
- intval($retriever_item['contact-id']));
- if (count($items) != 1) {
- Logger::log('retriever_get_item: unexpected number of results ' .
- count($items) . " when searching for item $uri $uid $cid", Logger::INFO);
- return;
+ // @@@ add contact id as a search term
+ Logger::log('@@@ retriever_get_item uri ' . $retriever_item['item-uri'] . ' uid ' . $retriever_item['item-uid'] . ' cid ' . $retriever_item['contact-id'], Logger::INFO);
+ try {//@@@ not necessary
+ $item = Item::selectFirst([], ['uri' => $retriever_item['item-uri'], 'uid' => intval($retriever_item['item-uid'])]);
+ Logger::log('@@@ 1 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
+ if (!DBA::isResult($item)) {
+ Logger::log('retriever_get_item: no item found for uri ' . $retriever_item['item-uri'], Logger::INFO);
+ return;
+ }
+ Logger::log('@@@ retriever_get_item: yay item found for uri ' . $retriever_item['item-uri'] . ' guid ' . $item['guid'] . ' plink ' . $item['plink'], Logger::INFO);
+ return $item;
+ } catch (Exception $e) {
+ Logger::log('retriever_get_item: exception ' . $e->getMessage(), Logger::INFO);
}
- return $items[0];
}
function retriever_item_completed($retriever_item_id, $resource, $a) {
Logger::log('retriever_item_completed: id ' . $retriever_item_id . ' url ' . $resource['url'], Logger::DEBUG);
$retriever_item = retriever_get_retriever_item($retriever_item_id);
- if (!$retriever_item) {
+ if (!DBA::isResult($retriever_item)) {
+ Logger::log('retriever_item_completed: no retriever item with id ' . $retriever_item_id, Logger::INFO);
+ return;
+ }
+ $item = retriever_get_item($retriever_item);
+ Logger::log('@@@ 2 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
+ if (!$item) {
+ Logger::log('retriever_item_completed: no item ' . $retriever_item['item-uri'], Logger::INFO);
return;
}
// Note: the retriever might be null. Doesn't matter.
- $retriever = get_retriever($retriever_item['contact-id'], $retriever_item['item-uid']);
- $item = retriever_get_item($retriever_item);
- if (!$item) {
- return;
- }
+ $retriever_rule = get_retriever_rule($retriever_item['contact-id'], $retriever_item['item-uid']);
- retriever_apply_completed_resource_to_item($retriever, $item, $resource, $a);
+ retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource, $a);
q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d",
intval($retriever_item['id']));
@@ -288,18 +333,24 @@ function apply_retrospective($a, $retriever, $num) {
$r = q("SELECT * FROM `item` WHERE `contact-id` = %d ORDER BY `received` DESC LIMIT %d",
intval($retriever['contact-id']), intval($num));
foreach ($r as $item) {
+ Logger::log('@@@ 3 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); //@@@ already know this is wrong
q('UPDATE `item` SET `visible` = 0 WHERE `id` = %d', $item['id']);
q('UPDATE `thread` SET `visible` = 0 WHERE `iid` = %d', $item['id']);
retriever_on_item_insert($a, $retriever, $item);
}
}
+//@@@ make this trigger a retriever immediately somehow
+//@@@ need a lock to say something is doing something
function retriever_on_item_insert($a, $retriever, &$item) {
+ Logger::log('@@@ 4 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
+ Logger::log('@@@ retriever_on_item_insert start ' . $item['plink'], Logger::INFO);
if (!$retriever || !$retriever['id']) {
Logger::log('retriever_on_item_insert: No retriever supplied', Logger::INFO);
return;
}
if (!$retriever["data"]['enable'] == "on") {
+ Logger::log('@@@ retriever_on_item_insert: Disabled', Logger::INFO);
return;
}
if (array_key_exists('pattern', $retriever["data"]) && $retriever["data"]['pattern']) {
@@ -310,12 +361,13 @@ function retriever_on_item_insert($a, $retriever, &$item) {
$url = $item['plink'];
}
- $resource = add_retriever_resource($a, $url);
+ Logger::log('@@@ retriever_on_item_insert: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id'], Logger::DEBUG);
+ $resource = add_retriever_resource($a, $url, $item['uid'], $item['contact-id']);
$retriever_item_id = add_retriever_item($item, $resource);
}
-function add_retriever_resource($a, $url, $binary = false) {
- Logger::log('add_retriever_resource: ' . $url, Logger::DEBUG);
+function add_retriever_resource($a, $url, $uid, $cid, $binary = false) {
+ Logger::log('add_retriever_resource: url ' . $url . ' uid ' . $uid . ' contact-id ' . $cid, Logger::DEBUG);
$scheme = parse_url($url, PHP_URL_SCHEME);
if ($scheme == 'data') {
@@ -326,7 +378,7 @@ function add_retriever_resource($a, $url, $binary = false) {
fclose($fp);
$url = 'md5://' . hash('md5', $url);
- $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url));
+ $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s' AND `item-uid` = %d AND `contact-id` = %d", DBA::escape($url), intval($uid), intval($cid));
$resource = $r[0];
if (count($r)) {
Logger::log('add_retriever_resource: Resource ' . $url . ' already requested', Logger::DEBUG);
@@ -334,8 +386,10 @@ function add_retriever_resource($a, $url, $binary = false) {
}
Logger::log('retrieve_resource: got data URL type ' . $resource['type'], Logger::DEBUG);
- q("INSERT INTO `retriever_resource` (`type`, `binary`, `url`, `completed`, `data`) " .
- "VALUES ('%s', %d, '%s', now(), '%s')",
+ q("INSERT INTO `retriever_resource` (`item-uid`, `contact-id`, `type`, `binary`, `url`, `completed`, `data`) " .
+ "VALUES (%d, %d, '%s', %d, '%s', now(), '%s')",
+ intval($uid),
+ intval($cid),
DBA::escape($type),
intval($binary ? 1 : 0),
DBA::escape($url),
@@ -349,31 +403,30 @@ function add_retriever_resource($a, $url, $binary = false) {
}
if (strlen($url) > 800) {
- Logger::log('add_retriever_resource: URL is longer than 800 characters', Logger::INFO);
+ Logger::log('add_retriever_resource: URL is longer than 800 characters', Logger::WARNING);
}
- $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url));
+ $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s' AND `item-uid` = %d AND `contact-id` = %d", DBA::escape($url), intval($uid), intval($cid));
if (count($r)) {
- Logger::log('add_retriever_resource: Resource ' . $url . ' already requested', Logger::DEBUG);
+ Logger::log('add_retriever_resource: Resource ' . $url . ' uid ' . $uid . ' cid ' . $cid . ' already requested', Logger::DEBUG);
return $r[0];
}
- q("INSERT INTO `retriever_resource` (`binary`, `url`) " .
- "VALUES (%d, '%s')", intval($binary ? 1 : 0), DBA::escape($url));
+ q("INSERT INTO `retriever_resource` (`item-uid`, `contact-id`, `binary`, `url`) " .
+ "VALUES (%d, %d, %d, '%s')", intval($uid), intval($cid), intval($binary ? 1 : 0), DBA::escape($url));
$r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url));
return $r[0];
}
function add_retriever_item(&$item, $resource) {
+ Logger::log('@@@ 5 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item), Logger::DEBUG);
Logger::log('add_retriever_item: ' . $resource['url'] . ' for ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG);
$r = q("SELECT COUNT(*) FROM `retriever_item` WHERE " .
"`item-uri` = '%s' AND `item-uid` = %d AND `contact-id` = %d AND `resource` = %d",
DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource['id']));
if ($r[0]['COUNT(*)'] > 0) {
- Logger::log("add_retriever_item: retriever item already present for " .
- $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'],
- Logger::INFO);
+ Logger::log("add_retriever_item: retriever item already present for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::INFO);
return;
}
q("INSERT INTO `retriever_item` (`item-uri`, `item-uid`, `contact-id`, `resource`) " .
@@ -383,9 +436,7 @@ function add_retriever_item(&$item, $resource) {
"`item-uri` = '%s' AND `item-uid` = %d AND `contact-id` = %d AND `resource` = %d ORDER BY id DESC",
DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource['id']));
if (!count($r)) {
- Logger::log("add_retriever_item: couldn't create retriever item for " .
- $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'],
- Logger::INFO);
+ Logger::log("add_retriever_item: couldn't create retriever item for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::INFO);
return;
}
Logger::log('add_retriever_item: created retriever_item ' . $r[0]['id'] . ' for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG);
@@ -416,7 +467,9 @@ function retriever_apply_xslt_text($xslt_text, $doc) {
return $result;
}
+//@@@ is that an item or a resource_item? I really want an item here so I can update it
function retriever_apply_dom_filter($retriever, &$item, $resource) {
+ Logger::log('@@@ 6 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item), Logger::DEBUG);
Logger::log('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['uri'] . ' contact ' . $item['contact-id'], Logger::DEBUG);
if (!array_key_exists('include', $retriever['data']) && !array_key_exists('customxslt', $retriever['data'])) {
@@ -454,18 +507,23 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) {
return;
}
+ Logger::log('@@@ retriever_apply_dom_filter: 1', Logger::INFO);
$components = parse_url($resource['redirect-url']);
$rooturl = $components['scheme'] . "://" . $components['host'];
$dirurl = $rooturl . dirname($components['path']) . "/";
+ Logger::log('@@@ retriever_apply_dom_filter: 2', Logger::INFO);
$params = array('$dirurl' => $dirurl, '$rooturl' => $rooturl);
$fix_urls_template = Renderer::getMarkupTemplate('fix-urls.tpl', 'addon/retriever/');
$fix_urls_xslt = Renderer::replaceMacros($fix_urls_template, $params);
+ Logger::log('@@@ retriever_apply_dom_filter: 3', Logger::INFO);
$doc = retriever_apply_xslt_text($fix_urls_xslt, $doc);
+ Logger::log('@@@ retriever_apply_dom_filter: 4', Logger::INFO);
if (!$doc) {
Logger::log('retriever_apply_dom_filter: failed to apply fix urls XSLT template', Logger::INFO);
return;
}
+ Logger::log('@@@ retriever_apply_dom_filter: 5', Logger::INFO);
$body = HTML::toBBCode($doc->saveHTML());
if (!strlen($body)) {
Logger::log('retriever_apply_dom_filter retriever ' . $retriever['id'] . ' item ' . $item['id'] . ': output was empty', Logger::INFO);
@@ -475,47 +533,66 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) {
$body .= $item['plink'];
$body .= ']' . $item['plink'] . '[/url]';
- $uri_id = ItemURI::getIdByURI($item['uri']);
- //@@@ remove this
- $item['body'] = $body;
+ Logger::log('@@@ retriever_apply_dom_filter: 6', Logger::INFO);
+ $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself?
+ Logger::log('@@@ retriever_apply_dom_filter: item id is ' . $item['id'] . ' uri id is ' . $uri_id, Logger::INFO);
Logger::log('retriever_apply_dom_filter: XSLT result \"' . $body . '\"', Logger::DATA);
- DBA::update('item', ['body' => $body], ['id' => $item['id']]);
- DBA::update('item-content', ['body' => $body], ['uri-id' => $uri_id]);
+ DBA::update('item-content', ['body' => $body], ['uri-id' => $uri_id]); //@@@ isn't there a better interface to that?
+ //@@@ probably Item::updateContent
}
function retrieve_images(&$item, $a) {
+ $blah_item_class = retriever_class_of_item($item) . ' ' . mat_test($item);
+ Logger::log('@@@ 7 item class is ' . $blah_item_class, Logger::DEBUG);
+
+ $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself?
+
+ $content = DBA::selectFirst('item-content', [], ['uri-id' => $uri_id]);
+ $body = $content['body'];
+ if (!strlen($body)) {
+ Logger::log('retrieve_images: no body for uri-id ' . $uri_id, Logger::WARNING);
+ return;
+ }
+
+ Logger::log('@@@ retrieve_images start looking in body "' . $body . '"', Logger::INFO);
$matches1 = array();
- preg_match_all("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", $item["body"], $matches1);
+ preg_match_all("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", $body, $matches1);
$matches2 = array();
- preg_match_all("/\[img\](.*?)\[\/img\]/ism", $item["body"], $matches2);
+ preg_match_all("/\[img\](.*?)\[\/img\]/ism", $body, $matches2);
$matches = array_merge($matches1[3], $matches2[1]);
Logger::log('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG);
foreach ($matches as $url) {
+ Logger::log('@@@ retrieve_images: url ' . $url, Logger::DEBUG);
if (strpos($url, get_app()->getBaseUrl()) === FALSE) {
- $resource = add_retriever_resource($a, $url, true);
+ Logger::log('@@@ retrieve_images: it is from somewhere else', Logger::DEBUG);
+ Logger::log('@@@ retrieve_images: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id'], Logger::DEBUG);
+ $resource = add_retriever_resource($a, $url, $item['uid'], $item['contact-id'], true);
if (!$resource['completed']) {
+ Logger::log('@@@ retrieve_images: do not have it yet, get it later', Logger::DEBUG);
add_retriever_item($item, $resource);
}
else {
+ Logger::log('@@@ retrieve_images: got it already, transform', Logger::DEBUG);
retriever_transform_images($a, $item, $resource);
}
}
}
+ Logger::log('@@@ retrieve_images end', Logger::INFO);
}
function retriever_check_item_completed(&$item)
{
+ Logger::log('@@@ 9 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item), Logger::DEBUG);
$r = q('SELECT count(*) FROM retriever_item WHERE `item-uri` = "%s" ' .
'AND `item-uid` = %d AND `contact-id` = %d AND `finished` = 0',
DBA::escape($item['uri']), intval($item['uid']),
intval($item['contact-id']));
$waiting = $r[0]['count(*)'];
- Logger::log('retriever_check_item_completed: item ' . $item['uri'] . ' ' . $item['uid']
- . ' '. $item['contact-id'] . ' waiting for ' . $waiting . ' resources', Logger::DEBUG);
+ Logger::log('retriever_check_item_completed: item ' . $item['uri'] . ' ' . $item['uid'] . ' '. $item['contact-id'] . ' waiting for ' . $waiting . ' resources', Logger::DEBUG);
$old_visible = $item['visible'];
$item['visible'] = $waiting ? 0 : 1;
if (array_key_exists('id', $item) && ($item['id'] > 0) && ($old_visible != $item['visible'])) {
- Logger::log('retriever_check_item_completed: changing visible flag to ' . $item['visible'] . ' and invoking notifier ("edit_post", ' . $item['id'] . ')', Logger::DEBUG);
+ Logger::log('retriever_check_item_completed: changing visible flag to ' . $item['visible'], Logger::DEBUG);
q("UPDATE `item` SET `visible` = %d WHERE `id` = %d",
intval($item['visible']),
intval($item['id']));
@@ -526,10 +603,10 @@ function retriever_check_item_completed(&$item)
}
function retriever_apply_completed_resource_to_item($retriever, &$item, $resource, $a) {
- Logger::log('retriever_apply_completed_resource_to_item: retriever ' .
- ($retriever ? $retriever['id'] : 'none') .
- ' resource ' . $resource['url'] . ' plink ' . $item['plink'], Logger::DEBUG);
+ Logger::log('@@@ 10 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item), Logger::DEBUG);
+ Logger::log('retriever_apply_completed_resource_to_item: retriever ' . ($retriever ? $retriever['id'] : 'none') . ' resource ' . $resource['url'] . ' plink ' . $item['plink'], Logger::DEBUG);
if (strpos($resource['type'], 'image') !== false) {
+ Logger::log('@@@ retriever_apply_completed_resource_to_item this is an image must transform', Logger::INFO);
retriever_transform_images($a, $item, $resource);
}
if (!$retriever) {
@@ -544,38 +621,61 @@ function retriever_apply_completed_resource_to_item($retriever, &$item, $resourc
}
}
+//@@@ todo: change all Logger::log to Logger::info etc
+//@@@ todo: what is this reference for? document if needed delete if not
function retriever_transform_images($a, &$item, $resource) {
- return; //@@@ not working
+ Logger::log('@@@ 11 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item), Logger::DEBUG);
+ Logger::log('@@@ retriever_transform_images', Logger::INFO);
if (!$resource["data"]) {
- Logger::log('retriever_transform_images: no data available for '
- . $resource['id'] . ' ' . $resource['url'], Logger::INFO);
+ Logger::log('retriever_transform_images: no data available for ' . $resource['id'] . ' ' . $resource['url'], Logger::INFO);
return;
}
- try {
- $photo = Image::storePhoto($a, $item['uid'], $resource['data'], $resource['url']);
+ $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself?
+
+ try { //@@@ probably can get rid of this try/catch
+ $data = $resource['data'];
+ $type = $resource['type'];
+ $uid = $item['uid'];
+ $cid = $item['contact-id'];
+ $rid = Photo::newResource();
+ $path = parse_url($resource['url'], PHP_URL_PATH);
+ $parts = pathinfo($path);
+ $filename = $parts['filename'] . (array_key_exists('extension', $parts) ? '.' . $parts['extension'] : '');
+ Logger::log('@@@ retriever_transform_images url ' . $resource['url'] . ' path ' . $path . ' filename ' . $parts['filename'], Logger::INFO);
+ $album = 'Wall Photos';
+ $scale = 0;
+ $desc = ''; // TODO: store alt text with resource when it's requested so we can fill this in
+ Logger::log('retriever_transform_images storing ' . strlen($data) . ' bytes type ' . $type . ': uid ' . $uid . ' cid ' . $cid . ' rid ' . $rid . ' filename ' . $filename . ' album ' . $album . ' scale ' . $scale . ' desc ' . $desc, Logger::DEBUG);
+ Logger::log('@@@ retriever_transform_images before new Image', Logger::INFO);
+ $image = new Image($data, $type);
+ Logger::log('@@@ retriever_transform_images after new Image', Logger::INFO);
+ Logger::log('@@@ retriever_transform_images before Photo::store', Logger::INFO);
+ $photo = Photo::store($image, $uid, $cid, $rid, $filename, $album, 0, 0, "", "", "", "", $desc);
+ Logger::log('@@@ retriever_transform_images after Photo::store', Logger::INFO);
+ $new_url = System::baseUrl() . '/photo/' . $rid . '-0.' . $image->getExt();
+ Logger::log('@@@ retriever_transform_images new url ' . $new_url . ' rid ' . $rid . ' ext ' . $image->getExt(), Logger::INFO);
+ if (!strlen($new_url)) {
+ Logger::log('retriever_transform_images: no replacement URL for image ' . $resource['url'], Logger::WARNING);
+ return;
+ }
+
+ $content = DBA::selectFirst('item-content', [], ['uri-id' => $uri_id]);
+ $body = $content['body'];
+ Logger::log('@@@ retriever_transform_images: found body for uri id ' . $uri_id . ': ' . $body, Logger::INFO);
+
+ Logger::log('retriever_transform_images: replacing ' . $resource['url'] . ' with ' . $new_url . ' in item ' . $item['uri'], Logger::DEBUG);
+ Logger::log('@@@ retriever_transform_images: replacing ' . $resource['url'] . ' with ' . $new_url . ' in body ' . $body, Logger::DEBUG);
+ $body = str_replace($resource["url"], $new_url, $body);
+
+ Logger::log('@@@ retriever_transform_images: result \"' . $body . '\"', Logger::INFO);
+ DBA::update('item-content', ['body' => $body], ['uri-id' => $uri_id]); //@@@ isn't there a better interface to that?
+ //@@@ probably Item::updateContent
+ //@@ actually no, Item::update
} catch (Exception $e) {
- Logger::log('retriever_transform_images caught exception ' . $e->getMessage());
+ Logger::log('retriever_transform_images caught exception ' . $e->getMessage(), Logger::INFO);
return;
}
- if (!array_key_exists('full', $photo)) {
- Logger::log('retriever_transform_images: no replacement URL for image ' . $resource['url']);
- return;
- }
- $new_url = $photo['full'];
- Logger::log('retriever_transform_images: replacing ' . $resource['url'] . ' with ' .
- $new_url . ' in item ' . $item['plink'], Logger::DEBUG);
- $transformed = str_replace($resource["url"], $new_url, $item['body']);
- if ($transformed === $item['body']) {
- return;
- }
-
- $item['body'] = $transformed;
- q("UPDATE `item` SET `body` = '%s' WHERE `plink` = '%s' AND `uid` = %d AND `contact-id` = %d",
- DBA::escape($item['body']),
- DBA::escape($item['plink']),
- intval($item['uid']),
- intval($item['contact-id']));
}
function retriever_content($a) {
@@ -596,37 +696,37 @@ function retriever_content($a) {
return;
}
if ($a->argv[1]) {
- $retriever = get_retriever($a->argv[1], local_user(), false);
+ $retriever_rule = get_retriever_rule($a->argv[1], local_user(), false);
if (!empty($_POST["id"])) {
- $retriever = get_retriever($a->argv[1], local_user(), true);
- $retriever["data"] = array();
+ $retriever_rule = get_retriever_rule($a->argv[1], local_user(), true);
+ $retriever_rule["data"] = array();
foreach (array('pattern', 'replace', 'enable', 'images', 'customxslt', 'storecookies', 'cookiedata') as $setting) {
if (!empty($_POST['retriever_' . $setting])) {
- $retriever["data"][$setting] = $_POST['retriever_' . $setting];
+ $retriever_rule["data"][$setting] = $_POST['retriever_' . $setting];
}
}
foreach ($_POST as $k=>$v) {
if (preg_match("/retriever-(include|exclude)-(\d+)-(element|attribute|value)/", $k, $matches)) {
- $retriever['data'][$matches[1]][intval($matches[2])][$matches[3]] = $v;
+ $retriever_rule['data'][$matches[1]][intval($matches[2])][$matches[3]] = $v;
}
}
// You've gotta have an element, even if it's just "*"
- foreach ($retriever['data']['include'] as $k=>$clause) {
+ foreach ($retriever_rule['data']['include'] as $k=>$clause) {
if (!$clause['element']) {
- unset($retriever['data']['include'][$k]);
+ unset($retriever_rule['data']['include'][$k]);
}
}
- foreach ($retriever['data']['exclude'] as $k=>$clause) {
+ foreach ($retriever_rule['data']['exclude'] as $k=>$clause) {
if (!$clause['element']) {
- unset($retriever['data']['exclude'][$k]);
+ unset($retriever_rule['data']['exclude'][$k]);
}
}
q("UPDATE `retriever_rule` SET `data`='%s' WHERE `id` = %d",
- DBA::escape(json_encode($retriever["data"])), intval($retriever["id"]));
+ DBA::escape(json_encode($retriever_rule["data"])), intval($retriever_rule["id"]));
$a->page['content'] .= "Settings Updated";
if (!empty($_POST["retriever_retrospective"])) {
- apply_retrospective($a, $retriever, $_POST["retriever_retrospective"]);
+ apply_retrospective($a, $retriever_rule, $_POST["retriever_retrospective"]);
$a->page['content'] .= " and retrospectively applied to " . $_POST["apply"] . " posts";
}
$a->page['content'] .= ".
";
@@ -637,21 +737,21 @@ function retriever_content($a) {
'$enable' => array(
'retriever_enable',
L10n::t('Enabled'),
- $retriever['data']['enable']),
+ $retriever_rule['data']['enable']),
'$pattern' => array(
'retriever_pattern',
L10n::t('URL Pattern'),
- $retriever["data"]['pattern'],
+ $retriever_rule["data"]['pattern'],
L10n::t('Regular expression matching part of the URL to replace')),
'$replace' => array(
'retriever_replace',
L10n::t('URL Replace'),
- $retriever["data"]['replace'],
+ $retriever_rule["data"]['replace'],
L10n::t('Text to replace matching part of above regular expression')),
'$images' => array(
'retriever_images',
L10n::t('Download Images'),
- $retriever['data']['images']),
+ $retriever_rule['data']['images']),
'$retrospective' => array(
'retriever_retrospective',
L10n::t('Retrospectively Apply'),
@@ -660,33 +760,33 @@ function retriever_content($a) {
'storecookies' => array(
'retriever_storecookies',
L10n::t('Store cookies'),
- $retriever['data']['storecookies'],
+ $retriever_rule['data']['storecookies'],
L10n::t("Preserve cookie data across fetches.")),
'$cookiedata' => array(
'retriever_cookiedata',
L10n::t('Cookie Data'),
- $retriever['data']['cookiedata'],
+ $retriever_rule['data']['cookiedata'],
L10n::t("Latest cookie data for this feed. Netscape cookie file format.")),
'$customxslt' => array(
'retriever_customxslt',
L10n::t('Custom XSLT'),
- $retriever['data']['customxslt'],
+ $retriever_rule['data']['customxslt'],
L10n::t("When standard rules aren't enough, apply custom XSLT to the article")),
'$title' => L10n::t('Retrieve Feed Content'),
'$help' => $a->getBaseUrl() . '/retriever/help',
'$help_t' => L10n::t('Get Help'),
'$submit_t' => L10n::t('Submit'),
'$submit' => L10n::t('Save Settings'),
- '$id' => ($retriever["id"] ? $retriever["id"] : "create"),
+ '$id' => ($retriever_rule["id"] ? $retriever_rule["id"] : "create"),
'$tag_t' => L10n::t('Tag'),
'$attribute_t' => L10n::t('Attribute'),
'$value_t' => L10n::t('Value'),
'$add_t' => L10n::t('Add'),
'$remove_t' => L10n::t('Remove'),
'$include_t' => L10n::t('Include'),
- '$include' => $retriever['data']['include'],
+ '$include' => $retriever_rule['data']['include'],
'$exclude_t' => L10n::t('Exclude'),
- '$exclude' => $retriever["data"]['exclude']));
+ '$exclude' => $retriever_rule["data"]['exclude']));
return;
}
}
@@ -701,18 +801,23 @@ function retriever_contact_photo_menu($a, &$args) {
}
function retriever_post_remote_hook(&$a, &$item) {
+ Logger::log('@@@ 12 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
Logger::log('retriever_post_remote_hook: ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG);
- $retriever = get_retriever($item['contact-id'], $item["uid"], false);
- if ($retriever) {
- retriever_on_item_insert($a, $retriever, $item);
+ $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself?
+ $retriever_rule = get_retriever_rule($item['contact-id'], $item["uid"], false);
+ if ($retriever_rule) {
+ retriever_on_item_insert($a, $retriever_rule, $item);
}
else {
if (PConfig::get($item["uid"], 'retriever', 'oembed')) {
// Convert to HTML and back to take advantage of bbcode's resolution of oembeds.
- $body = HTML::toBBCode(BBCode::convert($item['body']));
+ $content = DBA::selectFirst('item-content', [], ['uri-id' => $uri_id]);
+ $body = HTML::toBBCode(BBCode::convert($content['body']));
+ Logger::log('@@@ retriever_post_remote_hook item uri-id ' . $uri_id . ' body "' . $item['body'] . '" item content body "' . $body . '"', Logger::DEBUG);
if ($body) {
$item['body'] = $body;
+ DBA::update('item-content', ['body' => $body], ['uri-id' => $uri_id]); //@@@ isn't there a better interface to that?
}
}
if (PConfig::get($item["uid"], 'retriever', 'all_photos')) {
From ef6709d861bc9ba710bef507d7d298d39a58fd6c Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Sun, 22 Sep 2019 17:05:23 +0200
Subject: [PATCH 12/94] Improvement
---
retriever/retriever.php | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/retriever/retriever.php b/retriever/retriever.php
index 5644952a..704bff34 100644
--- a/retriever/retriever.php
+++ b/retriever/retriever.php
@@ -650,6 +650,10 @@ function retriever_transform_images($a, &$item, $resource) {
Logger::log('@@@ retriever_transform_images before new Image', Logger::INFO);
$image = new Image($data, $type);
Logger::log('@@@ retriever_transform_images after new Image', Logger::INFO);
+ if (!$image->isValid()) {
+ Logger::log('retriever_transform_images: invalid image found at URL ' . $resource['url'] ' for item ' . $item['id'], Logger::WARNING);
+ return;
+ }
Logger::log('@@@ retriever_transform_images before Photo::store', Logger::INFO);
$photo = Photo::store($image, $uid, $cid, $rid, $filename, $album, 0, 0, "", "", "", "", $desc);
Logger::log('@@@ retriever_transform_images after Photo::store', Logger::INFO);
From 615992810a8cfff2e7213f8bd7d2d9dcc259831a Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Sun, 22 Sep 2019 19:55:07 +0200
Subject: [PATCH 13/94] Change logging functions
---
retriever/retriever.php | 210 ++++++++++++++++++++--------------------
1 file changed, 105 insertions(+), 105 deletions(-)
diff --git a/retriever/retriever.php b/retriever/retriever.php
index 704bff34..ac6b321a 100644
--- a/retriever/retriever.php
+++ b/retriever/retriever.php
@@ -84,7 +84,7 @@ function retriever_cron($a, $b) {
$retriever_item_count = 0;
function retriever_retrieve_items($max_items, $a) {
- Logger::log('@@@ retriever_retrieve_items', Logger::INFO);
+ Logger::info('@@@ retriever_retrieve_items');
global $retriever_item_count;
$retriever_schedule = array(array(1,'minute'),
@@ -105,10 +105,10 @@ function retriever_retrieve_items($max_items, $a) {
}
$retrieve_items = $max_items - $retriever_item_count;
- Logger::log('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . $retriever_item_count . ', retrieve ' . $retrieve_items, Logger::DEBUG);
+ Logger::debug('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . $retriever_item_count . ', retrieve ' . $retrieve_items);
do {
- Logger::log('@@@ retriever_retrieve_items loop max ' . $max_items . ' count ' . $retriever_item_count, Logger::INFO);
- Logger::log("@@@ SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR " . implode($schedule_clauses, ' OR ') . ") ORDER BY `last-try` ASC LIMIT " . $retrieve_items, Logger::INFO);
+ Logger::info('@@@ retriever_retrieve_items loop max ' . $max_items . ' count ' . $retriever_item_count);
+ Logger::info("@@@ SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR " . implode($schedule_clauses, ' OR ') . ") ORDER BY `last-try` ASC LIMIT " . $retrieve_items);
$retriever_resources = q("SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR %s) ORDER BY `last-try` ASC LIMIT %d",
DBA::escape(implode($schedule_clauses, ' OR ')),
intval($retrieve_items));
@@ -118,9 +118,9 @@ function retriever_retrieve_items($max_items, $a) {
if (count($retriever_resources) == 0) {
break;
}
- Logger::log('retriever_retrieve_items: found ' . count($retriever_resources) . ' waiting resources in database', Logger::DEBUG);
+ Logger::debug('retriever_retrieve_items: found ' . count($retriever_resources) . ' waiting resources in database');
foreach ($retriever_resources as $retriever_resource) {
- Logger::log('@@@ need to get the retriever config here cid ' . $retriever_resource['contact-id'] . ' uid ' . $retriever_resource['item-uid'], Logger::INFO);
+ Logger::info('@@@ need to get the retriever config here cid ' . $retriever_resource['contact-id'] . ' uid ' . $retriever_resource['item-uid']);
retrieve_resource($retriever_resource);
$retriever_item_count++;
}
@@ -128,7 +128,7 @@ function retriever_retrieve_items($max_items, $a) {
}
while ($retrieve_items > 0);
// @@@ todo: when items add further items (i.e. images), do the new images go round this loop again?
- Logger::log('@@@ retriever_retrieve_items: finished retrieving items', Logger::INFO);
+ Logger::info('@@@ retriever_retrieve_items: finished retrieving items');
}
/* Look for items that are waiting even though the resource has
@@ -141,25 +141,25 @@ function retriever_clean_up_completed_resources($max_items, $a) {
if (!$r) {
$r = array();
}
- Logger::log('retriever_clean_up_completed_resources: items waiting even though resource has completed: ' . count($r), Logger::DEBUG);
+ Logger::debug('retriever_clean_up_completed_resources: items waiting even though resource has completed: ' . count($r));
foreach ($r as $rr) {
$resource = q("SELECT * FROM retriever_resource WHERE `id` = %d", $rr['resource']);
$retriever_item = retriever_get_retriever_item($rr['item']);
if (!DBA::isResult($retriever_item)) {
- Logger::log('retriever_clean_up_completed_resources: no retriever item with id ' . $rr['item'], Logger::WARNING);
+ Logger::warning('retriever_clean_up_completed_resources: no retriever item with id ' . $rr['item']);
continue;
}
$item = retriever_get_item($retriever_item);
if (!$item) {
- Logger::log('retriever_clean_up_completed_resources: no item ' . $retriever_item['item-uri'], Logger::WARNING);
+ Logger::warning('retriever_clean_up_completed_resources: no item ' . $retriever_item['item-uri']);
continue;
}
$retriever_rule = get_retriever_rule($retriever_item['contact-id'], $item['uid']);
if (!$retriever_rule) {
- Logger::log('retriever_clean_up_completed_resources: no retriever for uri ' . $retriever_item['item-uri'] . ' uid ' . $retriever_item['uid'] . ' ' . $retriever_item['contact-id'], Logger::WARNING);
+ Logger::warning('retriever_clean_up_completed_resources: no retriever for uri ' . $retriever_item['item-uri'] . ' uid ' . $retriever_item['uid'] . ' ' . $retriever_item['contact-id']);
continue;
}
- Logger::log('@@@ retriever_clean_up_completed_resources: about to retriever_apply_completed_resource_to_item', Logger::INFO);
+ Logger::info('@@@ retriever_clean_up_completed_resources: about to retriever_apply_completed_resource_to_item');
retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource[0], $a);
q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d", intval($retriever_item['id']));
retriever_check_item_completed($item);
@@ -171,7 +171,7 @@ function retriever_tidy() {
q("DELETE FROM retriever_resource WHERE completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)");
$r = q("SELECT retriever_item.id FROM retriever_item LEFT OUTER JOIN retriever_resource ON (retriever_item.resource = retriever_resource.id) WHERE retriever_resource.id is null");
- Logger::log('retriever_tidy: found ' . count($r) . ' retriever_items with no retriever_resource', Logger::INFO);
+ Logger::info('retriever_tidy: found ' . count($r) . ' retriever_items with no retriever_resource');
foreach ($r as $rr) {
q('DELETE FROM retriever_item WHERE id = %d', intval($rr['id']));
}
@@ -179,7 +179,7 @@ function retriever_tidy() {
function retrieve_dataurl_resource($resource) {
if (!preg_match("/date:(.*);base64,(.*)/", $resource['url'], $matches)) {
- Logger::log('retrieve_dataurl_resource: ' . $resource['id'] . ' does not match pattern', Logger::INFO);
+ Logger::info('retrieve_dataurl_resource: ' . $resource['id'] . ' does not match pattern');
} else {
$resource['type'] = $matches[1];
$resource['data'] = base64url_decode($matches[2]);
@@ -194,7 +194,7 @@ function retrieve_dataurl_resource($resource) {
}
function retrieve_resource($resource) {
- Logger::log('@@@ retrieve_resource: url ' . $resource['url'] . ' uid ' . $resource['item-uid'] . ' cid ' . $resource['contact-id'], Logger::INFO);
+ Logger::info('@@@ retrieve_resource: url ' . $resource['url'] . ' uid ' . $resource['item-uid'] . ' cid ' . $resource['contact-id']);
if (substr($resource['url'], 0, 5) == "data:") {
return retrieve_dataurl_resource($resource);
@@ -205,14 +205,14 @@ function retrieve_resource($resource) {
$retriever_rule = get_retriever_rule($resource['contact-id'], $resource['item-uid']);
try {
- Logger::log('retrieve_resource: ' . ($resource['num-tries'] + 1) . ' attempt at resource ' . $resource['id'] . ' ' . $resource['url'], Logger::DEBUG);
+ Logger::debug('retrieve_resource: ' . ($resource['num-tries'] + 1) . ' attempt at resource ' . $resource['id'] . ' ' . $resource['url']);
$redirects = 0;
$cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-');
- if ($retriever_rule['storecookies']) {
+ if (array_key_exists('storecookies', $retriever_rule) && $retriever_rule['storecookies']) {
file_put_contents($cookiejar, $retriever_rule['cookiedata']);
}
$fetch_result = Network::fetchUrlFull($resource['url'], $resource['binary'], $redirects, '', $cookiejar);
- if ($retriever_rule['storecookies']) {
+ if (array_key_exists('storecookies', $retriever_rule) && $retriever_rule['storecookies']) {
$retriever_rule['cookiedata'] = file_get_contents($cookiejar);
//@@@ do the store here
}
@@ -221,9 +221,9 @@ function retrieve_resource($resource) {
$resource['http-code'] = $fetch_result->getReturnCode();
$resource['type'] = $fetch_result->getContentType();
$resource['redirect-url'] = $fetch_result->getRedirectUrl();
- Logger::log('retrieve_resource: got code ' . $resource['http-code'] . ' retrieving resource ' . $resource['id'] . ' final url ' . $resource['redirect-url'], Logger::DEBUG);
+ Logger::debug('retrieve_resource: got code ' . $resource['http-code'] . ' retrieving resource ' . $resource['id'] . ' final url ' . $resource['redirect-url']);
} catch (Exception $e) {
- Logger::log('retrieve_resource: unable to retrieve ' . $resource['url'] . ' - ' . $e->getMessage(), Logger::INFO);
+ Logger::info('retrieve_resource: unable to retrieve ' . $resource['url'] . ' - ' . $e->getMessage());
}
q("UPDATE `retriever_resource` SET `last-try` = now(), `num-tries` = `num-tries` + 1, `http-code` = %d, `redirect-url` = '%s' WHERE id = %d",
intval($resource['http-code']),
@@ -236,17 +236,17 @@ function retrieve_resource($resource) {
intval($resource['id']));
retriever_resource_completed($resource, $a);
}
- Logger::log('@@@ retrieve_resource finished: ' . $resource['url'], Logger::INFO);
+ Logger::info('@@@ retrieve_resource finished: ' . $resource['url']);
}
function get_retriever_rule($contact_id, $uid, $create = false) {
- Logger::log('@@@ get_retriever_rule ' . "SELECT * FROM `retriever_rule` WHERE `contact-id` = " . intval($contact_id) . " AND `uid` = " . intval($uid), Logger::INFO);
+ Logger::info('@@@ get_retriever_rule ' . "SELECT * FROM `retriever_rule` WHERE `contact-id` = " . intval($contact_id) . " AND `uid` = " . intval($uid));
$r = q("SELECT * FROM `retriever_rule` WHERE `contact-id` = %d AND `uid` = %d",
intval($contact_id), intval($uid));
- Logger::log('@@@ get_retriever_rule count is ' . count($r), Logger::INFO);
+ Logger::info('@@@ get_retriever_rule count is ' . count($r));
if (count($r)) {
$r[0]['data'] = json_decode($r[0]['data'], true);
- Logger::log('@@@ get_retriever_rule returning an actual thing', Logger::INFO);
+ Logger::info('@@@ get_retriever_rule returning an actual thing');
return $r[0];
}
if ($create) {
@@ -267,7 +267,7 @@ function retriever_class_of_item($item) { //@@@
return 'false';
}
if (array_key_exists('finished', $item)) {
- Logger::log('@@@ oh no this is a bad thing', Logger::INFO);
+ Logger::info('@@@ oh no this is a bad thing');
return 'retriever_item';
}
if (array_key_exists('moderated', $item)) {
@@ -282,33 +282,33 @@ function mat_test($item) { //@@@
function retriever_get_item($retriever_item) {
// @@@ add contact id as a search term
- Logger::log('@@@ retriever_get_item uri ' . $retriever_item['item-uri'] . ' uid ' . $retriever_item['item-uid'] . ' cid ' . $retriever_item['contact-id'], Logger::INFO);
+ Logger::info('@@@ retriever_get_item uri ' . $retriever_item['item-uri'] . ' uid ' . $retriever_item['item-uid'] . ' cid ' . $retriever_item['contact-id']);
try {//@@@ not necessary
$item = Item::selectFirst([], ['uri' => $retriever_item['item-uri'], 'uid' => intval($retriever_item['item-uid'])]);
Logger::log('@@@ 1 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
if (!DBA::isResult($item)) {
- Logger::log('retriever_get_item: no item found for uri ' . $retriever_item['item-uri'], Logger::INFO);
+ Logger::log('retriever_get_item: no item found for uri ' . $retriever_item['item-uri']);
return;
}
- Logger::log('@@@ retriever_get_item: yay item found for uri ' . $retriever_item['item-uri'] . ' guid ' . $item['guid'] . ' plink ' . $item['plink'], Logger::INFO);
+ Logger::info('@@@ retriever_get_item: yay item found for uri ' . $retriever_item['item-uri'] . ' guid ' . $item['guid'] . ' plink ' . $item['plink']);
return $item;
} catch (Exception $e) {
- Logger::log('retriever_get_item: exception ' . $e->getMessage(), Logger::INFO);
+ Logger::info('retriever_get_item: exception ' . $e->getMessage());
}
}
function retriever_item_completed($retriever_item_id, $resource, $a) {
- Logger::log('retriever_item_completed: id ' . $retriever_item_id . ' url ' . $resource['url'], Logger::DEBUG);
+ Logger::debug('retriever_item_completed: id ' . $retriever_item_id . ' url ' . $resource['url']);
$retriever_item = retriever_get_retriever_item($retriever_item_id);
if (!DBA::isResult($retriever_item)) {
- Logger::log('retriever_item_completed: no retriever item with id ' . $retriever_item_id, Logger::INFO);
+ Logger::info('retriever_item_completed: no retriever item with id ' . $retriever_item_id);
return;
}
$item = retriever_get_item($retriever_item);
Logger::log('@@@ 2 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
if (!$item) {
- Logger::log('retriever_item_completed: no item ' . $retriever_item['item-uri'], Logger::INFO);
+ Logger::log('retriever_item_completed: no item ' . $retriever_item['item-uri']);
return;
}
// Note: the retriever might be null. Doesn't matter.
@@ -322,7 +322,7 @@ function retriever_item_completed($retriever_item_id, $resource, $a) {
}
function retriever_resource_completed($resource, $a) {
- Logger::log('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url'], Logger::DEBUG);
+ Logger::debug('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url']);
$r = q("SELECT `id` FROM `retriever_item` WHERE `resource` = %d", $resource['id']);
foreach ($r as $rr) {
retriever_item_completed($rr['id'], $resource, $a);
@@ -343,31 +343,31 @@ function apply_retrospective($a, $retriever, $num) {
//@@@ make this trigger a retriever immediately somehow
//@@@ need a lock to say something is doing something
function retriever_on_item_insert($a, $retriever, &$item) {
- Logger::log('@@@ 4 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
- Logger::log('@@@ retriever_on_item_insert start ' . $item['plink'], Logger::INFO);
+ Logger::info('@@@ 4 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
+ Logger::info('@@@ retriever_on_item_insert start ' . $item['plink']);
if (!$retriever || !$retriever['id']) {
- Logger::log('retriever_on_item_insert: No retriever supplied', Logger::INFO);
+ Logger::info('retriever_on_item_insert: No retriever supplied');
return;
}
if (!$retriever["data"]['enable'] == "on") {
- Logger::log('@@@ retriever_on_item_insert: Disabled', Logger::INFO);
+ Logger::info('@@@ retriever_on_item_insert: Disabled');
return;
}
if (array_key_exists('pattern', $retriever["data"]) && $retriever["data"]['pattern']) {
$url = preg_replace('/' . $retriever["data"]['pattern'] . '/', $retriever["data"]['replace'], $item['plink']);
- Logger::log('retriever_on_item_insert: Changed ' . $item['plink'] . ' to ' . $url, Logger::DATA);
+ Logger::debug('retriever_on_item_insert: Changed ' . $item['plink'] . ' to ' . $url);
}
else {
$url = $item['plink'];
}
- Logger::log('@@@ retriever_on_item_insert: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id'], Logger::DEBUG);
+ Logger::debug('@@@ retriever_on_item_insert: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id']);
$resource = add_retriever_resource($a, $url, $item['uid'], $item['contact-id']);
$retriever_item_id = add_retriever_item($item, $resource);
}
function add_retriever_resource($a, $url, $uid, $cid, $binary = false) {
- Logger::log('add_retriever_resource: url ' . $url . ' uid ' . $uid . ' contact-id ' . $cid, Logger::DEBUG);
+ Logger::debug('add_retriever_resource: url ' . $url . ' uid ' . $uid . ' contact-id ' . $cid);
$scheme = parse_url($url, PHP_URL_SCHEME);
if ($scheme == 'data') {
@@ -381,11 +381,11 @@ function add_retriever_resource($a, $url, $uid, $cid, $binary = false) {
$r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s' AND `item-uid` = %d AND `contact-id` = %d", DBA::escape($url), intval($uid), intval($cid));
$resource = $r[0];
if (count($r)) {
- Logger::log('add_retriever_resource: Resource ' . $url . ' already requested', Logger::DEBUG);
+ Logger::debug('add_retriever_resource: Resource ' . $url . ' already requested');
return $resource;
}
- Logger::log('retrieve_resource: got data URL type ' . $resource['type'], Logger::DEBUG);
+ Logger::debug('retrieve_resource: got data URL type ' . $resource['type']);
q("INSERT INTO `retriever_resource` (`item-uid`, `contact-id`, `type`, `binary`, `url`, `completed`, `data`) " .
"VALUES (%d, %d, '%s', %d, '%s', now(), '%s')",
intval($uid),
@@ -403,12 +403,12 @@ function add_retriever_resource($a, $url, $uid, $cid, $binary = false) {
}
if (strlen($url) > 800) {
- Logger::log('add_retriever_resource: URL is longer than 800 characters', Logger::WARNING);
+ Logger::warning('add_retriever_resource: URL is longer than 800 characters');
}
$r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s' AND `item-uid` = %d AND `contact-id` = %d", DBA::escape($url), intval($uid), intval($cid));
if (count($r)) {
- Logger::log('add_retriever_resource: Resource ' . $url . ' uid ' . $uid . ' cid ' . $cid . ' already requested', Logger::DEBUG);
+ Logger::debug('add_retriever_resource: Resource ' . $url . ' uid ' . $uid . ' cid ' . $cid . ' already requested');
return $r[0];
}
@@ -419,14 +419,14 @@ function add_retriever_resource($a, $url, $uid, $cid, $binary = false) {
}
function add_retriever_item(&$item, $resource) {
- Logger::log('@@@ 5 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item), Logger::DEBUG);
- Logger::log('add_retriever_item: ' . $resource['url'] . ' for ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG);
+ Logger::debug('@@@ 5 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
+ Logger::debug('add_retriever_item: ' . $resource['url'] . ' for ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']);
$r = q("SELECT COUNT(*) FROM `retriever_item` WHERE " .
"`item-uri` = '%s' AND `item-uid` = %d AND `contact-id` = %d AND `resource` = %d",
DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource['id']));
if ($r[0]['COUNT(*)'] > 0) {
- Logger::log("add_retriever_item: retriever item already present for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::INFO);
+ Logger::info("add_retriever_item: retriever item already present for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']);
return;
}
q("INSERT INTO `retriever_item` (`item-uri`, `item-uid`, `contact-id`, `resource`) " .
@@ -436,10 +436,10 @@ function add_retriever_item(&$item, $resource) {
"`item-uri` = '%s' AND `item-uid` = %d AND `contact-id` = %d AND `resource` = %d ORDER BY id DESC",
DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource['id']));
if (!count($r)) {
- Logger::log("add_retriever_item: couldn't create retriever item for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::INFO);
+ Logger::info("add_retriever_item: couldn't create retriever item for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']);
return;
}
- Logger::log('add_retriever_item: created retriever_item ' . $r[0]['id'] . ' for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG);
+ Logger::debug('add_retriever_item: created retriever_item ' . $r[0]['id'] . ' for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']);
return $r[0]['id'];
}
@@ -453,12 +453,12 @@ function retriever_get_encoding($resource) {
function retriever_apply_xslt_text($xslt_text, $doc) {
if (!$xslt_text) {
- Logger::log('retriever_apply_xslt_text: empty XSLT text', Logger::INFO);
+ Logger::info('retriever_apply_xslt_text: empty XSLT text');
return $doc;
}
$xslt_doc = new DOMDocument();
if (!$xslt_doc->loadXML($xslt_text)) {
- Logger::log('retriever_apply_xslt_text: could not load XML', Logger::INFO);
+ Logger::info('retriever_apply_xslt_text: could not load XML');
return $doc;
}
$xp = new XsltProcessor();
@@ -469,15 +469,15 @@ function retriever_apply_xslt_text($xslt_text, $doc) {
//@@@ is that an item or a resource_item? I really want an item here so I can update it
function retriever_apply_dom_filter($retriever, &$item, $resource) {
- Logger::log('@@@ 6 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item), Logger::DEBUG);
- Logger::log('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['uri'] . ' contact ' . $item['contact-id'], Logger::DEBUG);
+ Logger::debug('@@@ 6 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
+ Logger::debug('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['uri'] . ' contact ' . $item['contact-id']);
if (!array_key_exists('include', $retriever['data']) && !array_key_exists('customxslt', $retriever['data'])) {
- Logger::log('retriever_apply_dom_filter: no include and no customxslt', Logger::INFO);
+ Logger::info('retriever_apply_dom_filter: no include and no customxslt');
return;
}
if (!$resource['data']) {
- Logger::log('retriever_apply_dom_filter: no text to work with', Logger::INFO);
+ Logger::info('retriever_apply_dom_filter: no text to work with');
return;
}
@@ -495,104 +495,104 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) {
$extract_template = Renderer::getMarkupTemplate('extract.tpl', 'addon/retriever/');
$extract_xslt = Renderer::replaceMacros($extract_template, $params);
if ($retriever['data']['include']) {
- Logger::log('retriever_apply_dom_filter: applying include/exclude template \"' . $extract_xslt . '\"', Logger::DEBUG);
+ Logger::debug('retriever_apply_dom_filter: applying include/exclude template \"' . $extract_xslt . '\"');
$doc = retriever_apply_xslt_text($extract_xslt, $doc);
}
if (array_key_exists('customxslt', $retriever['data']) && $retriever['data']['customxslt']) {
- Logger::log('retriever_apply_dom_filter: applying custom XSLT \"' . $retriever['data']['customxslt'] . '\"', Logger::DEBUG);
+ Logger::debug('retriever_apply_dom_filter: applying custom XSLT \"' . $retriever['data']['customxslt'] . '\"');
$doc = retriever_apply_xslt_text($retriever['data']['customxslt'], $doc);
}
if (!$doc) {
- Logger::log('retriever_apply_dom_filter: failed to apply extract XSLT template', Logger::INFO);
+ Logger::info('retriever_apply_dom_filter: failed to apply extract XSLT template');
return;
}
- Logger::log('@@@ retriever_apply_dom_filter: 1', Logger::INFO);
+ Logger::info('@@@ retriever_apply_dom_filter: 1');
$components = parse_url($resource['redirect-url']);
$rooturl = $components['scheme'] . "://" . $components['host'];
$dirurl = $rooturl . dirname($components['path']) . "/";
- Logger::log('@@@ retriever_apply_dom_filter: 2', Logger::INFO);
+ Logger::info('@@@ retriever_apply_dom_filter: 2');
$params = array('$dirurl' => $dirurl, '$rooturl' => $rooturl);
$fix_urls_template = Renderer::getMarkupTemplate('fix-urls.tpl', 'addon/retriever/');
$fix_urls_xslt = Renderer::replaceMacros($fix_urls_template, $params);
- Logger::log('@@@ retriever_apply_dom_filter: 3', Logger::INFO);
+ Logger::info('@@@ retriever_apply_dom_filter: 3');
$doc = retriever_apply_xslt_text($fix_urls_xslt, $doc);
- Logger::log('@@@ retriever_apply_dom_filter: 4', Logger::INFO);
+ Logger::info('@@@ retriever_apply_dom_filter: 4');
if (!$doc) {
- Logger::log('retriever_apply_dom_filter: failed to apply fix urls XSLT template', Logger::INFO);
+ Logger::info('retriever_apply_dom_filter: failed to apply fix urls XSLT template');
return;
}
- Logger::log('@@@ retriever_apply_dom_filter: 5', Logger::INFO);
+ Logger::info('@@@ retriever_apply_dom_filter: 5');
$body = HTML::toBBCode($doc->saveHTML());
if (!strlen($body)) {
- Logger::log('retriever_apply_dom_filter retriever ' . $retriever['id'] . ' item ' . $item['id'] . ': output was empty', Logger::INFO);
+ Logger::info('retriever_apply_dom_filter retriever ' . $retriever['id'] . ' item ' . $item['id'] . ': output was empty');
return;
}
$body .= "\n\n" . L10n::t('Retrieved') . ' ' . date("Y-m-d") . ': [url=';
$body .= $item['plink'];
$body .= ']' . $item['plink'] . '[/url]';
- Logger::log('@@@ retriever_apply_dom_filter: 6', Logger::INFO);
+ Logger::info('@@@ retriever_apply_dom_filter: 6');
$uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself?
- Logger::log('@@@ retriever_apply_dom_filter: item id is ' . $item['id'] . ' uri id is ' . $uri_id, Logger::INFO);
- Logger::log('retriever_apply_dom_filter: XSLT result \"' . $body . '\"', Logger::DATA);
+ Logger::info('@@@ retriever_apply_dom_filter: item id is ' . $item['id'] . ' uri id is ' . $uri_id);
+ Logger::debug('retriever_apply_dom_filter: XSLT result \"' . $body . '\"');
DBA::update('item-content', ['body' => $body], ['uri-id' => $uri_id]); //@@@ isn't there a better interface to that?
//@@@ probably Item::updateContent
}
function retrieve_images(&$item, $a) {
$blah_item_class = retriever_class_of_item($item) . ' ' . mat_test($item);
- Logger::log('@@@ 7 item class is ' . $blah_item_class, Logger::DEBUG);
+ Logger::debug('@@@ 7 item class is ' . $blah_item_class);
$uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself?
$content = DBA::selectFirst('item-content', [], ['uri-id' => $uri_id]);
$body = $content['body'];
if (!strlen($body)) {
- Logger::log('retrieve_images: no body for uri-id ' . $uri_id, Logger::WARNING);
+ Logger::warning('retrieve_images: no body for uri-id ' . $uri_id);
return;
}
- Logger::log('@@@ retrieve_images start looking in body "' . $body . '"', Logger::INFO);
+ Logger::info('@@@ retrieve_images start looking in body "' . $body . '"');
$matches1 = array();
preg_match_all("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", $body, $matches1);
$matches2 = array();
preg_match_all("/\[img\](.*?)\[\/img\]/ism", $body, $matches2);
$matches = array_merge($matches1[3], $matches2[1]);
- Logger::log('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG);
+ Logger::debug('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']);
foreach ($matches as $url) {
- Logger::log('@@@ retrieve_images: url ' . $url, Logger::DEBUG);
+ Logger::debug('@@@ retrieve_images: url ' . $url);
if (strpos($url, get_app()->getBaseUrl()) === FALSE) {
- Logger::log('@@@ retrieve_images: it is from somewhere else', Logger::DEBUG);
- Logger::log('@@@ retrieve_images: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id'], Logger::DEBUG);
+ Logger::debug('@@@ retrieve_images: it is from somewhere else');
+ Logger::debug('@@@ retrieve_images: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id']);
$resource = add_retriever_resource($a, $url, $item['uid'], $item['contact-id'], true);
if (!$resource['completed']) {
- Logger::log('@@@ retrieve_images: do not have it yet, get it later', Logger::DEBUG);
+ Logger::debug('@@@ retrieve_images: do not have it yet, get it later');
add_retriever_item($item, $resource);
}
else {
- Logger::log('@@@ retrieve_images: got it already, transform', Logger::DEBUG);
+ Logger::debug('@@@ retrieve_images: got it already, transform');
retriever_transform_images($a, $item, $resource);
}
}
}
- Logger::log('@@@ retrieve_images end', Logger::INFO);
+ Logger::info('@@@ retrieve_images end');
}
function retriever_check_item_completed(&$item)
{
- Logger::log('@@@ 9 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item), Logger::DEBUG);
+ Logger::debug('@@@ 9 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
$r = q('SELECT count(*) FROM retriever_item WHERE `item-uri` = "%s" ' .
'AND `item-uid` = %d AND `contact-id` = %d AND `finished` = 0',
DBA::escape($item['uri']), intval($item['uid']),
intval($item['contact-id']));
$waiting = $r[0]['count(*)'];
- Logger::log('retriever_check_item_completed: item ' . $item['uri'] . ' ' . $item['uid'] . ' '. $item['contact-id'] . ' waiting for ' . $waiting . ' resources', Logger::DEBUG);
+ Logger::debug('retriever_check_item_completed: item ' . $item['uri'] . ' ' . $item['uid'] . ' '. $item['contact-id'] . ' waiting for ' . $waiting . ' resources');
$old_visible = $item['visible'];
$item['visible'] = $waiting ? 0 : 1;
if (array_key_exists('id', $item) && ($item['id'] > 0) && ($old_visible != $item['visible'])) {
- Logger::log('retriever_check_item_completed: changing visible flag to ' . $item['visible'], Logger::DEBUG);
+ Logger::debug('retriever_check_item_completed: changing visible flag to ' . $item['visible']);
q("UPDATE `item` SET `visible` = %d WHERE `id` = %d",
intval($item['visible']),
intval($item['id']));
@@ -603,10 +603,10 @@ function retriever_check_item_completed(&$item)
}
function retriever_apply_completed_resource_to_item($retriever, &$item, $resource, $a) {
- Logger::log('@@@ 10 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item), Logger::DEBUG);
- Logger::log('retriever_apply_completed_resource_to_item: retriever ' . ($retriever ? $retriever['id'] : 'none') . ' resource ' . $resource['url'] . ' plink ' . $item['plink'], Logger::DEBUG);
+ Logger::debug('@@@ 10 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
+ Logger::debug('retriever_apply_completed_resource_to_item: retriever ' . ($retriever ? $retriever['id'] : 'none') . ' resource ' . $resource['url'] . ' plink ' . $item['plink']);
if (strpos($resource['type'], 'image') !== false) {
- Logger::log('@@@ retriever_apply_completed_resource_to_item this is an image must transform', Logger::INFO);
+ Logger::info('@@@ retriever_apply_completed_resource_to_item this is an image must transform');
retriever_transform_images($a, $item, $resource);
}
if (!$retriever) {
@@ -621,13 +621,13 @@ function retriever_apply_completed_resource_to_item($retriever, &$item, $resourc
}
}
-//@@@ todo: change all Logger::log to Logger::info etc
+//@@@ todo: change all Logger::info t etc
//@@@ todo: what is this reference for? document if needed delete if not
function retriever_transform_images($a, &$item, $resource) {
- Logger::log('@@@ 11 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item), Logger::DEBUG);
- Logger::log('@@@ retriever_transform_images', Logger::INFO);
+ Logger::debug('@@@ 11 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
+ Logger::info('@@@ retriever_transform_images');
if (!$resource["data"]) {
- Logger::log('retriever_transform_images: no data available for ' . $resource['id'] . ' ' . $resource['url'], Logger::INFO);
+ Logger::info('retriever_transform_images: no data available for ' . $resource['id'] . ' ' . $resource['url']);
return;
}
@@ -642,42 +642,42 @@ function retriever_transform_images($a, &$item, $resource) {
$path = parse_url($resource['url'], PHP_URL_PATH);
$parts = pathinfo($path);
$filename = $parts['filename'] . (array_key_exists('extension', $parts) ? '.' . $parts['extension'] : '');
- Logger::log('@@@ retriever_transform_images url ' . $resource['url'] . ' path ' . $path . ' filename ' . $parts['filename'], Logger::INFO);
+ Logger::info('@@@ retriever_transform_images url ' . $resource['url'] . ' path ' . $path . ' filename ' . $parts['filename']);
$album = 'Wall Photos';
$scale = 0;
$desc = ''; // TODO: store alt text with resource when it's requested so we can fill this in
- Logger::log('retriever_transform_images storing ' . strlen($data) . ' bytes type ' . $type . ': uid ' . $uid . ' cid ' . $cid . ' rid ' . $rid . ' filename ' . $filename . ' album ' . $album . ' scale ' . $scale . ' desc ' . $desc, Logger::DEBUG);
- Logger::log('@@@ retriever_transform_images before new Image', Logger::INFO);
+ Logger::debug('retriever_transform_images storing ' . strlen($data) . ' bytes type ' . $type . ': uid ' . $uid . ' cid ' . $cid . ' rid ' . $rid . ' filename ' . $filename . ' album ' . $album . ' scale ' . $scale . ' desc ' . $desc);
+ Logger::info('@@@ retriever_transform_images before new Image');
$image = new Image($data, $type);
- Logger::log('@@@ retriever_transform_images after new Image', Logger::INFO);
+ Logger::info('@@@ retriever_transform_images after new Image');
if (!$image->isValid()) {
- Logger::log('retriever_transform_images: invalid image found at URL ' . $resource['url'] ' for item ' . $item['id'], Logger::WARNING);
+ Logger::warning('retriever_transform_images: invalid image found at URL ' . $resource['url'] . ' for item ' . $item['id']);
return;
}
- Logger::log('@@@ retriever_transform_images before Photo::store', Logger::INFO);
+ Logger::info('@@@ retriever_transform_images before Photo::store');
$photo = Photo::store($image, $uid, $cid, $rid, $filename, $album, 0, 0, "", "", "", "", $desc);
- Logger::log('@@@ retriever_transform_images after Photo::store', Logger::INFO);
+ Logger::info('@@@ retriever_transform_images after Photo::store');
$new_url = System::baseUrl() . '/photo/' . $rid . '-0.' . $image->getExt();
- Logger::log('@@@ retriever_transform_images new url ' . $new_url . ' rid ' . $rid . ' ext ' . $image->getExt(), Logger::INFO);
+ Logger::info('@@@ retriever_transform_images new url ' . $new_url . ' rid ' . $rid . ' ext ' . $image->getExt());
if (!strlen($new_url)) {
- Logger::log('retriever_transform_images: no replacement URL for image ' . $resource['url'], Logger::WARNING);
+ Logger::warning('retriever_transform_images: no replacement URL for image ' . $resource['url']);
return;
}
$content = DBA::selectFirst('item-content', [], ['uri-id' => $uri_id]);
$body = $content['body'];
- Logger::log('@@@ retriever_transform_images: found body for uri id ' . $uri_id . ': ' . $body, Logger::INFO);
+ Logger::info('@@@ retriever_transform_images: found body for uri id ' . $uri_id . ': ' . $body);
- Logger::log('retriever_transform_images: replacing ' . $resource['url'] . ' with ' . $new_url . ' in item ' . $item['uri'], Logger::DEBUG);
- Logger::log('@@@ retriever_transform_images: replacing ' . $resource['url'] . ' with ' . $new_url . ' in body ' . $body, Logger::DEBUG);
+ Logger::debug('retriever_transform_images: replacing ' . $resource['url'] . ' with ' . $new_url . ' in item ' . $item['uri']);
+ Logger::debug('@@@ retriever_transform_images: replacing ' . $resource['url'] . ' with ' . $new_url . ' in body ' . $body);
$body = str_replace($resource["url"], $new_url, $body);
- Logger::log('@@@ retriever_transform_images: result \"' . $body . '\"', Logger::INFO);
+ Logger::info('@@@ retriever_transform_images: result \"' . $body . '\"');
DBA::update('item-content', ['body' => $body], ['uri-id' => $uri_id]); //@@@ isn't there a better interface to that?
//@@@ probably Item::updateContent
//@@ actually no, Item::update
} catch (Exception $e) {
- Logger::log('retriever_transform_images caught exception ' . $e->getMessage(), Logger::INFO);
+ Logger::info('retriever_transform_images caught exception ' . $e->getMessage());
return;
}
}
@@ -805,8 +805,8 @@ function retriever_contact_photo_menu($a, &$args) {
}
function retriever_post_remote_hook(&$a, &$item) {
- Logger::log('@@@ 12 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
- Logger::log('retriever_post_remote_hook: ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG);
+ Logger::info('@@@ 12 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
+ Logger::info('retriever_post_remote_hook: ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']);
$uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself?
$retriever_rule = get_retriever_rule($item['contact-id'], $item["uid"], false);
@@ -818,7 +818,7 @@ function retriever_post_remote_hook(&$a, &$item) {
// Convert to HTML and back to take advantage of bbcode's resolution of oembeds.
$content = DBA::selectFirst('item-content', [], ['uri-id' => $uri_id]);
$body = HTML::toBBCode(BBCode::convert($content['body']));
- Logger::log('@@@ retriever_post_remote_hook item uri-id ' . $uri_id . ' body "' . $item['body'] . '" item content body "' . $body . '"', Logger::DEBUG);
+ Logger::debug('@@@ retriever_post_remote_hook item uri-id ' . $uri_id . ' body "' . $item['body'] . '" item content body "' . $body . '"');
if ($body) {
$item['body'] = $body;
DBA::update('item-content', ['body' => $body], ['uri-id' => $uri_id]); //@@@ isn't there a better interface to that?
From c5fb49455226f776ca16a99e9655d1fce0683b69 Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Fri, 27 Sep 2019 22:05:00 +0200
Subject: [PATCH 14/94] retriever stuff
---
retriever/retriever.php | 10 +++-------
1 file changed, 3 insertions(+), 7 deletions(-)
diff --git a/retriever/retriever.php b/retriever/retriever.php
index ac6b321a..56852e45 100644
--- a/retriever/retriever.php
+++ b/retriever/retriever.php
@@ -30,7 +30,6 @@ function retriever_install() {
Addon::registerHook('contact_photo_menu', 'addon/retriever/retriever.php', 'retriever_contact_photo_menu');
Addon::registerHook('cron', 'addon/retriever/retriever.php', 'retriever_cron');
- $r = q("SELECT `id` FROM `pconfig` WHERE `cat` LIKE 'retriever_%%'");
if (Config::get('retriever', 'dbversion') == '0.10') {
q("ALTER TABLE `retriever_resource` MODIFY COLUMN `type` char(255) NULL DEFAULT NULL");
q("ALTER TABLE `retriever_resource` MODIFY COLUMN `data` mediumblob NULL DEFAULT NULL");
@@ -537,8 +536,7 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) {
$uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself?
Logger::info('@@@ retriever_apply_dom_filter: item id is ' . $item['id'] . ' uri id is ' . $uri_id);
Logger::debug('retriever_apply_dom_filter: XSLT result \"' . $body . '\"');
- DBA::update('item-content', ['body' => $body], ['uri-id' => $uri_id]); //@@@ isn't there a better interface to that?
- //@@@ probably Item::updateContent
+ Item::update(['body' => $body], ['uri-id' => $uri_id]);
}
function retrieve_images(&$item, $a) {
@@ -673,9 +671,7 @@ function retriever_transform_images($a, &$item, $resource) {
$body = str_replace($resource["url"], $new_url, $body);
Logger::info('@@@ retriever_transform_images: result \"' . $body . '\"');
- DBA::update('item-content', ['body' => $body], ['uri-id' => $uri_id]); //@@@ isn't there a better interface to that?
- //@@@ probably Item::updateContent
- //@@ actually no, Item::update
+ Item::update(['body' => $body], ['uri-id' => $uri_id]);
} catch (Exception $e) {
Logger::info('retriever_transform_images caught exception ' . $e->getMessage());
return;
@@ -821,7 +817,7 @@ function retriever_post_remote_hook(&$a, &$item) {
Logger::debug('@@@ retriever_post_remote_hook item uri-id ' . $uri_id . ' body "' . $item['body'] . '" item content body "' . $body . '"');
if ($body) {
$item['body'] = $body;
- DBA::update('item-content', ['body' => $body], ['uri-id' => $uri_id]); //@@@ isn't there a better interface to that?
+ Item::update(['body' => $body], ['uri-id' => $uri_id]);
}
}
if (PConfig::get($item["uid"], 'retriever', 'all_photos')) {
From 4dc51d8f0557fbc2bdd94b0c2985036fa03a6ed3 Mon Sep 17 00:00:00 2001
From: Administrator
Date: Fri, 27 Sep 2019 21:02:52 +0200
Subject: [PATCH 15/94] Fix retriever database problems
---
retriever/database.sql | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/retriever/database.sql b/retriever/database.sql
index a29135e7..68480cfd 100644
--- a/retriever/database.sql
+++ b/retriever/database.sql
@@ -36,7 +36,7 @@ CREATE TABLE IF NOT EXISTS `retriever_resource` (
`data` mediumblob NULL DEFAULT NULL,
`http-code` smallint(1) unsigned NULL DEFAULT NULL,
`redirect-url` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NULL DEFAULT NULL,
- KEY `retriever_resource` ADD INDEX `url` (`url`),
- KEY `retriever_resource` ADD INDEX `completed` (`completed`),
+ KEY `url` (`url`),
+ KEY `completed` (`completed`),
PRIMARY KEY (`id`)
) DEFAULT CHARSET=utf8 COLLATE=utf8_bin
From 0963f0da4a2975d8731093cdcd384fca81ae7880 Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Fri, 27 Sep 2019 22:05:22 +0200
Subject: [PATCH 16/94] more retriever stuff
---
retriever/retriever.php | 34 +++++++++++++++++++++++++---------
1 file changed, 25 insertions(+), 9 deletions(-)
diff --git a/retriever/retriever.php b/retriever/retriever.php
index 56852e45..3b557d80 100644
--- a/retriever/retriever.php
+++ b/retriever/retriever.php
@@ -4,7 +4,6 @@
* Description: Follow the permalink of RSS/Atom feed items and replace the summary with the full content.
* Version: 1.1
* Author: Matthew Exon
- * Status: Unsupported
*/
use Friendica\Core\Addon;
@@ -53,6 +52,7 @@ function retriever_install() {
$arr = explode(';', $schema);
foreach ($arr as $a) {
$r = q($a);
+ //@@@ check for errors
}
Config::set('retriever', 'dbversion', '0.13');
}
@@ -332,7 +332,6 @@ function apply_retrospective($a, $retriever, $num) {
$r = q("SELECT * FROM `item` WHERE `contact-id` = %d ORDER BY `received` DESC LIMIT %d",
intval($retriever['contact-id']), intval($num));
foreach ($r as $item) {
- Logger::log('@@@ 3 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); //@@@ already know this is wrong
q('UPDATE `item` SET `visible` = 0 WHERE `id` = %d', $item['id']);
q('UPDATE `thread` SET `visible` = 0 WHERE `iid` = %d', $item['id']);
retriever_on_item_insert($a, $retriever, $item);
@@ -343,7 +342,10 @@ function apply_retrospective($a, $retriever, $num) {
//@@@ need a lock to say something is doing something
function retriever_on_item_insert($a, $retriever, &$item) {
Logger::info('@@@ 4 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
- Logger::info('@@@ retriever_on_item_insert start ' . $item['plink']);
+ foreach ($item as $key => $value) {
+ Logger::info("@@@ $key => $value");
+ }
+ Logger::info('@@@ retriever_on_item_insert start ' . ' plink ' . $item['plink']);
if (!$retriever || !$retriever['id']) {
Logger::info('retriever_on_item_insert: No retriever supplied');
return;
@@ -352,15 +354,29 @@ function retriever_on_item_insert($a, $retriever, &$item) {
Logger::info('@@@ retriever_on_item_insert: Disabled');
return;
}
- if (array_key_exists('pattern', $retriever["data"]) && $retriever["data"]['pattern']) {
- $url = preg_replace('/' . $retriever["data"]['pattern'] . '/', $retriever["data"]['replace'], $item['plink']);
- Logger::debug('retriever_on_item_insert: Changed ' . $item['plink'] . ' to ' . $url);
- }
- else {
+ if (array_key_exists('plink', $item)) {
$url = $item['plink'];
}
+ else {
+ if (!array_key_exists('uri_id', $item)) {
+ Logger::warning('retriever_on_item_insert: item ' . ' has no plink and no uri-id');
+ // @@@ find an identifier and put it in warning
+ Logger::warning('@@@ retriever_on_item_insert: item has: ' . print_r($item, true));
+ foreach ($item as $key => $value) {
+ Logger::warning("@@@ $key => $value");
+ }
+ return;
+ }
+ $content = DBA::selectFirst('item-content', [], ['uri-id' => $item['uri_id']]);
+ $url = $content['plink'];
+ }
- Logger::debug('@@@ retriever_on_item_insert: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id']);
+ if (array_key_exists('pattern', $retriever["data"]) && $retriever["data"]['pattern']) {
+ $url = preg_replace('/' . $retriever["data"]['pattern'] . '/', $retriever["data"]['replace'], $url);
+ Logger::debug('retriever_on_item_insert: Changed ' . $item['plink'] . ' to ' . $url);
+ }
+
+ Logger::debug('@@@ retriever_on_item_insert: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id'] . ' url ' . $url);
$resource = add_retriever_resource($a, $url, $item['uid'], $item['contact-id']);
$retriever_item_id = add_retriever_item($item, $resource);
}
From 04e57e4334d97764b67393ecfe75c48fc75afb8f Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Sun, 29 Sep 2019 17:01:46 +0200
Subject: [PATCH 17/94] Fix bugs in retriever retrospective stuff
---
retriever/retriever.php | 100 ++++++++++++++++----------------
retriever/templates/extract.tpl | 18 ++----
2 files changed, 55 insertions(+), 63 deletions(-)
diff --git a/retriever/retriever.php b/retriever/retriever.php
index 3b557d80..4d701276 100644
--- a/retriever/retriever.php
+++ b/retriever/retriever.php
@@ -30,16 +30,16 @@ function retriever_install() {
Addon::registerHook('cron', 'addon/retriever/retriever.php', 'retriever_cron');
if (Config::get('retriever', 'dbversion') == '0.10') {
- q("ALTER TABLE `retriever_resource` MODIFY COLUMN `type` char(255) NULL DEFAULT NULL");
- q("ALTER TABLE `retriever_resource` MODIFY COLUMN `data` mediumblob NULL DEFAULT NULL");
- q("ALTER TABLE `retriever_rule` MODIFY COLUMN `data` mediumtext NULL DEFAULT NULL");
+ q('ALTER TABLE `retriever_resource` MODIFY COLUMN `type` char(255) NULL DEFAULT NULL');
+ q('ALTER TABLE `retriever_resource` MODIFY COLUMN `data` mediumblob NULL DEFAULT NULL');
+ q('ALTER TABLE `retriever_rule` MODIFY COLUMN `data` mediumtext NULL DEFAULT NULL');
Config::set('retriever', 'dbversion', '0.11');
}
if (Config::get('retriever', 'dbversion') == '0.11') {
- q("ALTER TABLE `retriever_resource` ADD INDEX `url` (`url`)");
- q("ALTER TABLE `retriever_resource` ADD INDEX `completed` (`completed`)");
- q("ALTER TABLE `retriever_item` ADD INDEX `finished` (`finished`)");
- q("ALTER TABLE `retriever_item` ADD INDEX `item-uid` (`item-uid`)");
+ q('ALTER TABLE `retriever_resource` ADD INDEX `url` (`url`)');
+ q('ALTER TABLE `retriever_resource` ADD INDEX `completed` (`completed`)');
+ q('ALTER TABLE `retriever_item` ADD INDEX `finished` (`finished`)');
+ q('ALTER TABLE `retriever_item` ADD INDEX `item-uid` (`item-uid`)');
Config::set('retriever', 'dbversion', '0.12');
}
if (Config::get('retriever', 'dbversion') == '0.12') {
@@ -206,16 +206,19 @@ function retrieve_resource($resource) {
try {
Logger::debug('retrieve_resource: ' . ($resource['num-tries'] + 1) . ' attempt at resource ' . $resource['id'] . ' ' . $resource['url']);
$redirects = 0;
- $cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-');
+ $cookiejar = '';
if (array_key_exists('storecookies', $retriever_rule) && $retriever_rule['storecookies']) {
+ $cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-');
file_put_contents($cookiejar, $retriever_rule['cookiedata']);
}
$fetch_result = Network::fetchUrlFull($resource['url'], $resource['binary'], $redirects, '', $cookiejar);
if (array_key_exists('storecookies', $retriever_rule) && $retriever_rule['storecookies']) {
$retriever_rule['cookiedata'] = file_get_contents($cookiejar);
- //@@@ do the store here
+ Logger::debug('@@@ retriever_resource update cookie ' . json_encode($retriever_rule['data'] . ' id ' . $retriever_rule['id']));
+ q("UPDATE `retriever_rule` SET `data`='%s' WHERE `id` = %d",
+ DBA::escape(json_encode($retriever_rule['data'])), intval($retriever_rule["id"]));
+ unlink($cookiejar);
}
- unlink($cookiejar);
$resource['data'] = $fetch_result->getBody();
$resource['http-code'] = $fetch_result->getReturnCode();
$resource['type'] = $fetch_result->getContentType();
@@ -323,17 +326,25 @@ function retriever_item_completed($retriever_item_id, $resource, $a) {
function retriever_resource_completed($resource, $a) {
Logger::debug('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url']);
$r = q("SELECT `id` FROM `retriever_item` WHERE `resource` = %d", $resource['id']);
- foreach ($r as $rr) {
- retriever_item_completed($rr['id'], $resource, $a);
+ foreach (DBA::select('retriever_item', ['id'], ['resource' => intval($resource['id'])]) as $retriever_item) {
+ Logger::debug('@@@ retriever_resource_completed got item id ' . $retriever_item['id']);
+ retriever_item_completed($retriever_item['id'], $resource, $a);
}
}
function apply_retrospective($a, $retriever, $num) {
+ Logger::info('@@@ apply_retrospective');
$r = q("SELECT * FROM `item` WHERE `contact-id` = %d ORDER BY `received` DESC LIMIT %d",
intval($retriever['contact-id']), intval($num));
foreach ($r as $item) {
+ Logger::info('@@@ apply_retrospective item ' . $item['id']);
q('UPDATE `item` SET `visible` = 0 WHERE `id` = %d', $item['id']);
q('UPDATE `thread` SET `visible` = 0 WHERE `iid` = %d', $item['id']);
+ foreach (DBA::select('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => $item['uid'], 'contact-id' => $item['contact-id']]) as $retriever_item) {
+ Logger::info('@@@ about to delete retriever_item id ' . $retriever_item['id'] . ' uri ' . $item['uri'] . ' uid ' . $item['uid'] . ' contact ' . $item['contact-id']);
+ DBA::delete('retriever_resource', ['id' => $retriever_item['resource']]);
+ DBA::delete('retriever_item', ['id' => $retriever_item['id']]);
+ }
retriever_on_item_insert($a, $retriever, $item);
}
}
@@ -341,39 +352,31 @@ function apply_retrospective($a, $retriever, $num) {
//@@@ make this trigger a retriever immediately somehow
//@@@ need a lock to say something is doing something
function retriever_on_item_insert($a, $retriever, &$item) {
- Logger::info('@@@ 4 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
- foreach ($item as $key => $value) {
- Logger::info("@@@ $key => $value");
- }
- Logger::info('@@@ retriever_on_item_insert start ' . ' plink ' . $item['plink']);
+ Logger::info('@@@ retriever_on_item_insert start plink ' . $item['plink'] . ' id ' . $item['id']);
if (!$retriever || !$retriever['id']) {
Logger::info('retriever_on_item_insert: No retriever supplied');
return;
}
- if (!$retriever["data"]['enable'] == "on") {
+ if (!$retriever['data']['enable'] == "on") {
Logger::info('@@@ retriever_on_item_insert: Disabled');
return;
}
- if (array_key_exists('plink', $item)) {
+ if (array_key_exists('plink', $item) && strlen($item['plink'])) {
$url = $item['plink'];
}
else {
- if (!array_key_exists('uri_id', $item)) {
- Logger::warning('retriever_on_item_insert: item ' . ' has no plink and no uri-id');
- // @@@ find an identifier and put it in warning
- Logger::warning('@@@ retriever_on_item_insert: item has: ' . print_r($item, true));
- foreach ($item as $key => $value) {
- Logger::warning("@@@ $key => $value");
- }
+ if (!array_key_exists('uri-id', $item)) {
+ Logger::warning('retriever_on_item_insert: item ' . $item['id'] . ' has no plink and no uri-id');
return;
}
- $content = DBA::selectFirst('item-content', [], ['uri-id' => $item['uri_id']]);
+ $content = DBA::selectFirst('item-content', [], ['uri-id' => $item['uri-id']]);
$url = $content['plink'];
}
- if (array_key_exists('pattern', $retriever["data"]) && $retriever["data"]['pattern']) {
- $url = preg_replace('/' . $retriever["data"]['pattern'] . '/', $retriever["data"]['replace'], $url);
- Logger::debug('retriever_on_item_insert: Changed ' . $item['plink'] . ' to ' . $url);
+ if (array_key_exists('pattern', $retriever['data']) && $retriever['data']['pattern']) {
+ $orig_url = $url;
+ $url = preg_replace('/' . $retriever['data']['pattern'] . '/', $retriever['data']['replace'], $orig_url);
+ Logger::debug('retriever_on_item_insert: Changed ' . $orig_url . ' to ' . $url);
}
Logger::debug('@@@ retriever_on_item_insert: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id'] . ' url ' . $url);
@@ -476,15 +479,14 @@ function retriever_apply_xslt_text($xslt_text, $doc) {
Logger::info('retriever_apply_xslt_text: could not load XML');
return $doc;
}
+ Logger::debug('@@@ retriever_apply_xslt_text: ' . $xslt_text);
$xp = new XsltProcessor();
$xp->importStylesheet($xslt_doc);
$result = $xp->transformToDoc($doc);
return $result;
}
-//@@@ is that an item or a resource_item? I really want an item here so I can update it
function retriever_apply_dom_filter($retriever, &$item, $resource) {
- Logger::debug('@@@ 6 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
Logger::debug('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['uri'] . ' contact ' . $item['contact-id']);
if (!array_key_exists('include', $retriever['data']) && !array_key_exists('customxslt', $retriever['data'])) {
@@ -496,6 +498,7 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) {
return;
}
+ //@@@ break this bit into separate function
$encoding = retriever_get_encoding($resource);
$content = mb_convert_encoding($resource['data'], 'HTML-ENTITIES', $encoding);
$doc = new DOMDocument('1.0', 'UTF-8');
@@ -522,23 +525,19 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) {
return;
}
- Logger::info('@@@ retriever_apply_dom_filter: 1');
+ //@@@ break this bit into separate function
$components = parse_url($resource['redirect-url']);
$rooturl = $components['scheme'] . "://" . $components['host'];
$dirurl = $rooturl . dirname($components['path']) . "/";
- Logger::info('@@@ retriever_apply_dom_filter: 2');
$params = array('$dirurl' => $dirurl, '$rooturl' => $rooturl);
$fix_urls_template = Renderer::getMarkupTemplate('fix-urls.tpl', 'addon/retriever/');
$fix_urls_xslt = Renderer::replaceMacros($fix_urls_template, $params);
- Logger::info('@@@ retriever_apply_dom_filter: 3');
$doc = retriever_apply_xslt_text($fix_urls_xslt, $doc);
- Logger::info('@@@ retriever_apply_dom_filter: 4');
if (!$doc) {
Logger::info('retriever_apply_dom_filter: failed to apply fix urls XSLT template');
return;
}
- Logger::info('@@@ retriever_apply_dom_filter: 5');
$body = HTML::toBBCode($doc->saveHTML());
if (!strlen($body)) {
Logger::info('retriever_apply_dom_filter retriever ' . $retriever['id'] . ' item ' . $item['id'] . ': output was empty');
@@ -548,9 +547,7 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) {
$body .= $item['plink'];
$body .= ']' . $item['plink'] . '[/url]';
- Logger::info('@@@ retriever_apply_dom_filter: 6');
- $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself?
- Logger::info('@@@ retriever_apply_dom_filter: item id is ' . $item['id'] . ' uri id is ' . $uri_id);
+ $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself? Consider using item['id'] instead
Logger::debug('retriever_apply_dom_filter: XSLT result \"' . $body . '\"');
Item::update(['body' => $body], ['uri-id' => $uri_id]);
}
@@ -629,7 +626,7 @@ function retriever_apply_completed_resource_to_item($retriever, &$item, $resourc
if ((strpos($resource['type'], 'html') !== false) ||
(strpos($resource['type'], 'xml') !== false)) {
retriever_apply_dom_filter($retriever, $item, $resource);
- if ($retriever["data"]['images'] ) {
+ if ($retriever['data']['images'] ) {
retrieve_images($item, $a);
}
}
@@ -640,7 +637,7 @@ function retriever_apply_completed_resource_to_item($retriever, &$item, $resourc
function retriever_transform_images($a, &$item, $resource) {
Logger::debug('@@@ 11 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
Logger::info('@@@ retriever_transform_images');
- if (!$resource["data"]) {
+ if (!$resource['data']) {
Logger::info('retriever_transform_images: no data available for ' . $resource['id'] . ' ' . $resource['url']);
return;
}
@@ -716,10 +713,13 @@ function retriever_content($a) {
if (!empty($_POST["id"])) {
$retriever_rule = get_retriever_rule($a->argv[1], local_user(), true);
- $retriever_rule["data"] = array();
+ $retriever_rule['data'] = array();
foreach (array('pattern', 'replace', 'enable', 'images', 'customxslt', 'storecookies', 'cookiedata') as $setting) {
- if (!empty($_POST['retriever_' . $setting])) {
- $retriever_rule["data"][$setting] = $_POST['retriever_' . $setting];
+ if (empty($_POST['retriever_' . $setting])) {
+ $retriever_rule['data'][$setting] = NULL;
+ }
+ else {
+ $retriever_rule['data'][$setting] = $_POST['retriever_' . $setting];
}
}
foreach ($_POST as $k=>$v) {
@@ -739,11 +739,11 @@ function retriever_content($a) {
}
}
q("UPDATE `retriever_rule` SET `data`='%s' WHERE `id` = %d",
- DBA::escape(json_encode($retriever_rule["data"])), intval($retriever_rule["id"]));
+ DBA::escape(json_encode($retriever_rule['data'])), intval($retriever_rule["id"]));
$a->page['content'] .= "Settings Updated";
if (!empty($_POST["retriever_retrospective"])) {
apply_retrospective($a, $retriever_rule, $_POST["retriever_retrospective"]);
- $a->page['content'] .= " and retrospectively applied to " . $_POST["apply"] . " posts";
+ $a->page['content'] .= " and retrospectively applied to " . $_POST["retriever_retrospective"] . " posts";
}
$a->page['content'] .= ".
";
}
@@ -757,12 +757,12 @@ function retriever_content($a) {
'$pattern' => array(
'retriever_pattern',
L10n::t('URL Pattern'),
- $retriever_rule["data"]['pattern'],
+ $retriever_rule['data']['pattern'],
L10n::t('Regular expression matching part of the URL to replace')),
'$replace' => array(
'retriever_replace',
L10n::t('URL Replace'),
- $retriever_rule["data"]['replace'],
+ $retriever_rule['data']['replace'],
L10n::t('Text to replace matching part of above regular expression')),
'$images' => array(
'retriever_images',
@@ -802,7 +802,7 @@ function retriever_content($a) {
'$include_t' => L10n::t('Include'),
'$include' => $retriever_rule['data']['include'],
'$exclude_t' => L10n::t('Exclude'),
- '$exclude' => $retriever_rule["data"]['exclude']));
+ '$exclude' => $retriever_rule['data']['exclude']));
return;
}
}
diff --git a/retriever/templates/extract.tpl b/retriever/templates/extract.tpl
index f24a860d..ca67f683 100644
--- a/retriever/templates/extract.tpl
+++ b/retriever/templates/extract.tpl
@@ -3,25 +3,17 @@
-
-{{function clause_xpath}}
-{{if !$clause.attribute}}
-{{$clause.element}}{{elseif $clause.attribute == 'class'}}
-{{$clause.element}}[contains(concat(' ', normalize-space(@class), ' '), '{{$clause.value}}')]{{else}}
-{{$clause.element}}[@{{$clause.attribute}}='{{$clause.value}}']{{/if}}
-{{/function}}
-
+{{function clause_xpath}}{{if !$clause.attribute}}{{$clause.element}}{{elseif $clause.attribute == 'class'}}{{$clause.element}}[contains(concat(' ', normalize-space(@class), ' '), '{{$clause.value}}')]{{else}}{{$clause.element}}[@{{$clause.attribute}}='{{$clause.value}}']{{/if}}{{/function}}
{{foreach $spec.include as $clause}}
+
-
-{{/foreach}}
-
+ {{/foreach}}
{{foreach $spec.exclude as $clause}}
-
-{{/foreach}}
+
+ {{/foreach}}
From 0c9db8383af59932929bd4cafabd429fe0bff635 Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Sun, 29 Sep 2019 17:04:34 +0200
Subject: [PATCH 18/94] fakerei2
---
retriever/retriever.php | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/retriever/retriever.php b/retriever/retriever.php
index 4d701276..294fba67 100644
--- a/retriever/retriever.php
+++ b/retriever/retriever.php
@@ -51,8 +51,10 @@ function retriever_install() {
$schema = file_get_contents(dirname(__file__).'/database.sql');
$arr = explode(';', $schema);
foreach ($arr as $a) {
- $r = q($a);
- //@@@ check for errors
+ if (!DBA::e($a)) {
+ Logger::warning('Unable to create database table: ' . DBA::errorMessage());
+ return;
+ }
}
Config::set('retriever', 'dbversion', '0.13');
}
@@ -142,7 +144,8 @@ function retriever_clean_up_completed_resources($max_items, $a) {
}
Logger::debug('retriever_clean_up_completed_resources: items waiting even though resource has completed: ' . count($r));
foreach ($r as $rr) {
- $resource = q("SELECT * FROM retriever_resource WHERE `id` = %d", $rr['resource']);
+ $resource = DBA::selectFirst('retriever_resource', [], ['id' => intval($rr['resource'])]);
+ Logger::info('@@@ retriever_clean_up_completed_resources did alternate thing resource type ' . $resource['type']);
$retriever_item = retriever_get_retriever_item($rr['item']);
if (!DBA::isResult($retriever_item)) {
Logger::warning('retriever_clean_up_completed_resources: no retriever item with id ' . $rr['item']);
@@ -159,7 +162,7 @@ function retriever_clean_up_completed_resources($max_items, $a) {
continue;
}
Logger::info('@@@ retriever_clean_up_completed_resources: about to retriever_apply_completed_resource_to_item');
- retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource[0], $a);
+ retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource, $a);
q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d", intval($retriever_item['id']));
retriever_check_item_completed($item);
}
From 043c5157073333127b3a9966c507cfdcb09161da Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Sun, 29 Sep 2019 17:09:11 +0200
Subject: [PATCH 19/94] more dba stuff
---
retriever/retriever.php | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/retriever/retriever.php b/retriever/retriever.php
index 294fba67..bbe138b9 100644
--- a/retriever/retriever.php
+++ b/retriever/retriever.php
@@ -144,8 +144,6 @@ function retriever_clean_up_completed_resources($max_items, $a) {
}
Logger::debug('retriever_clean_up_completed_resources: items waiting even though resource has completed: ' . count($r));
foreach ($r as $rr) {
- $resource = DBA::selectFirst('retriever_resource', [], ['id' => intval($rr['resource'])]);
- Logger::info('@@@ retriever_clean_up_completed_resources did alternate thing resource type ' . $resource['type']);
$retriever_item = retriever_get_retriever_item($rr['item']);
if (!DBA::isResult($retriever_item)) {
Logger::warning('retriever_clean_up_completed_resources: no retriever item with id ' . $rr['item']);
@@ -161,7 +159,7 @@ function retriever_clean_up_completed_resources($max_items, $a) {
Logger::warning('retriever_clean_up_completed_resources: no retriever for uri ' . $retriever_item['item-uri'] . ' uid ' . $retriever_item['uid'] . ' ' . $retriever_item['contact-id']);
continue;
}
- Logger::info('@@@ retriever_clean_up_completed_resources: about to retriever_apply_completed_resource_to_item');
+ $resource = DBA::selectFirst('retriever_resource', [], ['id' => intval($rr['resource'])]);
retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource, $a);
q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d", intval($retriever_item['id']));
retriever_check_item_completed($item);
From 3906813dcfc9682f3f73d82a04fca2b1b9c3e4f9 Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Sun, 29 Sep 2019 20:59:14 +0200
Subject: [PATCH 20/94] fixed image regex
---
retriever/retriever.php | 70 +++++++++++++++++++++++++++++------------
1 file changed, 50 insertions(+), 20 deletions(-)
diff --git a/retriever/retriever.php b/retriever/retriever.php
index bbe138b9..20ab1ee8 100644
--- a/retriever/retriever.php
+++ b/retriever/retriever.php
@@ -47,7 +47,10 @@ function retriever_install() {
q("ALTER TABLE `retriever_resource` ADD COLUMN `item-uid` int(10) unsigned NOT NULL DEFAULT '0' AFTER `id`");
Config::set('retriever', 'dbversion', '0.13');
}
- if (Config::get('retriever', 'dbversion') != '0.13') {
+ if (Config::get('retriever', 'dbversion') == '0.13') {
+ Config::set('retriever', 'downloads_per_cron', '100');
+ }
+ if (Config::get('retriever', 'dbversion') != '0.14') {
$schema = file_get_contents(dirname(__file__).'/database.sql');
$arr = explode(';', $schema);
foreach ($arr as $a) {
@@ -56,7 +59,8 @@ function retriever_install() {
return;
}
}
- Config::set('retriever', 'dbversion', '0.13');
+ Config::set('retriever', 'downloads_per_cron', '100');
+ Config::set('retriever', 'dbversion', '0.14');
}
}
@@ -72,20 +76,37 @@ function retriever_uninstall() {
function retriever_module() {}
+function retriever_addon_admin(&$a, &$o) {
+ $downloads_per_cron = Config::get('retriever', 'downloads_per_cron');
+ $template = Renderer::getMarkupTemplate('admin.tpl', 'addon/retriever/');
+ $config = ['downloads_per_cron',
+ L10n::t('Downloads per Cron'),
+ $downloads_per_cron,
+ L10n::t('Maximum number of downloads to attempt during each run of the cron job.')];
+ $o .= Renderer::replaceMacros($template, [
+ '$downloads_per_cron' => $config,
+ '$submit' => L10n::t('Save Settings')]);
+}
+
+function retriever_addon_admin_post ($a) {
+ if (!empty($_POST['downloads_per_cron'])) {
+ Config::set('retriever', 'downloads_per_cron', $_POST['downloads_per_cron']);
+ }
+}
+
function retriever_cron($a, $b) {
- // 100 is a nice sane number. Maybe this should be configurable. @@@
+ $downloads_per_cron = Config::get('retriever', 'downloads_per_cron');
- // Do this first, otherwise it can interfere with retreiver_retrieve_items
- retriever_clean_up_completed_resources(100, $a);
+ // Do this first, otherwise it can interfere with retriever_retrieve_items
+ retriever_clean_up_completed_resources($downloads_per_cron, $a);
- retriever_retrieve_items(100, $a);
+ retriever_retrieve_items($downloads_per_cron, $a);
retriever_tidy();
}
$retriever_item_count = 0;
function retriever_retrieve_items($max_items, $a) {
- Logger::info('@@@ retriever_retrieve_items');
global $retriever_item_count;
$retriever_schedule = array(array(1,'minute'),
@@ -108,8 +129,7 @@ function retriever_retrieve_items($max_items, $a) {
$retrieve_items = $max_items - $retriever_item_count;
Logger::debug('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . $retriever_item_count . ', retrieve ' . $retrieve_items);
do {
- Logger::info('@@@ retriever_retrieve_items loop max ' . $max_items . ' count ' . $retriever_item_count);
- Logger::info("@@@ SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR " . implode($schedule_clauses, ' OR ') . ") ORDER BY `last-try` ASC LIMIT " . $retrieve_items);
+ // TODO: figure out how to do this with DBA module
$retriever_resources = q("SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR %s) ORDER BY `last-try` ASC LIMIT %d",
DBA::escape(implode($schedule_clauses, ' OR ')),
intval($retrieve_items));
@@ -121,7 +141,6 @@ function retriever_retrieve_items($max_items, $a) {
}
Logger::debug('retriever_retrieve_items: found ' . count($retriever_resources) . ' waiting resources in database');
foreach ($retriever_resources as $retriever_resource) {
- Logger::info('@@@ need to get the retriever config here cid ' . $retriever_resource['contact-id'] . ' uid ' . $retriever_resource['item-uid']);
retrieve_resource($retriever_resource);
$retriever_item_count++;
}
@@ -129,7 +148,7 @@ function retriever_retrieve_items($max_items, $a) {
}
while ($retrieve_items > 0);
// @@@ todo: when items add further items (i.e. images), do the new images go round this loop again?
- Logger::info('@@@ retriever_retrieve_items: finished retrieving items');
+ Logger::debug('retriever_retrieve_items: finished retrieving items');
}
/* Look for items that are waiting even though the resource has
@@ -137,7 +156,8 @@ function retriever_retrieve_items($max_items, $a) {
* retrospectively apply a config change. It could also happen due to
* a cron job dying or something. */
function retriever_clean_up_completed_resources($max_items, $a) {
- $r = q("SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT %d",
+ // TODO: figure out how to do this with DBA module
+ $r = q('SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT %d',
intval($max_items));
if (!$r) {
$r = array();
@@ -161,6 +181,7 @@ function retriever_clean_up_completed_resources($max_items, $a) {
}
$resource = DBA::selectFirst('retriever_resource', [], ['id' => intval($rr['resource'])]);
retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource, $a);
+ //@@@ next one to do
q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d", intval($retriever_item['id']));
retriever_check_item_completed($item);
}
@@ -208,8 +229,10 @@ function retrieve_resource($resource) {
Logger::debug('retrieve_resource: ' . ($resource['num-tries'] + 1) . ' attempt at resource ' . $resource['id'] . ' ' . $resource['url']);
$redirects = 0;
$cookiejar = '';
+ Logger::debug('@@@ retrieve_resource storecookies ' . $retriever_rule['storecookies']);
if (array_key_exists('storecookies', $retriever_rule) && $retriever_rule['storecookies']) {
$cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-');
+ Logger::debug('@@@ retrieve_resource cookie file ' . $cookiejar . ' content ' . $retriever_rule['cookiedata']);
file_put_contents($cookiejar, $retriever_rule['cookiedata']);
}
$fetch_result = Network::fetchUrlFull($resource['url'], $resource['binary'], $redirects, '', $cookiejar);
@@ -218,7 +241,7 @@ function retrieve_resource($resource) {
Logger::debug('@@@ retriever_resource update cookie ' . json_encode($retriever_rule['data'] . ' id ' . $retriever_rule['id']));
q("UPDATE `retriever_rule` SET `data`='%s' WHERE `id` = %d",
DBA::escape(json_encode($retriever_rule['data'])), intval($retriever_rule["id"]));
- unlink($cookiejar);
+ /* unlink($cookiejar); */ //@@@
}
$resource['data'] = $fetch_result->getBody();
$resource['http-code'] = $fetch_result->getReturnCode();
@@ -350,8 +373,8 @@ function apply_retrospective($a, $retriever, $num) {
}
}
-//@@@ make this trigger a retriever immediately somehow
-//@@@ need a lock to say something is doing something
+// TODO: Currently this waits until the next cron before actually downloading. Should do it immediately.
+// TODO: This queries then inserts. It should use some kind of lock to avoid requesting the same resource twice.
function retriever_on_item_insert($a, $retriever, &$item) {
Logger::info('@@@ retriever_on_item_insert start plink ' . $item['plink'] . ' id ' . $item['id']);
if (!$retriever || !$retriever['id']) {
@@ -397,6 +420,7 @@ function add_retriever_resource($a, $url, $uid, $cid, $binary = false) {
fclose($fp);
$url = 'md5://' . hash('md5', $url);
+ //@@@ fix this
$r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s' AND `item-uid` = %d AND `contact-id` = %d", DBA::escape($url), intval($uid), intval($cid));
$resource = $r[0];
if (count($r)) {
@@ -405,6 +429,7 @@ function add_retriever_resource($a, $url, $uid, $cid, $binary = false) {
}
Logger::debug('retrieve_resource: got data URL type ' . $resource['type']);
+ //@@@ fix this
q("INSERT INTO `retriever_resource` (`item-uid`, `contact-id`, `type`, `binary`, `url`, `completed`, `data`) " .
"VALUES (%d, %d, '%s', %d, '%s', now(), '%s')",
intval($uid),
@@ -425,6 +450,7 @@ function add_retriever_resource($a, $url, $uid, $cid, $binary = false) {
Logger::warning('add_retriever_resource: URL is longer than 800 characters');
}
+ //@@@ fix this
$r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s' AND `item-uid` = %d AND `contact-id` = %d", DBA::escape($url), intval($uid), intval($cid));
if (count($r)) {
Logger::debug('add_retriever_resource: Resource ' . $url . ' uid ' . $uid . ' cid ' . $cid . ' already requested');
@@ -554,24 +580,29 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) {
}
function retrieve_images(&$item, $a) {
+ // Note that $item doesn't necessarily contain all the fields you would expect, in particular 'id'
$blah_item_class = retriever_class_of_item($item) . ' ' . mat_test($item);
Logger::debug('@@@ 7 item class is ' . $blah_item_class);
+ Logger::debug('@@@ retrieve_images start item '. $item['id'] . ' uri ' . $item['uri'] . ' uri id ' . $item['uri-id'] . ' plink ' . $item['plink'] . ' guid ' . $item['guid']);
$uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself?
- $content = DBA::selectFirst('item-content', [], ['uri-id' => $uri_id]);
+ $content = DBA::selectFirst('item-content', ['body'], ['uri-id' => $uri_id]);
$body = $content['body'];
if (!strlen($body)) {
Logger::warning('retrieve_images: no body for uri-id ' . $uri_id);
return;
}
- Logger::info('@@@ retrieve_images start looking in body "' . $body . '"');
+ Logger::info('@@@ retrieve_images looking in body "' . $body . '"');
+ // I suspect that matches1 and matches2 are not used any more?
$matches1 = array();
preg_match_all("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", $body, $matches1);
$matches2 = array();
preg_match_all("/\[img\](.*?)\[\/img\]/ism", $body, $matches2);
- $matches = array_merge($matches1[3], $matches2[1]);
+ $matches3 = array();
+ preg_match_all("/\[img\=([^\]]*)\]([^[]*)\[\/img\]/ism", $body, $matches3);
+ $matches = array_merge($matches1[3], $matches2[1], $matches3[1]);
Logger::debug('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']);
foreach ($matches as $url) {
Logger::debug('@@@ retrieve_images: url ' . $url);
@@ -615,7 +646,6 @@ function retriever_check_item_completed(&$item)
}
function retriever_apply_completed_resource_to_item($retriever, &$item, $resource, $a) {
- Logger::debug('@@@ 10 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
Logger::debug('retriever_apply_completed_resource_to_item: retriever ' . ($retriever ? $retriever['id'] : 'none') . ' resource ' . $resource['url'] . ' plink ' . $item['plink']);
if (strpos($resource['type'], 'image') !== false) {
Logger::info('@@@ retriever_apply_completed_resource_to_item this is an image must transform');
@@ -676,7 +706,7 @@ function retriever_transform_images($a, &$item, $resource) {
return;
}
- $content = DBA::selectFirst('item-content', [], ['uri-id' => $uri_id]);
+ $content = DBA::selectFirst('item-content', ['body'], ['uri-id' => $uri_id]);
$body = $content['body'];
Logger::info('@@@ retriever_transform_images: found body for uri id ' . $uri_id . ': ' . $body);
From dbd00503aaaaef8d4c3100ae1d6af51601c54857 Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Sun, 29 Sep 2019 22:05:49 +0200
Subject: [PATCH 21/94] Stuff in retriever
---
retriever/retriever.php | 27 ++++++++-------------------
1 file changed, 8 insertions(+), 19 deletions(-)
diff --git a/retriever/retriever.php b/retriever/retriever.php
index 20ab1ee8..f43504e6 100644
--- a/retriever/retriever.php
+++ b/retriever/retriever.php
@@ -147,14 +147,10 @@ function retriever_retrieve_items($max_items, $a) {
$retrieve_items = $max_items - $retriever_item_count;
}
while ($retrieve_items > 0);
- // @@@ todo: when items add further items (i.e. images), do the new images go round this loop again?
Logger::debug('retriever_retrieve_items: finished retrieving items');
}
-/* Look for items that are waiting even though the resource has
- * completed. This usually happens because we've been asked to
- * retrospectively apply a config change. It could also happen due to
- * a cron job dying or something. */
+// Look for items that are waiting even though the resource has completed. This shouldn't happen, but is worth cleaning up if it does.
function retriever_clean_up_completed_resources($max_items, $a) {
// TODO: figure out how to do this with DBA module
$r = q('SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT %d',
@@ -181,13 +177,14 @@ function retriever_clean_up_completed_resources($max_items, $a) {
}
$resource = DBA::selectFirst('retriever_resource', [], ['id' => intval($rr['resource'])]);
retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource, $a);
- //@@@ next one to do
- q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d", intval($retriever_item['id']));
+ Logger::info('@@@ retriever_clean_up_completed_resources tried to update id ' . $retriver_item['id'] . ' to finished, better check that it really worked!');
+ DBA::update('retriever_item', ['finished' => 1], ['id' => intval($retriever_item['id'])], ['finished']);
retriever_check_item_completed($item);
}
}
function retriever_tidy() {
+ // TODO: figure out how to do this with DBA module
q("DELETE FROM retriever_resource WHERE completed IS NOT NULL AND completed < DATE_SUB(now(), INTERVAL 1 WEEK)");
q("DELETE FROM retriever_resource WHERE completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)");
@@ -581,8 +578,6 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) {
function retrieve_images(&$item, $a) {
// Note that $item doesn't necessarily contain all the fields you would expect, in particular 'id'
- $blah_item_class = retriever_class_of_item($item) . ' ' . mat_test($item);
- Logger::debug('@@@ 7 item class is ' . $blah_item_class);
Logger::debug('@@@ retrieve_images start item '. $item['id'] . ' uri ' . $item['uri'] . ' uri id ' . $item['uri-id'] . ' plink ' . $item['plink'] . ' guid ' . $item['guid']);
$uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself?
@@ -595,27 +590,21 @@ function retrieve_images(&$item, $a) {
}
Logger::info('@@@ retrieve_images looking in body "' . $body . '"');
- // I suspect that matches1 and matches2 are not used any more?
- $matches1 = array();
- preg_match_all("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", $body, $matches1);
- $matches2 = array();
- preg_match_all("/\[img\](.*?)\[\/img\]/ism", $body, $matches2);
- $matches3 = array();
- preg_match_all("/\[img\=([^\]]*)\]([^[]*)\[\/img\]/ism", $body, $matches3);
+ // I suspect that the first two are not used any more?
+ preg_match_all("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", $item["body"], $matches1);
+ preg_match_all("/\[img\](.*?)\[\/img\]/ism", $item["body"], $matches2);
+ preg_match_all("/\[img\=([^\]]*)\]([^[]*)\[\/img\]/ism", $item["body"], $matches3);
$matches = array_merge($matches1[3], $matches2[1], $matches3[1]);
Logger::debug('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']);
foreach ($matches as $url) {
Logger::debug('@@@ retrieve_images: url ' . $url);
if (strpos($url, get_app()->getBaseUrl()) === FALSE) {
- Logger::debug('@@@ retrieve_images: it is from somewhere else');
Logger::debug('@@@ retrieve_images: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id']);
$resource = add_retriever_resource($a, $url, $item['uid'], $item['contact-id'], true);
if (!$resource['completed']) {
- Logger::debug('@@@ retrieve_images: do not have it yet, get it later');
add_retriever_item($item, $resource);
}
else {
- Logger::debug('@@@ retrieve_images: got it already, transform');
retriever_transform_images($a, $item, $resource);
}
}
From be68a4aa3c53b08731b92d66b64979b322be2a34 Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Mon, 30 Sep 2019 08:25:00 +0200
Subject: [PATCH 22/94] update version number
---
retriever/retriever.php | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/retriever/retriever.php b/retriever/retriever.php
index f43504e6..fcd45b46 100644
--- a/retriever/retriever.php
+++ b/retriever/retriever.php
@@ -1,8 +1,8 @@
*/
From 2ba05cc80cb81f8d625ec44b4e0167f62afaa2cb Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Mon, 30 Sep 2019 08:25:16 +0200
Subject: [PATCH 23/94] configurable number of requests
---
retriever/templates/admin.tpl | 8 ++++++++
1 file changed, 8 insertions(+)
create mode 100644 retriever/templates/admin.tpl
diff --git a/retriever/templates/admin.tpl b/retriever/templates/admin.tpl
new file mode 100644
index 00000000..b5a35961
--- /dev/null
+++ b/retriever/templates/admin.tpl
@@ -0,0 +1,8 @@
+{{*
+ * AUTOMATICALLY GENERATED TEMPLATE
+ * DO NOT EDIT THIS FILE, CHANGES WILL BE OVERWRITTEN
+ *
+ *}}
+{{include file="field_input.tpl" field=$downloads_per_cron}}
+
+
From 42314b667063d2dca23376d138ef7cb27874c3ce Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Mon, 30 Sep 2019 20:52:05 +0200
Subject: [PATCH 24/94] Add phototrack and publicise
---
phototrack/database.sql | 23 ++
phototrack/phototrack.php | 258 ++++++++++++++++++++
publicise/publicise.php | 431 ++++++++++++++++++++++++++++++++++
publicise/templates/admin.tpl | 39 +++
4 files changed, 751 insertions(+)
create mode 100644 phototrack/database.sql
create mode 100644 phototrack/phototrack.php
create mode 100644 publicise/publicise.php
create mode 100644 publicise/templates/admin.tpl
diff --git a/phototrack/database.sql b/phototrack/database.sql
new file mode 100644
index 00000000..f1b58f6b
--- /dev/null
+++ b/phototrack/database.sql
@@ -0,0 +1,23 @@
+CREATE TABLE IF NOT EXISTS `phototrack_photo_use` (
+ `id` int(11) unsigned NOT NULL AUTO_INCREMENT,
+ `resource-id` char(64) NOT NULL,
+ `table` char(64) NOT NULL,
+ `field` char(64) NOT NULL,
+ `row-id` int(11) NOT NULL,
+ `checked` timestamp NOT NULL DEFAULT now(),
+ PRIMARY KEY (`id`),
+ INDEX `resource-id` (`resource-id`),
+ INDEX `row` (`table`,`field`,`row-id`)
+) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin;
+
+CREATE TABLE IF NOT EXISTS `phototrack_row_check` (
+ `id` int(11) unsigned NOT NULL AUTO_INCREMENT,
+ `table` char(64) NOT NULL,
+ `row-id` int(11) NOT NULL,
+ `checked` timestamp NOT NULL DEFAULT now(),
+ PRIMARY KEY (`id`),
+ INDEX `row` (`table`,`row-id`),
+ INDEX `checked` (`checked`)
+) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin;
+
+SELECT TRUE
diff --git a/phototrack/phototrack.php b/phototrack/phototrack.php
new file mode 100644
index 00000000..8b909f5d
--- /dev/null
+++ b/phototrack/phototrack.php
@@ -0,0 +1,258 @@
+
+ */
+
+/*
+ * List of tables and the fields that are checked:
+ *
+ * contact: photo thumb micro about
+ * fcontact: photo
+ * fsuggest: photo
+ * gcontact: photo about
+ * item: body
+ * item-content: body
+ * mail: from-photo
+ * notify: photo
+ * profile: photo thumb about
+ */
+
+use Friendica\Core\Addon;
+use Friendica\Core\Config;
+use Friendica\Core\Logger;
+use Friendica\Object\Image;
+use Friendica\Database\DBA;
+
+if (!defined('PHOTOTRACK_DEFAULT_BATCH_SIZE')) {
+ define('PHOTOTRACK_DEFAULT_BATCH_SIZE', 1000);
+}
+// Time in *minutes* between searching for photo uses
+if (!defined('PHOTOTRACK_DEFAULT_SEARCH_INTERVAL')) {
+ define('PHOTOTRACK_DEFAULT_SEARCH_INTERVAL', 10);
+}
+
+function phototrack_install() {
+ global $db;
+
+ Addon::registerHook('post_local_end', 'addon/phototrack/phototrack.php', 'phototrack_post_local_end');
+ Addon::registerHook('post_remote_end', 'addon/phototrack/phototrack.php', 'phototrack_post_remote_end');
+ Addon::registerHook('notifier_end', 'addon/phototrack/phototrack.php', 'phototrack_notifier_end');
+ Addon::registerHook('cron', 'addon/phototrack/phototrack.php', 'phototrack_cron');
+
+ if (Config::get('phototrack', 'dbversion') != '0.1') {
+ $schema = file_get_contents(dirname(__file__).'/database.sql');
+ $arr = explode(';', $schema);
+ foreach ($arr as $a) {
+ if (!DBA::e($a)) {
+ Logger::warning('Unable to create database table: ' . DBA::errorMessage());
+ return;
+ }
+ }
+ Config::set('phototrack', 'dbversion', '0.1');
+ }
+}
+
+function phototrack_uninstall() {
+ Addon::unregisterHook('post_local_end', 'addon/phototrack/phototrack.php', 'phototrack_post_local_end');
+ Addon::unregisterHook('post_remote_end', 'addon/phototrack/phototrack.php', 'phototrack_post_remote_end');
+ Addon::unregisterHook('notifier_end', 'addon/phototrack/phototrack.php', 'phototrack_notifier_end');
+ Addon::unregisterHook('cron', 'addon/phototrack/phototrack.php', 'phototrack_cron');
+}
+
+function phototrack_module() {}
+
+function phototrack_finished_row($table, $id) {
+ $existing = DBA::selectFirst('phototrack_row_check', ['id'], ['table' => $table, 'row-id' => $id]);
+ if (!is_bool($existing)) {
+ q("UPDATE phototrack_row_check SET checked = NOW() WHERE `table` = '$table' AND `row-id` = '$id'");
+ }
+ else {
+ q("INSERT INTO phototrack_row_check (`table`, `row-id`, `checked`) VALUES ('$table', '$id', NOW())");
+ }
+}
+
+function phototrack_photo_use($photo, $table, $field, $id) {
+ Logger::debug('@@@ phototrack_photo_use ' . $photo);
+ foreach (Image::supportedTypes() as $m => $e) {
+ $photo = str_replace(".$e", '', $photo);
+ }
+ if (substr($photo, -2, 1) == '-') {
+ $resolution = intval(substr($photo,-1,1));
+ $photo = substr($photo,0,-2);
+ }
+ if (strlen($photo) != 32) {
+ return;
+ }
+ $r = q("SELECT `resource-id` FROM `photo` WHERE `resource-id` = '%s' LIMIT 1", DBA::escape($photo));
+ if (!count($r)) {
+ return;
+ }
+ $rid = $r[0]['resource-id'];
+ $existing = q("SELECT id FROM phototrack_photo_use WHERE `resource-id` = '$rid' AND `table` = '$table' AND `field` = '$field' AND `row-id` = '$id'");
+ if (count($existing)) {
+ q("UPDATE phototrack_photo_use SET checked = NOW() WHERE `resource-id` = '$rid' AND `table` = '$table' AND `field` = '$field' AND `row-id` = '$id'");
+ }
+ else {
+ q("INSERT INTO phototrack_photo_use (`resource-id`, `table`, `field`, `row-id`, `checked`) VALUES ('$rid', '$table', '$field', '$id', NOW())");
+ }
+}
+
+function phototrack_check_field_url($a, $table, $field, $id, $url) {
+ Logger::info('@@@ phototrack_check_field_url table ' . $table . ' field ' . $field . ' id ' . $id . ' url ' . $url);
+ $baseurl = $a->getBaseURL();
+ if (strpos($url, $baseurl) !== FALSE) {
+ $url = substr($url, strlen($baseurl));
+ Logger::info('@@@ phototrack_check_field_url funny url stuff ' . $url . ' base ' . $baseurl);
+ }
+ if (strpos($url, '/photo/') !== FALSE) {
+ $rid = substr($url, strlen('/photo/'));
+ Logger::info('@@@ phototrack_check_field_url rid ' . $rid);
+ phototrack_photo_use($rid, $table, $field, $id);
+ }
+}
+
+function phototrack_check_field_bbcode($a, $table, $field, $id, $value) {
+ $baseurl = $a->getBaseURL();
+ $matches = array();
+ preg_match_all("/\[img(\=([0-9]*)x([0-9]*))?\](.*?)\[\/img\]/ism", $value, $matches);
+ foreach ($matches[4] as $url) {
+ phototrack_check_field_url($a, $table, $field, $id, $url);
+ }
+}
+
+function phototrack_post_local_end(&$a, &$item) {
+ phototrack_check_row($a, 'item', $item);
+ phototrack_check_row($a, 'item-content', $item);
+}
+
+function phototrack_post_remote_end(&$a, &$item) {
+ phototrack_check_row($a, 'item', $item);
+ phototrack_check_row($a, 'item-content', $item);
+}
+
+function phototrack_notifier_end($item) {
+ $a = get_app();
+}
+
+function phototrack_check_row($a, $table, $row) {
+ switch ($table) {
+ case 'item':
+ $fields = array(
+ 'body' => 'bbcode');
+ break;
+ case 'item-content':
+ $fields = array(
+ 'body' => 'bbcode');
+ break;
+ case 'contact':
+ $fields = array(
+ 'photo' => 'url',
+ 'thumb' => 'url',
+ 'micro' => 'url',
+ 'about' => 'bbcode');
+ break;
+ case 'fcontact':
+ $fields = array(
+ 'photo' => 'url');
+ break;
+ case 'fsuggest':
+ $fields = array(
+ 'photo' => 'url');
+ break;
+ case 'gcontact':
+ $fields = array(
+ 'photo' => 'url',
+ 'about' => 'bbcode');
+ break;
+ default: $fields = array(); break;
+ }
+ foreach ($fields as $field => $type) {
+ switch ($type) {
+ case 'bbcode': phototrack_check_field_bbcode($a, $table, $field, $row['id'], $row[$field]); break;
+ case 'url': phototrack_check_field_url($a, $table, $field, $row['id'], $row[$field]); break;
+ }
+ }
+ phototrack_finished_row($table, $row['id']);
+}
+
+function phototrack_batch_size() {
+ $batch_size = Config::get('phototrack', 'batch_size');
+ if ($batch_size > 0) {
+ return $batch_size;
+ }
+ return PHOTOTRACK_DEFAULT_BATCH_SIZE;
+}
+
+function phototrack_search_table($a, $table) {
+ $batch_size = phototrack_batch_size();
+ $rows = q("SELECT `$table`.* FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) ) ORDER BY phototrack_row_check.checked LIMIT $batch_size");
+ foreach ($rows as $row) {
+ phototrack_check_row($a, $table, $row);
+ }
+ $r = q("SELECT COUNT(*) FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) )");
+ $remaining = $r[0]['COUNT(*)'];
+ Logger::info('phototrack: searched ' . count($rows) . ' rows in table ' . $table . ', ' . $remaining . ' still remaining to search');
+ return $remaining;
+}
+
+function phototrack_cron_time() {
+ $prev_remaining = Config::get('phototrack', 'remaining_items');
+ if ($prev_remaining > 10 * phototrack_batch_size()) {
+ Logger::debug('phototrack: more than ' . (10 * phototrack_batch_size()) . ' items remaining');
+ return true;
+ }
+ $last = Config::get('phototrack', 'last_search');
+ $search_interval = intval(Config::get('phototrack', 'search_interval'));
+ if (!$search_interval) {
+ $search_interval = PHOTOTRACK_DEFAULT_SEARCH_INTERVAL;
+ }
+ if ($last) {
+ $next = $last + ($search_interval * 60);
+ if ($next > time()) {
+ Logger::debug('phototrack: search interval not reached');
+ return false;
+ }
+ }
+ return true;
+}
+
+function phototrack_cron($a, $b) {
+ if (!phototrack_cron_time()) {
+ return;
+ }
+ Config::set('phototrack', 'last_search', time());
+
+ $remaining = 0;
+ $remaining += phototrack_search_table($a, 'item');
+ $remaining += phototrack_search_table($a, 'item-content');
+ $remaining += phototrack_search_table($a, 'contact');
+ $remaining += phototrack_search_table($a, 'fcontact');
+ $remaining += phototrack_search_table($a, 'fsuggest');
+ $remaining += phototrack_search_table($a, 'gcontact');
+
+ Config::set('phototrack', 'remaining_items', $remaining);
+ if ($remaining === 0) {
+ phototrack_tidy();
+ }
+}
+
+function phototrack_tidy() {
+ $batch_size = phototrack_batch_size();
+ q('CREATE TABLE IF NOT EXISTS `phototrack-temp` (`resource-id` char(255) not null)');
+ q('INSERT INTO `phototrack-temp` SELECT DISTINCT(`resource-id`) FROM photo WHERE photo.`created` < DATE_SUB(NOW(), INTERVAL 2 MONTH)');
+ $rows = q('SELECT `phototrack-temp`.`resource-id` FROM `phototrack-temp` LEFT OUTER JOIN phototrack_photo_use ON (`phototrack-temp`.`resource-id` = phototrack_photo_use.`resource-id`) WHERE phototrack_photo_use.id IS NULL limit ' . /*$batch_size*/1000);
+ foreach ($rows as $row) {
+ Logger::debug('phototrack: remove photo ' . $row['resource-id']);
+ q('DELETE FROM photo WHERE `resource-id` = "' . $row['resource-id'] . '"');
+ }
+ q('DROP TABLE `phototrack-temp`');
+ Logger::info('phototrack_tidy: deleted ' . count($rows) . ' photos');
+ $rows = q('SELECT id FROM phototrack_photo_use WHERE checked < DATE_SUB(NOW(), INTERVAL 14 DAY)');
+ foreach ($rows as $row) {
+ q('DELETE FROM phototrack_photo_use WHERE id = ' . $row['id']);
+ }
+ Logger::info('phototrack_tidy: deleted ' . count($rows) . ' phototrack_photo_use rows');
+}
diff --git a/publicise/publicise.php b/publicise/publicise.php
new file mode 100644
index 00000000..d27eefd4
--- /dev/null
+++ b/publicise/publicise.php
@@ -0,0 +1,431 @@
+
+ */
+
+use Friendica\Core\Addon;
+use Friendica\Core\Logger;
+use Friendica\Core\Renderer;
+use Friendica\Core\L10n;
+use Friendica\Database\DBA;
+
+function publicise_install() {
+ Addon::registerHook('post_remote', 'addon/publicise/publicise.php', 'publicise_post_remote_hook');
+}
+
+function publicise_uninstall() {
+ Addon::unregisterHook('post_remote', 'addon/publicise/publicise.php', 'publicise_post_remote_hook');
+ Addon::unregisterHook('post_remote_end', 'addon/publicise/publicise.php', 'publicise_post_remote_end_hook');
+}
+
+function publicise_get_contacts() {
+ $query = <<$v) {
+ $enabled = ($v['reason'] === 'publicise') ? 1 : NULL;
+ $expire = 30;
+ $comments = 1;
+ $url = $v['url'];
+ if ($enabled) {
+ $r = q('SELECT * FROM `user` WHERE `uid` = %d', intval($v['uid']));
+ $expire = $r[0]['expire'];
+ $url = $a->get_baseurl() . '/profile/' . $v['nick'];
+ if ($r[0]['page-flags'] == PAGE_SOAPBOX) {
+ $comments = NULL;
+ }
+ if ($r[0]['account_expired']) {
+ $enabled = NULL;
+ }
+ }
+ $contacts[$k]['enabled'] = array('publicise-enabled-' . $v['id'], NULL, $enabled);
+ $contacts[$k]['comments'] = array('publicise-comments-' . $v['id'], NULL, $comments);
+ $contacts[$k]['expire'] = $expire;
+ $contacts[$k]['url'] = $url;
+ }
+ $template = Renderer::getMarkupTemplate('admin.tpl', 'addon/publicise/');
+ $o .= Renderer::replaceMacros($template, array(
+ '$feeds' => $contacts,
+ '$feed_t' => L10n::t('Feed'),
+ '$publicised_t' => L10n::t('Publicised'),
+ '$comments_t' => L10n::t('Allow Comments/Likes'),
+ '$expire_t' => L10n::t('Expire Articles After (Days)'),
+ '$submit_t' => L10n::t('Submit')));
+}
+
+function publicise_make_string($in) {
+ return "'" . DBA::escape($in) . "'";
+}
+
+function publicise_make_int($in) {
+ return intval($in) ? $in : 0;
+}
+
+function publicise_create_user($owner, $contact) {
+
+ $nick = $contact['nick'];
+ if (!$nick) {
+ notice(sprintf(t("Can't publicise feed \"%s\" because it doesn't have a nickname"), $contact['name']) . EOL);
+ return;
+ }
+ Logger::info('Publicise: create user, beginning key generation...');
+ $res=openssl_pkey_new(array(
+ 'digest_alg' => 'sha1',
+ 'private_key_bits' => 4096,
+ 'encrypt_key' => false ));
+ $prvkey = '';
+ openssl_pkey_export($res, $prvkey);
+ $pkey = openssl_pkey_get_details($res);
+ $pubkey = $pkey["key"];
+ $sres=openssl_pkey_new(array(
+ 'digest_alg' => 'sha1',
+ 'private_key_bits' => 512,
+ 'encrypt_key' => false ));
+ $sprvkey = '';
+ openssl_pkey_export($sres, $sprvkey);
+ $spkey = openssl_pkey_get_details($sres);
+ $spubkey = $spkey["key"];
+ $guid = generate_user_guid();
+
+ $newuser = array(
+ 'guid' => publicise_make_string($guid),
+ 'username' => publicise_make_string($contact['name']),
+ 'password' => publicise_make_string($owner['password']),
+ 'nickname' => publicise_make_string($contact['nick']),
+ 'email' => publicise_make_string($owner['email']),
+ 'openid' => publicise_make_string($owner['openid']),
+ 'timezone' => publicise_make_string($owner['timezone']),
+ 'language' => publicise_make_string($owner['language']),
+ 'register_date' => publicise_make_string(datetime_convert()),
+ 'default-location' => publicise_make_string($owner['default-location']),
+ 'allow_location' => publicise_make_string($owner['allow_location']),
+ 'theme' => publicise_make_string($owner['theme']),
+ 'pubkey' => publicise_make_string($pubkey),
+ 'prvkey' => publicise_make_string($prvkey),
+ 'spubkey' => publicise_make_string($spubkey),
+ 'sprvkey' => publicise_make_string($sprvkey),
+ 'verified' => publicise_make_int($owner['verified']),
+ 'blocked' => publicise_make_int(0),
+ 'blockwall' => publicise_make_int(1),
+ 'hidewall' => publicise_make_int(0),
+ 'blocktags' => publicise_make_int(0),
+ 'notify-flags' => publicise_make_int($owner['notifyflags']),
+ 'page-flags' => publicise_make_int($comments ? PAGE_COMMUNITY : PAGE_SOAPBOX),
+ 'expire' => publicise_make_int($expire),
+ );
+ Logger::debug('Publicise: creating user ' . print_r($newuser, true));
+ $r = q("INSERT INTO `user` (`"
+ . implode("`, `", array_keys($newuser))
+ . "`) VALUES ("
+ . implode(", ", array_values($newuser))
+ . ")" );
+ if (!$r) {
+ Logger::warning('Publicise: create user failed');
+ return;
+ }
+ $r = q('SELECT * FROM `user` WHERE `guid` = "%s"', DBA::escape($guid));
+ if (count($r) != 1) {
+ Logger::warning('Publicise: unexpected number of uids returned');
+ return;
+ }
+ Logger::debug('Publicise: created user ID ' . $r[0]);
+ return $r[0];
+}
+
+function publicise_create_self_contact($a, $contact, $uid) {
+ $newcontact = array(
+ 'uid' => $uid,
+ 'created' => publicise_make_string(datetime_convert()),
+ 'self' => publicise_make_int(1),
+ 'name' => publicise_make_string($contact['name']),
+ 'nick' => publicise_make_string($contact['nick']),
+ 'photo' => publicise_make_string($contact['photo']),
+ 'thumb' => publicise_make_string($contact['thumb']),
+ 'micro' => publicise_make_string($contact['micro']),
+ 'blocked' => publicise_make_int(0),
+ 'pending' => publicise_make_int(0),
+ 'url' => publicise_make_string($a->get_baseurl() . '/profile/' . $contact['nick']),
+ 'nurl' => publicise_make_string($a->get_baseurl() . '/profile/' . $contact['nick']),
+ 'request' => publicise_make_string($a->get_baseurl() . '/dfrn_request/' . $contact['nick']),
+ 'notify' => publicise_make_string($a->get_baseurl() . '/dfrn_notify/' . $contact['nick']),
+ 'poll' => publicise_make_string($a->get_baseurl() . '/dfrn_poll/' . $contact['nick']),
+ 'confirm' => publicise_make_string($a->get_baseurl() . '/dfrn_confirm/' . $contact['nick']),
+ 'poco' => publicise_make_string($a->get_baseurl() . '/poco/' . $contact['nick']),
+ 'uri-date' => publicise_make_string(datetime_convert()),
+ 'avatar-date' => publicise_make_string(datetime_convert()),
+ 'closeness' => publicise_make_int(0),
+ );
+ $existing = q("SELECT * FROM `contact` WHERE `uid` = %d AND `self` = 1", intval($uid));
+ if (count($existing)) {
+ $newcontact = $existing[0];
+ Logger::debug('Publicise: self contact already exists for user ' . $uid . ' id ' . $newcontact['id']);
+ } else {
+ Logger::debug('Publicise: create contact ' . print_r($newcontact, true));
+ q("INSERT INTO `contact` (`"
+ . implode("`, `", array_keys($newcontact))
+ . "`) VALUES ("
+ . implode(", ", array_values($newcontact))
+ . ")" );
+ $results = q("SELECT `id` FROM `contact` WHERE `uid` = %d AND `self` = 1", intval($uid));
+ if (count($results) != 1) {
+ Logger::warning('Publicise: create self contact failed, will delete uid ' . $uid);
+ $r = q("DELETE FROM `user` WHERE `uid` = %d", intval($uid));
+ return;
+ }
+ $newcontact = $results[0];
+ Logger::debug('Publicise: created self contact for user ' . $uid . ' id ' . $newcontact['id']);
+ }
+ Logger::debug('Publicise: self contact for ' . $uid . ' nick ' . $contact['nick'] . ' is ' . $newcontact['id']);
+ return $newcontact['id'];
+}
+
+function publicise_create_profile($contact, $uid) {
+ $newprofile = array(
+ 'uid' => $uid,
+ 'profile-name' => publicise_make_string('default'),
+ 'is-default' => publicise_make_int(1),
+ 'name' => publicise_make_string($contact['name']),
+ 'photo' => publicise_make_string($contact['photo']),
+ 'thumb' => publicise_make_string($contact['thumb']),
+ 'homepage' => publicise_make_string($contact['url']),
+ 'publish' => publicise_make_int(1),
+ 'net-publish' => publicise_make_int(1),
+ );
+ Logger::debug('Publicise: create profile ' . print_r($newprofile, true));
+ $r = q("INSERT INTO `profile` (`"
+ . implode("`, `", array_keys($newprofile))
+ . "`) VALUES ("
+ . implode(", ", array_values($newprofile))
+ . ")" );
+ if (!$r) {
+ Logger::warning('Publicise: create profile failed');
+ }
+ $newprofile = q('SELECT `id` FROM `profile` WHERE `uid` = %d AND `is-default` = 1', intval($uid));
+ if (count($newprofile) != 1) {
+ Logger::warning('Publicise: create profile produced unexpected number of results');
+ return;
+ }
+ Logger::debug('Publicise: created profile ' . $newprofile[0]['id']);
+ return $newprofile[0]['id'];
+}
+
+function publicise_set_up_user($a, $contact, $owner) {
+ $user = publicise_create_user($owner, $contact);
+ if (!$user) {
+ notice(sprintf(t("Failed to create user for feed \"%s\""), $contact['name']) . EOL);
+ return;
+ }
+ $self_contact = publicise_create_self_contact($a, $contact, $user['uid']);
+ if (!$self_contact) {
+ notice(sprintf(t("Failed to create self contact for user \"%s\""), $contact['name']) . EOL);
+ Logger::warning("Publicise: unable to create self contact, deleting user " . $user['uid']);
+ q('DELETE FROM `user` WHERE `uid` = %d', intval($user['uid']));
+ return;
+ }
+ $profile = publicise_create_profile($contact, $user['uid']);
+ if (!$profile) {
+ notice(sprintf(t("Failed to create profile for user \"%s\""), $contact['name']) . EOL);
+ Logger::warning("Publicise: unable to create profile, deleting user $uid contact $self_contact");
+ q('DELETE FROM `user` WHERE `uid` = %d', intval($user['uid']));
+ q('DELETE FROM `contact` WHERE `id` = %d', intval($self_contact));
+ return;
+ }
+ return $user;
+}
+
+function publicise($a, &$contact, &$owner) {
+ Logger::info('@@@ Publicise: publicise');
+ if (!is_site_admin()) {
+ notice(t("Only admin users can publicise feeds"));
+ Logger::warning('Publicise: non-admin tried to publicise');
+ return;
+ }
+
+ // Check if we're changing our mind about a feed we earlier depublicised
+ Logger::info('@@@ Publicise: ' . 'SELECT * FROM `user` WHERE `account_expires_on` != "0000-00-00 00:00:00" AND `nickname` = "' . $contact['nick'] . '" AND `email` = "' . $owner['email'] . '" AND `page-flags` in (' . intval(PAGE_COMMUNITY) . ', ' . intval(PAGE_SOAPBOX) . ')');
+ $existing = q('SELECT * FROM `user` WHERE `account_expires_on` != "0000-00-00 00:00:00" AND `nickname` = "%s" AND `email` = "%s" AND `page-flags` in (%d, %d)',
+ DBA::escape($contact['nick']), DBA::escape($owner['email']), intval(PAGE_COMMUNITY), intval(PAGE_SOAPBOX));
+ if (count($existing) == 1) {
+ Logger::info('@@@ Publicise: there is existing');
+ $owner = $existing[0];
+ q('UPDATE `user` SET `account_expires_on` = "0000-00-00 00:00:00", `account_removed` = 0, `account_expired` = 0 WHERE `uid` = %d', intval($owner['uid']));
+ q('UPDATE `profile` SET `publish` = 1, `net-publish` = 1 WHERE `uid` = %d AND `is-default` = 1', intval($owner['uid']));
+ Logger::debug('Publicise: recycled previous user ' . $owner['uid']);
+ }
+ else {
+ Logger::info('@@@ Publicise: there is not existing');
+ $owner = publicise_set_up_user($a, $contact, $owner);
+ if (!$owner) {
+ return;
+ }
+ Logger::debug("Publicise: created new user " . $owner['uid']);
+ }
+ Logger::info('Publicise: new contact user is ' . $owner['uid']);
+
+ $r = q("UPDATE `contact` SET `uid` = %d, `reason` = 'publicise', `hidden` = 1 WHERE id = %d", intval($owner['uid']), intval($contact['id']));
+ if (!$r) {
+ Logger::warning('Publicise: update contact failed, user is probably in a bad state ' . $user['uid']);
+ }
+ $contact['uid'] = $owner['uid'];
+ $contact['reason'] = 'publicise';
+ $contact['hidden'] = 1;
+ $r = q("UPDATE `item` SET `uid` = %d, type = 'wall', wall = 1, private = 0 WHERE `contact-id` = %d",
+ intval($owner['uid']), intval($contact['id']));
+ Logger::debug('Publicise: moved items from contact ' . $contact['id'] . ' to uid ' . $owner['uid']);
+
+ // Update the retriever config
+ $r = q("UPDATE `retriever_rule` SET `uid` = %d WHERE `contact-id` = %d",
+ intval($owner['uid']), intval($contact['id']));
+
+ info(sprintf(t("Moved feed \"%s\" to dedicated account"), $contact['name']) . EOL);
+ return true;
+}
+
+function publicise_self_contact($uid) {
+ $r = q('SELECT * FROM `contact` WHERE `uid` = %d AND `self` = 1', intval($uid));
+ if (count($r) != 1) {
+ Logger::warning('Publicise: unexpected number of self contacts for user ' . $uid);
+ return;
+ }
+ return $r[0];
+}
+
+function depublicise($a, $contact, $user) {
+ require_once('include/Contact.php');
+
+ if (!is_site_admin()) {
+ notice("Only admin users can depublicise feeds");
+ Logger::warning('Publicise: non-admin tried to depublicise');
+ return;
+ }
+
+ Logger::debug('Publicise: about to depublicise contact ' . $contact['id'] . ' user ' . $user['uid']);
+
+ $self_contact = publicise_self_contact($user['uid']);
+
+ // If the local_user() is subscribed to the feed, take ownership
+ // of the feed and all its items and photos. Otherwise they will
+ // be deleted when the account expires.
+ $r = q('SELECT * FROM `contact` WHERE `uid` = %d AND `url` = "%s"',
+ intval(local_user()), DBA::escape($self_contact['url']));
+ if (count($r)) {
+ // Delete the contact to the feed user and any
+ // copies of its items. These will be replaced by the originals,
+ // which will be brought back into the local_user's feed along
+ // with the feed contact itself.
+ foreach ($r as $my_contact) {
+ q('DELETE FROM `item` WHERE `contact-id` = %d', intval($my_contact['id']));
+ q('DELETE FROM `contact` WHERE `id` = %d', intval($my_contact['id']));
+ }
+
+ // Move the feed contact to local_user. Existing items stay
+ // attached to the original feed contact, but must have their uid
+ // updated. Also update the fields we scribbled over in
+ // publicise_post_remote_hook.
+ q('UPDATE `contact` SET `uid` = %d, `reason` = "", hidden = 0 WHERE id = %d',
+ intval(local_user()), intval($contact['id']));
+ q('UPDATE `item` SET `uid` = %d, `wall` = 0, `type` = "remote", `private` = 2 WHERE `contact-id` = %d',
+ intval(local_user()), intval($contact['id']));
+
+ // Take ownership of any photos created by the feed user
+ q('UPDATE `photo` SET `uid` = %d WHERE `uid` = %d',
+ intval(local_user()), intval($user['uid']));
+
+ // Update the retriever config
+ $r = q("UPDATE `retriever_rule` SET `uid` = %d WHERE `contact-id` = %d",
+ intval($owner['uid']), intval($contact['id']));
+ }
+
+ // Set the account to removed and expired right now. It will be cleaned up by cron after 3 days, giving a chance to change your mind
+ q('UPDATE `user` SET `account_removed` = 1, `account_expired` = 1, `account_expires_on` = UTC_TIMESTAMP() WHERE `uid` = %d',
+ intval($user['uid']));
+ q('UPDATE `profile` SET `publish` = 0, `net-publish` = 0 WHERE `uid` = %d AND `is-default` = 1', intval($user['uid']));
+
+ info(sprintf(t("Removed dedicated account for feed \"%s\""), $contact['name']) . EOL);
+}
+
+function publicise_addon_admin_post ($a) {
+ Logger::info('@@@ publicise_addon_admin_post');
+ if (!is_site_admin()) {
+ Logger::warning('Publicise: non-admin tried to do admin post');
+ return;
+ }
+
+ foreach (publicise_get_contacts() as $contact) {
+ Logger::info('@@@ publicise_addon_admin_post contact ' . $contact['id'] . ' ' . $contact['name']);
+ $user = publicise_get_user($contact['uid']);
+ if (!$_POST['publicise-enabled-' . $contact['id']]) {
+ if ($contact['reason'] === 'publicise') {
+ Logger::info('@@@ depublicise');
+ depublicise($a, $contact, $user);
+ }
+ }
+ else {
+ if ($contact['reason'] !== 'publicise') {
+ Logger::info('@@@ publicise');
+ if (!publicise($a, $contact, $user)) {
+ Logger::warning('Publicise: failed to publicise contact ' . $contact['id']);
+ continue;
+ }
+ }
+ if ($_POST['publicise-expire-' . $contact['id']] != $user['expire']) {
+ q('UPDATE `user` SET `expire` = %d WHERE `uid` = %d',
+ intval($_POST['publicise-expire-' . $contact['id']]), intval($user['uid']));
+ }
+ if ($_POST['publicise-comments-' . $contact['id']]) {
+ if ($user['page-flags'] != PAGE_COMMUNITY) {
+ q('UPDATE `user` SET `page-flags` = %d WHERE `uid` = %d',
+ intval(PAGE_COMMUNITY), intval($user['uid']));
+ q('UPDATE `contact` SET `rel` = %d WHERE `uid` = %d AND `network` = "dfrn"',
+ intval(CONTACT_IS_SHARING), intval($user['uid']));
+ }
+ }
+ else {
+ if ($user['page-flags'] != PAGE_SOAPBOX) {
+ q('UPDATE `user` SET `page-flags` = %d WHERE `uid` = %d',
+ intval(PAGE_SOAPBOX), intval($user['uid']));
+ q('UPDATE `contact` SET `rel` = %d WHERE `uid` = %d AND `network` = "dfrn"',
+ intval(CONTACT_IS_FOLLOWER), intval($user['uid']));
+ }
+ }
+ }
+ }
+}
+
+function publicise_post_remote_hook(&$a, &$item) {
+ $r1 = q("SELECT `uid` FROM `contact` WHERE `id` = %d AND `reason` = 'publicise'", intval($item['contact-id']));
+ if (!$r1) {
+ return;
+ }
+
+ Logger::debug('Publicise: moving to wall: ' . $item['uid'] . ' ' . $item['contact-id'] . ' ' . $item['uri']);
+ $item['type'] = 'wall';
+ $item['wall'] = 1;
+ $item['private'] = 0;
+}
+
diff --git a/publicise/templates/admin.tpl b/publicise/templates/admin.tpl
new file mode 100644
index 00000000..b10c3546
--- /dev/null
+++ b/publicise/templates/admin.tpl
@@ -0,0 +1,39 @@
+{{*
+ * AUTOMATICALLY GENERATED TEMPLATE
+ * DO NOT EDIT THIS FILE, CHANGES WILL BE OVERWRITTEN
+ *
+ *}}
+
From dbfc24d51fc531530b7f4c446d1fee833d0ac998 Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Mon, 30 Sep 2019 20:52:51 +0200
Subject: [PATCH 25/94] retriever tweaks
---
retriever/retriever.php | 13 ++++---------
1 file changed, 4 insertions(+), 9 deletions(-)
diff --git a/retriever/retriever.php b/retriever/retriever.php
index fcd45b46..f495578b 100644
--- a/retriever/retriever.php
+++ b/retriever/retriever.php
@@ -308,9 +308,8 @@ function retriever_get_item($retriever_item) {
Logger::info('@@@ retriever_get_item uri ' . $retriever_item['item-uri'] . ' uid ' . $retriever_item['item-uid'] . ' cid ' . $retriever_item['contact-id']);
try {//@@@ not necessary
$item = Item::selectFirst([], ['uri' => $retriever_item['item-uri'], 'uid' => intval($retriever_item['item-uid'])]);
- Logger::log('@@@ 1 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
if (!DBA::isResult($item)) {
- Logger::log('retriever_get_item: no item found for uri ' . $retriever_item['item-uri']);
+ Logger::warning('retriever_get_item: no item found for uri ' . $retriever_item['item-uri']);
return;
}
Logger::info('@@@ retriever_get_item: yay item found for uri ' . $retriever_item['item-uri'] . ' guid ' . $item['guid'] . ' plink ' . $item['plink']);
@@ -329,9 +328,9 @@ function retriever_item_completed($retriever_item_id, $resource, $a) {
return;
}
$item = retriever_get_item($retriever_item);
- Logger::log('@@@ 2 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
+ Logger::info('@@@ 2 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
if (!$item) {
- Logger::log('retriever_item_completed: no item ' . $retriever_item['item-uri']);
+ Logger::warning('retriever_item_completed: no item ' . $retriever_item['item-uri']);
return;
}
// Note: the retriever might be null. Doesn't matter.
@@ -348,21 +347,17 @@ function retriever_resource_completed($resource, $a) {
Logger::debug('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url']);
$r = q("SELECT `id` FROM `retriever_item` WHERE `resource` = %d", $resource['id']);
foreach (DBA::select('retriever_item', ['id'], ['resource' => intval($resource['id'])]) as $retriever_item) {
- Logger::debug('@@@ retriever_resource_completed got item id ' . $retriever_item['id']);
retriever_item_completed($retriever_item['id'], $resource, $a);
}
}
function apply_retrospective($a, $retriever, $num) {
- Logger::info('@@@ apply_retrospective');
$r = q("SELECT * FROM `item` WHERE `contact-id` = %d ORDER BY `received` DESC LIMIT %d",
intval($retriever['contact-id']), intval($num));
foreach ($r as $item) {
- Logger::info('@@@ apply_retrospective item ' . $item['id']);
q('UPDATE `item` SET `visible` = 0 WHERE `id` = %d', $item['id']);
q('UPDATE `thread` SET `visible` = 0 WHERE `iid` = %d', $item['id']);
foreach (DBA::select('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => $item['uid'], 'contact-id' => $item['contact-id']]) as $retriever_item) {
- Logger::info('@@@ about to delete retriever_item id ' . $retriever_item['id'] . ' uri ' . $item['uri'] . ' uid ' . $item['uid'] . ' contact ' . $item['contact-id']);
DBA::delete('retriever_resource', ['id' => $retriever_item['resource']]);
DBA::delete('retriever_item', ['id' => $retriever_item['id']]);
}
@@ -378,7 +373,7 @@ function retriever_on_item_insert($a, $retriever, &$item) {
Logger::info('retriever_on_item_insert: No retriever supplied');
return;
}
- if (!$retriever['data']['enable'] == "on") {
+ if (!array_key_exists('enable', $retriever['data']) || !$retriever['data']['enable'] == "on") {
Logger::info('@@@ retriever_on_item_insert: Disabled');
return;
}
From 2d8e13d53d388e9f98718e48199d44f8716fe265 Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Wed, 2 Oct 2019 07:19:59 +0200
Subject: [PATCH 26/94] extensive refactoring
---
retriever/retriever.php | 305 +++++++++++++---------------
retriever/templates/rule-config.tpl | 32 ++-
2 files changed, 164 insertions(+), 173 deletions(-)
diff --git a/retriever/retriever.php b/retriever/retriever.php
index f495578b..6ace5e98 100644
--- a/retriever/retriever.php
+++ b/retriever/retriever.php
@@ -177,14 +177,14 @@ function retriever_clean_up_completed_resources($max_items, $a) {
}
$resource = DBA::selectFirst('retriever_resource', [], ['id' => intval($rr['resource'])]);
retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource, $a);
- Logger::info('@@@ retriever_clean_up_completed_resources tried to update id ' . $retriver_item['id'] . ' to finished, better check that it really worked!');
+ Logger::info('@@@ retriever_clean_up_completed_resources tried to update id ' . $retriever_item['id'] . ' to finished, better check that it really worked!');
DBA::update('retriever_item', ['finished' => 1], ['id' => intval($retriever_item['id'])], ['finished']);
retriever_check_item_completed($item);
}
}
function retriever_tidy() {
- // TODO: figure out how to do this with DBA module
+ // TODO: figure out how to do this with DBA module @@@ it is possible
q("DELETE FROM retriever_resource WHERE completed IS NOT NULL AND completed < DATE_SUB(now(), INTERVAL 1 WEEK)");
q("DELETE FROM retriever_resource WHERE completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)");
@@ -212,8 +212,6 @@ function retrieve_dataurl_resource($resource) {
}
function retrieve_resource($resource) {
- Logger::info('@@@ retrieve_resource: url ' . $resource['url'] . ' uid ' . $resource['item-uid'] . ' cid ' . $resource['contact-id']);
-
if (substr($resource['url'], 0, 5) == "data:") {
return retrieve_dataurl_resource($resource);
}
@@ -221,24 +219,22 @@ function retrieve_resource($resource) {
$a = get_app();
$retriever_rule = get_retriever_rule($resource['contact-id'], $resource['item-uid']);
+ $rule_data = $retriever_rule['data'];
try {
Logger::debug('retrieve_resource: ' . ($resource['num-tries'] + 1) . ' attempt at resource ' . $resource['id'] . ' ' . $resource['url']);
$redirects = 0;
$cookiejar = '';
- Logger::debug('@@@ retrieve_resource storecookies ' . $retriever_rule['storecookies']);
- if (array_key_exists('storecookies', $retriever_rule) && $retriever_rule['storecookies']) {
+ if (array_key_exists('storecookies', $rule_data) && $rule_data['storecookies']) {
$cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-');
- Logger::debug('@@@ retrieve_resource cookie file ' . $cookiejar . ' content ' . $retriever_rule['cookiedata']);
- file_put_contents($cookiejar, $retriever_rule['cookiedata']);
+ file_put_contents($cookiejar, $rule_data['cookiedata']);
}
$fetch_result = Network::fetchUrlFull($resource['url'], $resource['binary'], $redirects, '', $cookiejar);
- if (array_key_exists('storecookies', $retriever_rule) && $retriever_rule['storecookies']) {
- $retriever_rule['cookiedata'] = file_get_contents($cookiejar);
- Logger::debug('@@@ retriever_resource update cookie ' . json_encode($retriever_rule['data'] . ' id ' . $retriever_rule['id']));
- q("UPDATE `retriever_rule` SET `data`='%s' WHERE `id` = %d",
- DBA::escape(json_encode($retriever_rule['data'])), intval($retriever_rule["id"]));
- /* unlink($cookiejar); */ //@@@
+ if (array_key_exists('storecookies', $rule_data) && $rule_data['storecookies']) {
+ $retriever_rule['data']['cookiedata'] = file_get_contents($cookiejar);
+ DBA::update('retriever_rule', ['data' => json_encode($retriever_rule['data'])], ['id' => intval($retriever_rule["id"])]);
+ //@@@ check the update worked
+ unlink($cookiejar);
}
$resource['data'] = $fetch_result->getBody();
$resource['http-code'] = $fetch_result->getReturnCode();
@@ -248,36 +244,33 @@ function retrieve_resource($resource) {
} catch (Exception $e) {
Logger::info('retrieve_resource: unable to retrieve ' . $resource['url'] . ' - ' . $e->getMessage());
}
+ // TODO: figure out how to do this with DBA module
q("UPDATE `retriever_resource` SET `last-try` = now(), `num-tries` = `num-tries` + 1, `http-code` = %d, `redirect-url` = '%s' WHERE id = %d",
intval($resource['http-code']),
DBA::escape($resource['redirect-url']),
intval($resource['id']));
if ($resource['data']) {
+ // TODO: figure out how to do this with DBA module
q("UPDATE `retriever_resource` SET `completed` = now(), `data` = '%s', `type` = '%s' WHERE id = %d",
DBA::escape($resource['data']),
DBA::escape($resource['type']),
intval($resource['id']));
retriever_resource_completed($resource, $a);
}
- Logger::info('@@@ retrieve_resource finished: ' . $resource['url']);
}
function get_retriever_rule($contact_id, $uid, $create = false) {
- Logger::info('@@@ get_retriever_rule ' . "SELECT * FROM `retriever_rule` WHERE `contact-id` = " . intval($contact_id) . " AND `uid` = " . intval($uid));
- $r = q("SELECT * FROM `retriever_rule` WHERE `contact-id` = %d AND `uid` = %d",
- intval($contact_id), intval($uid));
- Logger::info('@@@ get_retriever_rule count is ' . count($r));
- if (count($r)) {
- $r[0]['data'] = json_decode($r[0]['data'], true);
+ $retriever_rule = DBA::selectFirst('retriever_rule', [], ['contact-id' => intval($contact_id), 'uid' => intval($uid)]);
+ //@@@ check that this worked
+ if ($retriever_rule) {
+ $retriever_rule['data'] = json_decode($retriever_rule['data'], true);
Logger::info('@@@ get_retriever_rule returning an actual thing');
- return $r[0];
+ return $retriever_rule;
}
if ($create) {
- q("INSERT INTO `retriever_rule` (`uid`, `contact-id`) VALUES (%d, %d)",
- intval($uid), intval($contact_id));
- $r = q("SELECT * FROM `retriever_rule` WHERE `contact-id` = %d AND `uid` = %d",
- intval($contact_id), intval($uid));
- return $r[0];
+ DBA::insert('retriever_rule', ['uid' => intval($uid), 'contact-id' => intval($contact_id)]);
+ //@@@ check that this worked
+ return DBA::selectFirst('retriever_rule', [], ['contact-id' => intval($contact_id), 'uid' => intval($uid)]);
}
}
@@ -285,38 +278,13 @@ function retriever_get_retriever_item($id) {
return DBA::selectFirst('retriever_item', [], ['id' => intval($id)]);
}
-function retriever_class_of_item($item) { //@@@
- if (!$item) {
- return 'false';
- }
- if (array_key_exists('finished', $item)) {
- Logger::info('@@@ oh no this is a bad thing');
- return 'retriever_item';
- }
- if (array_key_exists('moderated', $item)) {
- return 'friendica_item';
- }
- return 'unknown';
-}
-
-function mat_test($item) { //@@@
- return 'mat_test';
-}
-
function retriever_get_item($retriever_item) {
- // @@@ add contact id as a search term
- Logger::info('@@@ retriever_get_item uri ' . $retriever_item['item-uri'] . ' uid ' . $retriever_item['item-uid'] . ' cid ' . $retriever_item['contact-id']);
- try {//@@@ not necessary
- $item = Item::selectFirst([], ['uri' => $retriever_item['item-uri'], 'uid' => intval($retriever_item['item-uid'])]);
- if (!DBA::isResult($item)) {
- Logger::warning('retriever_get_item: no item found for uri ' . $retriever_item['item-uri']);
- return;
- }
- Logger::info('@@@ retriever_get_item: yay item found for uri ' . $retriever_item['item-uri'] . ' guid ' . $item['guid'] . ' plink ' . $item['plink']);
- return $item;
- } catch (Exception $e) {
- Logger::info('retriever_get_item: exception ' . $e->getMessage());
+ $item = Item::selectFirst([], ['uri' => $retriever_item['item-uri'], 'uid' => intval($retriever_item['item-uid']), 'contact-id' => intval($retriever_item['contact-id'])]);
+ if (!DBA::isResult($item)) {
+ Logger::warning('retriever_get_item: no item found for uri ' . $retriever_item['item-uri']);
+ return;
}
+ return $item;
}
function retriever_item_completed($retriever_item_id, $resource, $a) {
@@ -328,7 +296,6 @@ function retriever_item_completed($retriever_item_id, $resource, $a) {
return;
}
$item = retriever_get_item($retriever_item);
- Logger::info('@@@ 2 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
if (!$item) {
Logger::warning('retriever_item_completed: no item ' . $retriever_item['item-uri']);
return;
@@ -338,25 +305,23 @@ function retriever_item_completed($retriever_item_id, $resource, $a) {
retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource, $a);
- q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d",
- intval($retriever_item['id']));
+ DBA::update('retriever_item', ['finished' => 1], ['id' => intval($retriever_item['id'])], ['finished']);
retriever_check_item_completed($item);
}
function retriever_resource_completed($resource, $a) {
Logger::debug('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url']);
- $r = q("SELECT `id` FROM `retriever_item` WHERE `resource` = %d", $resource['id']);
foreach (DBA::select('retriever_item', ['id'], ['resource' => intval($resource['id'])]) as $retriever_item) {
retriever_item_completed($retriever_item['id'], $resource, $a);
}
}
function apply_retrospective($a, $retriever, $num) {
- $r = q("SELECT * FROM `item` WHERE `contact-id` = %d ORDER BY `received` DESC LIMIT %d",
- intval($retriever['contact-id']), intval($num));
- foreach ($r as $item) {
- q('UPDATE `item` SET `visible` = 0 WHERE `id` = %d', $item['id']);
- q('UPDATE `thread` SET `visible` = 0 WHERE `iid` = %d', $item['id']);
+ Logger::debug('@@@ apply_retrospective');
+ foreach (Item::select([], ['contact-id' => intval($retriever['contact-id'])], ['order' => ['received' => true], 'limit' => $num]) as $item) {
+ Logger::debug('@@@ apply_retrospective got item id ' . $item['id'] . ' uri ' . $item['uri']);
+ Item::update(['visible' => 0], ['id' => intval($item['id'])]);
+ //@@@ check that this works
foreach (DBA::select('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => $item['uid'], 'contact-id' => $item['contact-id']]) as $retriever_item) {
DBA::delete('retriever_resource', ['id' => $retriever_item['resource']]);
DBA::delete('retriever_item', ['id' => $retriever_item['id']]);
@@ -368,13 +333,11 @@ function apply_retrospective($a, $retriever, $num) {
// TODO: Currently this waits until the next cron before actually downloading. Should do it immediately.
// TODO: This queries then inserts. It should use some kind of lock to avoid requesting the same resource twice.
function retriever_on_item_insert($a, $retriever, &$item) {
- Logger::info('@@@ retriever_on_item_insert start plink ' . $item['plink'] . ' id ' . $item['id']);
if (!$retriever || !$retriever['id']) {
Logger::info('retriever_on_item_insert: No retriever supplied');
return;
}
if (!array_key_exists('enable', $retriever['data']) || !$retriever['data']['enable'] == "on") {
- Logger::info('@@@ retriever_on_item_insert: Disabled');
return;
}
if (array_key_exists('plink', $item) && strlen($item['plink'])) {
@@ -389,13 +352,12 @@ function retriever_on_item_insert($a, $retriever, &$item) {
$url = $content['plink'];
}
- if (array_key_exists('pattern', $retriever['data']) && $retriever['data']['pattern']) {
+ if ($retriever['data']['modurl']) {
$orig_url = $url;
$url = preg_replace('/' . $retriever['data']['pattern'] . '/', $retriever['data']['replace'], $orig_url);
Logger::debug('retriever_on_item_insert: Changed ' . $orig_url . ' to ' . $url);
}
- Logger::debug('@@@ retriever_on_item_insert: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id'] . ' url ' . $url);
$resource = add_retriever_resource($a, $url, $item['uid'], $item['contact-id']);
$retriever_item_id = add_retriever_item($item, $resource);
}
@@ -412,16 +374,15 @@ function add_retriever_resource($a, $url, $uid, $cid, $binary = false) {
fclose($fp);
$url = 'md5://' . hash('md5', $url);
- //@@@ fix this
- $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s' AND `item-uid` = %d AND `contact-id` = %d", DBA::escape($url), intval($uid), intval($cid));
- $resource = $r[0];
- if (count($r)) {
+ if (DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)])) {
+ //@@@ test that this really happens - it should sometimes
Logger::debug('add_retriever_resource: Resource ' . $url . ' already requested');
return $resource;
}
Logger::debug('retrieve_resource: got data URL type ' . $resource['type']);
- //@@@ fix this
+ // TODO: figure out how to do this with DBA module
+ // @@@ DBA::update('workerqueue', ['executed' => DateTimeFormat::utcNow()], ['pid' => $mypid, 'done' => false]);
q("INSERT INTO `retriever_resource` (`item-uid`, `contact-id`, `type`, `binary`, `url`, `completed`, `data`) " .
"VALUES (%d, %d, '%s', %d, '%s', now(), '%s')",
intval($uid),
@@ -430,6 +391,7 @@ function add_retriever_resource($a, $url, $uid, $cid, $binary = false) {
intval($binary ? 1 : 0),
DBA::escape($url),
DBA::escape($data));
+ //@@@ fix this
$r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url));
$resource = $r[0];
if (count($r)) {
@@ -449,16 +411,18 @@ function add_retriever_resource($a, $url, $uid, $cid, $binary = false) {
return $r[0];
}
+ //@@@ fix this
q("INSERT INTO `retriever_resource` (`item-uid`, `contact-id`, `binary`, `url`) " .
"VALUES (%d, %d, %d, '%s')", intval($uid), intval($cid), intval($binary ? 1 : 0), DBA::escape($url));
+ //@@@ fix this
$r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url));
return $r[0];
}
function add_retriever_item(&$item, $resource) {
- Logger::debug('@@@ 5 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
Logger::debug('add_retriever_item: ' . $resource['url'] . ' for ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']);
+ //@@@ can use selectFirst
$r = q("SELECT COUNT(*) FROM `retriever_item` WHERE " .
"`item-uri` = '%s' AND `item-uid` = %d AND `contact-id` = %d AND `resource` = %d",
DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource['id']));
@@ -466,9 +430,11 @@ function add_retriever_item(&$item, $resource) {
Logger::info("add_retriever_item: retriever item already present for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']);
return;
}
+ //@@@ fix this
q("INSERT INTO `retriever_item` (`item-uri`, `item-uid`, `contact-id`, `resource`) " .
"VALUES ('%s', %d, %d, %d)",
DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource["id"]));
+ //@@@ fix this
$r = q("SELECT id FROM `retriever_item` WHERE " .
"`item-uri` = '%s' AND `item-uid` = %d AND `contact-id` = %d AND `resource` = %d ORDER BY id DESC",
DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource['id']));
@@ -505,8 +471,10 @@ function retriever_apply_xslt_text($xslt_text, $doc) {
return $result;
}
+//@@@ I think this is supposed to update the $item, but it doesn't
function retriever_apply_dom_filter($retriever, &$item, $resource) {
- Logger::debug('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['uri'] . ' contact ' . $item['contact-id']);
+ //@@@ check if id and uri-id are there //@@@ uri-id definitely is not
+ Logger::debug('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['uri'] . ' contact ' . $item['contact-id'] . ' uri-id ' . $item['uri-id']);
if (!array_key_exists('include', $retriever['data']) && !array_key_exists('customxslt', $retriever['data'])) {
Logger::info('retriever_apply_dom_filter: no include and no customxslt');
@@ -517,41 +485,15 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) {
return;
}
- //@@@ break this bit into separate function
- $encoding = retriever_get_encoding($resource);
- $content = mb_convert_encoding($resource['data'], 'HTML-ENTITIES', $encoding);
- $doc = new DOMDocument('1.0', 'UTF-8');
- if (strpos($resource['type'], 'html') !== false) {
- @$doc->loadHTML($content);
- }
- else {
- $doc->loadXML($content);
- }
+ $doc = retriever_load_into_dom($resource);
- $params = array('$spec' => $retriever['data']);
- $extract_template = Renderer::getMarkupTemplate('extract.tpl', 'addon/retriever/');
- $extract_xslt = Renderer::replaceMacros($extract_template, $params);
- if ($retriever['data']['include']) {
- Logger::debug('retriever_apply_dom_filter: applying include/exclude template \"' . $extract_xslt . '\"');
- $doc = retriever_apply_xslt_text($extract_xslt, $doc);
- }
- if (array_key_exists('customxslt', $retriever['data']) && $retriever['data']['customxslt']) {
- Logger::debug('retriever_apply_dom_filter: applying custom XSLT \"' . $retriever['data']['customxslt'] . '\"');
- $doc = retriever_apply_xslt_text($retriever['data']['customxslt'], $doc);
- }
+ $doc = retriever_extract($doc, $retriever);
if (!$doc) {
Logger::info('retriever_apply_dom_filter: failed to apply extract XSLT template');
return;
}
- //@@@ break this bit into separate function
- $components = parse_url($resource['redirect-url']);
- $rooturl = $components['scheme'] . "://" . $components['host'];
- $dirurl = $rooturl . dirname($components['path']) . "/";
- $params = array('$dirurl' => $dirurl, '$rooturl' => $rooturl);
- $fix_urls_template = Renderer::getMarkupTemplate('fix-urls.tpl', 'addon/retriever/');
- $fix_urls_xslt = Renderer::replaceMacros($fix_urls_template, $params);
- $doc = retriever_apply_xslt_text($fix_urls_xslt, $doc);
+ $doc = retriever_globalise_urls($doc, $resource);
if (!$doc) {
Logger::info('retriever_apply_dom_filter: failed to apply fix urls XSLT template');
return;
@@ -571,10 +513,56 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) {
Item::update(['body' => $body], ['uri-id' => $uri_id]);
}
+function retriever_load_into_dom($resource) {
+ Logger::info('@@@ retriever_load_into_dom start');
+ $encoding = retriever_get_encoding($resource);
+ $content = mb_convert_encoding($resource['data'], 'HTML-ENTITIES', $encoding);
+ $doc = new DOMDocument('1.0', 'UTF-8');
+ if (strpos($resource['type'], 'html') !== false) {
+ @$doc->loadHTML($content);
+ }
+ else {
+ $doc->loadXML($content);
+ }
+ Logger::info('@@@ retriever_load_into_dom end');
+ return $doc;
+}
+
+function retriever_extract($doc, $retriever) {
+ Logger::info('@@@ retriever_extract start');
+ $params = array('$spec' => $retriever['data']);
+ $extract_template = Renderer::getMarkupTemplate('extract.tpl', 'addon/retriever/');
+ $extract_xslt = Renderer::replaceMacros($extract_template, $params);
+ if ($retriever['data']['include']) {
+ Logger::debug('retriever_apply_dom_filter: applying include/exclude template \"' . $extract_xslt . '\"');
+ $doc = retriever_apply_xslt_text($extract_xslt, $doc);
+ }
+ if (array_key_exists('customxslt', $retriever['data']) && $retriever['data']['customxslt']) {
+ Logger::debug('retriever_extract: applying custom XSLT \"' . $retriever['data']['customxslt'] . '\"');
+ $doc = retriever_apply_xslt_text($retriever['data']['customxslt'], $doc);
+ }
+ Logger::info('@@@ retriever_extract end');
+ return $doc;
+}
+
+function retriever_globalise_urls($doc, $resource) {
+ Logger::info('@@@ retriever_globalise_urls start');
+ $components = parse_url($resource['redirect-url']);
+ $rooturl = $components['scheme'] . "://" . $components['host'];
+ $dirurl = $rooturl . dirname($components['path']) . "/";
+ $params = array('$dirurl' => $dirurl, '$rooturl' => $rooturl);
+ $fix_urls_template = Renderer::getMarkupTemplate('fix-urls.tpl', 'addon/retriever/');
+ $fix_urls_xslt = Renderer::replaceMacros($fix_urls_template, $params);
+ $doc = retriever_apply_xslt_text($fix_urls_xslt, $doc);
+ Logger::info('@@@ retriever_globalise_urls end');
+ return $doc;
+}
+
function retrieve_images(&$item, $a) {
// Note that $item doesn't necessarily contain all the fields you would expect, in particular 'id'
+ //@@@ doe sit contain uri-id? //@@@ it definitely does not
- Logger::debug('@@@ retrieve_images start item '. $item['id'] . ' uri ' . $item['uri'] . ' uri id ' . $item['uri-id'] . ' plink ' . $item['plink'] . ' guid ' . $item['guid']);
+ Logger::debug('@@@ retrieve_images start item id '. (array_key_exists('id', $item) ? $item['id'] : 'undef') . ' uri ' . $item['uri'] . ' uri id ' . $item['uri-id'] . ' plink ' . $item['plink'] . ' guid ' . $item['guid']);
$uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself?
$content = DBA::selectFirst('item-content', ['body'], ['uri-id' => $uri_id]);
@@ -584,7 +572,6 @@ function retrieve_images(&$item, $a) {
return;
}
- Logger::info('@@@ retrieve_images looking in body "' . $body . '"');
// I suspect that the first two are not used any more?
preg_match_all("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", $item["body"], $matches1);
preg_match_all("/\[img\](.*?)\[\/img\]/ism", $item["body"], $matches2);
@@ -592,9 +579,7 @@ function retrieve_images(&$item, $a) {
$matches = array_merge($matches1[3], $matches2[1], $matches3[1]);
Logger::debug('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']);
foreach ($matches as $url) {
- Logger::debug('@@@ retrieve_images: url ' . $url);
if (strpos($url, get_app()->getBaseUrl()) === FALSE) {
- Logger::debug('@@@ retrieve_images: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id']);
$resource = add_retriever_resource($a, $url, $item['uid'], $item['contact-id'], true);
if (!$resource['completed']) {
add_retriever_item($item, $resource);
@@ -604,12 +589,11 @@ function retrieve_images(&$item, $a) {
}
}
}
- Logger::info('@@@ retrieve_images end');
}
function retriever_check_item_completed(&$item)
{
- Logger::debug('@@@ 9 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
+ // TODO: figure out how to do this with DBA module
$r = q('SELECT count(*) FROM retriever_item WHERE `item-uri` = "%s" ' .
'AND `item-uid` = %d AND `contact-id` = %d AND `finished` = 0',
DBA::escape($item['uri']), intval($item['uid']),
@@ -620,12 +604,7 @@ function retriever_check_item_completed(&$item)
$item['visible'] = $waiting ? 0 : 1;
if (array_key_exists('id', $item) && ($item['id'] > 0) && ($old_visible != $item['visible'])) {
Logger::debug('retriever_check_item_completed: changing visible flag to ' . $item['visible']);
- q("UPDATE `item` SET `visible` = %d WHERE `id` = %d",
- intval($item['visible']),
- intval($item['id']));
- q("UPDATE `thread` SET `visible` = %d WHERE `iid` = %d",
- intval($item['visible']),
- intval($item['id']));
+ Item::update(['visible' => 0], ['id' => intval($item['id'])]);
}
}
@@ -647,11 +626,8 @@ function retriever_apply_completed_resource_to_item($retriever, &$item, $resourc
}
}
-//@@@ todo: change all Logger::info t etc
//@@@ todo: what is this reference for? document if needed delete if not
function retriever_transform_images($a, &$item, $resource) {
- Logger::debug('@@@ 11 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
- Logger::info('@@@ retriever_transform_images');
if (!$resource['data']) {
Logger::info('retriever_transform_images: no data available for ' . $resource['id'] . ' ' . $resource['url']);
return;
@@ -659,51 +635,37 @@ function retriever_transform_images($a, &$item, $resource) {
$uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself?
- try { //@@@ probably can get rid of this try/catch
- $data = $resource['data'];
- $type = $resource['type'];
- $uid = $item['uid'];
- $cid = $item['contact-id'];
- $rid = Photo::newResource();
- $path = parse_url($resource['url'], PHP_URL_PATH);
- $parts = pathinfo($path);
- $filename = $parts['filename'] . (array_key_exists('extension', $parts) ? '.' . $parts['extension'] : '');
- Logger::info('@@@ retriever_transform_images url ' . $resource['url'] . ' path ' . $path . ' filename ' . $parts['filename']);
- $album = 'Wall Photos';
- $scale = 0;
- $desc = ''; // TODO: store alt text with resource when it's requested so we can fill this in
- Logger::debug('retriever_transform_images storing ' . strlen($data) . ' bytes type ' . $type . ': uid ' . $uid . ' cid ' . $cid . ' rid ' . $rid . ' filename ' . $filename . ' album ' . $album . ' scale ' . $scale . ' desc ' . $desc);
- Logger::info('@@@ retriever_transform_images before new Image');
- $image = new Image($data, $type);
- Logger::info('@@@ retriever_transform_images after new Image');
- if (!$image->isValid()) {
- Logger::warning('retriever_transform_images: invalid image found at URL ' . $resource['url'] . ' for item ' . $item['id']);
- return;
- }
- Logger::info('@@@ retriever_transform_images before Photo::store');
- $photo = Photo::store($image, $uid, $cid, $rid, $filename, $album, 0, 0, "", "", "", "", $desc);
- Logger::info('@@@ retriever_transform_images after Photo::store');
- $new_url = System::baseUrl() . '/photo/' . $rid . '-0.' . $image->getExt();
- Logger::info('@@@ retriever_transform_images new url ' . $new_url . ' rid ' . $rid . ' ext ' . $image->getExt());
- if (!strlen($new_url)) {
- Logger::warning('retriever_transform_images: no replacement URL for image ' . $resource['url']);
- return;
- }
-
- $content = DBA::selectFirst('item-content', ['body'], ['uri-id' => $uri_id]);
- $body = $content['body'];
- Logger::info('@@@ retriever_transform_images: found body for uri id ' . $uri_id . ': ' . $body);
-
- Logger::debug('retriever_transform_images: replacing ' . $resource['url'] . ' with ' . $new_url . ' in item ' . $item['uri']);
- Logger::debug('@@@ retriever_transform_images: replacing ' . $resource['url'] . ' with ' . $new_url . ' in body ' . $body);
- $body = str_replace($resource["url"], $new_url, $body);
-
- Logger::info('@@@ retriever_transform_images: result \"' . $body . '\"');
- Item::update(['body' => $body], ['uri-id' => $uri_id]);
- } catch (Exception $e) {
- Logger::info('retriever_transform_images caught exception ' . $e->getMessage());
+ $data = $resource['data'];
+ $type = $resource['type'];
+ $uid = $item['uid'];
+ $cid = $item['contact-id'];
+ $rid = Photo::newResource();
+ $path = parse_url($resource['url'], PHP_URL_PATH);
+ $parts = pathinfo($path);
+ $filename = $parts['filename'] . (array_key_exists('extension', $parts) ? '.' . $parts['extension'] : '');
+ $album = 'Wall Photos';
+ $scale = 0;
+ $desc = ''; // TODO: store alt text with resource when it's requested so we can fill this in
+ Logger::debug('retriever_transform_images storing ' . strlen($data) . ' bytes type ' . $type . ': uid ' . $uid . ' cid ' . $cid . ' rid ' . $rid . ' filename ' . $filename . ' album ' . $album . ' scale ' . $scale . ' desc ' . $desc);
+ $image = new Image($data, $type);
+ if (!$image->isValid()) {
+ Logger::warning('retriever_transform_images: invalid image found at URL ' . $resource['url'] . ' for item ' . $item['id']);
return;
}
+ $photo = Photo::store($image, $uid, $cid, $rid, $filename, $album, 0, 0, "", "", "", "", $desc);
+ $new_url = System::baseUrl() . '/photo/' . $rid . '-0.' . $image->getExt();
+ if (!strlen($new_url)) {
+ Logger::warning('retriever_transform_images: no replacement URL for image ' . $resource['url']);
+ return;
+ }
+
+ $content = DBA::selectFirst('item-content', ['body'], ['uri-id' => $uri_id]);
+ $body = $content['body'];
+
+ Logger::debug('retriever_transform_images: replacing ' . $resource['url'] . ' with ' . $new_url . ' in item ' . $item['uri']);
+ $body = str_replace($resource["url"], $new_url, $body);
+
+ Item::update(['body' => $body], ['uri-id' => $uri_id]);
}
function retriever_content($a) {
@@ -712,6 +674,7 @@ function retriever_content($a) {
return;
}
if ($a->argv[1] === 'help') {
+ //@@@ fix me
$feeds = q("SELECT `id`, `name`, `thumb` FROM contact WHERE `uid` = %d AND `network` = 'feed'",
local_user());
foreach ($feeds as $k=>$v) {
@@ -729,7 +692,7 @@ function retriever_content($a) {
if (!empty($_POST["id"])) {
$retriever_rule = get_retriever_rule($a->argv[1], local_user(), true);
$retriever_rule['data'] = array();
- foreach (array('pattern', 'replace', 'enable', 'images', 'customxslt', 'storecookies', 'cookiedata') as $setting) {
+ foreach (array('modurl', 'pattern', 'replace', 'enable', 'images', 'customxslt', 'storecookies', 'cookiedata') as $setting) {
if (empty($_POST['retriever_' . $setting])) {
$retriever_rule['data'][$setting] = NULL;
}
@@ -753,6 +716,7 @@ function retriever_content($a) {
unset($retriever_rule['data']['exclude'][$k]);
}
}
+ //@@@ fix me
q("UPDATE `retriever_rule` SET `data`='%s' WHERE `id` = %d",
DBA::escape(json_encode($retriever_rule['data'])), intval($retriever_rule["id"]));
$a->page['content'] .= "Settings Updated";
@@ -769,6 +733,11 @@ function retriever_content($a) {
'retriever_enable',
L10n::t('Enabled'),
$retriever_rule['data']['enable']),
+ '$modurl' => array(
+ 'retriever_modurl',
+ L10n::t('Modify URL'),
+ $retriever_rule['data']['modurl'],
+ L10n::t("Modify each article's URL with regular expressions before retrieving.")),
'$pattern' => array(
'retriever_pattern',
L10n::t('URL Pattern'),
@@ -832,7 +801,8 @@ function retriever_contact_photo_menu($a, &$args) {
}
function retriever_post_remote_hook(&$a, &$item) {
- Logger::info('@@@ 12 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
+ // Note that $item doesn't necessarily contain all the fields you would expect, in particular 'id'
+
Logger::info('retriever_post_remote_hook: ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']);
$uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself?
@@ -845,14 +815,13 @@ function retriever_post_remote_hook(&$a, &$item) {
// Convert to HTML and back to take advantage of bbcode's resolution of oembeds.
$content = DBA::selectFirst('item-content', [], ['uri-id' => $uri_id]);
$body = HTML::toBBCode(BBCode::convert($content['body']));
- Logger::debug('@@@ retriever_post_remote_hook item uri-id ' . $uri_id . ' body "' . $item['body'] . '" item content body "' . $body . '"');
if ($body) {
$item['body'] = $body;
Item::update(['body' => $body], ['uri-id' => $uri_id]);
}
}
if (PConfig::get($item["uid"], 'retriever', 'all_photos')) {
- retrieve_images($item, $a);
+ retrieve_images($item, $a); //@@@ backwards
}
}
retriever_check_item_completed($item);
diff --git a/retriever/templates/rule-config.tpl b/retriever/templates/rule-config.tpl
index 9061d1ff..171054de 100644
--- a/retriever/templates/rule-config.tpl
+++ b/retriever/templates/rule-config.tpl
@@ -41,6 +41,25 @@ function retriever_remove_row(id, number)
tbody.removeChild(row);
}
+function retriever_toggle_url_block()
+{
+ var pattern = document.querySelector("#id_retriever_pattern").parentNode;
+ if (document.querySelector("#id_retriever_modurl").checked) {
+ pattern.style.display = "block";
+ }
+ else {
+ pattern.style.display = "none";
+ }
+
+ var replace = document.querySelector("#id_retriever_replace").parentNode;
+ if (document.querySelector("#id_retriever_modurl").checked) {
+ replace.style.display = "block";
+ }
+ else {
+ replace.style.display = "none";
+ }
+}
+
function retriever_toggle_cookiedata_block()
{
var div = document.querySelector("#id_retriever_cookiedata").parentNode;
@@ -53,6 +72,8 @@ function retriever_toggle_cookiedata_block()
}
document.addEventListener('DOMContentLoaded', function() {
+ retriever_toggle_url_block();
+ document.querySelector("#id_retriever_modurl").addEventListener('change', retriever_toggle_url_block, false);
retriever_toggle_cookiedata_block();
document.querySelector("#id_retriever_storecookies").addEventListener('change', retriever_toggle_cookiedata_block, false);
}, false);
@@ -62,10 +83,6 @@ document.addEventListener('DOMContentLoaded', function() {