From 90eda431399cb9b37c20105d96f6d1ed84f3fa94 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 7 Jul 2019 14:45:23 +0100 Subject: [PATCH 01/96] Latest version of retriever --- retriever/database.sql | 40 ++ retriever/retriever.php | 832 ++++++++++++++++++++++++++++ retriever/templates/extract.tpl | 32 ++ retriever/templates/fix-urls.tpl | 26 + retriever/templates/help.tpl | 148 +++++ retriever/templates/rule-config.tpl | 112 ++++ retriever/templates/settings.tpl | 9 + 7 files changed, 1199 insertions(+) create mode 100644 retriever/database.sql create mode 100644 retriever/retriever.php create mode 100644 retriever/templates/extract.tpl create mode 100644 retriever/templates/fix-urls.tpl create mode 100644 retriever/templates/help.tpl create mode 100644 retriever/templates/rule-config.tpl create mode 100644 retriever/templates/settings.tpl diff --git a/retriever/database.sql b/retriever/database.sql new file mode 100644 index 00000000..340e33eb --- /dev/null +++ b/retriever/database.sql @@ -0,0 +1,40 @@ +CREATE TABLE IF NOT EXISTS `retriever_rule` ( + `id` int(11) unsigned NOT NULL AUTO_INCREMENT, + `uid` int(11) NOT NULL, + `contact-id` int(11) NOT NULL, + `data` mediumtext NULL DEFAULT NULL, + PRIMARY KEY (`id`), + KEY `uid` (`uid`), + KEY `contact-id` (`contact-id`) +) DEFAULT CHARSET=utf8 COLLATE=utf8_bin; + +CREATE TABLE IF NOT EXISTS `retriever_item` ( + `id` int(11) unsigned NOT NULL AUTO_INCREMENT, + `item-uri` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NOT NULL, + `item-uid` int(10) unsigned NOT NULL DEFAULT '0', + `contact-id` int(10) unsigned NOT NULL DEFAULT '0', + `resource` int(11) NOT NULL, + `finished` tinyint(1) unsigned NOT NULL DEFAULT '0', + KEY `resource` (`resource`), + KEY `finished` (`finished`), + KEY `item-uid` (`item-uid`), + KEY `all` (`item-uri`, `item-uid`, `contact-id`), + PRIMARY KEY (`id`) +) DEFAULT CHARSET=utf8 COLLATE=utf8_bin; + +CREATE TABLE IF NOT EXISTS `retriever_resource` ( + `id` int(11) unsigned NOT NULL AUTO_INCREMENT, + `type` char(255) NULL DEFAULT NULL, + `binary` int(1) NOT NULL DEFAULT 0, + `url` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NOT NULL, + `created` timestamp NOT NULL DEFAULT now(), + `completed` timestamp NULL DEFAULT NULL, + `last-try` timestamp NULL DEFAULT NULL, + `num-tries` int(11) NOT NULL DEFAULT 0, + `data` mediumblob NULL DEFAULT NULL, + `http-code` smallint(1) unsigned NULL DEFAULT NULL, + `redirect-url` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NULL DEFAULT NULL, + KEY `retriever_resource` ADD INDEX `url` (`url`), + KEY `retriever_resource` ADD INDEX `completed` (`completed`), + PRIMARY KEY (`id`) +) DEFAULT CHARSET=utf8 COLLATE=utf8_bin diff --git a/retriever/retriever.php b/retriever/retriever.php new file mode 100644 index 00000000..78fe575f --- /dev/null +++ b/retriever/retriever.php @@ -0,0 +1,832 @@ + + * Status: Unsupported + */ + +use Friendica\Core\Addon; +use Friendica\Core\Config; +use Friendica\Core\PConfig; +use Friendica\Content\Text\HTML; +use Friendica\Content\Text\BBCode; +use Friendica\Object\Image; +use Friendica\Util\Network; +use Friendica\Core\L10n; +use Friendica\Database\DBA; + +function retriever_install() { + Addon::registerHook('plugin_settings', 'addon/retriever/retriever.php', 'retriever_plugin_settings'); + Addon::registerHook('plugin_settings_post', 'addon/retriever/retriever.php', 'retriever_plugin_settings_post'); + Addon::registerHook('post_remote', 'addon/retriever/retriever.php', 'retriever_post_remote_hook'); + Addon::registerHook('contact_photo_menu', 'addon/retriever/retriever.php', 'retriever_contact_photo_menu'); + Addon::registerHook('cron', 'addon/retriever/retriever.php', 'retriever_cron'); + + $r = q("SELECT `id` FROM `pconfig` WHERE `cat` LIKE 'retriever_%%'"); + if (count($r) || (Config::get('retriever', 'dbversion') == '0.1')) { + $retrievers = array(); + $r = q("SELECT SUBSTRING(`cat`, 10) AS `contact`, `k`, `v` FROM `pconfig` WHERE `cat` LIKE 'retriever%%'"); + foreach ($r as $rr) { + $retrievers[$rr['contact']][$rr['k']] = $rr['v']; + } + foreach ($retrievers as $k => $v) { + $rr = q("SELECT `uid` FROM `contact` WHERE `id` = %d", intval($k)); + $uid = $rr[0]['uid']; + $v['images'] = 'on'; + q("INSERT INTO `retriever_rule` (`uid`, `contact-id`, `data`) VALUES (%d, %d, '%s')", + intval($uid), intval($k), DBA::escape(json_encode($v))); + } + q("DELETE FROM `pconfig` WHERE `cat` LIKE 'retriever_%%'"); + Config::set('retriever', 'dbversion', '0.2'); + } + if (Config::get('retriever', 'dbversion') == '0.2') { + q("ALTER TABLE `retriever_resource` DROP COLUMN `retriever`"); + Config::set('retriever', 'dbversion', '0.3'); + } + if (Config::get('retriever', 'dbversion') == '0.3') { + q("ALTER TABLE `retriever_item` MODIFY COLUMN `item-uri` varchar(800) CHARACTER SET ascii NOT NULL"); + q("ALTER TABLE `retriever_resource` MODIFY COLUMN `url` varchar(800) CHARACTER SET ascii NOT NULL"); + Config::set('retriever', 'dbversion', '0.4'); + } + if (Config::get('retriever', 'dbversion') == '0.4') { + q("ALTER TABLE `retriever_item` ADD COLUMN `finished` tinyint(1) unsigned NOT NULL DEFAULT '0'"); + Config::set('retriever', 'dbversion', '0.5'); + } + if (Config::get('retriever', 'dbversion') == '0.5') { + q('ALTER TABLE `retriever_resource` CHANGE `created` `created` timestamp NOT NULL DEFAULT now()'); + q('ALTER TABLE `retriever_resource` CHANGE `completed` `completed` timestamp NULL DEFAULT NULL'); + q('ALTER TABLE `retriever_resource` CHANGE `last-try` `last-try` timestamp NULL DEFAULT NULL'); + q('ALTER TABLE `retriever_item` DROP KEY `all`'); + q('ALTER TABLE `retriever_item` ADD KEY `all` (`item-uri`, `item-uid`, `contact-id`)'); + Config::set('retriever', 'dbversion', '0.6'); + } + if (Config::get('retriever', 'dbversion') == '0.6') { + q('ALTER TABLE `retriever_item` CONVERT TO CHARACTER SET utf8 COLLATE utf8_bin'); + q('ALTER TABLE `retriever_item` CHANGE `item-uri` `item-uri` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NOT NULL'); + q('ALTER TABLE `retriever_resource` CONVERT TO CHARACTER SET utf8 COLLATE utf8_bin'); + q('ALTER TABLE `retriever_resource` CHANGE `url` `url` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NOT NULL'); + q('ALTER TABLE `retriever_rule` CONVERT TO CHARACTER SET utf8 COLLATE utf8_bin'); + Config::set('retriever', 'dbversion', '0.7'); + } + if (Config::get('retriever', 'dbversion') == '0.7') { + $r = q("SELECT `id`, `data` FROM `retriever_rule`"); + foreach ($r as $rr) { + logger('retriever_install: retriever ' . $rr['id'] . ' old config ' . $rr['data'], LOGGER_DATA); + $data = json_decode($rr['data'], true); + if ($data['pattern']) { + $matches = array(); + if (preg_match("/\/(.*)\//", $data['pattern'], $matches)) { + $data['pattern'] = $matches[1]; + } + } + if ($data['match']) { + $include = array(); + foreach (explode('|', $data['match']) as $component) { + $matches = array(); + if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[@([A-Za-z][a-z0-9]*)='([^']*)'\]/", $component, $matches)) { + $include[] = array( + 'element' => $matches[1], + 'attribute' => $matches[2], + 'value' => $matches[3]); + } + if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[contains(concat(' ',normalize-space(@class),' '),' ([^ ']+) ')]/", $component, $matches)) { + $include[] = array( + 'element' => $matches[1], + 'attribute' => $matches[2], + 'value' => $matches[3]); + } + } + $data['include'] = $include; + unset($data['match']); + } + if ($data['remove']) { + $exclude = array(); + foreach (explode('|', $data['remove']) as $component) { + $matches = array(); + if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[@([A-Za-z][a-z0-9]*)='([^']*)'\]/", $component, $matches)) { + $exclude[] = array( + 'element' => $matches[1], + 'attribute' => $matches[2], + 'value' => $matches[3]); + } + if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[contains(concat(' ',normalize-space(@class),' '),' ([^ ']+) ')]/", $component, $matches)) { + $exclude[] = array( + 'element' => $matches[1], + 'attribute' => $matches[2], + 'value' => $matches[3]); + } + } + $data['exclude'] = $exclude; + unset($data['remove']); + } + $r = q('UPDATE `retriever_rule` SET `data` = "%s" WHERE `id` = %d', DBA::escape(json_encode($data)), $rr['id']); + logger('retriever_install: retriever ' . $rr['id'] . ' new config ' . json_encode($data), LOGGER_DATA); + } + Config::set('retriever', 'dbversion', '0.8'); + } + if (Config::get('retriever', 'dbversion') == '0.8') { + q("ALTER TABLE `retriever_resource` ADD COLUMN `http-code` smallint(1) unsigned NULL DEFAULT NULL"); + Config::set('retriever', 'dbversion', '0.9'); + } + if (Config::get('retriever', 'dbversion') == '0.9') { + q("ALTER TABLE `retriever_item` DROP COLUMN `parent`"); + q("ALTER TABLE `retriever_resource` ADD COLUMN `redirect-url` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NULL DEFAULT NULL"); + Config::set('retriever', 'dbversion', '0.10'); + } + if (Config::get('retriever', 'dbversion') == '0.10') { + q("ALTER TABLE `retriever_resource` MODIFY COLUMN `type` char(255) NULL DEFAULT NULL"); + q("ALTER TABLE `retriever_resource` MODIFY COLUMN `data` mediumblob NULL DEFAULT NULL"); + q("ALTER TABLE `retriever_rule` MODIFY COLUMN `data` mediumtext NULL DEFAULT NULL"); + Config::set('retriever', 'dbversion', '0.11'); + } + if (Config::get('retriever', 'dbversion') == '0.11') { + q("ALTER TABLE `retriever_resource` ADD INDEX `url` (`url`)"); + q("ALTER TABLE `retriever_resource` ADD INDEX `completed` (`completed`)"); + q("ALTER TABLE `retriever_item` ADD INDEX `finished` (`finished`)"); + q("ALTER TABLE `retriever_item` ADD INDEX `item-uid` (`item-uid`)"); + Config::set('retriever', 'dbversion', '0.12'); + } + if (Config::get('retriever', 'dbversion') != '0.12') { + $schema = file_get_contents(dirname(__file__).'/database.sql'); + $arr = explode(';', $schema); + foreach ($arr as $a) { + $r = q($a); + } + Config::set('retriever', 'dbversion', '0.12'); + } +} + +function retriever_uninstall() { + Addon::unregisterHook('plugin_settings', 'addon/retriever/retriever.php', 'retriever_plugin_settings'); + Addon::unregisterHook('plugin_settings_post', 'addon/retriever/retriever.php', 'retriever_plugin_settings_post'); + Addon::unregisterHook('post_remote', 'addon/retriever/retriever.php', 'retriever_post_remote_hook'); + Addon::unregisterHook('plugin_settings', 'addon/retriever/retriever.php', 'retriever_plugin_settings'); + Addon::unregisterHook('plugin_settings_post', 'addon/retriever/retriever.php', 'retriever_plugin_settings_post'); + Addon::unregisterHook('contact_photo_menu', 'addon/retriever/retriever.php', 'retriever_contact_photo_menu'); + Addon::unregisterHook('cron', 'addon/retriever/retriever.php', 'retriever_cron'); +} + +function retriever_module() {} + +function retriever_cron($a, $b) { + // 100 is a nice sane number. Maybe this should be configurable. + retriever_retrieve_items(100, $a); + retriever_tidy(); +} + +$retriever_item_count = 0; + +function retriever_retrieve_items($max_items, $a) { + global $retriever_item_count; + + $retriever_schedule = array(array(1,'minute'), + array(10,'minute'), + array(1,'hour'), + array(1,'day'), + array(2,'day'), + array(1,'week'), + array(1,'month')); + + $schedule_clauses = array(); + for ($i = 0; $i < count($retriever_schedule); $i++) { + $num = $retriever_schedule[$i][0]; + $unit = $retriever_schedule[$i][1]; + array_push($schedule_clauses, + '(`num-tries` = ' . $i . ' AND TIMESTAMPADD(' . DBA::escape($unit) . + ', ' . intval($num) . ', `last-try`) < now())'); + } + + $retrieve_items = $max_items - $retriever_item_count; + logger('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . $retriever_item_count . ', retrieve ' . $retrieve_items, LOGGER_DEBUG); + do { + $r = q("SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR %s) ORDER BY `last-try` ASC LIMIT %d", + DBA::escape(implode($schedule_clauses, ' OR ')), + intval($retrieve_items)); + if (!is_array($r)) { + break; + } + if (count($r) == 0) { + break; + } + logger('retriever_retrieve_items: found ' . count($r) . ' waiting resources in database', LOGGER_DEBUG); + foreach ($r as $rr) { + retrieve_resource($rr); + $retriever_item_count++; + } + $retrieve_items = $max_items - $retriever_item_count; + } + while ($retrieve_items > 0); + + /* Look for items that are waiting even though the resource has + * completed. This usually happens because we've been asked to + * retrospectively apply a config change. It could also happen + * due to a cron job dying or something. */ + $r = q("SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT %d", + intval($retrieve_items)); + if (!$r) { + $r = array(); + } + logger('retriever_retrieve_items: items waiting even though resource has completed: ' . count($r), LOGGER_DEBUG); + foreach ($r as $rr) { + $resource = q("SELECT * FROM retriever_resource WHERE `id` = %d", $rr['resource']); + $retriever_item = retriever_get_retriever_item($rr['item']); + if (!$retriever_item) { + logger('retriever_retrieve_items: no retriever item with id ' . $rr['item'], LOGGER_INFO); + continue; + } + $item = retriever_get_item($retriever_item); + if (!$item) { + logger('retriever_retrieve_items: no item ' . $retriever_item['item-uri'], LOGGER_INFO); + continue; + } + $retriever = get_retriever($item['contact-id'], $item['uid']); + if (!$retriever) { + logger('retriever_retrieve_items: no retriever for item ' . + $retriever_item['item-uri'] . ' ' . $retriever_item['uid'] . ' ' . $item['contact-id'], + LOGGER_INFO); + continue; + } + retriever_apply_completed_resource_to_item($retriever, $item, $resource[0], $a); + q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d", + intval($retriever_item['id'])); + retriever_check_item_completed($item); + } +} + +function retriever_tidy() { + q("DELETE FROM retriever_resource WHERE completed IS NOT NULL AND completed < DATE_SUB(now(), INTERVAL 1 WEEK)"); + q("DELETE FROM retriever_resource WHERE completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)"); + + $r = q("SELECT retriever_item.id FROM retriever_item LEFT OUTER JOIN retriever_resource ON (retriever_item.resource = retriever_resource.id) WHERE retriever_resource.id is null"); + logger('retriever_tidy: found ' . count($r) . ' retriever_items with no retriever_resource'); + foreach ($r as $rr) { + q('DELETE FROM retriever_item WHERE id = %d', intval($rr['id'])); + } +} + +function retrieve_dataurl_resource($resource) { + if (!preg_match("/date:(.*);base64,(.*)/", $resource['url'], $matches)) { + logger('retrieve_dataurl_resource: ' . $resource['id'] . ' does not match pattern'); + } else { + $resource['type'] = $matches[1]; + $resource['data'] = base64url_decode($matches[2]); + } + + // Succeed or fail, there's no point retrying + q("UPDATE `retriever_resource` SET `last-try` = now(), `num-tries` = `num-tries` + 1, `completed` = now(), `data` = '%s', `type` = '%s' WHERE id = %d", + DBA::escape($resource['data']), + DBA::escape($resource['type']), + intval($resource['id'])); + retriever_resource_completed($resource, $a); +} + +function retrieve_resource($resource) { + if (substr($resource['url'], 0, 5) == "data:") { + return retrieve_dataurl_resource($resource); + } + + $a = get_app(); + + try { + logger('retrieve_resource: ' . ($resource['num-tries'] + 1) . + ' attempt at resource ' . $resource['id'] . ' ' . $resource['url'], LOGGER_DEBUG); + $redirects; + $cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-'); + $fetch_result = Network::fetchUrlFull($resource['url'], $resource['binary'], $redirects, array('cookiejar' => $cookiejar)); + unlink($cookiejar); + $resource['data'] = $fetch_result['body']; + $resource['http-code'] = $a->get_curl_code(); + $resource['type'] = $a->get_curl_content_type(); + $resource['redirect-url'] = $fetch_result['redirect_url']; + logger('retrieve_resource: got code ' . $resource['http-code'] . + ' retrieving resource ' . $resource['id'] . + ' final url ' . $resource['redirect-url'], LOGGER_DEBUG); + } catch (Exception $e) { + logger('retrieve_resource: unable to retrieve ' . $resource['url'] . ' - ' . $e->getMessage()); + } + q("UPDATE `retriever_resource` SET `last-try` = now(), `num-tries` = `num-tries` + 1, `http-code` = %d, `redirect-url` = '%s' WHERE id = %d", + intval($resource['http-code']), + DBA::escape($resource['redirect-url']), + intval($resource['id'])); + if ($resource['data']) { + q("UPDATE `retriever_resource` SET `completed` = now(), `data` = '%s', `type` = '%s' WHERE id = %d", + DBA::escape($resource['data']), + DBA::escape($resource['type']), + intval($resource['id'])); + retriever_resource_completed($resource, $a); + } +} + +function get_retriever($contact_id, $uid, $create = false) { + $r = q("SELECT * FROM `retriever_rule` WHERE `contact-id` = %d AND `uid` = %d", + intval($contact_id), intval($uid)); + if (count($r)) { + $r[0]['data'] = json_decode($r[0]['data'], true); + return $r[0]; + } + if ($create) { + q("INSERT INTO `retriever_rule` (`uid`, `contact-id`) VALUES (%d, %d)", + intval($uid), intval($contact_id)); + $r = q("SELECT * FROM `retriever_rule` WHERE `contact-id` = %d AND `uid` = %d", + intval($contact_id), intval($uid)); + return $r[0]; + } +} + +function retriever_get_retriever_item($id) { + $retriever_items = q("SELECT * FROM `retriever_item` WHERE id = %d", intval($id)); + if (count($retriever_items) != 1) { + logger('retriever_get_retriever_item: unable to find retriever_item ' . $id, LOGGER_INFO); + return; + } + return $retriever_items[0]; +} + +function retriever_get_item($retriever_item) { + $items = q("SELECT * FROM `item` WHERE `uri` = '%s' AND `uid` = %d AND `contact-id` = %d", + DBA::escape($retriever_item['item-uri']), + intval($retriever_item['item-uid']), + intval($retriever_item['contact-id'])); + if (count($items) != 1) { + logger('retriever_get_item: unexpected number of results ' . + count($items) . " when searching for item $uri $uid $cid", LOGGER_INFO); + return; + } + return $items[0]; +} + +function retriever_item_completed($retriever_item_id, $resource, $a) { + logger('retriever_item_completed: id ' . $retriever_item_id . ' url ' . $resource['url'], LOGGER_DEBUG); + + $retriever_item = retriever_get_retriever_item($retriever_item_id); + if (!$retriever_item) { + return; + } + // Note: the retriever might be null. Doesn't matter. + $retriever = get_retriever($retriever_item['contact-id'], $retriever_item['item-uid']); + $item = retriever_get_item($retriever_item); + if (!$item) { + return; + } + + retriever_apply_completed_resource_to_item($retriever, $item, $resource, $a); + + q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d", + intval($retriever_item['id'])); + retriever_check_item_completed($item); +} + +function retriever_resource_completed($resource, $a) { + logger('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url'], LOGGER_DEBUG); + $r = q("SELECT `id` FROM `retriever_item` WHERE `resource` = %d", $resource['id']); + foreach ($r as $rr) { + retriever_item_completed($rr['id'], $resource, $a); + } +} + +function apply_retrospective($a, $retriever, $num) { + $r = q("SELECT * FROM `item` WHERE `contact-id` = %d ORDER BY `received` DESC LIMIT %d", + intval($retriever['contact-id']), intval($num)); + foreach ($r as $item) { + q('UPDATE `item` SET `visible` = 0 WHERE `id` = %d', $item['id']); + q('UPDATE `thread` SET `visible` = 0 WHERE `iid` = %d', $item['id']); + retriever_on_item_insert($a, $retriever, $item); + } +} + +function retriever_on_item_insert($a, $retriever, &$item) { + if (!$retriever || !$retriever['id']) { + logger('retriever_on_item_insert: No retriever supplied', LOGGER_INFO); + return; + } + if (!$retriever["data"]['enable'] == "on") { + return; + } + if (array_key_exists('pattern', $retriever["data"]) && $retriever["data"]['pattern']) { + $url = preg_replace('/' . $retriever["data"]['pattern'] . '/', $retriever["data"]['replace'], $item['plink']); + logger('retriever_on_item_insert: Changed ' . $item['plink'] . ' to ' . $url, LOGGER_DATA); + } + else { + $url = $item['plink']; + } + + $resource = add_retriever_resource($a, $url); + $retriever_item_id = add_retriever_item($item, $resource); +} + +function add_retriever_resource($a, $url, $binary = false) { + logger('add_retriever_resource: ' . $url, LOGGER_DEBUG); + + $scheme = parse_url($url, PHP_URL_SCHEME); + if ($scheme == 'data') { + $fp = fopen($url, 'r'); + $meta = stream_get_meta_data($fp); + $type = $meta['mediatype']; + $data = stream_get_contents($fp); + fclose($fp); + + $url = 'md5://' . hash('md5', $url); + $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url)); + $resource = $r[0]; + if (count($r)) { + logger('add_retriever_resource: Resource ' . $url . ' already requested', LOGGER_DEBUG); + return $resource; + } + + logger('retrieve_resource: got data URL type ' . $resource['type'], LOGGER_DEBUG); + q("INSERT INTO `retriever_resource` (`type`, `binary`, `url`, `completed`, `data`) " . + "VALUES ('%s', %d, '%s', now(), '%s')", + DBA::escape($type), + intval($binary ? 1 : 0), + DBA::escape($url), + DBA::escape($data)); + $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url)); + $resource = $r[0]; + if (count($r)) { + retriever_resource_completed($resource, $a); + } + return $resource; + } + + if (strlen($url) > 800) { + logger('add_retriever_resource: URL is longer than 800 characters', LOGGER_INFO); + } + + $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url)); + if (count($r)) { + logger('add_retriever_resource: Resource ' . $url . ' already requested', LOGGER_DEBUG); + return $r[0]; + } + + q("INSERT INTO `retriever_resource` (`binary`, `url`) " . + "VALUES (%d, '%s')", intval($binary ? 1 : 0), DBA::escape($url)); + $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url)); + return $r[0]; +} + +function add_retriever_item(&$item, $resource) { + logger('add_retriever_item: ' . $resource['url'] . ' for ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], LOGGER_DEBUG); + + q("INSERT INTO `retriever_item` (`item-uri`, `item-uid`, `contact-id`, `resource`) " . + "VALUES ('%s', %d, %d, %d)", + DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource["id"])); + $r = q("SELECT id FROM `retriever_item` WHERE " . + "`item-uri` = '%s' AND `item-uid` = %d AND `contact-id` = %d AND `resource` = %d ORDER BY id DESC", + DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource['id'])); + if (!count($r)) { + logger("add_retriever_item: couldn't create retriever item for " . + $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], + LOGGER_INFO); + return; + } + logger('add_retriever_item: created retriever_item ' . $r[0]['id'] . ' for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], LOGGER_DEBUG); + return $r[0]['id']; +} + +function retriever_get_encoding($resource) { + $matches = array(); + if (preg_match('/charset=(.*)/', $resource['type'], $matches)) { + return trim(array_pop($matches)); + } + return 'utf-8'; +} + +function retriever_apply_xslt_text($xslt_text, $doc) { + if (!$xslt_text) { + logger('retriever_apply_xslt_text: empty XSLT text', LOGGER_INFO); + return $doc; + } + $xslt_doc = new DOMDocument(); + if (!$xslt_doc->loadXML($xslt_text)) { + logger('retriever_apply_xslt_text: could not load XML', LOGGER_INFO); + return $doc; + } + $xp = new XsltProcessor(); + $xp->importStylesheet($xslt_doc); + $result = $xp->transformToDoc($doc); + return $result; +} + +function retriever_apply_dom_filter($retriever, &$item, $resource) { + logger('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['uri'] . ' contact ' . $item['contact-id'], LOGGER_DEBUG); + + if (!$retriever['data']['include'] && !$retriever['data']['customxslt']) { + return; + } + if (!$resource['data']) { + logger('retriever_apply_dom_filter: no text to work with', LOGGER_INFO); + return; + } + + $encoding = retriever_get_encoding($resource); + $content = mb_convert_encoding($resource['data'], 'HTML-ENTITIES', $encoding); + $doc = new DOMDocument('1.0', 'UTF-8'); + if (strpos($resource['type'], 'html') !== false) { + @$doc->loadHTML($content); + } + else { + $doc->loadXML($content); + } + + $params = array('$spec' => $retriever['data']); + $extract_template = get_markup_template('extract.tpl', 'addon/retriever/'); + $extract_xslt = replace_macros($extract_template, $params); + if ($retriever['data']['include']) { + $doc = retriever_apply_xslt_text($extract_xslt, $doc); + } + if (array_key_exists('customxslt', $retriever['data']) && $retriever['data']['customxslt']) { + $doc = retriever_apply_xslt_text($retriever['data']['customxslt'], $doc); + } + if (!$doc) { + logger('retriever_apply_dom_filter: failed to apply extract XSLT template', LOGGER_INFO); + return; + } + + $components = parse_url($resource['redirect-url']); + $rooturl = $components['scheme'] . "://" . $components['host']; + $dirurl = $rooturl . dirname($components['path']) . "/"; + $params = array('$dirurl' => $dirurl, '$rooturl' => $rooturl); + $fix_urls_template = get_markup_template('fix-urls.tpl', 'addon/retriever/'); + $fix_urls_xslt = replace_macros($fix_urls_template, $params); + $doc = retriever_apply_xslt_text($fix_urls_xslt, $doc); + if (!$doc) { + logger('retriever_apply_dom_filter: failed to apply fix urls XSLT template', LOGGER_INFO); + return; + } + + $item['body'] = HTML::toBBCode($doc->saveHTML()); + if (!strlen($item['body'])) { + logger('retriever_apply_dom_filter retriever ' . $retriever['id'] . ' item ' . $item['id'] . ': output was empty', LOGGER_INFO); + return; + } + $item['body'] .= "\n\n" . L10n::t('Retrieved') . ' ' . date("Y-m-d") . ': [url='; + $item['body'] .= $item['plink']; + $item['body'] .= ']' . $item['plink'] . '[/url]'; + q("UPDATE `item` SET `body` = '%s' WHERE `id` = %d", + DBA::escape($item['body']), intval($item['id'])); +} + +function retrieve_images(&$item, $a) { + $matches1 = array(); + preg_match_all("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", $item["body"], $matches1); + $matches2 = array(); + preg_match_all("/\[img\](.*?)\[\/img\]/ism", $item["body"], $matches2); + $matches = array_merge($matches1[3], $matches2[1]); + logger('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], LOGGER_DEBUG); + foreach ($matches as $url) { + if (strpos($url, get_app()->get_baseurl()) === FALSE) { + $resource = add_retriever_resource($a, $url, true); + if (!$resource['completed']) { + add_retriever_item($item, $resource); + } + else { + retriever_transform_images($a, $item, $resource); + } + } + } +} + +function retriever_check_item_completed(&$item) +{ + $r = q('SELECT count(*) FROM retriever_item WHERE `item-uri` = "%s" ' . + 'AND `item-uid` = %d AND `contact-id` = %d AND `finished` = 0', + DBA::escape($item['uri']), intval($item['uid']), + intval($item['contact-id'])); + $waiting = $r[0]['count(*)']; + logger('retriever_check_item_completed: item ' . $item['uri'] . ' ' . $item['uid'] + . ' '. $item['contact-id'] . ' waiting for ' . $waiting . ' resources', LOGGER_DEBUG); + $old_visible = $item['visible']; + $item['visible'] = $waiting ? 0 : 1; + if (array_key_exists('id', $item) && ($item['id'] > 0) && ($old_visible != $item['visible'])) { + logger('retriever_check_item_completed: changing visible flag to ' . $item['visible'] . ' and invoking notifier ("edit_post", ' . $item['id'] . ')', LOGGER_DEBUG); + q("UPDATE `item` SET `visible` = %d WHERE `id` = %d", + intval($item['visible']), + intval($item['id'])); + q("UPDATE `thread` SET `visible` = %d WHERE `iid` = %d", + intval($item['visible']), + intval($item['id'])); + } +} + +function retriever_apply_completed_resource_to_item($retriever, &$item, $resource, $a) { + logger('retriever_apply_completed_resource_to_item: retriever ' . + ($retriever ? $retriever['id'] : 'none') . + ' resource ' . $resource['url'] . ' plink ' . $item['plink'], LOGGER_DEBUG); + if (strpos($resource['type'], 'image') !== false) { + retriever_transform_images($a, $item, $resource); + } + if (!$retriever) { + return; + } + if ((strpos($resource['type'], 'html') !== false) || + (strpos($resource['type'], 'xml') !== false)) { + retriever_apply_dom_filter($retriever, $item, $resource); + if ($retriever["data"]['images'] ) { + retrieve_images($item, $a); + } + } +} + +function retriever_transform_images($a, &$item, $resource) { + if (!$resource["data"]) { + logger('retriever_transform_images: no data available for ' + . $resource['id'] . ' ' . $resource['url'], LOGGER_INFO); + return; + } + + try { + $photo = Image::storePhoto($a, $item['uid'], $resource['data'], $resource['url']); + } catch (Exception $e) { + logger('retriever_transform_images caught exception ' . $e->getMessage()); + return; + } + foreach ($photo as $k => $v) + { + logger('@@@ photo key ' . $k); + } + $new_url = $photo['full']; + logger('retriever_transform_images: replacing ' . $resource['url'] . ' with ' . + $new_url . ' in item ' . $item['plink'], LOGGER_DEBUG); + $transformed = str_replace($resource["url"], $new_url, $item['body']); + if ($transformed === $item['body']) { + return; + } + + $item['body'] = $transformed; + q("UPDATE `item` SET `body` = '%s' WHERE `plink` = '%s' AND `uid` = %d AND `contact-id` = %d", + DBA::escape($item['body']), + DBA::escape($item['plink']), + intval($item['uid']), + intval($item['contact-id'])); +} + +function retriever_content($a) { + if (!local_user()) { + $a->page['content'] .= "

Please log in

"; + return; + } + if ($a->argv[1] === 'help') { + $feeds = q("SELECT `id`, `name`, `thumb` FROM contact WHERE `uid` = %d AND `network` = 'feed'", + local_user()); + foreach ($feeds as $k=>$v) { + $feeds[$k]['url'] = $a->get_baseurl() . '/retriever/' . $v['id']; + } + $template = get_markup_template('/help.tpl', 'addon/retriever/'); + $a->page['content'] .= replace_macros($template, array( + '$config' => $a->get_baseurl() . '/settings/addon', + '$feeds' => $feeds)); + return; + } + if ($a->argv[1]) { + $retriever = get_retriever($a->argv[1], local_user(), false); + + if (x($_POST["id"])) { + $retriever = get_retriever($a->argv[1], local_user(), true); + $retriever["data"] = array(); + foreach (array('pattern', 'replace', 'enable', 'images', 'customxslt') as $setting) { + if (x($_POST['retriever_' . $setting])) { + $retriever["data"][$setting] = $_POST['retriever_' . $setting]; + } + } + foreach ($_POST as $k=>$v) { + if (preg_match("/retriever-(include|exclude)-(\d+)-(element|attribute|value)/", $k, $matches)) { + $retriever['data'][$matches[1]][intval($matches[2])][$matches[3]] = $v; + } + } + // You've gotta have an element, even if it's just "*" + foreach ($retriever['data']['include'] as $k=>$clause) { + if (!$clause['element']) { + unset($retriever['data']['include'][$k]); + } + } + foreach ($retriever['data']['exclude'] as $k=>$clause) { + if (!$clause['element']) { + unset($retriever['data']['exclude'][$k]); + } + } + q("UPDATE `retriever_rule` SET `data`='%s' WHERE `id` = %d", + DBA::escape(json_encode($retriever["data"])), intval($retriever["id"])); + $a->page['content'] .= "

Settings Updated"; + if (x($_POST["retriever_retrospective"])) { + apply_retrospective($a, $retriever, $_POST["retriever_retrospective"]); + $a->page['content'] .= " and retrospectively applied to " . $_POST["apply"] . " posts"; + } + $a->page['content'] .= ".

"; + } + + $template = get_markup_template('/rule-config.tpl', 'addon/retriever/'); + $a->page['content'] .= replace_macros($template, array( + '$enable' => array( + 'retriever_enable', + L10n::t('Enabled'), + $retriever['data']['enable']), + '$pattern' => array( + 'retriever_pattern', + L10n::t('URL Pattern'), + $retriever["data"]['pattern'], + L10n::t('Regular expression matching part of the URL to replace')), + '$replace' => array( + 'retriever_replace', + L10n::t('URL Replace'), + $retriever["data"]['replace'], + L10n::t('Text to replace matching part of above regular expression')), + '$images' => array( + 'retriever_images', + L10n::t('Download Images'), + $retriever['data']['images']), + '$retrospective' => array( + 'retriever_retrospective', + L10n::t('Retrospectively Apply'), + '0', + L10n::t('Reapply the rules to this number of posts')), + '$customxslt' => array( + 'retriever_customxslt', + L10n::t('Custom XSLT'), + $retriever['data']['customxslt'], + L10n::t("When standard rules aren't enough, apply custom XSLT to the article")), + '$title' => L10n::t('Retrieve Feed Content'), + '$help' => $a->get_baseurl() . '/retriever/help', + '$help_t' => L10n::t('Get Help'), + '$submit_t' => L10n::t('Submit'), + '$submit' => L10n::t('Save Settings'), + '$id' => ($retriever["id"] ? $retriever["id"] : "create"), + '$tag_t' => L10n::t('Tag'), + '$attribute_t' => L10n::t('Attribute'), + '$value_t' => L10n::t('Value'), + '$add_t' => L10n::t('Add'), + '$remove_t' => L10n::t('Remove'), + '$include_t' => L10n::t('Include'), + '$include' => $retriever['data']['include'], + '$exclude_t' => L10n::t('Exclude'), + '$exclude' => $retriever["data"]['exclude'])); + return; + } +} + +function retriever_contact_photo_menu($a, &$args) { + if (!$args) { + return; + } + if ($args["contact"]["network"] == "feed") { + $args["menu"][ 'retriever' ] = array(L10n::t('Retriever'), $a->get_baseurl() . '/retriever/' . $args["contact"]['id']); + } +} + +function retriever_post_remote_hook(&$a, &$item) { + logger('retriever_post_remote_hook: ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], LOGGER_DEBUG); + + $retriever = get_retriever($item['contact-id'], $item["uid"], false); + if ($retriever) { + retriever_on_item_insert($a, $retriever, $item); + } + else { + if (PConfig::get($item["uid"], 'retriever', 'oembed')) { + // Convert to HTML and back to take advantage of bbcode's resolution of oembeds. + $body = HTML::toBBCode(BBCode::convert($item['body'])); + if ($body) { + $item['body'] = $body; + } + } + if (PConfig::get($item["uid"], 'retriever', 'all_photos')) { + retrieve_images($item, $a); + } + } + retriever_check_item_completed($item); +} + +function retriever_plugin_settings(&$a,&$s) { + $all_photos = PConfig::get(local_user(), 'retriever', 'all_photos'); + $oembed = PConfig::get(local_user(), 'retriever', 'oembed'); + $template = get_markup_template('/settings.tpl', 'addon/retriever/'); + $s .= replace_macros($template, array( + '$allphotos' => array( + 'retriever_all_photos', + L10n::t('All Photos'), + $all_photos, + L10n::t('Check this to retrieve photos for all posts')), + '$oembed' => array( + 'retriever_oembed', + L10n::t('Resolve OEmbed'), + $oembed, + L10n::t('Check this to attempt to retrieve embedded content for all posts - useful e.g. for Facebook posts')), + '$submit' => L10n::t('Save Settings'), + '$title' => L10n::t('Retriever Settings'), + '$help' => $a->get_baseurl() . '/retriever/help')); +} + +function retriever_plugin_settings_post($a,$post) { + if ($_POST['retriever_all_photos']) { + PConfig::set(local_user(), 'retriever', 'all_photos', $_POST['retriever_all_photos']); + } + else { + PConfig::del(local_user(), 'retriever', 'all_photos'); + } + if ($_POST['retriever_oembed']) { + PConfig::set(local_user(), 'retriever', 'oembed', $_POST['retriever_oembed']); + } + else { + PConfig::del(local_user(), 'retriever', 'oembed'); + } +} diff --git a/retriever/templates/extract.tpl b/retriever/templates/extract.tpl new file mode 100644 index 00000000..f24a860d --- /dev/null +++ b/retriever/templates/extract.tpl @@ -0,0 +1,32 @@ + + + + + + +{{function clause_xpath}} +{{if !$clause.attribute}} +{{$clause.element}}{{elseif $clause.attribute == 'class'}} +{{$clause.element}}[contains(concat(' ', normalize-space(@class), ' '), '{{$clause.value}}')]{{else}} +{{$clause.element}}[@{{$clause.attribute}}='{{$clause.value}}']{{/if}} +{{/function}} + +{{foreach $spec.include as $clause}} + + + + + +{{/foreach}} + +{{foreach $spec.exclude as $clause}} + +{{/foreach}} + + + + + + + + diff --git a/retriever/templates/fix-urls.tpl b/retriever/templates/fix-urls.tpl new file mode 100644 index 00000000..248d4770 --- /dev/null +++ b/retriever/templates/fix-urls.tpl @@ -0,0 +1,26 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/retriever/templates/help.tpl b/retriever/templates/help.tpl new file mode 100644 index 00000000..10b421d0 --- /dev/null +++ b/retriever/templates/help.tpl @@ -0,0 +1,148 @@ +

Retriever Plugin Help

+

+This plugin replaces the short excerpts you normally get in RSS feeds +with the full content of the article from the source website. You +specify which part of the page you're interested in with a set of +rules. When each item arrives, the plugin downloads the full page +from the website, extracts content using the rules, and replaces the +original article. +

+

+There's a few reasons you may want to do this. The source website +might be slow or overloaded. The source website might be +untrustworthy, in which case using Friendica to scrub the HTML is a +good idea. You might be on a LAN that blacklists certain websites. +It also works neatly with the mailstream plugin, allowing you to read +a news stream comfortably without needing continuous Internet +connectivity. +

+

+However, setting up retriever can be quite tricky since it depends on +the internal design of the website. That was designed to make life +easy for the website's developers, not for you. You'll need to have +some familiarity with HTML, and be willing to adapt when the website +suddenly changes everything without notice. +

+

Configuring Retriever for a feed

+

+To set up retriever for an RSS feed, go to the "Contacts" page and +find your feed. Then click on the drop-down menu on the contact. +Select "Retriever" to get to the retriever configuration. +

+

+The "Include" configuration section specifies parts of the page to +include in the article. Each row has three components: +

+ +

+A simple case is when the article is wrapped in a "div" element: +

+
+    ...
+    <div class="ArticleWrapper">
+      <h2>Man Bites Dog</h2>
+      <img src="mbd.jpg">
+      <p>
+        Residents of the sleepy community of Nowheresville were
+        shocked yesterday by the sight of creepy local weirdo Jim
+        McOddman assaulting innocent local dog Snufflekins with his
+        false teeth.
+      </p>
+      ...
+    </div>
+    ...
+
+

+You then specify the tag "div", attribute "class", and value +"ArticleWrapper". Everything else in the page, such as navigation +panels and menus and footers and so on, will be discarded. If there +is more than one section of the page you want to include, specify each +one on a separate row. If the matching section contains some sections +you want to remove, specify those in the "Exclude" section in the same +way. +

+

+Once you've got a configuration that you think will work, you can try +it out on some existing articles. Type a number into the +"Retrospectively Apply" box and click "Submit". After a while +(exactly how long depends on your system's cron configuration) the new +articles should be available. +

+

Techniques

+

+You can leave the attribute and value blank to include all the +corresponding elements with the specified tag name. You can also use +a tag name of just an asterisk ("*"), which will match any element type with the +specified attribute regardless of the tag. +

+

+Note that the "class" attribute is a special case. Many web page +templates will put multiple different classes in the same element, +separated by spaces. If you specify an attribute of "class" it will +match an element if any of its classes matches the specified value. +For example: +

+
+    <div class="article breaking-news">
+
+

+In this case you can specify a value of "article", or "breaking-news". +You can also specify "article breaking-news", but that won't match if +the website suddenly changes to "breaking-news article", so that's not +recommended. +

+

+One useful trick you can try is using the website's "print" pages. +Many news sites have print versions of all their articles. These are +usually drastically simplified compared to the live website page. +Sometimes this is a good way to get the whole article when it's +normally split across multiple pages. +

+

+Hopefully the URL for the print page is a predictable variant of the +normal article URL. For example, an article URL like: +

+
+    http://www.newssite.com/article-8636.html
+
+

+...might have a print version at: +

+
+    http://www.newssite.com/print/article-8636.html
+
+

+To change the URL used to retrieve the page, use the "URL Pattern" and +"URL Replace" fields. The pattern is a regular expression matching +part of the URL to replace. In this case, you might use a pattern of +"/article" and a replace string of "/print/article". A common pattern +is simply a dollar sign ("$"), used to add the replace string to the end of the URL. +

+

Background Processing

+

+Note that retrieving and processing the articles can take some time, +so it's done in the background. Incoming articles will be marked as +invisible while they're in the process of being downloaded. If a URL +fails, the plugin will keep trying at progressively longer intervals +for up to a month, in case the website is temporarily overloaded or +the network is down. +

+

Retrieving Images

+

+Retriever can also optionally download images and store them in the +local Friendica instance. Just check the "Download Images" box. You +can also download images in every item from your network, whether it's +an RSS feed or not. Go to the "Settings" page and +click "Plugin settings". Then check the "All +Photos" box in the "Retriever Settings" section and click "Submit". +

+

Configure Feeds:

+
+{{foreach $feeds as $feed}} +{{include file='contact_template.tpl' contact=$feed}} +{{/foreach}} +
diff --git a/retriever/templates/rule-config.tpl b/retriever/templates/rule-config.tpl new file mode 100644 index 00000000..228d0326 --- /dev/null +++ b/retriever/templates/rule-config.tpl @@ -0,0 +1,112 @@ +
+ +

{{$title}}

+

{{$help_t}}

+
+ +{{include file="field_checkbox.tpl" field=$enable}} +{{include file="field_input.tpl" field=$pattern}} +{{include file="field_input.tpl" field=$replace}} +{{include file="field_checkbox.tpl" field=$images}} +{{include file="field_input.tpl" field=$retrospective}} +

{{$include_t}}:

+
+ + + + + +{{if $include}} + {{foreach $include as $k=>$m}} + + + + + + + {{/foreach}} +{{else}} + + + + + + +{{/if}} + +
{{$tag_t}}{{$attribute_t}}{{$value_t}}
+ +
+

{{$exclude_t}}:

+
+ + + + + +{{if $exclude}} + {{foreach $exclude as $k=>$r}} + + + + + + + {{/foreach}} +{{else}} + + + + + + +{{/if}} + +
TagAttributeValue
+ +
+{{include file="field_textarea.tpl" field=$customxslt}} + +
+
diff --git a/retriever/templates/settings.tpl b/retriever/templates/settings.tpl new file mode 100644 index 00000000..8bfe8db0 --- /dev/null +++ b/retriever/templates/settings.tpl @@ -0,0 +1,9 @@ +
+

{{$title}}

+

+ Get Help +

+{{include file="field_checkbox.tpl" field=$allphotos}} +{{include file="field_checkbox.tpl" field=$oembed}} + +
From 65138c22ede49668fc956bbf74c8d89a87d12bab Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sat, 20 Jul 2019 10:44:38 +0100 Subject: [PATCH 02/96] Fixes for retriever --- retriever/retriever.php | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 78fe575f..5f2b855a 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -512,7 +512,8 @@ function retriever_apply_xslt_text($xslt_text, $doc) { function retriever_apply_dom_filter($retriever, &$item, $resource) { logger('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['uri'] . ' contact ' . $item['contact-id'], LOGGER_DEBUG); - if (!$retriever['data']['include'] && !$retriever['data']['customxslt']) { + if (!array_key_exists('include', $retriever['data']) && !array_key_exists('customxslt', $retriever['data'])) { + logger('retriever_apply_dom_filter: no include and no customxslt', LOGGER_INFO); return; } if (!$resource['data']) { @@ -564,8 +565,8 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) { $item['body'] .= "\n\n" . L10n::t('Retrieved') . ' ' . date("Y-m-d") . ': [url='; $item['body'] .= $item['plink']; $item['body'] .= ']' . $item['plink'] . '[/url]'; - q("UPDATE `item` SET `body` = '%s' WHERE `id` = %d", - DBA::escape($item['body']), intval($item['id'])); + DBA::update('item', ['body' => $item['body']], ['id' => $item['id']]); + DBA::update('item-content', ['body' => $item['body']], ['uri' => $item['uri']]); } function retrieve_images(&$item, $a) { @@ -642,9 +643,9 @@ function retriever_transform_images($a, &$item, $resource) { logger('retriever_transform_images caught exception ' . $e->getMessage()); return; } - foreach ($photo as $k => $v) - { - logger('@@@ photo key ' . $k); + if (!array_key_exists('full', $photo)) { + logger('retriever_transform_images: no replacement URL for image ' . $resource['url']); + return; } $new_url = $photo['full']; logger('retriever_transform_images: replacing ' . $resource['url'] . ' with ' . From ea8dca34db574d4a592e47b77bf18c699531c006 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sat, 20 Jul 2019 14:37:57 +0100 Subject: [PATCH 03/96] more fixes --- retriever/retriever.php | 119 ++++++++++++++++++++-------------------- 1 file changed, 60 insertions(+), 59 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 5f2b855a..18351f1e 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -10,6 +10,7 @@ use Friendica\Core\Addon; use Friendica\Core\Config; use Friendica\Core\PConfig; +use Friendica\Core\Logger; use Friendica\Content\Text\HTML; use Friendica\Content\Text\BBCode; use Friendica\Object\Image; @@ -73,7 +74,7 @@ function retriever_install() { if (Config::get('retriever', 'dbversion') == '0.7') { $r = q("SELECT `id`, `data` FROM `retriever_rule`"); foreach ($r as $rr) { - logger('retriever_install: retriever ' . $rr['id'] . ' old config ' . $rr['data'], LOGGER_DATA); + Logger::log('retriever_install: retriever ' . $rr['id'] . ' old config ' . $rr['data'], Logger::DATA); $data = json_decode($rr['data'], true); if ($data['pattern']) { $matches = array(); @@ -122,7 +123,7 @@ function retriever_install() { unset($data['remove']); } $r = q('UPDATE `retriever_rule` SET `data` = "%s" WHERE `id` = %d', DBA::escape(json_encode($data)), $rr['id']); - logger('retriever_install: retriever ' . $rr['id'] . ' new config ' . json_encode($data), LOGGER_DATA); + Logger::log('retriever_install: retriever ' . $rr['id'] . ' new config ' . json_encode($data), Logger::DATA); } Config::set('retriever', 'dbversion', '0.8'); } @@ -199,7 +200,7 @@ function retriever_retrieve_items($max_items, $a) { } $retrieve_items = $max_items - $retriever_item_count; - logger('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . $retriever_item_count . ', retrieve ' . $retrieve_items, LOGGER_DEBUG); + Logger::log('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . $retriever_item_count . ', retrieve ' . $retrieve_items, Logger::DEBUG); do { $r = q("SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR %s) ORDER BY `last-try` ASC LIMIT %d", DBA::escape(implode($schedule_clauses, ' OR ')), @@ -210,7 +211,7 @@ function retriever_retrieve_items($max_items, $a) { if (count($r) == 0) { break; } - logger('retriever_retrieve_items: found ' . count($r) . ' waiting resources in database', LOGGER_DEBUG); + Logger::log('retriever_retrieve_items: found ' . count($r) . ' waiting resources in database', Logger::DEBUG); foreach ($r as $rr) { retrieve_resource($rr); $retriever_item_count++; @@ -228,24 +229,24 @@ function retriever_retrieve_items($max_items, $a) { if (!$r) { $r = array(); } - logger('retriever_retrieve_items: items waiting even though resource has completed: ' . count($r), LOGGER_DEBUG); + Logger::log('retriever_retrieve_items: items waiting even though resource has completed: ' . count($r), Logger::DEBUG); foreach ($r as $rr) { $resource = q("SELECT * FROM retriever_resource WHERE `id` = %d", $rr['resource']); $retriever_item = retriever_get_retriever_item($rr['item']); if (!$retriever_item) { - logger('retriever_retrieve_items: no retriever item with id ' . $rr['item'], LOGGER_INFO); + Logger::log('retriever_retrieve_items: no retriever item with id ' . $rr['item'], Logger::INFO); continue; } $item = retriever_get_item($retriever_item); if (!$item) { - logger('retriever_retrieve_items: no item ' . $retriever_item['item-uri'], LOGGER_INFO); + Logger::log('retriever_retrieve_items: no item ' . $retriever_item['item-uri'], Logger::INFO); continue; } $retriever = get_retriever($item['contact-id'], $item['uid']); if (!$retriever) { - logger('retriever_retrieve_items: no retriever for item ' . + Logger::log('retriever_retrieve_items: no retriever for item ' . $retriever_item['item-uri'] . ' ' . $retriever_item['uid'] . ' ' . $item['contact-id'], - LOGGER_INFO); + Logger::INFO); continue; } retriever_apply_completed_resource_to_item($retriever, $item, $resource[0], $a); @@ -260,7 +261,7 @@ function retriever_tidy() { q("DELETE FROM retriever_resource WHERE completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)"); $r = q("SELECT retriever_item.id FROM retriever_item LEFT OUTER JOIN retriever_resource ON (retriever_item.resource = retriever_resource.id) WHERE retriever_resource.id is null"); - logger('retriever_tidy: found ' . count($r) . ' retriever_items with no retriever_resource'); + Logger::log('retriever_tidy: found ' . count($r) . ' retriever_items with no retriever_resource'); foreach ($r as $rr) { q('DELETE FROM retriever_item WHERE id = %d', intval($rr['id'])); } @@ -268,7 +269,7 @@ function retriever_tidy() { function retrieve_dataurl_resource($resource) { if (!preg_match("/date:(.*);base64,(.*)/", $resource['url'], $matches)) { - logger('retrieve_dataurl_resource: ' . $resource['id'] . ' does not match pattern'); + Logger::log('retrieve_dataurl_resource: ' . $resource['id'] . ' does not match pattern'); } else { $resource['type'] = $matches[1]; $resource['data'] = base64url_decode($matches[2]); @@ -290,21 +291,21 @@ function retrieve_resource($resource) { $a = get_app(); try { - logger('retrieve_resource: ' . ($resource['num-tries'] + 1) . - ' attempt at resource ' . $resource['id'] . ' ' . $resource['url'], LOGGER_DEBUG); + Logger::log('retrieve_resource: ' . ($resource['num-tries'] + 1) . + ' attempt at resource ' . $resource['id'] . ' ' . $resource['url'], Logger::DEBUG); $redirects; $cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-'); $fetch_result = Network::fetchUrlFull($resource['url'], $resource['binary'], $redirects, array('cookiejar' => $cookiejar)); unlink($cookiejar); - $resource['data'] = $fetch_result['body']; - $resource['http-code'] = $a->get_curl_code(); - $resource['type'] = $a->get_curl_content_type(); - $resource['redirect-url'] = $fetch_result['redirect_url']; - logger('retrieve_resource: got code ' . $resource['http-code'] . + $resource['data'] = $fetch_result->getBody(); + $resource['http-code'] = $fetch_result->getReturnCode(); + $resource['type'] = $fetch_result->getContentType(); + $resource['redirect-url'] = $fetch_result->getRedirectUrl(); + Logger::log('retrieve_resource: got code ' . $resource['http-code'] . ' retrieving resource ' . $resource['id'] . - ' final url ' . $resource['redirect-url'], LOGGER_DEBUG); + ' final url ' . $resource['redirect-url'], Logger::DEBUG); } catch (Exception $e) { - logger('retrieve_resource: unable to retrieve ' . $resource['url'] . ' - ' . $e->getMessage()); + Logger::log('retrieve_resource: unable to retrieve ' . $resource['url'] . ' - ' . $e->getMessage()); } q("UPDATE `retriever_resource` SET `last-try` = now(), `num-tries` = `num-tries` + 1, `http-code` = %d, `redirect-url` = '%s' WHERE id = %d", intval($resource['http-code']), @@ -338,7 +339,7 @@ function get_retriever($contact_id, $uid, $create = false) { function retriever_get_retriever_item($id) { $retriever_items = q("SELECT * FROM `retriever_item` WHERE id = %d", intval($id)); if (count($retriever_items) != 1) { - logger('retriever_get_retriever_item: unable to find retriever_item ' . $id, LOGGER_INFO); + Logger::log('retriever_get_retriever_item: unable to find retriever_item ' . $id, Logger::INFO); return; } return $retriever_items[0]; @@ -350,15 +351,15 @@ function retriever_get_item($retriever_item) { intval($retriever_item['item-uid']), intval($retriever_item['contact-id'])); if (count($items) != 1) { - logger('retriever_get_item: unexpected number of results ' . - count($items) . " when searching for item $uri $uid $cid", LOGGER_INFO); + Logger::log('retriever_get_item: unexpected number of results ' . + count($items) . " when searching for item $uri $uid $cid", Logger::INFO); return; } return $items[0]; } function retriever_item_completed($retriever_item_id, $resource, $a) { - logger('retriever_item_completed: id ' . $retriever_item_id . ' url ' . $resource['url'], LOGGER_DEBUG); + Logger::log('retriever_item_completed: id ' . $retriever_item_id . ' url ' . $resource['url'], Logger::DEBUG); $retriever_item = retriever_get_retriever_item($retriever_item_id); if (!$retriever_item) { @@ -379,7 +380,7 @@ function retriever_item_completed($retriever_item_id, $resource, $a) { } function retriever_resource_completed($resource, $a) { - logger('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url'], LOGGER_DEBUG); + Logger::log('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url'], Logger::DEBUG); $r = q("SELECT `id` FROM `retriever_item` WHERE `resource` = %d", $resource['id']); foreach ($r as $rr) { retriever_item_completed($rr['id'], $resource, $a); @@ -398,7 +399,7 @@ function apply_retrospective($a, $retriever, $num) { function retriever_on_item_insert($a, $retriever, &$item) { if (!$retriever || !$retriever['id']) { - logger('retriever_on_item_insert: No retriever supplied', LOGGER_INFO); + Logger::log('retriever_on_item_insert: No retriever supplied', Logger::INFO); return; } if (!$retriever["data"]['enable'] == "on") { @@ -406,7 +407,7 @@ function retriever_on_item_insert($a, $retriever, &$item) { } if (array_key_exists('pattern', $retriever["data"]) && $retriever["data"]['pattern']) { $url = preg_replace('/' . $retriever["data"]['pattern'] . '/', $retriever["data"]['replace'], $item['plink']); - logger('retriever_on_item_insert: Changed ' . $item['plink'] . ' to ' . $url, LOGGER_DATA); + Logger::log('retriever_on_item_insert: Changed ' . $item['plink'] . ' to ' . $url, Logger::DATA); } else { $url = $item['plink']; @@ -417,7 +418,7 @@ function retriever_on_item_insert($a, $retriever, &$item) { } function add_retriever_resource($a, $url, $binary = false) { - logger('add_retriever_resource: ' . $url, LOGGER_DEBUG); + Logger::log('add_retriever_resource: ' . $url, Logger::DEBUG); $scheme = parse_url($url, PHP_URL_SCHEME); if ($scheme == 'data') { @@ -431,11 +432,11 @@ function add_retriever_resource($a, $url, $binary = false) { $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url)); $resource = $r[0]; if (count($r)) { - logger('add_retriever_resource: Resource ' . $url . ' already requested', LOGGER_DEBUG); + Logger::log('add_retriever_resource: Resource ' . $url . ' already requested', Logger::DEBUG); return $resource; } - logger('retrieve_resource: got data URL type ' . $resource['type'], LOGGER_DEBUG); + Logger::log('retrieve_resource: got data URL type ' . $resource['type'], Logger::DEBUG); q("INSERT INTO `retriever_resource` (`type`, `binary`, `url`, `completed`, `data`) " . "VALUES ('%s', %d, '%s', now(), '%s')", DBA::escape($type), @@ -451,12 +452,12 @@ function add_retriever_resource($a, $url, $binary = false) { } if (strlen($url) > 800) { - logger('add_retriever_resource: URL is longer than 800 characters', LOGGER_INFO); + Logger::log('add_retriever_resource: URL is longer than 800 characters', Logger::INFO); } $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url)); if (count($r)) { - logger('add_retriever_resource: Resource ' . $url . ' already requested', LOGGER_DEBUG); + Logger::log('add_retriever_resource: Resource ' . $url . ' already requested', Logger::DEBUG); return $r[0]; } @@ -467,7 +468,7 @@ function add_retriever_resource($a, $url, $binary = false) { } function add_retriever_item(&$item, $resource) { - logger('add_retriever_item: ' . $resource['url'] . ' for ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], LOGGER_DEBUG); + Logger::log('add_retriever_item: ' . $resource['url'] . ' for ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG); q("INSERT INTO `retriever_item` (`item-uri`, `item-uid`, `contact-id`, `resource`) " . "VALUES ('%s', %d, %d, %d)", @@ -476,12 +477,12 @@ function add_retriever_item(&$item, $resource) { "`item-uri` = '%s' AND `item-uid` = %d AND `contact-id` = %d AND `resource` = %d ORDER BY id DESC", DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource['id'])); if (!count($r)) { - logger("add_retriever_item: couldn't create retriever item for " . + Logger::log("add_retriever_item: couldn't create retriever item for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], - LOGGER_INFO); + Logger::INFO); return; } - logger('add_retriever_item: created retriever_item ' . $r[0]['id'] . ' for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], LOGGER_DEBUG); + Logger::log('add_retriever_item: created retriever_item ' . $r[0]['id'] . ' for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG); return $r[0]['id']; } @@ -495,12 +496,12 @@ function retriever_get_encoding($resource) { function retriever_apply_xslt_text($xslt_text, $doc) { if (!$xslt_text) { - logger('retriever_apply_xslt_text: empty XSLT text', LOGGER_INFO); + Logger::log('retriever_apply_xslt_text: empty XSLT text', Logger::INFO); return $doc; } $xslt_doc = new DOMDocument(); if (!$xslt_doc->loadXML($xslt_text)) { - logger('retriever_apply_xslt_text: could not load XML', LOGGER_INFO); + Logger::log('retriever_apply_xslt_text: could not load XML', Logger::INFO); return $doc; } $xp = new XsltProcessor(); @@ -510,14 +511,14 @@ function retriever_apply_xslt_text($xslt_text, $doc) { } function retriever_apply_dom_filter($retriever, &$item, $resource) { - logger('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['uri'] . ' contact ' . $item['contact-id'], LOGGER_DEBUG); + Logger::log('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['uri'] . ' contact ' . $item['contact-id'], Logger::DEBUG); if (!array_key_exists('include', $retriever['data']) && !array_key_exists('customxslt', $retriever['data'])) { - logger('retriever_apply_dom_filter: no include and no customxslt', LOGGER_INFO); + Logger::log('retriever_apply_dom_filter: no include and no customxslt', Logger::INFO); return; } if (!$resource['data']) { - logger('retriever_apply_dom_filter: no text to work with', LOGGER_INFO); + Logger::log('retriever_apply_dom_filter: no text to work with', Logger::INFO); return; } @@ -541,7 +542,7 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) { $doc = retriever_apply_xslt_text($retriever['data']['customxslt'], $doc); } if (!$doc) { - logger('retriever_apply_dom_filter: failed to apply extract XSLT template', LOGGER_INFO); + Logger::log('retriever_apply_dom_filter: failed to apply extract XSLT template', Logger::INFO); return; } @@ -553,13 +554,13 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) { $fix_urls_xslt = replace_macros($fix_urls_template, $params); $doc = retriever_apply_xslt_text($fix_urls_xslt, $doc); if (!$doc) { - logger('retriever_apply_dom_filter: failed to apply fix urls XSLT template', LOGGER_INFO); + Logger::log('retriever_apply_dom_filter: failed to apply fix urls XSLT template', Logger::INFO); return; } $item['body'] = HTML::toBBCode($doc->saveHTML()); if (!strlen($item['body'])) { - logger('retriever_apply_dom_filter retriever ' . $retriever['id'] . ' item ' . $item['id'] . ': output was empty', LOGGER_INFO); + Logger::log('retriever_apply_dom_filter retriever ' . $retriever['id'] . ' item ' . $item['id'] . ': output was empty', Logger::INFO); return; } $item['body'] .= "\n\n" . L10n::t('Retrieved') . ' ' . date("Y-m-d") . ': [url='; @@ -575,9 +576,9 @@ function retrieve_images(&$item, $a) { $matches2 = array(); preg_match_all("/\[img\](.*?)\[\/img\]/ism", $item["body"], $matches2); $matches = array_merge($matches1[3], $matches2[1]); - logger('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], LOGGER_DEBUG); + Logger::log('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG); foreach ($matches as $url) { - if (strpos($url, get_app()->get_baseurl()) === FALSE) { + if (strpos($url, get_app()->getBaseUrl()) === FALSE) { $resource = add_retriever_resource($a, $url, true); if (!$resource['completed']) { add_retriever_item($item, $resource); @@ -596,12 +597,12 @@ function retriever_check_item_completed(&$item) DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id'])); $waiting = $r[0]['count(*)']; - logger('retriever_check_item_completed: item ' . $item['uri'] . ' ' . $item['uid'] - . ' '. $item['contact-id'] . ' waiting for ' . $waiting . ' resources', LOGGER_DEBUG); + Logger::log('retriever_check_item_completed: item ' . $item['uri'] . ' ' . $item['uid'] + . ' '. $item['contact-id'] . ' waiting for ' . $waiting . ' resources', Logger::DEBUG); $old_visible = $item['visible']; $item['visible'] = $waiting ? 0 : 1; if (array_key_exists('id', $item) && ($item['id'] > 0) && ($old_visible != $item['visible'])) { - logger('retriever_check_item_completed: changing visible flag to ' . $item['visible'] . ' and invoking notifier ("edit_post", ' . $item['id'] . ')', LOGGER_DEBUG); + Logger::log('retriever_check_item_completed: changing visible flag to ' . $item['visible'] . ' and invoking notifier ("edit_post", ' . $item['id'] . ')', Logger::DEBUG); q("UPDATE `item` SET `visible` = %d WHERE `id` = %d", intval($item['visible']), intval($item['id'])); @@ -612,9 +613,9 @@ function retriever_check_item_completed(&$item) } function retriever_apply_completed_resource_to_item($retriever, &$item, $resource, $a) { - logger('retriever_apply_completed_resource_to_item: retriever ' . + Logger::log('retriever_apply_completed_resource_to_item: retriever ' . ($retriever ? $retriever['id'] : 'none') . - ' resource ' . $resource['url'] . ' plink ' . $item['plink'], LOGGER_DEBUG); + ' resource ' . $resource['url'] . ' plink ' . $item['plink'], Logger::DEBUG); if (strpos($resource['type'], 'image') !== false) { retriever_transform_images($a, $item, $resource); } @@ -632,24 +633,24 @@ function retriever_apply_completed_resource_to_item($retriever, &$item, $resourc function retriever_transform_images($a, &$item, $resource) { if (!$resource["data"]) { - logger('retriever_transform_images: no data available for ' - . $resource['id'] . ' ' . $resource['url'], LOGGER_INFO); + Logger::log('retriever_transform_images: no data available for ' + . $resource['id'] . ' ' . $resource['url'], Logger::INFO); return; } try { $photo = Image::storePhoto($a, $item['uid'], $resource['data'], $resource['url']); } catch (Exception $e) { - logger('retriever_transform_images caught exception ' . $e->getMessage()); + Logger::log('retriever_transform_images caught exception ' . $e->getMessage()); return; } if (!array_key_exists('full', $photo)) { - logger('retriever_transform_images: no replacement URL for image ' . $resource['url']); + Logger::log('retriever_transform_images: no replacement URL for image ' . $resource['url']); return; } $new_url = $photo['full']; - logger('retriever_transform_images: replacing ' . $resource['url'] . ' with ' . - $new_url . ' in item ' . $item['plink'], LOGGER_DEBUG); + Logger::log('retriever_transform_images: replacing ' . $resource['url'] . ' with ' . + $new_url . ' in item ' . $item['plink'], Logger::DEBUG); $transformed = str_replace($resource["url"], $new_url, $item['body']); if ($transformed === $item['body']) { return; @@ -672,7 +673,7 @@ function retriever_content($a) { $feeds = q("SELECT `id`, `name`, `thumb` FROM contact WHERE `uid` = %d AND `network` = 'feed'", local_user()); foreach ($feeds as $k=>$v) { - $feeds[$k]['url'] = $a->get_baseurl() . '/retriever/' . $v['id']; + $feeds[$k]['url'] = $a->getBaseUrl() . '/retriever/' . $v['id']; } $template = get_markup_template('/help.tpl', 'addon/retriever/'); $a->page['content'] .= replace_macros($template, array( @@ -776,7 +777,7 @@ function retriever_contact_photo_menu($a, &$args) { } function retriever_post_remote_hook(&$a, &$item) { - logger('retriever_post_remote_hook: ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], LOGGER_DEBUG); + Logger::log('retriever_post_remote_hook: ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG); $retriever = get_retriever($item['contact-id'], $item["uid"], false); if ($retriever) { From aa12fec3f5238055f9b49e8e242da51766e2e629 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sat, 20 Jul 2019 14:45:10 +0100 Subject: [PATCH 04/96] more fixes --- retriever/retriever.php | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 18351f1e..3072a743 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -11,6 +11,7 @@ use Friendica\Core\Addon; use Friendica\Core\Config; use Friendica\Core\PConfig; use Friendica\Core\Logger; +use Friendica\Core\Renderer; use Friendica\Content\Text\HTML; use Friendica\Content\Text\BBCode; use Friendica\Object\Image; @@ -533,8 +534,8 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) { } $params = array('$spec' => $retriever['data']); - $extract_template = get_markup_template('extract.tpl', 'addon/retriever/'); - $extract_xslt = replace_macros($extract_template, $params); + $extract_template = Renderer::getMarkupTemplate('extract.tpl', 'addon/retriever/'); + $extract_xslt = Renderer::replaceMacros($extract_template, $params); if ($retriever['data']['include']) { $doc = retriever_apply_xslt_text($extract_xslt, $doc); } @@ -550,8 +551,8 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) { $rooturl = $components['scheme'] . "://" . $components['host']; $dirurl = $rooturl . dirname($components['path']) . "/"; $params = array('$dirurl' => $dirurl, '$rooturl' => $rooturl); - $fix_urls_template = get_markup_template('fix-urls.tpl', 'addon/retriever/'); - $fix_urls_xslt = replace_macros($fix_urls_template, $params); + $fix_urls_template = Renderer::getMarkupTemplate('fix-urls.tpl', 'addon/retriever/'); + $fix_urls_xslt = Renderer::replaceMacros($fix_urls_template, $params); $doc = retriever_apply_xslt_text($fix_urls_xslt, $doc); if (!$doc) { Logger::log('retriever_apply_dom_filter: failed to apply fix urls XSLT template', Logger::INFO); @@ -675,8 +676,8 @@ function retriever_content($a) { foreach ($feeds as $k=>$v) { $feeds[$k]['url'] = $a->getBaseUrl() . '/retriever/' . $v['id']; } - $template = get_markup_template('/help.tpl', 'addon/retriever/'); - $a->page['content'] .= replace_macros($template, array( + $template = Renderer::getMarkupTemplate('/help.tpl', 'addon/retriever/'); + $a->page['content'] .= Renderer::replaceMacros($template, array( '$config' => $a->get_baseurl() . '/settings/addon', '$feeds' => $feeds)); return; @@ -718,8 +719,8 @@ function retriever_content($a) { $a->page['content'] .= ".

"; } - $template = get_markup_template('/rule-config.tpl', 'addon/retriever/'); - $a->page['content'] .= replace_macros($template, array( + $template = Renderer::getMarkupTemplate('/rule-config.tpl', 'addon/retriever/'); + $a->page['content'] .= Renderer::replaceMacros($template, array( '$enable' => array( 'retriever_enable', L10n::t('Enabled'), @@ -801,8 +802,8 @@ function retriever_post_remote_hook(&$a, &$item) { function retriever_plugin_settings(&$a,&$s) { $all_photos = PConfig::get(local_user(), 'retriever', 'all_photos'); $oembed = PConfig::get(local_user(), 'retriever', 'oembed'); - $template = get_markup_template('/settings.tpl', 'addon/retriever/'); - $s .= replace_macros($template, array( + $template = Renderer::getMarkupTemplate('/settings.tpl', 'addon/retriever/'); + $s .= Renderer::replaceMacros($template, array( '$allphotos' => array( 'retriever_all_photos', L10n::t('All Photos'), From 9e434957b0ebd5245afbe66ed6082f94d6d53edc Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 21 Jul 2019 18:27:14 +0100 Subject: [PATCH 05/96] now working retriever --- retriever/retriever.php | 147 +++++++--------------------------------- 1 file changed, 23 insertions(+), 124 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 3072a743..97f29694 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -18,6 +18,7 @@ use Friendica\Object\Image; use Friendica\Util\Network; use Friendica\Core\L10n; use Friendica\Database\DBA; +use Friendica\Model\ItemURI; function retriever_install() { Addon::registerHook('plugin_settings', 'addon/retriever/retriever.php', 'retriever_plugin_settings'); @@ -27,116 +28,6 @@ function retriever_install() { Addon::registerHook('cron', 'addon/retriever/retriever.php', 'retriever_cron'); $r = q("SELECT `id` FROM `pconfig` WHERE `cat` LIKE 'retriever_%%'"); - if (count($r) || (Config::get('retriever', 'dbversion') == '0.1')) { - $retrievers = array(); - $r = q("SELECT SUBSTRING(`cat`, 10) AS `contact`, `k`, `v` FROM `pconfig` WHERE `cat` LIKE 'retriever%%'"); - foreach ($r as $rr) { - $retrievers[$rr['contact']][$rr['k']] = $rr['v']; - } - foreach ($retrievers as $k => $v) { - $rr = q("SELECT `uid` FROM `contact` WHERE `id` = %d", intval($k)); - $uid = $rr[0]['uid']; - $v['images'] = 'on'; - q("INSERT INTO `retriever_rule` (`uid`, `contact-id`, `data`) VALUES (%d, %d, '%s')", - intval($uid), intval($k), DBA::escape(json_encode($v))); - } - q("DELETE FROM `pconfig` WHERE `cat` LIKE 'retriever_%%'"); - Config::set('retriever', 'dbversion', '0.2'); - } - if (Config::get('retriever', 'dbversion') == '0.2') { - q("ALTER TABLE `retriever_resource` DROP COLUMN `retriever`"); - Config::set('retriever', 'dbversion', '0.3'); - } - if (Config::get('retriever', 'dbversion') == '0.3') { - q("ALTER TABLE `retriever_item` MODIFY COLUMN `item-uri` varchar(800) CHARACTER SET ascii NOT NULL"); - q("ALTER TABLE `retriever_resource` MODIFY COLUMN `url` varchar(800) CHARACTER SET ascii NOT NULL"); - Config::set('retriever', 'dbversion', '0.4'); - } - if (Config::get('retriever', 'dbversion') == '0.4') { - q("ALTER TABLE `retriever_item` ADD COLUMN `finished` tinyint(1) unsigned NOT NULL DEFAULT '0'"); - Config::set('retriever', 'dbversion', '0.5'); - } - if (Config::get('retriever', 'dbversion') == '0.5') { - q('ALTER TABLE `retriever_resource` CHANGE `created` `created` timestamp NOT NULL DEFAULT now()'); - q('ALTER TABLE `retriever_resource` CHANGE `completed` `completed` timestamp NULL DEFAULT NULL'); - q('ALTER TABLE `retriever_resource` CHANGE `last-try` `last-try` timestamp NULL DEFAULT NULL'); - q('ALTER TABLE `retriever_item` DROP KEY `all`'); - q('ALTER TABLE `retriever_item` ADD KEY `all` (`item-uri`, `item-uid`, `contact-id`)'); - Config::set('retriever', 'dbversion', '0.6'); - } - if (Config::get('retriever', 'dbversion') == '0.6') { - q('ALTER TABLE `retriever_item` CONVERT TO CHARACTER SET utf8 COLLATE utf8_bin'); - q('ALTER TABLE `retriever_item` CHANGE `item-uri` `item-uri` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NOT NULL'); - q('ALTER TABLE `retriever_resource` CONVERT TO CHARACTER SET utf8 COLLATE utf8_bin'); - q('ALTER TABLE `retriever_resource` CHANGE `url` `url` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NOT NULL'); - q('ALTER TABLE `retriever_rule` CONVERT TO CHARACTER SET utf8 COLLATE utf8_bin'); - Config::set('retriever', 'dbversion', '0.7'); - } - if (Config::get('retriever', 'dbversion') == '0.7') { - $r = q("SELECT `id`, `data` FROM `retriever_rule`"); - foreach ($r as $rr) { - Logger::log('retriever_install: retriever ' . $rr['id'] . ' old config ' . $rr['data'], Logger::DATA); - $data = json_decode($rr['data'], true); - if ($data['pattern']) { - $matches = array(); - if (preg_match("/\/(.*)\//", $data['pattern'], $matches)) { - $data['pattern'] = $matches[1]; - } - } - if ($data['match']) { - $include = array(); - foreach (explode('|', $data['match']) as $component) { - $matches = array(); - if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[@([A-Za-z][a-z0-9]*)='([^']*)'\]/", $component, $matches)) { - $include[] = array( - 'element' => $matches[1], - 'attribute' => $matches[2], - 'value' => $matches[3]); - } - if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[contains(concat(' ',normalize-space(@class),' '),' ([^ ']+) ')]/", $component, $matches)) { - $include[] = array( - 'element' => $matches[1], - 'attribute' => $matches[2], - 'value' => $matches[3]); - } - } - $data['include'] = $include; - unset($data['match']); - } - if ($data['remove']) { - $exclude = array(); - foreach (explode('|', $data['remove']) as $component) { - $matches = array(); - if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[@([A-Za-z][a-z0-9]*)='([^']*)'\]/", $component, $matches)) { - $exclude[] = array( - 'element' => $matches[1], - 'attribute' => $matches[2], - 'value' => $matches[3]); - } - if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[contains(concat(' ',normalize-space(@class),' '),' ([^ ']+) ')]/", $component, $matches)) { - $exclude[] = array( - 'element' => $matches[1], - 'attribute' => $matches[2], - 'value' => $matches[3]); - } - } - $data['exclude'] = $exclude; - unset($data['remove']); - } - $r = q('UPDATE `retriever_rule` SET `data` = "%s" WHERE `id` = %d', DBA::escape(json_encode($data)), $rr['id']); - Logger::log('retriever_install: retriever ' . $rr['id'] . ' new config ' . json_encode($data), Logger::DATA); - } - Config::set('retriever', 'dbversion', '0.8'); - } - if (Config::get('retriever', 'dbversion') == '0.8') { - q("ALTER TABLE `retriever_resource` ADD COLUMN `http-code` smallint(1) unsigned NULL DEFAULT NULL"); - Config::set('retriever', 'dbversion', '0.9'); - } - if (Config::get('retriever', 'dbversion') == '0.9') { - q("ALTER TABLE `retriever_item` DROP COLUMN `parent`"); - q("ALTER TABLE `retriever_resource` ADD COLUMN `redirect-url` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NULL DEFAULT NULL"); - Config::set('retriever', 'dbversion', '0.10'); - } if (Config::get('retriever', 'dbversion') == '0.10') { q("ALTER TABLE `retriever_resource` MODIFY COLUMN `type` char(255) NULL DEFAULT NULL"); q("ALTER TABLE `retriever_resource` MODIFY COLUMN `data` mediumblob NULL DEFAULT NULL"); @@ -347,6 +238,7 @@ function retriever_get_retriever_item($id) { } function retriever_get_item($retriever_item) { + // @@@ Need to replace this with Item::selectFirst $items = q("SELECT * FROM `item` WHERE `uri` = '%s' AND `uid` = %d AND `contact-id` = %d", DBA::escape($retriever_item['item-uri']), intval($retriever_item['item-uid']), @@ -537,9 +429,11 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) { $extract_template = Renderer::getMarkupTemplate('extract.tpl', 'addon/retriever/'); $extract_xslt = Renderer::replaceMacros($extract_template, $params); if ($retriever['data']['include']) { + Logger::log('retriever_apply_dom_filter: applying include/exclude template \"' . $extract_xslt . '\"', Logger::DEBUG); $doc = retriever_apply_xslt_text($extract_xslt, $doc); } if (array_key_exists('customxslt', $retriever['data']) && $retriever['data']['customxslt']) { + Logger::log('retriever_apply_dom_filter: applying custom XSLT \"' . $retriever['data']['customxslt'] . '\"', Logger::DEBUG); $doc = retriever_apply_xslt_text($retriever['data']['customxslt'], $doc); } if (!$doc) { @@ -559,16 +453,21 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) { return; } - $item['body'] = HTML::toBBCode($doc->saveHTML()); - if (!strlen($item['body'])) { + $body = HTML::toBBCode($doc->saveHTML()); + if (!strlen($body)) { Logger::log('retriever_apply_dom_filter retriever ' . $retriever['id'] . ' item ' . $item['id'] . ': output was empty', Logger::INFO); return; } - $item['body'] .= "\n\n" . L10n::t('Retrieved') . ' ' . date("Y-m-d") . ': [url='; - $item['body'] .= $item['plink']; - $item['body'] .= ']' . $item['plink'] . '[/url]'; - DBA::update('item', ['body' => $item['body']], ['id' => $item['id']]); - DBA::update('item-content', ['body' => $item['body']], ['uri' => $item['uri']]); + $body .= "\n\n" . L10n::t('Retrieved') . ' ' . date("Y-m-d") . ': [url='; + $body .= $item['plink']; + $body .= ']' . $item['plink'] . '[/url]'; + + $uri_id = ItemURI::getIdByURI($item['uri']); + //@@@ remove this + $item['body'] = $body; + Logger::log('retriever_apply_dom_filter: XSLT result \"' . $body . '\"', Logger::DATA); + DBA::update('item', ['body' => $body], ['id' => $item['id']]); + DBA::update('item-content', ['body' => $body], ['uri-id' => $uri_id]); } function retrieve_images(&$item, $a) { @@ -678,18 +577,18 @@ function retriever_content($a) { } $template = Renderer::getMarkupTemplate('/help.tpl', 'addon/retriever/'); $a->page['content'] .= Renderer::replaceMacros($template, array( - '$config' => $a->get_baseurl() . '/settings/addon', + '$config' => $a->getBaseUrl() . '/settings/addon', '$feeds' => $feeds)); return; } if ($a->argv[1]) { $retriever = get_retriever($a->argv[1], local_user(), false); - if (x($_POST["id"])) { + if (!empty($_POST["id"])) { $retriever = get_retriever($a->argv[1], local_user(), true); $retriever["data"] = array(); foreach (array('pattern', 'replace', 'enable', 'images', 'customxslt') as $setting) { - if (x($_POST['retriever_' . $setting])) { + if (!empty($_POST['retriever_' . $setting])) { $retriever["data"][$setting] = $_POST['retriever_' . $setting]; } } @@ -712,7 +611,7 @@ function retriever_content($a) { q("UPDATE `retriever_rule` SET `data`='%s' WHERE `id` = %d", DBA::escape(json_encode($retriever["data"])), intval($retriever["id"])); $a->page['content'] .= "

Settings Updated"; - if (x($_POST["retriever_retrospective"])) { + if (!empty($_POST["retriever_retrospective"])) { apply_retrospective($a, $retriever, $_POST["retriever_retrospective"]); $a->page['content'] .= " and retrospectively applied to " . $_POST["apply"] . " posts"; } @@ -750,7 +649,7 @@ function retriever_content($a) { $retriever['data']['customxslt'], L10n::t("When standard rules aren't enough, apply custom XSLT to the article")), '$title' => L10n::t('Retrieve Feed Content'), - '$help' => $a->get_baseurl() . '/retriever/help', + '$help' => $a->getBaseUrl() . '/retriever/help', '$help_t' => L10n::t('Get Help'), '$submit_t' => L10n::t('Submit'), '$submit' => L10n::t('Save Settings'), @@ -773,7 +672,7 @@ function retriever_contact_photo_menu($a, &$args) { return; } if ($args["contact"]["network"] == "feed") { - $args["menu"][ 'retriever' ] = array(L10n::t('Retriever'), $a->get_baseurl() . '/retriever/' . $args["contact"]['id']); + $args["menu"][ 'retriever' ] = array(L10n::t('Retriever'), $a->getBaseUrl() . '/retriever/' . $args["contact"]['id']); } } @@ -816,7 +715,7 @@ function retriever_plugin_settings(&$a,&$s) { L10n::t('Check this to attempt to retrieve embedded content for all posts - useful e.g. for Facebook posts')), '$submit' => L10n::t('Save Settings'), '$title' => L10n::t('Retriever Settings'), - '$help' => $a->get_baseurl() . '/retriever/help')); + '$help' => $a->getBaseUrl() . '/retriever/help')); } function retriever_plugin_settings_post($a,$post) { From d086f2b3bb45dd8ef1a6beb75252e7b8b5ec6395 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 21 Jul 2019 18:27:55 +0100 Subject: [PATCH 06/96] beginnings of persistent cookiejar support --- retriever/retriever.php | 5 +++++ retriever/templates/rule-config.tpl | 1 + 2 files changed, 6 insertions(+) diff --git a/retriever/retriever.php b/retriever/retriever.php index 97f29694..78a79a0e 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -643,6 +643,11 @@ function retriever_content($a) { L10n::t('Retrospectively Apply'), '0', L10n::t('Reapply the rules to this number of posts')), + '$cookies' => array( + 'retriever_cookies', + L10n::t('Cookies'), + $retriever['data']['cookies'], + L10n::t("Persistent cookies for this feed. Netscape cookie file format.")), '$customxslt' => array( 'retriever_customxslt', L10n::t('Custom XSLT'), diff --git a/retriever/templates/rule-config.tpl b/retriever/templates/rule-config.tpl index 228d0326..847d9c3f 100644 --- a/retriever/templates/rule-config.tpl +++ b/retriever/templates/rule-config.tpl @@ -106,6 +106,7 @@ function retriever_remove_row(id, number) +{{include file="field_textarea.tpl" field=$cookies}} {{include file="field_textarea.tpl" field=$customxslt}} From c4543de56197576be8fe0ac8854f712e3fea9b39 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 21 Jul 2019 19:32:30 +0100 Subject: [PATCH 07/96] More preparation for persistent cookies --- retriever/retriever.php | 17 +++++++++++------ retriever/templates/rule-config.tpl | 19 ++++++++++++++++++- 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 78a79a0e..bb3460a1 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -587,7 +587,7 @@ function retriever_content($a) { if (!empty($_POST["id"])) { $retriever = get_retriever($a->argv[1], local_user(), true); $retriever["data"] = array(); - foreach (array('pattern', 'replace', 'enable', 'images', 'customxslt') as $setting) { + foreach (array('pattern', 'replace', 'enable', 'images', 'customxslt', 'storecookies', 'cookiedata') as $setting) { if (!empty($_POST['retriever_' . $setting])) { $retriever["data"][$setting] = $_POST['retriever_' . $setting]; } @@ -643,11 +643,16 @@ function retriever_content($a) { L10n::t('Retrospectively Apply'), '0', L10n::t('Reapply the rules to this number of posts')), - '$cookies' => array( - 'retriever_cookies', - L10n::t('Cookies'), - $retriever['data']['cookies'], - L10n::t("Persistent cookies for this feed. Netscape cookie file format.")), + 'storecookies' => array( + 'retriever_storecookies', + L10n::t('Store cookies'), + $retriever['data']['storecookies'], + L10n::t("Preserve cookie data across fetches.")), + '$cookiedata' => array( + 'retriever_cookiedata', + L10n::t('Cookie Data'), + $retriever['data']['cookiedata'], + L10n::t("Latest cookie data for this feed. Netscape cookie file format.")), '$customxslt' => array( 'retriever_customxslt', L10n::t('Custom XSLT'), diff --git a/retriever/templates/rule-config.tpl b/retriever/templates/rule-config.tpl index 847d9c3f..9061d1ff 100644 --- a/retriever/templates/rule-config.tpl +++ b/retriever/templates/rule-config.tpl @@ -40,6 +40,22 @@ function retriever_remove_row(id, number) var row = document.getElementById(id + '-' + number); tbody.removeChild(row); } + +function retriever_toggle_cookiedata_block() +{ + var div = document.querySelector("#id_retriever_cookiedata").parentNode; + if (document.querySelector("#id_retriever_storecookies").checked) { + div.style.display = "block"; + } + else { + div.style.display = "none"; + } +} + +document.addEventListener('DOMContentLoaded', function() { + retriever_toggle_cookiedata_block(); + document.querySelector("#id_retriever_storecookies").addEventListener('change', retriever_toggle_cookiedata_block, false); +}, false);

{{$title}}

{{$help_t}}

@@ -106,8 +122,9 @@ function retriever_remove_row(id, number) -{{include file="field_textarea.tpl" field=$cookies}} {{include file="field_textarea.tpl" field=$customxslt}} +{{include file="field_checkbox.tpl" field=$storecookies}} +{{include file="field_textarea.tpl" field=$cookiedata}} From 049fcc0494d088654723c46d4dd33626feb89ae0 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 24 Jul 2019 06:48:23 +0100 Subject: [PATCH 08/96] tentative database work --- retriever/database.sql | 1 + retriever/retriever.php | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/retriever/database.sql b/retriever/database.sql index 340e33eb..2a0db966 100644 --- a/retriever/database.sql +++ b/retriever/database.sql @@ -24,6 +24,7 @@ CREATE TABLE IF NOT EXISTS `retriever_item` ( CREATE TABLE IF NOT EXISTS `retriever_resource` ( `id` int(11) unsigned NOT NULL AUTO_INCREMENT, + `contact-id` int(10) unsigned NOT NULL DEFAULT '0', `type` char(255) NULL DEFAULT NULL, `binary` int(1) NOT NULL DEFAULT 0, `url` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NOT NULL, diff --git a/retriever/retriever.php b/retriever/retriever.php index bb3460a1..adf9681e 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -41,6 +41,10 @@ function retriever_install() { q("ALTER TABLE `retriever_item` ADD INDEX `item-uid` (`item-uid`)"); Config::set('retriever', 'dbversion', '0.12'); } + /* if (Config::get('retriever', 'dbversion') == '0.12') { */ + /* q("ALTER TABLE `retriever_resource` ADD COLUMN `contact-id` int(10) unsigned NULL AFTER `id`"); */ + /* Config::set('retriever', 'dbversion', '0.13'); */ + /* } */ if (Config::get('retriever', 'dbversion') != '0.12') { $schema = file_get_contents(dirname(__file__).'/database.sql'); $arr = explode(';', $schema); From 4050928be71b7fd9b78c2aaf6d6e4eb4e16a9381 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Fri, 26 Jul 2019 05:49:53 +0100 Subject: [PATCH 09/96] fix --- retriever/retriever.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index adf9681e..65471be9 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -189,9 +189,9 @@ function retrieve_resource($resource) { try { Logger::log('retrieve_resource: ' . ($resource['num-tries'] + 1) . ' attempt at resource ' . $resource['id'] . ' ' . $resource['url'], Logger::DEBUG); - $redirects; + $redirects = 0; $cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-'); - $fetch_result = Network::fetchUrlFull($resource['url'], $resource['binary'], $redirects, array('cookiejar' => $cookiejar)); + $fetch_result = Network::fetchUrlFull($resource['url'], $resource['binary'], $redirects, '', $cookiejar); unlink($cookiejar); $resource['data'] = $fetch_result->getBody(); $resource['http-code'] = $fetch_result->getReturnCode(); From df1a5a8a3f93609ff515e94461fd7cbf2fef2ba5 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 15 Sep 2019 09:26:25 +0100 Subject: [PATCH 10/96] fixed a bug and commented on another --- retriever/retriever.php | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/retriever/retriever.php b/retriever/retriever.php index 65471be9..c70f906e 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -367,6 +367,15 @@ function add_retriever_resource($a, $url, $binary = false) { function add_retriever_item(&$item, $resource) { Logger::log('add_retriever_item: ' . $resource['url'] . ' for ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG); + $r = q("SELECT COUNT(*) FROM `retriever_item` WHERE " . + "`item-uri` = '%s' AND `item-uid` = %d AND `contact-id` = %d AND `resource` = %d", + DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource['id'])); + if ($r[0]['COUNT(*)'] > 0) { + Logger::log("add_retriever_item: retriever item already present for " . + $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], + Logger::INFO); + return; + } q("INSERT INTO `retriever_item` (`item-uri`, `item-uid`, `contact-id`, `resource`) " . "VALUES ('%s', %d, %d, %d)", DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource["id"])); @@ -536,6 +545,7 @@ function retriever_apply_completed_resource_to_item($retriever, &$item, $resourc } function retriever_transform_images($a, &$item, $resource) { + return; //@@@ not working if (!$resource["data"]) { Logger::log('retriever_transform_images: no data available for ' . $resource['id'] . ' ' . $resource['url'], Logger::INFO); From cc438120ac18dd0a2472ed60bdbbb20dee59b816 Mon Sep 17 00:00:00 2001 From: Administrator Date: Sun, 22 Sep 2019 11:47:30 +0200 Subject: [PATCH 11/96] this is working OK --- retriever/database.sql | 1 + retriever/retriever.php | 387 +++++++++++++++++++++++++--------------- 2 files changed, 247 insertions(+), 141 deletions(-) diff --git a/retriever/database.sql b/retriever/database.sql index 2a0db966..a29135e7 100644 --- a/retriever/database.sql +++ b/retriever/database.sql @@ -24,6 +24,7 @@ CREATE TABLE IF NOT EXISTS `retriever_item` ( CREATE TABLE IF NOT EXISTS `retriever_resource` ( `id` int(11) unsigned NOT NULL AUTO_INCREMENT, + `item-uid` int(10) unsigned NOT NULL DEFAULT '0', `contact-id` int(10) unsigned NOT NULL DEFAULT '0', `type` char(255) NULL DEFAULT NULL, `binary` int(1) NOT NULL DEFAULT 0, diff --git a/retriever/retriever.php b/retriever/retriever.php index c70f906e..5644952a 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -12,13 +12,16 @@ use Friendica\Core\Config; use Friendica\Core\PConfig; use Friendica\Core\Logger; use Friendica\Core\Renderer; +use Friendica\Core\System; use Friendica\Content\Text\HTML; use Friendica\Content\Text\BBCode; +use Friendica\Model\Photo; use Friendica\Object\Image; use Friendica\Util\Network; use Friendica\Core\L10n; use Friendica\Database\DBA; use Friendica\Model\ItemURI; +use Friendica\Model\Item; function retriever_install() { Addon::registerHook('plugin_settings', 'addon/retriever/retriever.php', 'retriever_plugin_settings'); @@ -41,17 +44,18 @@ function retriever_install() { q("ALTER TABLE `retriever_item` ADD INDEX `item-uid` (`item-uid`)"); Config::set('retriever', 'dbversion', '0.12'); } - /* if (Config::get('retriever', 'dbversion') == '0.12') { */ - /* q("ALTER TABLE `retriever_resource` ADD COLUMN `contact-id` int(10) unsigned NULL AFTER `id`"); */ - /* Config::set('retriever', 'dbversion', '0.13'); */ - /* } */ - if (Config::get('retriever', 'dbversion') != '0.12') { + if (Config::get('retriever', 'dbversion') == '0.12') { + q("ALTER TABLE `retriever_resource` ADD COLUMN `contact-id` int(10) unsigned NOT NULL DEFAULT '0' AFTER `id`"); + q("ALTER TABLE `retriever_resource` ADD COLUMN `item-uid` int(10) unsigned NOT NULL DEFAULT '0' AFTER `id`"); + Config::set('retriever', 'dbversion', '0.13'); + } + if (Config::get('retriever', 'dbversion') != '0.13') { $schema = file_get_contents(dirname(__file__).'/database.sql'); $arr = explode(';', $schema); foreach ($arr as $a) { $r = q($a); } - Config::set('retriever', 'dbversion', '0.12'); + Config::set('retriever', 'dbversion', '0.13'); } } @@ -68,7 +72,11 @@ function retriever_uninstall() { function retriever_module() {} function retriever_cron($a, $b) { - // 100 is a nice sane number. Maybe this should be configurable. + // 100 is a nice sane number. Maybe this should be configurable. @@@ + + // Do this first, otherwise it can interfere with retreiver_retrieve_items + retriever_clean_up_completed_resources(100, $a); + retriever_retrieve_items(100, $a); retriever_tidy(); } @@ -76,6 +84,7 @@ function retriever_cron($a, $b) { $retriever_item_count = 0; function retriever_retrieve_items($max_items, $a) { + Logger::log('@@@ retriever_retrieve_items', Logger::INFO); global $retriever_item_count; $retriever_schedule = array(array(1,'minute'), @@ -98,56 +107,61 @@ function retriever_retrieve_items($max_items, $a) { $retrieve_items = $max_items - $retriever_item_count; Logger::log('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . $retriever_item_count . ', retrieve ' . $retrieve_items, Logger::DEBUG); do { - $r = q("SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR %s) ORDER BY `last-try` ASC LIMIT %d", + Logger::log('@@@ retriever_retrieve_items loop max ' . $max_items . ' count ' . $retriever_item_count, Logger::INFO); + Logger::log("@@@ SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR " . implode($schedule_clauses, ' OR ') . ") ORDER BY `last-try` ASC LIMIT " . $retrieve_items, Logger::INFO); + $retriever_resources = q("SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR %s) ORDER BY `last-try` ASC LIMIT %d", DBA::escape(implode($schedule_clauses, ' OR ')), intval($retrieve_items)); - if (!is_array($r)) { + if (!is_array($retriever_resources)) { break; } - if (count($r) == 0) { + if (count($retriever_resources) == 0) { break; } - Logger::log('retriever_retrieve_items: found ' . count($r) . ' waiting resources in database', Logger::DEBUG); - foreach ($r as $rr) { - retrieve_resource($rr); + Logger::log('retriever_retrieve_items: found ' . count($retriever_resources) . ' waiting resources in database', Logger::DEBUG); + foreach ($retriever_resources as $retriever_resource) { + Logger::log('@@@ need to get the retriever config here cid ' . $retriever_resource['contact-id'] . ' uid ' . $retriever_resource['item-uid'], Logger::INFO); + retrieve_resource($retriever_resource); $retriever_item_count++; } $retrieve_items = $max_items - $retriever_item_count; } while ($retrieve_items > 0); + // @@@ todo: when items add further items (i.e. images), do the new images go round this loop again? + Logger::log('@@@ retriever_retrieve_items: finished retrieving items', Logger::INFO); +} - /* Look for items that are waiting even though the resource has - * completed. This usually happens because we've been asked to - * retrospectively apply a config change. It could also happen - * due to a cron job dying or something. */ +/* Look for items that are waiting even though the resource has + * completed. This usually happens because we've been asked to + * retrospectively apply a config change. It could also happen due to + * a cron job dying or something. */ +function retriever_clean_up_completed_resources($max_items, $a) { $r = q("SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT %d", - intval($retrieve_items)); + intval($max_items)); if (!$r) { $r = array(); } - Logger::log('retriever_retrieve_items: items waiting even though resource has completed: ' . count($r), Logger::DEBUG); + Logger::log('retriever_clean_up_completed_resources: items waiting even though resource has completed: ' . count($r), Logger::DEBUG); foreach ($r as $rr) { $resource = q("SELECT * FROM retriever_resource WHERE `id` = %d", $rr['resource']); $retriever_item = retriever_get_retriever_item($rr['item']); - if (!$retriever_item) { - Logger::log('retriever_retrieve_items: no retriever item with id ' . $rr['item'], Logger::INFO); + if (!DBA::isResult($retriever_item)) { + Logger::log('retriever_clean_up_completed_resources: no retriever item with id ' . $rr['item'], Logger::WARNING); continue; } $item = retriever_get_item($retriever_item); if (!$item) { - Logger::log('retriever_retrieve_items: no item ' . $retriever_item['item-uri'], Logger::INFO); + Logger::log('retriever_clean_up_completed_resources: no item ' . $retriever_item['item-uri'], Logger::WARNING); continue; } - $retriever = get_retriever($item['contact-id'], $item['uid']); - if (!$retriever) { - Logger::log('retriever_retrieve_items: no retriever for item ' . - $retriever_item['item-uri'] . ' ' . $retriever_item['uid'] . ' ' . $item['contact-id'], - Logger::INFO); + $retriever_rule = get_retriever_rule($retriever_item['contact-id'], $item['uid']); + if (!$retriever_rule) { + Logger::log('retriever_clean_up_completed_resources: no retriever for uri ' . $retriever_item['item-uri'] . ' uid ' . $retriever_item['uid'] . ' ' . $retriever_item['contact-id'], Logger::WARNING); continue; } - retriever_apply_completed_resource_to_item($retriever, $item, $resource[0], $a); - q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d", - intval($retriever_item['id'])); + Logger::log('@@@ retriever_clean_up_completed_resources: about to retriever_apply_completed_resource_to_item', Logger::INFO); + retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource[0], $a); + q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d", intval($retriever_item['id'])); retriever_check_item_completed($item); } } @@ -157,7 +171,7 @@ function retriever_tidy() { q("DELETE FROM retriever_resource WHERE completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)"); $r = q("SELECT retriever_item.id FROM retriever_item LEFT OUTER JOIN retriever_resource ON (retriever_item.resource = retriever_resource.id) WHERE retriever_resource.id is null"); - Logger::log('retriever_tidy: found ' . count($r) . ' retriever_items with no retriever_resource'); + Logger::log('retriever_tidy: found ' . count($r) . ' retriever_items with no retriever_resource', Logger::INFO); foreach ($r as $rr) { q('DELETE FROM retriever_item WHERE id = %d', intval($rr['id'])); } @@ -165,7 +179,7 @@ function retriever_tidy() { function retrieve_dataurl_resource($resource) { if (!preg_match("/date:(.*);base64,(.*)/", $resource['url'], $matches)) { - Logger::log('retrieve_dataurl_resource: ' . $resource['id'] . ' does not match pattern'); + Logger::log('retrieve_dataurl_resource: ' . $resource['id'] . ' does not match pattern', Logger::INFO); } else { $resource['type'] = $matches[1]; $resource['data'] = base64url_decode($matches[2]); @@ -180,28 +194,36 @@ function retrieve_dataurl_resource($resource) { } function retrieve_resource($resource) { + Logger::log('@@@ retrieve_resource: url ' . $resource['url'] . ' uid ' . $resource['item-uid'] . ' cid ' . $resource['contact-id'], Logger::INFO); + if (substr($resource['url'], 0, 5) == "data:") { return retrieve_dataurl_resource($resource); } $a = get_app(); + $retriever_rule = get_retriever_rule($resource['contact-id'], $resource['item-uid']); + try { - Logger::log('retrieve_resource: ' . ($resource['num-tries'] + 1) . - ' attempt at resource ' . $resource['id'] . ' ' . $resource['url'], Logger::DEBUG); + Logger::log('retrieve_resource: ' . ($resource['num-tries'] + 1) . ' attempt at resource ' . $resource['id'] . ' ' . $resource['url'], Logger::DEBUG); $redirects = 0; $cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-'); + if ($retriever_rule['storecookies']) { + file_put_contents($cookiejar, $retriever_rule['cookiedata']); + } $fetch_result = Network::fetchUrlFull($resource['url'], $resource['binary'], $redirects, '', $cookiejar); + if ($retriever_rule['storecookies']) { + $retriever_rule['cookiedata'] = file_get_contents($cookiejar); + //@@@ do the store here + } unlink($cookiejar); $resource['data'] = $fetch_result->getBody(); $resource['http-code'] = $fetch_result->getReturnCode(); $resource['type'] = $fetch_result->getContentType(); $resource['redirect-url'] = $fetch_result->getRedirectUrl(); - Logger::log('retrieve_resource: got code ' . $resource['http-code'] . - ' retrieving resource ' . $resource['id'] . - ' final url ' . $resource['redirect-url'], Logger::DEBUG); + Logger::log('retrieve_resource: got code ' . $resource['http-code'] . ' retrieving resource ' . $resource['id'] . ' final url ' . $resource['redirect-url'], Logger::DEBUG); } catch (Exception $e) { - Logger::log('retrieve_resource: unable to retrieve ' . $resource['url'] . ' - ' . $e->getMessage()); + Logger::log('retrieve_resource: unable to retrieve ' . $resource['url'] . ' - ' . $e->getMessage(), Logger::INFO); } q("UPDATE `retriever_resource` SET `last-try` = now(), `num-tries` = `num-tries` + 1, `http-code` = %d, `redirect-url` = '%s' WHERE id = %d", intval($resource['http-code']), @@ -214,13 +236,17 @@ function retrieve_resource($resource) { intval($resource['id'])); retriever_resource_completed($resource, $a); } + Logger::log('@@@ retrieve_resource finished: ' . $resource['url'], Logger::INFO); } -function get_retriever($contact_id, $uid, $create = false) { +function get_retriever_rule($contact_id, $uid, $create = false) { + Logger::log('@@@ get_retriever_rule ' . "SELECT * FROM `retriever_rule` WHERE `contact-id` = " . intval($contact_id) . " AND `uid` = " . intval($uid), Logger::INFO); $r = q("SELECT * FROM `retriever_rule` WHERE `contact-id` = %d AND `uid` = %d", intval($contact_id), intval($uid)); + Logger::log('@@@ get_retriever_rule count is ' . count($r), Logger::INFO); if (count($r)) { $r[0]['data'] = json_decode($r[0]['data'], true); + Logger::log('@@@ get_retriever_rule returning an actual thing', Logger::INFO); return $r[0]; } if ($create) { @@ -233,43 +259,62 @@ function get_retriever($contact_id, $uid, $create = false) { } function retriever_get_retriever_item($id) { - $retriever_items = q("SELECT * FROM `retriever_item` WHERE id = %d", intval($id)); - if (count($retriever_items) != 1) { - Logger::log('retriever_get_retriever_item: unable to find retriever_item ' . $id, Logger::INFO); - return; + return DBA::selectFirst('retriever_item', [], ['id' => intval($id)]); +} + +function retriever_class_of_item($item) { //@@@ + if (!$item) { + return 'false'; } - return $retriever_items[0]; + if (array_key_exists('finished', $item)) { + Logger::log('@@@ oh no this is a bad thing', Logger::INFO); + return 'retriever_item'; + } + if (array_key_exists('moderated', $item)) { + return 'friendica_item'; + } + return 'unknown'; +} + +function mat_test($item) { //@@@ + return 'mat_test'; } function retriever_get_item($retriever_item) { - // @@@ Need to replace this with Item::selectFirst - $items = q("SELECT * FROM `item` WHERE `uri` = '%s' AND `uid` = %d AND `contact-id` = %d", - DBA::escape($retriever_item['item-uri']), - intval($retriever_item['item-uid']), - intval($retriever_item['contact-id'])); - if (count($items) != 1) { - Logger::log('retriever_get_item: unexpected number of results ' . - count($items) . " when searching for item $uri $uid $cid", Logger::INFO); - return; + // @@@ add contact id as a search term + Logger::log('@@@ retriever_get_item uri ' . $retriever_item['item-uri'] . ' uid ' . $retriever_item['item-uid'] . ' cid ' . $retriever_item['contact-id'], Logger::INFO); + try {//@@@ not necessary + $item = Item::selectFirst([], ['uri' => $retriever_item['item-uri'], 'uid' => intval($retriever_item['item-uid'])]); + Logger::log('@@@ 1 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); + if (!DBA::isResult($item)) { + Logger::log('retriever_get_item: no item found for uri ' . $retriever_item['item-uri'], Logger::INFO); + return; + } + Logger::log('@@@ retriever_get_item: yay item found for uri ' . $retriever_item['item-uri'] . ' guid ' . $item['guid'] . ' plink ' . $item['plink'], Logger::INFO); + return $item; + } catch (Exception $e) { + Logger::log('retriever_get_item: exception ' . $e->getMessage(), Logger::INFO); } - return $items[0]; } function retriever_item_completed($retriever_item_id, $resource, $a) { Logger::log('retriever_item_completed: id ' . $retriever_item_id . ' url ' . $resource['url'], Logger::DEBUG); $retriever_item = retriever_get_retriever_item($retriever_item_id); - if (!$retriever_item) { + if (!DBA::isResult($retriever_item)) { + Logger::log('retriever_item_completed: no retriever item with id ' . $retriever_item_id, Logger::INFO); + return; + } + $item = retriever_get_item($retriever_item); + Logger::log('@@@ 2 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); + if (!$item) { + Logger::log('retriever_item_completed: no item ' . $retriever_item['item-uri'], Logger::INFO); return; } // Note: the retriever might be null. Doesn't matter. - $retriever = get_retriever($retriever_item['contact-id'], $retriever_item['item-uid']); - $item = retriever_get_item($retriever_item); - if (!$item) { - return; - } + $retriever_rule = get_retriever_rule($retriever_item['contact-id'], $retriever_item['item-uid']); - retriever_apply_completed_resource_to_item($retriever, $item, $resource, $a); + retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource, $a); q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d", intval($retriever_item['id'])); @@ -288,18 +333,24 @@ function apply_retrospective($a, $retriever, $num) { $r = q("SELECT * FROM `item` WHERE `contact-id` = %d ORDER BY `received` DESC LIMIT %d", intval($retriever['contact-id']), intval($num)); foreach ($r as $item) { + Logger::log('@@@ 3 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); //@@@ already know this is wrong q('UPDATE `item` SET `visible` = 0 WHERE `id` = %d', $item['id']); q('UPDATE `thread` SET `visible` = 0 WHERE `iid` = %d', $item['id']); retriever_on_item_insert($a, $retriever, $item); } } +//@@@ make this trigger a retriever immediately somehow +//@@@ need a lock to say something is doing something function retriever_on_item_insert($a, $retriever, &$item) { + Logger::log('@@@ 4 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); + Logger::log('@@@ retriever_on_item_insert start ' . $item['plink'], Logger::INFO); if (!$retriever || !$retriever['id']) { Logger::log('retriever_on_item_insert: No retriever supplied', Logger::INFO); return; } if (!$retriever["data"]['enable'] == "on") { + Logger::log('@@@ retriever_on_item_insert: Disabled', Logger::INFO); return; } if (array_key_exists('pattern', $retriever["data"]) && $retriever["data"]['pattern']) { @@ -310,12 +361,13 @@ function retriever_on_item_insert($a, $retriever, &$item) { $url = $item['plink']; } - $resource = add_retriever_resource($a, $url); + Logger::log('@@@ retriever_on_item_insert: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id'], Logger::DEBUG); + $resource = add_retriever_resource($a, $url, $item['uid'], $item['contact-id']); $retriever_item_id = add_retriever_item($item, $resource); } -function add_retriever_resource($a, $url, $binary = false) { - Logger::log('add_retriever_resource: ' . $url, Logger::DEBUG); +function add_retriever_resource($a, $url, $uid, $cid, $binary = false) { + Logger::log('add_retriever_resource: url ' . $url . ' uid ' . $uid . ' contact-id ' . $cid, Logger::DEBUG); $scheme = parse_url($url, PHP_URL_SCHEME); if ($scheme == 'data') { @@ -326,7 +378,7 @@ function add_retriever_resource($a, $url, $binary = false) { fclose($fp); $url = 'md5://' . hash('md5', $url); - $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url)); + $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s' AND `item-uid` = %d AND `contact-id` = %d", DBA::escape($url), intval($uid), intval($cid)); $resource = $r[0]; if (count($r)) { Logger::log('add_retriever_resource: Resource ' . $url . ' already requested', Logger::DEBUG); @@ -334,8 +386,10 @@ function add_retriever_resource($a, $url, $binary = false) { } Logger::log('retrieve_resource: got data URL type ' . $resource['type'], Logger::DEBUG); - q("INSERT INTO `retriever_resource` (`type`, `binary`, `url`, `completed`, `data`) " . - "VALUES ('%s', %d, '%s', now(), '%s')", + q("INSERT INTO `retriever_resource` (`item-uid`, `contact-id`, `type`, `binary`, `url`, `completed`, `data`) " . + "VALUES (%d, %d, '%s', %d, '%s', now(), '%s')", + intval($uid), + intval($cid), DBA::escape($type), intval($binary ? 1 : 0), DBA::escape($url), @@ -349,31 +403,30 @@ function add_retriever_resource($a, $url, $binary = false) { } if (strlen($url) > 800) { - Logger::log('add_retriever_resource: URL is longer than 800 characters', Logger::INFO); + Logger::log('add_retriever_resource: URL is longer than 800 characters', Logger::WARNING); } - $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url)); + $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s' AND `item-uid` = %d AND `contact-id` = %d", DBA::escape($url), intval($uid), intval($cid)); if (count($r)) { - Logger::log('add_retriever_resource: Resource ' . $url . ' already requested', Logger::DEBUG); + Logger::log('add_retriever_resource: Resource ' . $url . ' uid ' . $uid . ' cid ' . $cid . ' already requested', Logger::DEBUG); return $r[0]; } - q("INSERT INTO `retriever_resource` (`binary`, `url`) " . - "VALUES (%d, '%s')", intval($binary ? 1 : 0), DBA::escape($url)); + q("INSERT INTO `retriever_resource` (`item-uid`, `contact-id`, `binary`, `url`) " . + "VALUES (%d, %d, %d, '%s')", intval($uid), intval($cid), intval($binary ? 1 : 0), DBA::escape($url)); $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url)); return $r[0]; } function add_retriever_item(&$item, $resource) { + Logger::log('@@@ 5 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item), Logger::DEBUG); Logger::log('add_retriever_item: ' . $resource['url'] . ' for ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG); $r = q("SELECT COUNT(*) FROM `retriever_item` WHERE " . "`item-uri` = '%s' AND `item-uid` = %d AND `contact-id` = %d AND `resource` = %d", DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource['id'])); if ($r[0]['COUNT(*)'] > 0) { - Logger::log("add_retriever_item: retriever item already present for " . - $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], - Logger::INFO); + Logger::log("add_retriever_item: retriever item already present for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::INFO); return; } q("INSERT INTO `retriever_item` (`item-uri`, `item-uid`, `contact-id`, `resource`) " . @@ -383,9 +436,7 @@ function add_retriever_item(&$item, $resource) { "`item-uri` = '%s' AND `item-uid` = %d AND `contact-id` = %d AND `resource` = %d ORDER BY id DESC", DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource['id'])); if (!count($r)) { - Logger::log("add_retriever_item: couldn't create retriever item for " . - $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], - Logger::INFO); + Logger::log("add_retriever_item: couldn't create retriever item for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::INFO); return; } Logger::log('add_retriever_item: created retriever_item ' . $r[0]['id'] . ' for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG); @@ -416,7 +467,9 @@ function retriever_apply_xslt_text($xslt_text, $doc) { return $result; } +//@@@ is that an item or a resource_item? I really want an item here so I can update it function retriever_apply_dom_filter($retriever, &$item, $resource) { + Logger::log('@@@ 6 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item), Logger::DEBUG); Logger::log('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['uri'] . ' contact ' . $item['contact-id'], Logger::DEBUG); if (!array_key_exists('include', $retriever['data']) && !array_key_exists('customxslt', $retriever['data'])) { @@ -454,18 +507,23 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) { return; } + Logger::log('@@@ retriever_apply_dom_filter: 1', Logger::INFO); $components = parse_url($resource['redirect-url']); $rooturl = $components['scheme'] . "://" . $components['host']; $dirurl = $rooturl . dirname($components['path']) . "/"; + Logger::log('@@@ retriever_apply_dom_filter: 2', Logger::INFO); $params = array('$dirurl' => $dirurl, '$rooturl' => $rooturl); $fix_urls_template = Renderer::getMarkupTemplate('fix-urls.tpl', 'addon/retriever/'); $fix_urls_xslt = Renderer::replaceMacros($fix_urls_template, $params); + Logger::log('@@@ retriever_apply_dom_filter: 3', Logger::INFO); $doc = retriever_apply_xslt_text($fix_urls_xslt, $doc); + Logger::log('@@@ retriever_apply_dom_filter: 4', Logger::INFO); if (!$doc) { Logger::log('retriever_apply_dom_filter: failed to apply fix urls XSLT template', Logger::INFO); return; } + Logger::log('@@@ retriever_apply_dom_filter: 5', Logger::INFO); $body = HTML::toBBCode($doc->saveHTML()); if (!strlen($body)) { Logger::log('retriever_apply_dom_filter retriever ' . $retriever['id'] . ' item ' . $item['id'] . ': output was empty', Logger::INFO); @@ -475,47 +533,66 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) { $body .= $item['plink']; $body .= ']' . $item['plink'] . '[/url]'; - $uri_id = ItemURI::getIdByURI($item['uri']); - //@@@ remove this - $item['body'] = $body; + Logger::log('@@@ retriever_apply_dom_filter: 6', Logger::INFO); + $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself? + Logger::log('@@@ retriever_apply_dom_filter: item id is ' . $item['id'] . ' uri id is ' . $uri_id, Logger::INFO); Logger::log('retriever_apply_dom_filter: XSLT result \"' . $body . '\"', Logger::DATA); - DBA::update('item', ['body' => $body], ['id' => $item['id']]); - DBA::update('item-content', ['body' => $body], ['uri-id' => $uri_id]); + DBA::update('item-content', ['body' => $body], ['uri-id' => $uri_id]); //@@@ isn't there a better interface to that? + //@@@ probably Item::updateContent } function retrieve_images(&$item, $a) { + $blah_item_class = retriever_class_of_item($item) . ' ' . mat_test($item); + Logger::log('@@@ 7 item class is ' . $blah_item_class, Logger::DEBUG); + + $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself? + + $content = DBA::selectFirst('item-content', [], ['uri-id' => $uri_id]); + $body = $content['body']; + if (!strlen($body)) { + Logger::log('retrieve_images: no body for uri-id ' . $uri_id, Logger::WARNING); + return; + } + + Logger::log('@@@ retrieve_images start looking in body "' . $body . '"', Logger::INFO); $matches1 = array(); - preg_match_all("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", $item["body"], $matches1); + preg_match_all("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", $body, $matches1); $matches2 = array(); - preg_match_all("/\[img\](.*?)\[\/img\]/ism", $item["body"], $matches2); + preg_match_all("/\[img\](.*?)\[\/img\]/ism", $body, $matches2); $matches = array_merge($matches1[3], $matches2[1]); Logger::log('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG); foreach ($matches as $url) { + Logger::log('@@@ retrieve_images: url ' . $url, Logger::DEBUG); if (strpos($url, get_app()->getBaseUrl()) === FALSE) { - $resource = add_retriever_resource($a, $url, true); + Logger::log('@@@ retrieve_images: it is from somewhere else', Logger::DEBUG); + Logger::log('@@@ retrieve_images: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id'], Logger::DEBUG); + $resource = add_retriever_resource($a, $url, $item['uid'], $item['contact-id'], true); if (!$resource['completed']) { + Logger::log('@@@ retrieve_images: do not have it yet, get it later', Logger::DEBUG); add_retriever_item($item, $resource); } else { + Logger::log('@@@ retrieve_images: got it already, transform', Logger::DEBUG); retriever_transform_images($a, $item, $resource); } } } + Logger::log('@@@ retrieve_images end', Logger::INFO); } function retriever_check_item_completed(&$item) { + Logger::log('@@@ 9 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item), Logger::DEBUG); $r = q('SELECT count(*) FROM retriever_item WHERE `item-uri` = "%s" ' . 'AND `item-uid` = %d AND `contact-id` = %d AND `finished` = 0', DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id'])); $waiting = $r[0]['count(*)']; - Logger::log('retriever_check_item_completed: item ' . $item['uri'] . ' ' . $item['uid'] - . ' '. $item['contact-id'] . ' waiting for ' . $waiting . ' resources', Logger::DEBUG); + Logger::log('retriever_check_item_completed: item ' . $item['uri'] . ' ' . $item['uid'] . ' '. $item['contact-id'] . ' waiting for ' . $waiting . ' resources', Logger::DEBUG); $old_visible = $item['visible']; $item['visible'] = $waiting ? 0 : 1; if (array_key_exists('id', $item) && ($item['id'] > 0) && ($old_visible != $item['visible'])) { - Logger::log('retriever_check_item_completed: changing visible flag to ' . $item['visible'] . ' and invoking notifier ("edit_post", ' . $item['id'] . ')', Logger::DEBUG); + Logger::log('retriever_check_item_completed: changing visible flag to ' . $item['visible'], Logger::DEBUG); q("UPDATE `item` SET `visible` = %d WHERE `id` = %d", intval($item['visible']), intval($item['id'])); @@ -526,10 +603,10 @@ function retriever_check_item_completed(&$item) } function retriever_apply_completed_resource_to_item($retriever, &$item, $resource, $a) { - Logger::log('retriever_apply_completed_resource_to_item: retriever ' . - ($retriever ? $retriever['id'] : 'none') . - ' resource ' . $resource['url'] . ' plink ' . $item['plink'], Logger::DEBUG); + Logger::log('@@@ 10 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item), Logger::DEBUG); + Logger::log('retriever_apply_completed_resource_to_item: retriever ' . ($retriever ? $retriever['id'] : 'none') . ' resource ' . $resource['url'] . ' plink ' . $item['plink'], Logger::DEBUG); if (strpos($resource['type'], 'image') !== false) { + Logger::log('@@@ retriever_apply_completed_resource_to_item this is an image must transform', Logger::INFO); retriever_transform_images($a, $item, $resource); } if (!$retriever) { @@ -544,38 +621,61 @@ function retriever_apply_completed_resource_to_item($retriever, &$item, $resourc } } +//@@@ todo: change all Logger::log to Logger::info etc +//@@@ todo: what is this reference for? document if needed delete if not function retriever_transform_images($a, &$item, $resource) { - return; //@@@ not working + Logger::log('@@@ 11 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item), Logger::DEBUG); + Logger::log('@@@ retriever_transform_images', Logger::INFO); if (!$resource["data"]) { - Logger::log('retriever_transform_images: no data available for ' - . $resource['id'] . ' ' . $resource['url'], Logger::INFO); + Logger::log('retriever_transform_images: no data available for ' . $resource['id'] . ' ' . $resource['url'], Logger::INFO); return; } - try { - $photo = Image::storePhoto($a, $item['uid'], $resource['data'], $resource['url']); + $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself? + + try { //@@@ probably can get rid of this try/catch + $data = $resource['data']; + $type = $resource['type']; + $uid = $item['uid']; + $cid = $item['contact-id']; + $rid = Photo::newResource(); + $path = parse_url($resource['url'], PHP_URL_PATH); + $parts = pathinfo($path); + $filename = $parts['filename'] . (array_key_exists('extension', $parts) ? '.' . $parts['extension'] : ''); + Logger::log('@@@ retriever_transform_images url ' . $resource['url'] . ' path ' . $path . ' filename ' . $parts['filename'], Logger::INFO); + $album = 'Wall Photos'; + $scale = 0; + $desc = ''; // TODO: store alt text with resource when it's requested so we can fill this in + Logger::log('retriever_transform_images storing ' . strlen($data) . ' bytes type ' . $type . ': uid ' . $uid . ' cid ' . $cid . ' rid ' . $rid . ' filename ' . $filename . ' album ' . $album . ' scale ' . $scale . ' desc ' . $desc, Logger::DEBUG); + Logger::log('@@@ retriever_transform_images before new Image', Logger::INFO); + $image = new Image($data, $type); + Logger::log('@@@ retriever_transform_images after new Image', Logger::INFO); + Logger::log('@@@ retriever_transform_images before Photo::store', Logger::INFO); + $photo = Photo::store($image, $uid, $cid, $rid, $filename, $album, 0, 0, "", "", "", "", $desc); + Logger::log('@@@ retriever_transform_images after Photo::store', Logger::INFO); + $new_url = System::baseUrl() . '/photo/' . $rid . '-0.' . $image->getExt(); + Logger::log('@@@ retriever_transform_images new url ' . $new_url . ' rid ' . $rid . ' ext ' . $image->getExt(), Logger::INFO); + if (!strlen($new_url)) { + Logger::log('retriever_transform_images: no replacement URL for image ' . $resource['url'], Logger::WARNING); + return; + } + + $content = DBA::selectFirst('item-content', [], ['uri-id' => $uri_id]); + $body = $content['body']; + Logger::log('@@@ retriever_transform_images: found body for uri id ' . $uri_id . ': ' . $body, Logger::INFO); + + Logger::log('retriever_transform_images: replacing ' . $resource['url'] . ' with ' . $new_url . ' in item ' . $item['uri'], Logger::DEBUG); + Logger::log('@@@ retriever_transform_images: replacing ' . $resource['url'] . ' with ' . $new_url . ' in body ' . $body, Logger::DEBUG); + $body = str_replace($resource["url"], $new_url, $body); + + Logger::log('@@@ retriever_transform_images: result \"' . $body . '\"', Logger::INFO); + DBA::update('item-content', ['body' => $body], ['uri-id' => $uri_id]); //@@@ isn't there a better interface to that? + //@@@ probably Item::updateContent + //@@ actually no, Item::update } catch (Exception $e) { - Logger::log('retriever_transform_images caught exception ' . $e->getMessage()); + Logger::log('retriever_transform_images caught exception ' . $e->getMessage(), Logger::INFO); return; } - if (!array_key_exists('full', $photo)) { - Logger::log('retriever_transform_images: no replacement URL for image ' . $resource['url']); - return; - } - $new_url = $photo['full']; - Logger::log('retriever_transform_images: replacing ' . $resource['url'] . ' with ' . - $new_url . ' in item ' . $item['plink'], Logger::DEBUG); - $transformed = str_replace($resource["url"], $new_url, $item['body']); - if ($transformed === $item['body']) { - return; - } - - $item['body'] = $transformed; - q("UPDATE `item` SET `body` = '%s' WHERE `plink` = '%s' AND `uid` = %d AND `contact-id` = %d", - DBA::escape($item['body']), - DBA::escape($item['plink']), - intval($item['uid']), - intval($item['contact-id'])); } function retriever_content($a) { @@ -596,37 +696,37 @@ function retriever_content($a) { return; } if ($a->argv[1]) { - $retriever = get_retriever($a->argv[1], local_user(), false); + $retriever_rule = get_retriever_rule($a->argv[1], local_user(), false); if (!empty($_POST["id"])) { - $retriever = get_retriever($a->argv[1], local_user(), true); - $retriever["data"] = array(); + $retriever_rule = get_retriever_rule($a->argv[1], local_user(), true); + $retriever_rule["data"] = array(); foreach (array('pattern', 'replace', 'enable', 'images', 'customxslt', 'storecookies', 'cookiedata') as $setting) { if (!empty($_POST['retriever_' . $setting])) { - $retriever["data"][$setting] = $_POST['retriever_' . $setting]; + $retriever_rule["data"][$setting] = $_POST['retriever_' . $setting]; } } foreach ($_POST as $k=>$v) { if (preg_match("/retriever-(include|exclude)-(\d+)-(element|attribute|value)/", $k, $matches)) { - $retriever['data'][$matches[1]][intval($matches[2])][$matches[3]] = $v; + $retriever_rule['data'][$matches[1]][intval($matches[2])][$matches[3]] = $v; } } // You've gotta have an element, even if it's just "*" - foreach ($retriever['data']['include'] as $k=>$clause) { + foreach ($retriever_rule['data']['include'] as $k=>$clause) { if (!$clause['element']) { - unset($retriever['data']['include'][$k]); + unset($retriever_rule['data']['include'][$k]); } } - foreach ($retriever['data']['exclude'] as $k=>$clause) { + foreach ($retriever_rule['data']['exclude'] as $k=>$clause) { if (!$clause['element']) { - unset($retriever['data']['exclude'][$k]); + unset($retriever_rule['data']['exclude'][$k]); } } q("UPDATE `retriever_rule` SET `data`='%s' WHERE `id` = %d", - DBA::escape(json_encode($retriever["data"])), intval($retriever["id"])); + DBA::escape(json_encode($retriever_rule["data"])), intval($retriever_rule["id"])); $a->page['content'] .= "

Settings Updated"; if (!empty($_POST["retriever_retrospective"])) { - apply_retrospective($a, $retriever, $_POST["retriever_retrospective"]); + apply_retrospective($a, $retriever_rule, $_POST["retriever_retrospective"]); $a->page['content'] .= " and retrospectively applied to " . $_POST["apply"] . " posts"; } $a->page['content'] .= ".

"; @@ -637,21 +737,21 @@ function retriever_content($a) { '$enable' => array( 'retriever_enable', L10n::t('Enabled'), - $retriever['data']['enable']), + $retriever_rule['data']['enable']), '$pattern' => array( 'retriever_pattern', L10n::t('URL Pattern'), - $retriever["data"]['pattern'], + $retriever_rule["data"]['pattern'], L10n::t('Regular expression matching part of the URL to replace')), '$replace' => array( 'retriever_replace', L10n::t('URL Replace'), - $retriever["data"]['replace'], + $retriever_rule["data"]['replace'], L10n::t('Text to replace matching part of above regular expression')), '$images' => array( 'retriever_images', L10n::t('Download Images'), - $retriever['data']['images']), + $retriever_rule['data']['images']), '$retrospective' => array( 'retriever_retrospective', L10n::t('Retrospectively Apply'), @@ -660,33 +760,33 @@ function retriever_content($a) { 'storecookies' => array( 'retriever_storecookies', L10n::t('Store cookies'), - $retriever['data']['storecookies'], + $retriever_rule['data']['storecookies'], L10n::t("Preserve cookie data across fetches.")), '$cookiedata' => array( 'retriever_cookiedata', L10n::t('Cookie Data'), - $retriever['data']['cookiedata'], + $retriever_rule['data']['cookiedata'], L10n::t("Latest cookie data for this feed. Netscape cookie file format.")), '$customxslt' => array( 'retriever_customxslt', L10n::t('Custom XSLT'), - $retriever['data']['customxslt'], + $retriever_rule['data']['customxslt'], L10n::t("When standard rules aren't enough, apply custom XSLT to the article")), '$title' => L10n::t('Retrieve Feed Content'), '$help' => $a->getBaseUrl() . '/retriever/help', '$help_t' => L10n::t('Get Help'), '$submit_t' => L10n::t('Submit'), '$submit' => L10n::t('Save Settings'), - '$id' => ($retriever["id"] ? $retriever["id"] : "create"), + '$id' => ($retriever_rule["id"] ? $retriever_rule["id"] : "create"), '$tag_t' => L10n::t('Tag'), '$attribute_t' => L10n::t('Attribute'), '$value_t' => L10n::t('Value'), '$add_t' => L10n::t('Add'), '$remove_t' => L10n::t('Remove'), '$include_t' => L10n::t('Include'), - '$include' => $retriever['data']['include'], + '$include' => $retriever_rule['data']['include'], '$exclude_t' => L10n::t('Exclude'), - '$exclude' => $retriever["data"]['exclude'])); + '$exclude' => $retriever_rule["data"]['exclude'])); return; } } @@ -701,18 +801,23 @@ function retriever_contact_photo_menu($a, &$args) { } function retriever_post_remote_hook(&$a, &$item) { + Logger::log('@@@ 12 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); Logger::log('retriever_post_remote_hook: ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG); - $retriever = get_retriever($item['contact-id'], $item["uid"], false); - if ($retriever) { - retriever_on_item_insert($a, $retriever, $item); + $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself? + $retriever_rule = get_retriever_rule($item['contact-id'], $item["uid"], false); + if ($retriever_rule) { + retriever_on_item_insert($a, $retriever_rule, $item); } else { if (PConfig::get($item["uid"], 'retriever', 'oembed')) { // Convert to HTML and back to take advantage of bbcode's resolution of oembeds. - $body = HTML::toBBCode(BBCode::convert($item['body'])); + $content = DBA::selectFirst('item-content', [], ['uri-id' => $uri_id]); + $body = HTML::toBBCode(BBCode::convert($content['body'])); + Logger::log('@@@ retriever_post_remote_hook item uri-id ' . $uri_id . ' body "' . $item['body'] . '" item content body "' . $body . '"', Logger::DEBUG); if ($body) { $item['body'] = $body; + DBA::update('item-content', ['body' => $body], ['uri-id' => $uri_id]); //@@@ isn't there a better interface to that? } } if (PConfig::get($item["uid"], 'retriever', 'all_photos')) { From 68d3e6c7cfd57890e2b60447d80e5a32b37aba92 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 22 Sep 2019 17:05:23 +0200 Subject: [PATCH 12/96] Improvement --- retriever/retriever.php | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/retriever/retriever.php b/retriever/retriever.php index 5644952a..704bff34 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -650,6 +650,10 @@ function retriever_transform_images($a, &$item, $resource) { Logger::log('@@@ retriever_transform_images before new Image', Logger::INFO); $image = new Image($data, $type); Logger::log('@@@ retriever_transform_images after new Image', Logger::INFO); + if (!$image->isValid()) { + Logger::log('retriever_transform_images: invalid image found at URL ' . $resource['url'] ' for item ' . $item['id'], Logger::WARNING); + return; + } Logger::log('@@@ retriever_transform_images before Photo::store', Logger::INFO); $photo = Photo::store($image, $uid, $cid, $rid, $filename, $album, 0, 0, "", "", "", "", $desc); Logger::log('@@@ retriever_transform_images after Photo::store', Logger::INFO); From cdb3ac82e5f9f5465629d3fa24efdb5c20441d0a Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 22 Sep 2019 19:55:07 +0200 Subject: [PATCH 13/96] Change logging functions --- retriever/retriever.php | 210 ++++++++++++++++++++-------------------- 1 file changed, 105 insertions(+), 105 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 704bff34..ac6b321a 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -84,7 +84,7 @@ function retriever_cron($a, $b) { $retriever_item_count = 0; function retriever_retrieve_items($max_items, $a) { - Logger::log('@@@ retriever_retrieve_items', Logger::INFO); + Logger::info('@@@ retriever_retrieve_items'); global $retriever_item_count; $retriever_schedule = array(array(1,'minute'), @@ -105,10 +105,10 @@ function retriever_retrieve_items($max_items, $a) { } $retrieve_items = $max_items - $retriever_item_count; - Logger::log('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . $retriever_item_count . ', retrieve ' . $retrieve_items, Logger::DEBUG); + Logger::debug('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . $retriever_item_count . ', retrieve ' . $retrieve_items); do { - Logger::log('@@@ retriever_retrieve_items loop max ' . $max_items . ' count ' . $retriever_item_count, Logger::INFO); - Logger::log("@@@ SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR " . implode($schedule_clauses, ' OR ') . ") ORDER BY `last-try` ASC LIMIT " . $retrieve_items, Logger::INFO); + Logger::info('@@@ retriever_retrieve_items loop max ' . $max_items . ' count ' . $retriever_item_count); + Logger::info("@@@ SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR " . implode($schedule_clauses, ' OR ') . ") ORDER BY `last-try` ASC LIMIT " . $retrieve_items); $retriever_resources = q("SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR %s) ORDER BY `last-try` ASC LIMIT %d", DBA::escape(implode($schedule_clauses, ' OR ')), intval($retrieve_items)); @@ -118,9 +118,9 @@ function retriever_retrieve_items($max_items, $a) { if (count($retriever_resources) == 0) { break; } - Logger::log('retriever_retrieve_items: found ' . count($retriever_resources) . ' waiting resources in database', Logger::DEBUG); + Logger::debug('retriever_retrieve_items: found ' . count($retriever_resources) . ' waiting resources in database'); foreach ($retriever_resources as $retriever_resource) { - Logger::log('@@@ need to get the retriever config here cid ' . $retriever_resource['contact-id'] . ' uid ' . $retriever_resource['item-uid'], Logger::INFO); + Logger::info('@@@ need to get the retriever config here cid ' . $retriever_resource['contact-id'] . ' uid ' . $retriever_resource['item-uid']); retrieve_resource($retriever_resource); $retriever_item_count++; } @@ -128,7 +128,7 @@ function retriever_retrieve_items($max_items, $a) { } while ($retrieve_items > 0); // @@@ todo: when items add further items (i.e. images), do the new images go round this loop again? - Logger::log('@@@ retriever_retrieve_items: finished retrieving items', Logger::INFO); + Logger::info('@@@ retriever_retrieve_items: finished retrieving items'); } /* Look for items that are waiting even though the resource has @@ -141,25 +141,25 @@ function retriever_clean_up_completed_resources($max_items, $a) { if (!$r) { $r = array(); } - Logger::log('retriever_clean_up_completed_resources: items waiting even though resource has completed: ' . count($r), Logger::DEBUG); + Logger::debug('retriever_clean_up_completed_resources: items waiting even though resource has completed: ' . count($r)); foreach ($r as $rr) { $resource = q("SELECT * FROM retriever_resource WHERE `id` = %d", $rr['resource']); $retriever_item = retriever_get_retriever_item($rr['item']); if (!DBA::isResult($retriever_item)) { - Logger::log('retriever_clean_up_completed_resources: no retriever item with id ' . $rr['item'], Logger::WARNING); + Logger::warning('retriever_clean_up_completed_resources: no retriever item with id ' . $rr['item']); continue; } $item = retriever_get_item($retriever_item); if (!$item) { - Logger::log('retriever_clean_up_completed_resources: no item ' . $retriever_item['item-uri'], Logger::WARNING); + Logger::warning('retriever_clean_up_completed_resources: no item ' . $retriever_item['item-uri']); continue; } $retriever_rule = get_retriever_rule($retriever_item['contact-id'], $item['uid']); if (!$retriever_rule) { - Logger::log('retriever_clean_up_completed_resources: no retriever for uri ' . $retriever_item['item-uri'] . ' uid ' . $retriever_item['uid'] . ' ' . $retriever_item['contact-id'], Logger::WARNING); + Logger::warning('retriever_clean_up_completed_resources: no retriever for uri ' . $retriever_item['item-uri'] . ' uid ' . $retriever_item['uid'] . ' ' . $retriever_item['contact-id']); continue; } - Logger::log('@@@ retriever_clean_up_completed_resources: about to retriever_apply_completed_resource_to_item', Logger::INFO); + Logger::info('@@@ retriever_clean_up_completed_resources: about to retriever_apply_completed_resource_to_item'); retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource[0], $a); q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d", intval($retriever_item['id'])); retriever_check_item_completed($item); @@ -171,7 +171,7 @@ function retriever_tidy() { q("DELETE FROM retriever_resource WHERE completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)"); $r = q("SELECT retriever_item.id FROM retriever_item LEFT OUTER JOIN retriever_resource ON (retriever_item.resource = retriever_resource.id) WHERE retriever_resource.id is null"); - Logger::log('retriever_tidy: found ' . count($r) . ' retriever_items with no retriever_resource', Logger::INFO); + Logger::info('retriever_tidy: found ' . count($r) . ' retriever_items with no retriever_resource'); foreach ($r as $rr) { q('DELETE FROM retriever_item WHERE id = %d', intval($rr['id'])); } @@ -179,7 +179,7 @@ function retriever_tidy() { function retrieve_dataurl_resource($resource) { if (!preg_match("/date:(.*);base64,(.*)/", $resource['url'], $matches)) { - Logger::log('retrieve_dataurl_resource: ' . $resource['id'] . ' does not match pattern', Logger::INFO); + Logger::info('retrieve_dataurl_resource: ' . $resource['id'] . ' does not match pattern'); } else { $resource['type'] = $matches[1]; $resource['data'] = base64url_decode($matches[2]); @@ -194,7 +194,7 @@ function retrieve_dataurl_resource($resource) { } function retrieve_resource($resource) { - Logger::log('@@@ retrieve_resource: url ' . $resource['url'] . ' uid ' . $resource['item-uid'] . ' cid ' . $resource['contact-id'], Logger::INFO); + Logger::info('@@@ retrieve_resource: url ' . $resource['url'] . ' uid ' . $resource['item-uid'] . ' cid ' . $resource['contact-id']); if (substr($resource['url'], 0, 5) == "data:") { return retrieve_dataurl_resource($resource); @@ -205,14 +205,14 @@ function retrieve_resource($resource) { $retriever_rule = get_retriever_rule($resource['contact-id'], $resource['item-uid']); try { - Logger::log('retrieve_resource: ' . ($resource['num-tries'] + 1) . ' attempt at resource ' . $resource['id'] . ' ' . $resource['url'], Logger::DEBUG); + Logger::debug('retrieve_resource: ' . ($resource['num-tries'] + 1) . ' attempt at resource ' . $resource['id'] . ' ' . $resource['url']); $redirects = 0; $cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-'); - if ($retriever_rule['storecookies']) { + if (array_key_exists('storecookies', $retriever_rule) && $retriever_rule['storecookies']) { file_put_contents($cookiejar, $retriever_rule['cookiedata']); } $fetch_result = Network::fetchUrlFull($resource['url'], $resource['binary'], $redirects, '', $cookiejar); - if ($retriever_rule['storecookies']) { + if (array_key_exists('storecookies', $retriever_rule) && $retriever_rule['storecookies']) { $retriever_rule['cookiedata'] = file_get_contents($cookiejar); //@@@ do the store here } @@ -221,9 +221,9 @@ function retrieve_resource($resource) { $resource['http-code'] = $fetch_result->getReturnCode(); $resource['type'] = $fetch_result->getContentType(); $resource['redirect-url'] = $fetch_result->getRedirectUrl(); - Logger::log('retrieve_resource: got code ' . $resource['http-code'] . ' retrieving resource ' . $resource['id'] . ' final url ' . $resource['redirect-url'], Logger::DEBUG); + Logger::debug('retrieve_resource: got code ' . $resource['http-code'] . ' retrieving resource ' . $resource['id'] . ' final url ' . $resource['redirect-url']); } catch (Exception $e) { - Logger::log('retrieve_resource: unable to retrieve ' . $resource['url'] . ' - ' . $e->getMessage(), Logger::INFO); + Logger::info('retrieve_resource: unable to retrieve ' . $resource['url'] . ' - ' . $e->getMessage()); } q("UPDATE `retriever_resource` SET `last-try` = now(), `num-tries` = `num-tries` + 1, `http-code` = %d, `redirect-url` = '%s' WHERE id = %d", intval($resource['http-code']), @@ -236,17 +236,17 @@ function retrieve_resource($resource) { intval($resource['id'])); retriever_resource_completed($resource, $a); } - Logger::log('@@@ retrieve_resource finished: ' . $resource['url'], Logger::INFO); + Logger::info('@@@ retrieve_resource finished: ' . $resource['url']); } function get_retriever_rule($contact_id, $uid, $create = false) { - Logger::log('@@@ get_retriever_rule ' . "SELECT * FROM `retriever_rule` WHERE `contact-id` = " . intval($contact_id) . " AND `uid` = " . intval($uid), Logger::INFO); + Logger::info('@@@ get_retriever_rule ' . "SELECT * FROM `retriever_rule` WHERE `contact-id` = " . intval($contact_id) . " AND `uid` = " . intval($uid)); $r = q("SELECT * FROM `retriever_rule` WHERE `contact-id` = %d AND `uid` = %d", intval($contact_id), intval($uid)); - Logger::log('@@@ get_retriever_rule count is ' . count($r), Logger::INFO); + Logger::info('@@@ get_retriever_rule count is ' . count($r)); if (count($r)) { $r[0]['data'] = json_decode($r[0]['data'], true); - Logger::log('@@@ get_retriever_rule returning an actual thing', Logger::INFO); + Logger::info('@@@ get_retriever_rule returning an actual thing'); return $r[0]; } if ($create) { @@ -267,7 +267,7 @@ function retriever_class_of_item($item) { //@@@ return 'false'; } if (array_key_exists('finished', $item)) { - Logger::log('@@@ oh no this is a bad thing', Logger::INFO); + Logger::info('@@@ oh no this is a bad thing'); return 'retriever_item'; } if (array_key_exists('moderated', $item)) { @@ -282,33 +282,33 @@ function mat_test($item) { //@@@ function retriever_get_item($retriever_item) { // @@@ add contact id as a search term - Logger::log('@@@ retriever_get_item uri ' . $retriever_item['item-uri'] . ' uid ' . $retriever_item['item-uid'] . ' cid ' . $retriever_item['contact-id'], Logger::INFO); + Logger::info('@@@ retriever_get_item uri ' . $retriever_item['item-uri'] . ' uid ' . $retriever_item['item-uid'] . ' cid ' . $retriever_item['contact-id']); try {//@@@ not necessary $item = Item::selectFirst([], ['uri' => $retriever_item['item-uri'], 'uid' => intval($retriever_item['item-uid'])]); Logger::log('@@@ 1 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); if (!DBA::isResult($item)) { - Logger::log('retriever_get_item: no item found for uri ' . $retriever_item['item-uri'], Logger::INFO); + Logger::log('retriever_get_item: no item found for uri ' . $retriever_item['item-uri']); return; } - Logger::log('@@@ retriever_get_item: yay item found for uri ' . $retriever_item['item-uri'] . ' guid ' . $item['guid'] . ' plink ' . $item['plink'], Logger::INFO); + Logger::info('@@@ retriever_get_item: yay item found for uri ' . $retriever_item['item-uri'] . ' guid ' . $item['guid'] . ' plink ' . $item['plink']); return $item; } catch (Exception $e) { - Logger::log('retriever_get_item: exception ' . $e->getMessage(), Logger::INFO); + Logger::info('retriever_get_item: exception ' . $e->getMessage()); } } function retriever_item_completed($retriever_item_id, $resource, $a) { - Logger::log('retriever_item_completed: id ' . $retriever_item_id . ' url ' . $resource['url'], Logger::DEBUG); + Logger::debug('retriever_item_completed: id ' . $retriever_item_id . ' url ' . $resource['url']); $retriever_item = retriever_get_retriever_item($retriever_item_id); if (!DBA::isResult($retriever_item)) { - Logger::log('retriever_item_completed: no retriever item with id ' . $retriever_item_id, Logger::INFO); + Logger::info('retriever_item_completed: no retriever item with id ' . $retriever_item_id); return; } $item = retriever_get_item($retriever_item); Logger::log('@@@ 2 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); if (!$item) { - Logger::log('retriever_item_completed: no item ' . $retriever_item['item-uri'], Logger::INFO); + Logger::log('retriever_item_completed: no item ' . $retriever_item['item-uri']); return; } // Note: the retriever might be null. Doesn't matter. @@ -322,7 +322,7 @@ function retriever_item_completed($retriever_item_id, $resource, $a) { } function retriever_resource_completed($resource, $a) { - Logger::log('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url'], Logger::DEBUG); + Logger::debug('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url']); $r = q("SELECT `id` FROM `retriever_item` WHERE `resource` = %d", $resource['id']); foreach ($r as $rr) { retriever_item_completed($rr['id'], $resource, $a); @@ -343,31 +343,31 @@ function apply_retrospective($a, $retriever, $num) { //@@@ make this trigger a retriever immediately somehow //@@@ need a lock to say something is doing something function retriever_on_item_insert($a, $retriever, &$item) { - Logger::log('@@@ 4 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); - Logger::log('@@@ retriever_on_item_insert start ' . $item['plink'], Logger::INFO); + Logger::info('@@@ 4 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); + Logger::info('@@@ retriever_on_item_insert start ' . $item['plink']); if (!$retriever || !$retriever['id']) { - Logger::log('retriever_on_item_insert: No retriever supplied', Logger::INFO); + Logger::info('retriever_on_item_insert: No retriever supplied'); return; } if (!$retriever["data"]['enable'] == "on") { - Logger::log('@@@ retriever_on_item_insert: Disabled', Logger::INFO); + Logger::info('@@@ retriever_on_item_insert: Disabled'); return; } if (array_key_exists('pattern', $retriever["data"]) && $retriever["data"]['pattern']) { $url = preg_replace('/' . $retriever["data"]['pattern'] . '/', $retriever["data"]['replace'], $item['plink']); - Logger::log('retriever_on_item_insert: Changed ' . $item['plink'] . ' to ' . $url, Logger::DATA); + Logger::debug('retriever_on_item_insert: Changed ' . $item['plink'] . ' to ' . $url); } else { $url = $item['plink']; } - Logger::log('@@@ retriever_on_item_insert: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id'], Logger::DEBUG); + Logger::debug('@@@ retriever_on_item_insert: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id']); $resource = add_retriever_resource($a, $url, $item['uid'], $item['contact-id']); $retriever_item_id = add_retriever_item($item, $resource); } function add_retriever_resource($a, $url, $uid, $cid, $binary = false) { - Logger::log('add_retriever_resource: url ' . $url . ' uid ' . $uid . ' contact-id ' . $cid, Logger::DEBUG); + Logger::debug('add_retriever_resource: url ' . $url . ' uid ' . $uid . ' contact-id ' . $cid); $scheme = parse_url($url, PHP_URL_SCHEME); if ($scheme == 'data') { @@ -381,11 +381,11 @@ function add_retriever_resource($a, $url, $uid, $cid, $binary = false) { $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s' AND `item-uid` = %d AND `contact-id` = %d", DBA::escape($url), intval($uid), intval($cid)); $resource = $r[0]; if (count($r)) { - Logger::log('add_retriever_resource: Resource ' . $url . ' already requested', Logger::DEBUG); + Logger::debug('add_retriever_resource: Resource ' . $url . ' already requested'); return $resource; } - Logger::log('retrieve_resource: got data URL type ' . $resource['type'], Logger::DEBUG); + Logger::debug('retrieve_resource: got data URL type ' . $resource['type']); q("INSERT INTO `retriever_resource` (`item-uid`, `contact-id`, `type`, `binary`, `url`, `completed`, `data`) " . "VALUES (%d, %d, '%s', %d, '%s', now(), '%s')", intval($uid), @@ -403,12 +403,12 @@ function add_retriever_resource($a, $url, $uid, $cid, $binary = false) { } if (strlen($url) > 800) { - Logger::log('add_retriever_resource: URL is longer than 800 characters', Logger::WARNING); + Logger::warning('add_retriever_resource: URL is longer than 800 characters'); } $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s' AND `item-uid` = %d AND `contact-id` = %d", DBA::escape($url), intval($uid), intval($cid)); if (count($r)) { - Logger::log('add_retriever_resource: Resource ' . $url . ' uid ' . $uid . ' cid ' . $cid . ' already requested', Logger::DEBUG); + Logger::debug('add_retriever_resource: Resource ' . $url . ' uid ' . $uid . ' cid ' . $cid . ' already requested'); return $r[0]; } @@ -419,14 +419,14 @@ function add_retriever_resource($a, $url, $uid, $cid, $binary = false) { } function add_retriever_item(&$item, $resource) { - Logger::log('@@@ 5 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item), Logger::DEBUG); - Logger::log('add_retriever_item: ' . $resource['url'] . ' for ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG); + Logger::debug('@@@ 5 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); + Logger::debug('add_retriever_item: ' . $resource['url'] . ' for ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); $r = q("SELECT COUNT(*) FROM `retriever_item` WHERE " . "`item-uri` = '%s' AND `item-uid` = %d AND `contact-id` = %d AND `resource` = %d", DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource['id'])); if ($r[0]['COUNT(*)'] > 0) { - Logger::log("add_retriever_item: retriever item already present for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::INFO); + Logger::info("add_retriever_item: retriever item already present for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); return; } q("INSERT INTO `retriever_item` (`item-uri`, `item-uid`, `contact-id`, `resource`) " . @@ -436,10 +436,10 @@ function add_retriever_item(&$item, $resource) { "`item-uri` = '%s' AND `item-uid` = %d AND `contact-id` = %d AND `resource` = %d ORDER BY id DESC", DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource['id'])); if (!count($r)) { - Logger::log("add_retriever_item: couldn't create retriever item for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::INFO); + Logger::info("add_retriever_item: couldn't create retriever item for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); return; } - Logger::log('add_retriever_item: created retriever_item ' . $r[0]['id'] . ' for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG); + Logger::debug('add_retriever_item: created retriever_item ' . $r[0]['id'] . ' for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); return $r[0]['id']; } @@ -453,12 +453,12 @@ function retriever_get_encoding($resource) { function retriever_apply_xslt_text($xslt_text, $doc) { if (!$xslt_text) { - Logger::log('retriever_apply_xslt_text: empty XSLT text', Logger::INFO); + Logger::info('retriever_apply_xslt_text: empty XSLT text'); return $doc; } $xslt_doc = new DOMDocument(); if (!$xslt_doc->loadXML($xslt_text)) { - Logger::log('retriever_apply_xslt_text: could not load XML', Logger::INFO); + Logger::info('retriever_apply_xslt_text: could not load XML'); return $doc; } $xp = new XsltProcessor(); @@ -469,15 +469,15 @@ function retriever_apply_xslt_text($xslt_text, $doc) { //@@@ is that an item or a resource_item? I really want an item here so I can update it function retriever_apply_dom_filter($retriever, &$item, $resource) { - Logger::log('@@@ 6 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item), Logger::DEBUG); - Logger::log('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['uri'] . ' contact ' . $item['contact-id'], Logger::DEBUG); + Logger::debug('@@@ 6 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); + Logger::debug('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['uri'] . ' contact ' . $item['contact-id']); if (!array_key_exists('include', $retriever['data']) && !array_key_exists('customxslt', $retriever['data'])) { - Logger::log('retriever_apply_dom_filter: no include and no customxslt', Logger::INFO); + Logger::info('retriever_apply_dom_filter: no include and no customxslt'); return; } if (!$resource['data']) { - Logger::log('retriever_apply_dom_filter: no text to work with', Logger::INFO); + Logger::info('retriever_apply_dom_filter: no text to work with'); return; } @@ -495,104 +495,104 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) { $extract_template = Renderer::getMarkupTemplate('extract.tpl', 'addon/retriever/'); $extract_xslt = Renderer::replaceMacros($extract_template, $params); if ($retriever['data']['include']) { - Logger::log('retriever_apply_dom_filter: applying include/exclude template \"' . $extract_xslt . '\"', Logger::DEBUG); + Logger::debug('retriever_apply_dom_filter: applying include/exclude template \"' . $extract_xslt . '\"'); $doc = retriever_apply_xslt_text($extract_xslt, $doc); } if (array_key_exists('customxslt', $retriever['data']) && $retriever['data']['customxslt']) { - Logger::log('retriever_apply_dom_filter: applying custom XSLT \"' . $retriever['data']['customxslt'] . '\"', Logger::DEBUG); + Logger::debug('retriever_apply_dom_filter: applying custom XSLT \"' . $retriever['data']['customxslt'] . '\"'); $doc = retriever_apply_xslt_text($retriever['data']['customxslt'], $doc); } if (!$doc) { - Logger::log('retriever_apply_dom_filter: failed to apply extract XSLT template', Logger::INFO); + Logger::info('retriever_apply_dom_filter: failed to apply extract XSLT template'); return; } - Logger::log('@@@ retriever_apply_dom_filter: 1', Logger::INFO); + Logger::info('@@@ retriever_apply_dom_filter: 1'); $components = parse_url($resource['redirect-url']); $rooturl = $components['scheme'] . "://" . $components['host']; $dirurl = $rooturl . dirname($components['path']) . "/"; - Logger::log('@@@ retriever_apply_dom_filter: 2', Logger::INFO); + Logger::info('@@@ retriever_apply_dom_filter: 2'); $params = array('$dirurl' => $dirurl, '$rooturl' => $rooturl); $fix_urls_template = Renderer::getMarkupTemplate('fix-urls.tpl', 'addon/retriever/'); $fix_urls_xslt = Renderer::replaceMacros($fix_urls_template, $params); - Logger::log('@@@ retriever_apply_dom_filter: 3', Logger::INFO); + Logger::info('@@@ retriever_apply_dom_filter: 3'); $doc = retriever_apply_xslt_text($fix_urls_xslt, $doc); - Logger::log('@@@ retriever_apply_dom_filter: 4', Logger::INFO); + Logger::info('@@@ retriever_apply_dom_filter: 4'); if (!$doc) { - Logger::log('retriever_apply_dom_filter: failed to apply fix urls XSLT template', Logger::INFO); + Logger::info('retriever_apply_dom_filter: failed to apply fix urls XSLT template'); return; } - Logger::log('@@@ retriever_apply_dom_filter: 5', Logger::INFO); + Logger::info('@@@ retriever_apply_dom_filter: 5'); $body = HTML::toBBCode($doc->saveHTML()); if (!strlen($body)) { - Logger::log('retriever_apply_dom_filter retriever ' . $retriever['id'] . ' item ' . $item['id'] . ': output was empty', Logger::INFO); + Logger::info('retriever_apply_dom_filter retriever ' . $retriever['id'] . ' item ' . $item['id'] . ': output was empty'); return; } $body .= "\n\n" . L10n::t('Retrieved') . ' ' . date("Y-m-d") . ': [url='; $body .= $item['plink']; $body .= ']' . $item['plink'] . '[/url]'; - Logger::log('@@@ retriever_apply_dom_filter: 6', Logger::INFO); + Logger::info('@@@ retriever_apply_dom_filter: 6'); $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself? - Logger::log('@@@ retriever_apply_dom_filter: item id is ' . $item['id'] . ' uri id is ' . $uri_id, Logger::INFO); - Logger::log('retriever_apply_dom_filter: XSLT result \"' . $body . '\"', Logger::DATA); + Logger::info('@@@ retriever_apply_dom_filter: item id is ' . $item['id'] . ' uri id is ' . $uri_id); + Logger::debug('retriever_apply_dom_filter: XSLT result \"' . $body . '\"'); DBA::update('item-content', ['body' => $body], ['uri-id' => $uri_id]); //@@@ isn't there a better interface to that? //@@@ probably Item::updateContent } function retrieve_images(&$item, $a) { $blah_item_class = retriever_class_of_item($item) . ' ' . mat_test($item); - Logger::log('@@@ 7 item class is ' . $blah_item_class, Logger::DEBUG); + Logger::debug('@@@ 7 item class is ' . $blah_item_class); $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself? $content = DBA::selectFirst('item-content', [], ['uri-id' => $uri_id]); $body = $content['body']; if (!strlen($body)) { - Logger::log('retrieve_images: no body for uri-id ' . $uri_id, Logger::WARNING); + Logger::warning('retrieve_images: no body for uri-id ' . $uri_id); return; } - Logger::log('@@@ retrieve_images start looking in body "' . $body . '"', Logger::INFO); + Logger::info('@@@ retrieve_images start looking in body "' . $body . '"'); $matches1 = array(); preg_match_all("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", $body, $matches1); $matches2 = array(); preg_match_all("/\[img\](.*?)\[\/img\]/ism", $body, $matches2); $matches = array_merge($matches1[3], $matches2[1]); - Logger::log('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG); + Logger::debug('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); foreach ($matches as $url) { - Logger::log('@@@ retrieve_images: url ' . $url, Logger::DEBUG); + Logger::debug('@@@ retrieve_images: url ' . $url); if (strpos($url, get_app()->getBaseUrl()) === FALSE) { - Logger::log('@@@ retrieve_images: it is from somewhere else', Logger::DEBUG); - Logger::log('@@@ retrieve_images: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id'], Logger::DEBUG); + Logger::debug('@@@ retrieve_images: it is from somewhere else'); + Logger::debug('@@@ retrieve_images: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id']); $resource = add_retriever_resource($a, $url, $item['uid'], $item['contact-id'], true); if (!$resource['completed']) { - Logger::log('@@@ retrieve_images: do not have it yet, get it later', Logger::DEBUG); + Logger::debug('@@@ retrieve_images: do not have it yet, get it later'); add_retriever_item($item, $resource); } else { - Logger::log('@@@ retrieve_images: got it already, transform', Logger::DEBUG); + Logger::debug('@@@ retrieve_images: got it already, transform'); retriever_transform_images($a, $item, $resource); } } } - Logger::log('@@@ retrieve_images end', Logger::INFO); + Logger::info('@@@ retrieve_images end'); } function retriever_check_item_completed(&$item) { - Logger::log('@@@ 9 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item), Logger::DEBUG); + Logger::debug('@@@ 9 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); $r = q('SELECT count(*) FROM retriever_item WHERE `item-uri` = "%s" ' . 'AND `item-uid` = %d AND `contact-id` = %d AND `finished` = 0', DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id'])); $waiting = $r[0]['count(*)']; - Logger::log('retriever_check_item_completed: item ' . $item['uri'] . ' ' . $item['uid'] . ' '. $item['contact-id'] . ' waiting for ' . $waiting . ' resources', Logger::DEBUG); + Logger::debug('retriever_check_item_completed: item ' . $item['uri'] . ' ' . $item['uid'] . ' '. $item['contact-id'] . ' waiting for ' . $waiting . ' resources'); $old_visible = $item['visible']; $item['visible'] = $waiting ? 0 : 1; if (array_key_exists('id', $item) && ($item['id'] > 0) && ($old_visible != $item['visible'])) { - Logger::log('retriever_check_item_completed: changing visible flag to ' . $item['visible'], Logger::DEBUG); + Logger::debug('retriever_check_item_completed: changing visible flag to ' . $item['visible']); q("UPDATE `item` SET `visible` = %d WHERE `id` = %d", intval($item['visible']), intval($item['id'])); @@ -603,10 +603,10 @@ function retriever_check_item_completed(&$item) } function retriever_apply_completed_resource_to_item($retriever, &$item, $resource, $a) { - Logger::log('@@@ 10 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item), Logger::DEBUG); - Logger::log('retriever_apply_completed_resource_to_item: retriever ' . ($retriever ? $retriever['id'] : 'none') . ' resource ' . $resource['url'] . ' plink ' . $item['plink'], Logger::DEBUG); + Logger::debug('@@@ 10 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); + Logger::debug('retriever_apply_completed_resource_to_item: retriever ' . ($retriever ? $retriever['id'] : 'none') . ' resource ' . $resource['url'] . ' plink ' . $item['plink']); if (strpos($resource['type'], 'image') !== false) { - Logger::log('@@@ retriever_apply_completed_resource_to_item this is an image must transform', Logger::INFO); + Logger::info('@@@ retriever_apply_completed_resource_to_item this is an image must transform'); retriever_transform_images($a, $item, $resource); } if (!$retriever) { @@ -621,13 +621,13 @@ function retriever_apply_completed_resource_to_item($retriever, &$item, $resourc } } -//@@@ todo: change all Logger::log to Logger::info etc +//@@@ todo: change all Logger::info t etc //@@@ todo: what is this reference for? document if needed delete if not function retriever_transform_images($a, &$item, $resource) { - Logger::log('@@@ 11 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item), Logger::DEBUG); - Logger::log('@@@ retriever_transform_images', Logger::INFO); + Logger::debug('@@@ 11 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); + Logger::info('@@@ retriever_transform_images'); if (!$resource["data"]) { - Logger::log('retriever_transform_images: no data available for ' . $resource['id'] . ' ' . $resource['url'], Logger::INFO); + Logger::info('retriever_transform_images: no data available for ' . $resource['id'] . ' ' . $resource['url']); return; } @@ -642,42 +642,42 @@ function retriever_transform_images($a, &$item, $resource) { $path = parse_url($resource['url'], PHP_URL_PATH); $parts = pathinfo($path); $filename = $parts['filename'] . (array_key_exists('extension', $parts) ? '.' . $parts['extension'] : ''); - Logger::log('@@@ retriever_transform_images url ' . $resource['url'] . ' path ' . $path . ' filename ' . $parts['filename'], Logger::INFO); + Logger::info('@@@ retriever_transform_images url ' . $resource['url'] . ' path ' . $path . ' filename ' . $parts['filename']); $album = 'Wall Photos'; $scale = 0; $desc = ''; // TODO: store alt text with resource when it's requested so we can fill this in - Logger::log('retriever_transform_images storing ' . strlen($data) . ' bytes type ' . $type . ': uid ' . $uid . ' cid ' . $cid . ' rid ' . $rid . ' filename ' . $filename . ' album ' . $album . ' scale ' . $scale . ' desc ' . $desc, Logger::DEBUG); - Logger::log('@@@ retriever_transform_images before new Image', Logger::INFO); + Logger::debug('retriever_transform_images storing ' . strlen($data) . ' bytes type ' . $type . ': uid ' . $uid . ' cid ' . $cid . ' rid ' . $rid . ' filename ' . $filename . ' album ' . $album . ' scale ' . $scale . ' desc ' . $desc); + Logger::info('@@@ retriever_transform_images before new Image'); $image = new Image($data, $type); - Logger::log('@@@ retriever_transform_images after new Image', Logger::INFO); + Logger::info('@@@ retriever_transform_images after new Image'); if (!$image->isValid()) { - Logger::log('retriever_transform_images: invalid image found at URL ' . $resource['url'] ' for item ' . $item['id'], Logger::WARNING); + Logger::warning('retriever_transform_images: invalid image found at URL ' . $resource['url'] . ' for item ' . $item['id']); return; } - Logger::log('@@@ retriever_transform_images before Photo::store', Logger::INFO); + Logger::info('@@@ retriever_transform_images before Photo::store'); $photo = Photo::store($image, $uid, $cid, $rid, $filename, $album, 0, 0, "", "", "", "", $desc); - Logger::log('@@@ retriever_transform_images after Photo::store', Logger::INFO); + Logger::info('@@@ retriever_transform_images after Photo::store'); $new_url = System::baseUrl() . '/photo/' . $rid . '-0.' . $image->getExt(); - Logger::log('@@@ retriever_transform_images new url ' . $new_url . ' rid ' . $rid . ' ext ' . $image->getExt(), Logger::INFO); + Logger::info('@@@ retriever_transform_images new url ' . $new_url . ' rid ' . $rid . ' ext ' . $image->getExt()); if (!strlen($new_url)) { - Logger::log('retriever_transform_images: no replacement URL for image ' . $resource['url'], Logger::WARNING); + Logger::warning('retriever_transform_images: no replacement URL for image ' . $resource['url']); return; } $content = DBA::selectFirst('item-content', [], ['uri-id' => $uri_id]); $body = $content['body']; - Logger::log('@@@ retriever_transform_images: found body for uri id ' . $uri_id . ': ' . $body, Logger::INFO); + Logger::info('@@@ retriever_transform_images: found body for uri id ' . $uri_id . ': ' . $body); - Logger::log('retriever_transform_images: replacing ' . $resource['url'] . ' with ' . $new_url . ' in item ' . $item['uri'], Logger::DEBUG); - Logger::log('@@@ retriever_transform_images: replacing ' . $resource['url'] . ' with ' . $new_url . ' in body ' . $body, Logger::DEBUG); + Logger::debug('retriever_transform_images: replacing ' . $resource['url'] . ' with ' . $new_url . ' in item ' . $item['uri']); + Logger::debug('@@@ retriever_transform_images: replacing ' . $resource['url'] . ' with ' . $new_url . ' in body ' . $body); $body = str_replace($resource["url"], $new_url, $body); - Logger::log('@@@ retriever_transform_images: result \"' . $body . '\"', Logger::INFO); + Logger::info('@@@ retriever_transform_images: result \"' . $body . '\"'); DBA::update('item-content', ['body' => $body], ['uri-id' => $uri_id]); //@@@ isn't there a better interface to that? //@@@ probably Item::updateContent //@@ actually no, Item::update } catch (Exception $e) { - Logger::log('retriever_transform_images caught exception ' . $e->getMessage(), Logger::INFO); + Logger::info('retriever_transform_images caught exception ' . $e->getMessage()); return; } } @@ -805,8 +805,8 @@ function retriever_contact_photo_menu($a, &$args) { } function retriever_post_remote_hook(&$a, &$item) { - Logger::log('@@@ 12 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); - Logger::log('retriever_post_remote_hook: ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG); + Logger::info('@@@ 12 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); + Logger::info('retriever_post_remote_hook: ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself? $retriever_rule = get_retriever_rule($item['contact-id'], $item["uid"], false); @@ -818,7 +818,7 @@ function retriever_post_remote_hook(&$a, &$item) { // Convert to HTML and back to take advantage of bbcode's resolution of oembeds. $content = DBA::selectFirst('item-content', [], ['uri-id' => $uri_id]); $body = HTML::toBBCode(BBCode::convert($content['body'])); - Logger::log('@@@ retriever_post_remote_hook item uri-id ' . $uri_id . ' body "' . $item['body'] . '" item content body "' . $body . '"', Logger::DEBUG); + Logger::debug('@@@ retriever_post_remote_hook item uri-id ' . $uri_id . ' body "' . $item['body'] . '" item content body "' . $body . '"'); if ($body) { $item['body'] = $body; DBA::update('item-content', ['body' => $body], ['uri-id' => $uri_id]); //@@@ isn't there a better interface to that? From eab0467e35070a2ab11c02bc641be96d3eeb8f73 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Fri, 27 Sep 2019 22:05:00 +0200 Subject: [PATCH 14/96] retriever stuff --- retriever/retriever.php | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index ac6b321a..56852e45 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -30,7 +30,6 @@ function retriever_install() { Addon::registerHook('contact_photo_menu', 'addon/retriever/retriever.php', 'retriever_contact_photo_menu'); Addon::registerHook('cron', 'addon/retriever/retriever.php', 'retriever_cron'); - $r = q("SELECT `id` FROM `pconfig` WHERE `cat` LIKE 'retriever_%%'"); if (Config::get('retriever', 'dbversion') == '0.10') { q("ALTER TABLE `retriever_resource` MODIFY COLUMN `type` char(255) NULL DEFAULT NULL"); q("ALTER TABLE `retriever_resource` MODIFY COLUMN `data` mediumblob NULL DEFAULT NULL"); @@ -537,8 +536,7 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) { $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself? Logger::info('@@@ retriever_apply_dom_filter: item id is ' . $item['id'] . ' uri id is ' . $uri_id); Logger::debug('retriever_apply_dom_filter: XSLT result \"' . $body . '\"'); - DBA::update('item-content', ['body' => $body], ['uri-id' => $uri_id]); //@@@ isn't there a better interface to that? - //@@@ probably Item::updateContent + Item::update(['body' => $body], ['uri-id' => $uri_id]); } function retrieve_images(&$item, $a) { @@ -673,9 +671,7 @@ function retriever_transform_images($a, &$item, $resource) { $body = str_replace($resource["url"], $new_url, $body); Logger::info('@@@ retriever_transform_images: result \"' . $body . '\"'); - DBA::update('item-content', ['body' => $body], ['uri-id' => $uri_id]); //@@@ isn't there a better interface to that? - //@@@ probably Item::updateContent - //@@ actually no, Item::update + Item::update(['body' => $body], ['uri-id' => $uri_id]); } catch (Exception $e) { Logger::info('retriever_transform_images caught exception ' . $e->getMessage()); return; @@ -821,7 +817,7 @@ function retriever_post_remote_hook(&$a, &$item) { Logger::debug('@@@ retriever_post_remote_hook item uri-id ' . $uri_id . ' body "' . $item['body'] . '" item content body "' . $body . '"'); if ($body) { $item['body'] = $body; - DBA::update('item-content', ['body' => $body], ['uri-id' => $uri_id]); //@@@ isn't there a better interface to that? + Item::update(['body' => $body], ['uri-id' => $uri_id]); } } if (PConfig::get($item["uid"], 'retriever', 'all_photos')) { From dc068b16eca6f9e839d64227684d6e9104ebc0d6 Mon Sep 17 00:00:00 2001 From: Administrator Date: Fri, 27 Sep 2019 21:02:52 +0200 Subject: [PATCH 15/96] Fix retriever database problems --- retriever/database.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/retriever/database.sql b/retriever/database.sql index a29135e7..68480cfd 100644 --- a/retriever/database.sql +++ b/retriever/database.sql @@ -36,7 +36,7 @@ CREATE TABLE IF NOT EXISTS `retriever_resource` ( `data` mediumblob NULL DEFAULT NULL, `http-code` smallint(1) unsigned NULL DEFAULT NULL, `redirect-url` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NULL DEFAULT NULL, - KEY `retriever_resource` ADD INDEX `url` (`url`), - KEY `retriever_resource` ADD INDEX `completed` (`completed`), + KEY `url` (`url`), + KEY `completed` (`completed`), PRIMARY KEY (`id`) ) DEFAULT CHARSET=utf8 COLLATE=utf8_bin From 2153e7059b0d070cbca118341d7080217c03130f Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Fri, 27 Sep 2019 22:05:22 +0200 Subject: [PATCH 16/96] more retriever stuff --- retriever/retriever.php | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 56852e45..3b557d80 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -4,7 +4,6 @@ * Description: Follow the permalink of RSS/Atom feed items and replace the summary with the full content. * Version: 1.1 * Author: Matthew Exon - * Status: Unsupported */ use Friendica\Core\Addon; @@ -53,6 +52,7 @@ function retriever_install() { $arr = explode(';', $schema); foreach ($arr as $a) { $r = q($a); + //@@@ check for errors } Config::set('retriever', 'dbversion', '0.13'); } @@ -332,7 +332,6 @@ function apply_retrospective($a, $retriever, $num) { $r = q("SELECT * FROM `item` WHERE `contact-id` = %d ORDER BY `received` DESC LIMIT %d", intval($retriever['contact-id']), intval($num)); foreach ($r as $item) { - Logger::log('@@@ 3 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); //@@@ already know this is wrong q('UPDATE `item` SET `visible` = 0 WHERE `id` = %d', $item['id']); q('UPDATE `thread` SET `visible` = 0 WHERE `iid` = %d', $item['id']); retriever_on_item_insert($a, $retriever, $item); @@ -343,7 +342,10 @@ function apply_retrospective($a, $retriever, $num) { //@@@ need a lock to say something is doing something function retriever_on_item_insert($a, $retriever, &$item) { Logger::info('@@@ 4 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); - Logger::info('@@@ retriever_on_item_insert start ' . $item['plink']); + foreach ($item as $key => $value) { + Logger::info("@@@ $key => $value"); + } + Logger::info('@@@ retriever_on_item_insert start ' . ' plink ' . $item['plink']); if (!$retriever || !$retriever['id']) { Logger::info('retriever_on_item_insert: No retriever supplied'); return; @@ -352,15 +354,29 @@ function retriever_on_item_insert($a, $retriever, &$item) { Logger::info('@@@ retriever_on_item_insert: Disabled'); return; } - if (array_key_exists('pattern', $retriever["data"]) && $retriever["data"]['pattern']) { - $url = preg_replace('/' . $retriever["data"]['pattern'] . '/', $retriever["data"]['replace'], $item['plink']); - Logger::debug('retriever_on_item_insert: Changed ' . $item['plink'] . ' to ' . $url); - } - else { + if (array_key_exists('plink', $item)) { $url = $item['plink']; } + else { + if (!array_key_exists('uri_id', $item)) { + Logger::warning('retriever_on_item_insert: item ' . ' has no plink and no uri-id'); + // @@@ find an identifier and put it in warning + Logger::warning('@@@ retriever_on_item_insert: item has: ' . print_r($item, true)); + foreach ($item as $key => $value) { + Logger::warning("@@@ $key => $value"); + } + return; + } + $content = DBA::selectFirst('item-content', [], ['uri-id' => $item['uri_id']]); + $url = $content['plink']; + } - Logger::debug('@@@ retriever_on_item_insert: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id']); + if (array_key_exists('pattern', $retriever["data"]) && $retriever["data"]['pattern']) { + $url = preg_replace('/' . $retriever["data"]['pattern'] . '/', $retriever["data"]['replace'], $url); + Logger::debug('retriever_on_item_insert: Changed ' . $item['plink'] . ' to ' . $url); + } + + Logger::debug('@@@ retriever_on_item_insert: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id'] . ' url ' . $url); $resource = add_retriever_resource($a, $url, $item['uid'], $item['contact-id']); $retriever_item_id = add_retriever_item($item, $resource); } From 928911ea6d861c0b3d85cebadc1d9af78da80c42 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 29 Sep 2019 17:01:46 +0200 Subject: [PATCH 17/96] Fix bugs in retriever retrospective stuff --- retriever/retriever.php | 100 ++++++++++++++++---------------- retriever/templates/extract.tpl | 18 ++---- 2 files changed, 55 insertions(+), 63 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 3b557d80..4d701276 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -30,16 +30,16 @@ function retriever_install() { Addon::registerHook('cron', 'addon/retriever/retriever.php', 'retriever_cron'); if (Config::get('retriever', 'dbversion') == '0.10') { - q("ALTER TABLE `retriever_resource` MODIFY COLUMN `type` char(255) NULL DEFAULT NULL"); - q("ALTER TABLE `retriever_resource` MODIFY COLUMN `data` mediumblob NULL DEFAULT NULL"); - q("ALTER TABLE `retriever_rule` MODIFY COLUMN `data` mediumtext NULL DEFAULT NULL"); + q('ALTER TABLE `retriever_resource` MODIFY COLUMN `type` char(255) NULL DEFAULT NULL'); + q('ALTER TABLE `retriever_resource` MODIFY COLUMN `data` mediumblob NULL DEFAULT NULL'); + q('ALTER TABLE `retriever_rule` MODIFY COLUMN `data` mediumtext NULL DEFAULT NULL'); Config::set('retriever', 'dbversion', '0.11'); } if (Config::get('retriever', 'dbversion') == '0.11') { - q("ALTER TABLE `retriever_resource` ADD INDEX `url` (`url`)"); - q("ALTER TABLE `retriever_resource` ADD INDEX `completed` (`completed`)"); - q("ALTER TABLE `retriever_item` ADD INDEX `finished` (`finished`)"); - q("ALTER TABLE `retriever_item` ADD INDEX `item-uid` (`item-uid`)"); + q('ALTER TABLE `retriever_resource` ADD INDEX `url` (`url`)'); + q('ALTER TABLE `retriever_resource` ADD INDEX `completed` (`completed`)'); + q('ALTER TABLE `retriever_item` ADD INDEX `finished` (`finished`)'); + q('ALTER TABLE `retriever_item` ADD INDEX `item-uid` (`item-uid`)'); Config::set('retriever', 'dbversion', '0.12'); } if (Config::get('retriever', 'dbversion') == '0.12') { @@ -206,16 +206,19 @@ function retrieve_resource($resource) { try { Logger::debug('retrieve_resource: ' . ($resource['num-tries'] + 1) . ' attempt at resource ' . $resource['id'] . ' ' . $resource['url']); $redirects = 0; - $cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-'); + $cookiejar = ''; if (array_key_exists('storecookies', $retriever_rule) && $retriever_rule['storecookies']) { + $cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-'); file_put_contents($cookiejar, $retriever_rule['cookiedata']); } $fetch_result = Network::fetchUrlFull($resource['url'], $resource['binary'], $redirects, '', $cookiejar); if (array_key_exists('storecookies', $retriever_rule) && $retriever_rule['storecookies']) { $retriever_rule['cookiedata'] = file_get_contents($cookiejar); - //@@@ do the store here + Logger::debug('@@@ retriever_resource update cookie ' . json_encode($retriever_rule['data'] . ' id ' . $retriever_rule['id'])); + q("UPDATE `retriever_rule` SET `data`='%s' WHERE `id` = %d", + DBA::escape(json_encode($retriever_rule['data'])), intval($retriever_rule["id"])); + unlink($cookiejar); } - unlink($cookiejar); $resource['data'] = $fetch_result->getBody(); $resource['http-code'] = $fetch_result->getReturnCode(); $resource['type'] = $fetch_result->getContentType(); @@ -323,17 +326,25 @@ function retriever_item_completed($retriever_item_id, $resource, $a) { function retriever_resource_completed($resource, $a) { Logger::debug('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url']); $r = q("SELECT `id` FROM `retriever_item` WHERE `resource` = %d", $resource['id']); - foreach ($r as $rr) { - retriever_item_completed($rr['id'], $resource, $a); + foreach (DBA::select('retriever_item', ['id'], ['resource' => intval($resource['id'])]) as $retriever_item) { + Logger::debug('@@@ retriever_resource_completed got item id ' . $retriever_item['id']); + retriever_item_completed($retriever_item['id'], $resource, $a); } } function apply_retrospective($a, $retriever, $num) { + Logger::info('@@@ apply_retrospective'); $r = q("SELECT * FROM `item` WHERE `contact-id` = %d ORDER BY `received` DESC LIMIT %d", intval($retriever['contact-id']), intval($num)); foreach ($r as $item) { + Logger::info('@@@ apply_retrospective item ' . $item['id']); q('UPDATE `item` SET `visible` = 0 WHERE `id` = %d', $item['id']); q('UPDATE `thread` SET `visible` = 0 WHERE `iid` = %d', $item['id']); + foreach (DBA::select('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => $item['uid'], 'contact-id' => $item['contact-id']]) as $retriever_item) { + Logger::info('@@@ about to delete retriever_item id ' . $retriever_item['id'] . ' uri ' . $item['uri'] . ' uid ' . $item['uid'] . ' contact ' . $item['contact-id']); + DBA::delete('retriever_resource', ['id' => $retriever_item['resource']]); + DBA::delete('retriever_item', ['id' => $retriever_item['id']]); + } retriever_on_item_insert($a, $retriever, $item); } } @@ -341,39 +352,31 @@ function apply_retrospective($a, $retriever, $num) { //@@@ make this trigger a retriever immediately somehow //@@@ need a lock to say something is doing something function retriever_on_item_insert($a, $retriever, &$item) { - Logger::info('@@@ 4 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); - foreach ($item as $key => $value) { - Logger::info("@@@ $key => $value"); - } - Logger::info('@@@ retriever_on_item_insert start ' . ' plink ' . $item['plink']); + Logger::info('@@@ retriever_on_item_insert start plink ' . $item['plink'] . ' id ' . $item['id']); if (!$retriever || !$retriever['id']) { Logger::info('retriever_on_item_insert: No retriever supplied'); return; } - if (!$retriever["data"]['enable'] == "on") { + if (!$retriever['data']['enable'] == "on") { Logger::info('@@@ retriever_on_item_insert: Disabled'); return; } - if (array_key_exists('plink', $item)) { + if (array_key_exists('plink', $item) && strlen($item['plink'])) { $url = $item['plink']; } else { - if (!array_key_exists('uri_id', $item)) { - Logger::warning('retriever_on_item_insert: item ' . ' has no plink and no uri-id'); - // @@@ find an identifier and put it in warning - Logger::warning('@@@ retriever_on_item_insert: item has: ' . print_r($item, true)); - foreach ($item as $key => $value) { - Logger::warning("@@@ $key => $value"); - } + if (!array_key_exists('uri-id', $item)) { + Logger::warning('retriever_on_item_insert: item ' . $item['id'] . ' has no plink and no uri-id'); return; } - $content = DBA::selectFirst('item-content', [], ['uri-id' => $item['uri_id']]); + $content = DBA::selectFirst('item-content', [], ['uri-id' => $item['uri-id']]); $url = $content['plink']; } - if (array_key_exists('pattern', $retriever["data"]) && $retriever["data"]['pattern']) { - $url = preg_replace('/' . $retriever["data"]['pattern'] . '/', $retriever["data"]['replace'], $url); - Logger::debug('retriever_on_item_insert: Changed ' . $item['plink'] . ' to ' . $url); + if (array_key_exists('pattern', $retriever['data']) && $retriever['data']['pattern']) { + $orig_url = $url; + $url = preg_replace('/' . $retriever['data']['pattern'] . '/', $retriever['data']['replace'], $orig_url); + Logger::debug('retriever_on_item_insert: Changed ' . $orig_url . ' to ' . $url); } Logger::debug('@@@ retriever_on_item_insert: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id'] . ' url ' . $url); @@ -476,15 +479,14 @@ function retriever_apply_xslt_text($xslt_text, $doc) { Logger::info('retriever_apply_xslt_text: could not load XML'); return $doc; } + Logger::debug('@@@ retriever_apply_xslt_text: ' . $xslt_text); $xp = new XsltProcessor(); $xp->importStylesheet($xslt_doc); $result = $xp->transformToDoc($doc); return $result; } -//@@@ is that an item or a resource_item? I really want an item here so I can update it function retriever_apply_dom_filter($retriever, &$item, $resource) { - Logger::debug('@@@ 6 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); Logger::debug('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['uri'] . ' contact ' . $item['contact-id']); if (!array_key_exists('include', $retriever['data']) && !array_key_exists('customxslt', $retriever['data'])) { @@ -496,6 +498,7 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) { return; } + //@@@ break this bit into separate function $encoding = retriever_get_encoding($resource); $content = mb_convert_encoding($resource['data'], 'HTML-ENTITIES', $encoding); $doc = new DOMDocument('1.0', 'UTF-8'); @@ -522,23 +525,19 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) { return; } - Logger::info('@@@ retriever_apply_dom_filter: 1'); + //@@@ break this bit into separate function $components = parse_url($resource['redirect-url']); $rooturl = $components['scheme'] . "://" . $components['host']; $dirurl = $rooturl . dirname($components['path']) . "/"; - Logger::info('@@@ retriever_apply_dom_filter: 2'); $params = array('$dirurl' => $dirurl, '$rooturl' => $rooturl); $fix_urls_template = Renderer::getMarkupTemplate('fix-urls.tpl', 'addon/retriever/'); $fix_urls_xslt = Renderer::replaceMacros($fix_urls_template, $params); - Logger::info('@@@ retriever_apply_dom_filter: 3'); $doc = retriever_apply_xslt_text($fix_urls_xslt, $doc); - Logger::info('@@@ retriever_apply_dom_filter: 4'); if (!$doc) { Logger::info('retriever_apply_dom_filter: failed to apply fix urls XSLT template'); return; } - Logger::info('@@@ retriever_apply_dom_filter: 5'); $body = HTML::toBBCode($doc->saveHTML()); if (!strlen($body)) { Logger::info('retriever_apply_dom_filter retriever ' . $retriever['id'] . ' item ' . $item['id'] . ': output was empty'); @@ -548,9 +547,7 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) { $body .= $item['plink']; $body .= ']' . $item['plink'] . '[/url]'; - Logger::info('@@@ retriever_apply_dom_filter: 6'); - $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself? - Logger::info('@@@ retriever_apply_dom_filter: item id is ' . $item['id'] . ' uri id is ' . $uri_id); + $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself? Consider using item['id'] instead Logger::debug('retriever_apply_dom_filter: XSLT result \"' . $body . '\"'); Item::update(['body' => $body], ['uri-id' => $uri_id]); } @@ -629,7 +626,7 @@ function retriever_apply_completed_resource_to_item($retriever, &$item, $resourc if ((strpos($resource['type'], 'html') !== false) || (strpos($resource['type'], 'xml') !== false)) { retriever_apply_dom_filter($retriever, $item, $resource); - if ($retriever["data"]['images'] ) { + if ($retriever['data']['images'] ) { retrieve_images($item, $a); } } @@ -640,7 +637,7 @@ function retriever_apply_completed_resource_to_item($retriever, &$item, $resourc function retriever_transform_images($a, &$item, $resource) { Logger::debug('@@@ 11 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); Logger::info('@@@ retriever_transform_images'); - if (!$resource["data"]) { + if (!$resource['data']) { Logger::info('retriever_transform_images: no data available for ' . $resource['id'] . ' ' . $resource['url']); return; } @@ -716,10 +713,13 @@ function retriever_content($a) { if (!empty($_POST["id"])) { $retriever_rule = get_retriever_rule($a->argv[1], local_user(), true); - $retriever_rule["data"] = array(); + $retriever_rule['data'] = array(); foreach (array('pattern', 'replace', 'enable', 'images', 'customxslt', 'storecookies', 'cookiedata') as $setting) { - if (!empty($_POST['retriever_' . $setting])) { - $retriever_rule["data"][$setting] = $_POST['retriever_' . $setting]; + if (empty($_POST['retriever_' . $setting])) { + $retriever_rule['data'][$setting] = NULL; + } + else { + $retriever_rule['data'][$setting] = $_POST['retriever_' . $setting]; } } foreach ($_POST as $k=>$v) { @@ -739,11 +739,11 @@ function retriever_content($a) { } } q("UPDATE `retriever_rule` SET `data`='%s' WHERE `id` = %d", - DBA::escape(json_encode($retriever_rule["data"])), intval($retriever_rule["id"])); + DBA::escape(json_encode($retriever_rule['data'])), intval($retriever_rule["id"])); $a->page['content'] .= "

Settings Updated"; if (!empty($_POST["retriever_retrospective"])) { apply_retrospective($a, $retriever_rule, $_POST["retriever_retrospective"]); - $a->page['content'] .= " and retrospectively applied to " . $_POST["apply"] . " posts"; + $a->page['content'] .= " and retrospectively applied to " . $_POST["retriever_retrospective"] . " posts"; } $a->page['content'] .= ".

"; } @@ -757,12 +757,12 @@ function retriever_content($a) { '$pattern' => array( 'retriever_pattern', L10n::t('URL Pattern'), - $retriever_rule["data"]['pattern'], + $retriever_rule['data']['pattern'], L10n::t('Regular expression matching part of the URL to replace')), '$replace' => array( 'retriever_replace', L10n::t('URL Replace'), - $retriever_rule["data"]['replace'], + $retriever_rule['data']['replace'], L10n::t('Text to replace matching part of above regular expression')), '$images' => array( 'retriever_images', @@ -802,7 +802,7 @@ function retriever_content($a) { '$include_t' => L10n::t('Include'), '$include' => $retriever_rule['data']['include'], '$exclude_t' => L10n::t('Exclude'), - '$exclude' => $retriever_rule["data"]['exclude'])); + '$exclude' => $retriever_rule['data']['exclude'])); return; } } diff --git a/retriever/templates/extract.tpl b/retriever/templates/extract.tpl index f24a860d..ca67f683 100644 --- a/retriever/templates/extract.tpl +++ b/retriever/templates/extract.tpl @@ -3,25 +3,17 @@ - -{{function clause_xpath}} -{{if !$clause.attribute}} -{{$clause.element}}{{elseif $clause.attribute == 'class'}} -{{$clause.element}}[contains(concat(' ', normalize-space(@class), ' '), '{{$clause.value}}')]{{else}} -{{$clause.element}}[@{{$clause.attribute}}='{{$clause.value}}']{{/if}} -{{/function}} - +{{function clause_xpath}}{{if !$clause.attribute}}{{$clause.element}}{{elseif $clause.attribute == 'class'}}{{$clause.element}}[contains(concat(' ', normalize-space(@class), ' '), '{{$clause.value}}')]{{else}}{{$clause.element}}[@{{$clause.attribute}}='{{$clause.value}}']{{/if}}{{/function}} {{foreach $spec.include as $clause}} + - -{{/foreach}} - + {{/foreach}} {{foreach $spec.exclude as $clause}} - -{{/foreach}} + + {{/foreach}} From 0e3805bb3c9a64c269be0a4e78ebd88ca7a74684 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 29 Sep 2019 17:04:34 +0200 Subject: [PATCH 18/96] fakerei2 --- retriever/retriever.php | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 4d701276..294fba67 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -51,8 +51,10 @@ function retriever_install() { $schema = file_get_contents(dirname(__file__).'/database.sql'); $arr = explode(';', $schema); foreach ($arr as $a) { - $r = q($a); - //@@@ check for errors + if (!DBA::e($a)) { + Logger::warning('Unable to create database table: ' . DBA::errorMessage()); + return; + } } Config::set('retriever', 'dbversion', '0.13'); } @@ -142,7 +144,8 @@ function retriever_clean_up_completed_resources($max_items, $a) { } Logger::debug('retriever_clean_up_completed_resources: items waiting even though resource has completed: ' . count($r)); foreach ($r as $rr) { - $resource = q("SELECT * FROM retriever_resource WHERE `id` = %d", $rr['resource']); + $resource = DBA::selectFirst('retriever_resource', [], ['id' => intval($rr['resource'])]); + Logger::info('@@@ retriever_clean_up_completed_resources did alternate thing resource type ' . $resource['type']); $retriever_item = retriever_get_retriever_item($rr['item']); if (!DBA::isResult($retriever_item)) { Logger::warning('retriever_clean_up_completed_resources: no retriever item with id ' . $rr['item']); @@ -159,7 +162,7 @@ function retriever_clean_up_completed_resources($max_items, $a) { continue; } Logger::info('@@@ retriever_clean_up_completed_resources: about to retriever_apply_completed_resource_to_item'); - retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource[0], $a); + retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource, $a); q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d", intval($retriever_item['id'])); retriever_check_item_completed($item); } From 26f231f39584927b9094095f252c189509fb8576 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 29 Sep 2019 17:09:11 +0200 Subject: [PATCH 19/96] more dba stuff --- retriever/retriever.php | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 294fba67..bbe138b9 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -144,8 +144,6 @@ function retriever_clean_up_completed_resources($max_items, $a) { } Logger::debug('retriever_clean_up_completed_resources: items waiting even though resource has completed: ' . count($r)); foreach ($r as $rr) { - $resource = DBA::selectFirst('retriever_resource', [], ['id' => intval($rr['resource'])]); - Logger::info('@@@ retriever_clean_up_completed_resources did alternate thing resource type ' . $resource['type']); $retriever_item = retriever_get_retriever_item($rr['item']); if (!DBA::isResult($retriever_item)) { Logger::warning('retriever_clean_up_completed_resources: no retriever item with id ' . $rr['item']); @@ -161,7 +159,7 @@ function retriever_clean_up_completed_resources($max_items, $a) { Logger::warning('retriever_clean_up_completed_resources: no retriever for uri ' . $retriever_item['item-uri'] . ' uid ' . $retriever_item['uid'] . ' ' . $retriever_item['contact-id']); continue; } - Logger::info('@@@ retriever_clean_up_completed_resources: about to retriever_apply_completed_resource_to_item'); + $resource = DBA::selectFirst('retriever_resource', [], ['id' => intval($rr['resource'])]); retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource, $a); q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d", intval($retriever_item['id'])); retriever_check_item_completed($item); From 3abf8c6850a93a401ea3230e17d73ec61174ccc9 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 29 Sep 2019 20:59:14 +0200 Subject: [PATCH 20/96] fixed image regex --- retriever/retriever.php | 70 +++++++++++++++++++++++++++++------------ 1 file changed, 50 insertions(+), 20 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index bbe138b9..20ab1ee8 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -47,7 +47,10 @@ function retriever_install() { q("ALTER TABLE `retriever_resource` ADD COLUMN `item-uid` int(10) unsigned NOT NULL DEFAULT '0' AFTER `id`"); Config::set('retriever', 'dbversion', '0.13'); } - if (Config::get('retriever', 'dbversion') != '0.13') { + if (Config::get('retriever', 'dbversion') == '0.13') { + Config::set('retriever', 'downloads_per_cron', '100'); + } + if (Config::get('retriever', 'dbversion') != '0.14') { $schema = file_get_contents(dirname(__file__).'/database.sql'); $arr = explode(';', $schema); foreach ($arr as $a) { @@ -56,7 +59,8 @@ function retriever_install() { return; } } - Config::set('retriever', 'dbversion', '0.13'); + Config::set('retriever', 'downloads_per_cron', '100'); + Config::set('retriever', 'dbversion', '0.14'); } } @@ -72,20 +76,37 @@ function retriever_uninstall() { function retriever_module() {} +function retriever_addon_admin(&$a, &$o) { + $downloads_per_cron = Config::get('retriever', 'downloads_per_cron'); + $template = Renderer::getMarkupTemplate('admin.tpl', 'addon/retriever/'); + $config = ['downloads_per_cron', + L10n::t('Downloads per Cron'), + $downloads_per_cron, + L10n::t('Maximum number of downloads to attempt during each run of the cron job.')]; + $o .= Renderer::replaceMacros($template, [ + '$downloads_per_cron' => $config, + '$submit' => L10n::t('Save Settings')]); +} + +function retriever_addon_admin_post ($a) { + if (!empty($_POST['downloads_per_cron'])) { + Config::set('retriever', 'downloads_per_cron', $_POST['downloads_per_cron']); + } +} + function retriever_cron($a, $b) { - // 100 is a nice sane number. Maybe this should be configurable. @@@ + $downloads_per_cron = Config::get('retriever', 'downloads_per_cron'); - // Do this first, otherwise it can interfere with retreiver_retrieve_items - retriever_clean_up_completed_resources(100, $a); + // Do this first, otherwise it can interfere with retriever_retrieve_items + retriever_clean_up_completed_resources($downloads_per_cron, $a); - retriever_retrieve_items(100, $a); + retriever_retrieve_items($downloads_per_cron, $a); retriever_tidy(); } $retriever_item_count = 0; function retriever_retrieve_items($max_items, $a) { - Logger::info('@@@ retriever_retrieve_items'); global $retriever_item_count; $retriever_schedule = array(array(1,'minute'), @@ -108,8 +129,7 @@ function retriever_retrieve_items($max_items, $a) { $retrieve_items = $max_items - $retriever_item_count; Logger::debug('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . $retriever_item_count . ', retrieve ' . $retrieve_items); do { - Logger::info('@@@ retriever_retrieve_items loop max ' . $max_items . ' count ' . $retriever_item_count); - Logger::info("@@@ SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR " . implode($schedule_clauses, ' OR ') . ") ORDER BY `last-try` ASC LIMIT " . $retrieve_items); + // TODO: figure out how to do this with DBA module $retriever_resources = q("SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR %s) ORDER BY `last-try` ASC LIMIT %d", DBA::escape(implode($schedule_clauses, ' OR ')), intval($retrieve_items)); @@ -121,7 +141,6 @@ function retriever_retrieve_items($max_items, $a) { } Logger::debug('retriever_retrieve_items: found ' . count($retriever_resources) . ' waiting resources in database'); foreach ($retriever_resources as $retriever_resource) { - Logger::info('@@@ need to get the retriever config here cid ' . $retriever_resource['contact-id'] . ' uid ' . $retriever_resource['item-uid']); retrieve_resource($retriever_resource); $retriever_item_count++; } @@ -129,7 +148,7 @@ function retriever_retrieve_items($max_items, $a) { } while ($retrieve_items > 0); // @@@ todo: when items add further items (i.e. images), do the new images go round this loop again? - Logger::info('@@@ retriever_retrieve_items: finished retrieving items'); + Logger::debug('retriever_retrieve_items: finished retrieving items'); } /* Look for items that are waiting even though the resource has @@ -137,7 +156,8 @@ function retriever_retrieve_items($max_items, $a) { * retrospectively apply a config change. It could also happen due to * a cron job dying or something. */ function retriever_clean_up_completed_resources($max_items, $a) { - $r = q("SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT %d", + // TODO: figure out how to do this with DBA module + $r = q('SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT %d', intval($max_items)); if (!$r) { $r = array(); @@ -161,6 +181,7 @@ function retriever_clean_up_completed_resources($max_items, $a) { } $resource = DBA::selectFirst('retriever_resource', [], ['id' => intval($rr['resource'])]); retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource, $a); + //@@@ next one to do q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d", intval($retriever_item['id'])); retriever_check_item_completed($item); } @@ -208,8 +229,10 @@ function retrieve_resource($resource) { Logger::debug('retrieve_resource: ' . ($resource['num-tries'] + 1) . ' attempt at resource ' . $resource['id'] . ' ' . $resource['url']); $redirects = 0; $cookiejar = ''; + Logger::debug('@@@ retrieve_resource storecookies ' . $retriever_rule['storecookies']); if (array_key_exists('storecookies', $retriever_rule) && $retriever_rule['storecookies']) { $cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-'); + Logger::debug('@@@ retrieve_resource cookie file ' . $cookiejar . ' content ' . $retriever_rule['cookiedata']); file_put_contents($cookiejar, $retriever_rule['cookiedata']); } $fetch_result = Network::fetchUrlFull($resource['url'], $resource['binary'], $redirects, '', $cookiejar); @@ -218,7 +241,7 @@ function retrieve_resource($resource) { Logger::debug('@@@ retriever_resource update cookie ' . json_encode($retriever_rule['data'] . ' id ' . $retriever_rule['id'])); q("UPDATE `retriever_rule` SET `data`='%s' WHERE `id` = %d", DBA::escape(json_encode($retriever_rule['data'])), intval($retriever_rule["id"])); - unlink($cookiejar); + /* unlink($cookiejar); */ //@@@ } $resource['data'] = $fetch_result->getBody(); $resource['http-code'] = $fetch_result->getReturnCode(); @@ -350,8 +373,8 @@ function apply_retrospective($a, $retriever, $num) { } } -//@@@ make this trigger a retriever immediately somehow -//@@@ need a lock to say something is doing something +// TODO: Currently this waits until the next cron before actually downloading. Should do it immediately. +// TODO: This queries then inserts. It should use some kind of lock to avoid requesting the same resource twice. function retriever_on_item_insert($a, $retriever, &$item) { Logger::info('@@@ retriever_on_item_insert start plink ' . $item['plink'] . ' id ' . $item['id']); if (!$retriever || !$retriever['id']) { @@ -397,6 +420,7 @@ function add_retriever_resource($a, $url, $uid, $cid, $binary = false) { fclose($fp); $url = 'md5://' . hash('md5', $url); + //@@@ fix this $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s' AND `item-uid` = %d AND `contact-id` = %d", DBA::escape($url), intval($uid), intval($cid)); $resource = $r[0]; if (count($r)) { @@ -405,6 +429,7 @@ function add_retriever_resource($a, $url, $uid, $cid, $binary = false) { } Logger::debug('retrieve_resource: got data URL type ' . $resource['type']); + //@@@ fix this q("INSERT INTO `retriever_resource` (`item-uid`, `contact-id`, `type`, `binary`, `url`, `completed`, `data`) " . "VALUES (%d, %d, '%s', %d, '%s', now(), '%s')", intval($uid), @@ -425,6 +450,7 @@ function add_retriever_resource($a, $url, $uid, $cid, $binary = false) { Logger::warning('add_retriever_resource: URL is longer than 800 characters'); } + //@@@ fix this $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s' AND `item-uid` = %d AND `contact-id` = %d", DBA::escape($url), intval($uid), intval($cid)); if (count($r)) { Logger::debug('add_retriever_resource: Resource ' . $url . ' uid ' . $uid . ' cid ' . $cid . ' already requested'); @@ -554,24 +580,29 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) { } function retrieve_images(&$item, $a) { + // Note that $item doesn't necessarily contain all the fields you would expect, in particular 'id' $blah_item_class = retriever_class_of_item($item) . ' ' . mat_test($item); Logger::debug('@@@ 7 item class is ' . $blah_item_class); + Logger::debug('@@@ retrieve_images start item '. $item['id'] . ' uri ' . $item['uri'] . ' uri id ' . $item['uri-id'] . ' plink ' . $item['plink'] . ' guid ' . $item['guid']); $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself? - $content = DBA::selectFirst('item-content', [], ['uri-id' => $uri_id]); + $content = DBA::selectFirst('item-content', ['body'], ['uri-id' => $uri_id]); $body = $content['body']; if (!strlen($body)) { Logger::warning('retrieve_images: no body for uri-id ' . $uri_id); return; } - Logger::info('@@@ retrieve_images start looking in body "' . $body . '"'); + Logger::info('@@@ retrieve_images looking in body "' . $body . '"'); + // I suspect that matches1 and matches2 are not used any more? $matches1 = array(); preg_match_all("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", $body, $matches1); $matches2 = array(); preg_match_all("/\[img\](.*?)\[\/img\]/ism", $body, $matches2); - $matches = array_merge($matches1[3], $matches2[1]); + $matches3 = array(); + preg_match_all("/\[img\=([^\]]*)\]([^[]*)\[\/img\]/ism", $body, $matches3); + $matches = array_merge($matches1[3], $matches2[1], $matches3[1]); Logger::debug('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); foreach ($matches as $url) { Logger::debug('@@@ retrieve_images: url ' . $url); @@ -615,7 +646,6 @@ function retriever_check_item_completed(&$item) } function retriever_apply_completed_resource_to_item($retriever, &$item, $resource, $a) { - Logger::debug('@@@ 10 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); Logger::debug('retriever_apply_completed_resource_to_item: retriever ' . ($retriever ? $retriever['id'] : 'none') . ' resource ' . $resource['url'] . ' plink ' . $item['plink']); if (strpos($resource['type'], 'image') !== false) { Logger::info('@@@ retriever_apply_completed_resource_to_item this is an image must transform'); @@ -676,7 +706,7 @@ function retriever_transform_images($a, &$item, $resource) { return; } - $content = DBA::selectFirst('item-content', [], ['uri-id' => $uri_id]); + $content = DBA::selectFirst('item-content', ['body'], ['uri-id' => $uri_id]); $body = $content['body']; Logger::info('@@@ retriever_transform_images: found body for uri id ' . $uri_id . ': ' . $body); From 396f64a476f1f3d48a24082bfca9dafbf3cc39af Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 29 Sep 2019 22:05:49 +0200 Subject: [PATCH 21/96] Stuff in retriever --- retriever/retriever.php | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 20ab1ee8..f43504e6 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -147,14 +147,10 @@ function retriever_retrieve_items($max_items, $a) { $retrieve_items = $max_items - $retriever_item_count; } while ($retrieve_items > 0); - // @@@ todo: when items add further items (i.e. images), do the new images go round this loop again? Logger::debug('retriever_retrieve_items: finished retrieving items'); } -/* Look for items that are waiting even though the resource has - * completed. This usually happens because we've been asked to - * retrospectively apply a config change. It could also happen due to - * a cron job dying or something. */ +// Look for items that are waiting even though the resource has completed. This shouldn't happen, but is worth cleaning up if it does. function retriever_clean_up_completed_resources($max_items, $a) { // TODO: figure out how to do this with DBA module $r = q('SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT %d', @@ -181,13 +177,14 @@ function retriever_clean_up_completed_resources($max_items, $a) { } $resource = DBA::selectFirst('retriever_resource', [], ['id' => intval($rr['resource'])]); retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource, $a); - //@@@ next one to do - q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d", intval($retriever_item['id'])); + Logger::info('@@@ retriever_clean_up_completed_resources tried to update id ' . $retriver_item['id'] . ' to finished, better check that it really worked!'); + DBA::update('retriever_item', ['finished' => 1], ['id' => intval($retriever_item['id'])], ['finished']); retriever_check_item_completed($item); } } function retriever_tidy() { + // TODO: figure out how to do this with DBA module q("DELETE FROM retriever_resource WHERE completed IS NOT NULL AND completed < DATE_SUB(now(), INTERVAL 1 WEEK)"); q("DELETE FROM retriever_resource WHERE completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)"); @@ -581,8 +578,6 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) { function retrieve_images(&$item, $a) { // Note that $item doesn't necessarily contain all the fields you would expect, in particular 'id' - $blah_item_class = retriever_class_of_item($item) . ' ' . mat_test($item); - Logger::debug('@@@ 7 item class is ' . $blah_item_class); Logger::debug('@@@ retrieve_images start item '. $item['id'] . ' uri ' . $item['uri'] . ' uri id ' . $item['uri-id'] . ' plink ' . $item['plink'] . ' guid ' . $item['guid']); $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself? @@ -595,27 +590,21 @@ function retrieve_images(&$item, $a) { } Logger::info('@@@ retrieve_images looking in body "' . $body . '"'); - // I suspect that matches1 and matches2 are not used any more? - $matches1 = array(); - preg_match_all("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", $body, $matches1); - $matches2 = array(); - preg_match_all("/\[img\](.*?)\[\/img\]/ism", $body, $matches2); - $matches3 = array(); - preg_match_all("/\[img\=([^\]]*)\]([^[]*)\[\/img\]/ism", $body, $matches3); + // I suspect that the first two are not used any more? + preg_match_all("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", $item["body"], $matches1); + preg_match_all("/\[img\](.*?)\[\/img\]/ism", $item["body"], $matches2); + preg_match_all("/\[img\=([^\]]*)\]([^[]*)\[\/img\]/ism", $item["body"], $matches3); $matches = array_merge($matches1[3], $matches2[1], $matches3[1]); Logger::debug('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); foreach ($matches as $url) { Logger::debug('@@@ retrieve_images: url ' . $url); if (strpos($url, get_app()->getBaseUrl()) === FALSE) { - Logger::debug('@@@ retrieve_images: it is from somewhere else'); Logger::debug('@@@ retrieve_images: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id']); $resource = add_retriever_resource($a, $url, $item['uid'], $item['contact-id'], true); if (!$resource['completed']) { - Logger::debug('@@@ retrieve_images: do not have it yet, get it later'); add_retriever_item($item, $resource); } else { - Logger::debug('@@@ retrieve_images: got it already, transform'); retriever_transform_images($a, $item, $resource); } } From d19d96b5802b5646f9e2dcc6dc1a5b946411b3fd Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Mon, 30 Sep 2019 08:25:00 +0200 Subject: [PATCH 22/96] update version number --- retriever/retriever.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index f43504e6..fcd45b46 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -1,8 +1,8 @@ */ From 9a859040030b769633b05b1eafa51b7531b77e7e Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Mon, 30 Sep 2019 08:25:16 +0200 Subject: [PATCH 23/96] configurable number of requests --- retriever/templates/admin.tpl | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 retriever/templates/admin.tpl diff --git a/retriever/templates/admin.tpl b/retriever/templates/admin.tpl new file mode 100644 index 00000000..b5a35961 --- /dev/null +++ b/retriever/templates/admin.tpl @@ -0,0 +1,8 @@ +{{* + * AUTOMATICALLY GENERATED TEMPLATE + * DO NOT EDIT THIS FILE, CHANGES WILL BE OVERWRITTEN + * + *}} +{{include file="field_input.tpl" field=$downloads_per_cron}} +
+ From 86ea938621a3c66421d85d0db836d06d873c24c4 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Mon, 30 Sep 2019 20:52:05 +0200 Subject: [PATCH 24/96] Add phototrack and publicise --- phototrack/database.sql | 23 ++ phototrack/phototrack.php | 258 ++++++++++++++++++++ publicise/publicise.php | 431 ++++++++++++++++++++++++++++++++++ publicise/templates/admin.tpl | 39 +++ 4 files changed, 751 insertions(+) create mode 100644 phototrack/database.sql create mode 100644 phototrack/phototrack.php create mode 100644 publicise/publicise.php create mode 100644 publicise/templates/admin.tpl diff --git a/phototrack/database.sql b/phototrack/database.sql new file mode 100644 index 00000000..f1b58f6b --- /dev/null +++ b/phototrack/database.sql @@ -0,0 +1,23 @@ +CREATE TABLE IF NOT EXISTS `phototrack_photo_use` ( + `id` int(11) unsigned NOT NULL AUTO_INCREMENT, + `resource-id` char(64) NOT NULL, + `table` char(64) NOT NULL, + `field` char(64) NOT NULL, + `row-id` int(11) NOT NULL, + `checked` timestamp NOT NULL DEFAULT now(), + PRIMARY KEY (`id`), + INDEX `resource-id` (`resource-id`), + INDEX `row` (`table`,`field`,`row-id`) +) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin; + +CREATE TABLE IF NOT EXISTS `phototrack_row_check` ( + `id` int(11) unsigned NOT NULL AUTO_INCREMENT, + `table` char(64) NOT NULL, + `row-id` int(11) NOT NULL, + `checked` timestamp NOT NULL DEFAULT now(), + PRIMARY KEY (`id`), + INDEX `row` (`table`,`row-id`), + INDEX `checked` (`checked`) +) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin; + +SELECT TRUE diff --git a/phototrack/phototrack.php b/phototrack/phototrack.php new file mode 100644 index 00000000..8b909f5d --- /dev/null +++ b/phototrack/phototrack.php @@ -0,0 +1,258 @@ + + */ + +/* + * List of tables and the fields that are checked: + * + * contact: photo thumb micro about + * fcontact: photo + * fsuggest: photo + * gcontact: photo about + * item: body + * item-content: body + * mail: from-photo + * notify: photo + * profile: photo thumb about + */ + +use Friendica\Core\Addon; +use Friendica\Core\Config; +use Friendica\Core\Logger; +use Friendica\Object\Image; +use Friendica\Database\DBA; + +if (!defined('PHOTOTRACK_DEFAULT_BATCH_SIZE')) { + define('PHOTOTRACK_DEFAULT_BATCH_SIZE', 1000); +} +// Time in *minutes* between searching for photo uses +if (!defined('PHOTOTRACK_DEFAULT_SEARCH_INTERVAL')) { + define('PHOTOTRACK_DEFAULT_SEARCH_INTERVAL', 10); +} + +function phototrack_install() { + global $db; + + Addon::registerHook('post_local_end', 'addon/phototrack/phototrack.php', 'phototrack_post_local_end'); + Addon::registerHook('post_remote_end', 'addon/phototrack/phototrack.php', 'phototrack_post_remote_end'); + Addon::registerHook('notifier_end', 'addon/phototrack/phototrack.php', 'phototrack_notifier_end'); + Addon::registerHook('cron', 'addon/phototrack/phototrack.php', 'phototrack_cron'); + + if (Config::get('phototrack', 'dbversion') != '0.1') { + $schema = file_get_contents(dirname(__file__).'/database.sql'); + $arr = explode(';', $schema); + foreach ($arr as $a) { + if (!DBA::e($a)) { + Logger::warning('Unable to create database table: ' . DBA::errorMessage()); + return; + } + } + Config::set('phototrack', 'dbversion', '0.1'); + } +} + +function phototrack_uninstall() { + Addon::unregisterHook('post_local_end', 'addon/phototrack/phototrack.php', 'phototrack_post_local_end'); + Addon::unregisterHook('post_remote_end', 'addon/phototrack/phototrack.php', 'phototrack_post_remote_end'); + Addon::unregisterHook('notifier_end', 'addon/phototrack/phototrack.php', 'phototrack_notifier_end'); + Addon::unregisterHook('cron', 'addon/phototrack/phototrack.php', 'phototrack_cron'); +} + +function phototrack_module() {} + +function phototrack_finished_row($table, $id) { + $existing = DBA::selectFirst('phototrack_row_check', ['id'], ['table' => $table, 'row-id' => $id]); + if (!is_bool($existing)) { + q("UPDATE phototrack_row_check SET checked = NOW() WHERE `table` = '$table' AND `row-id` = '$id'"); + } + else { + q("INSERT INTO phototrack_row_check (`table`, `row-id`, `checked`) VALUES ('$table', '$id', NOW())"); + } +} + +function phototrack_photo_use($photo, $table, $field, $id) { + Logger::debug('@@@ phototrack_photo_use ' . $photo); + foreach (Image::supportedTypes() as $m => $e) { + $photo = str_replace(".$e", '', $photo); + } + if (substr($photo, -2, 1) == '-') { + $resolution = intval(substr($photo,-1,1)); + $photo = substr($photo,0,-2); + } + if (strlen($photo) != 32) { + return; + } + $r = q("SELECT `resource-id` FROM `photo` WHERE `resource-id` = '%s' LIMIT 1", DBA::escape($photo)); + if (!count($r)) { + return; + } + $rid = $r[0]['resource-id']; + $existing = q("SELECT id FROM phototrack_photo_use WHERE `resource-id` = '$rid' AND `table` = '$table' AND `field` = '$field' AND `row-id` = '$id'"); + if (count($existing)) { + q("UPDATE phototrack_photo_use SET checked = NOW() WHERE `resource-id` = '$rid' AND `table` = '$table' AND `field` = '$field' AND `row-id` = '$id'"); + } + else { + q("INSERT INTO phototrack_photo_use (`resource-id`, `table`, `field`, `row-id`, `checked`) VALUES ('$rid', '$table', '$field', '$id', NOW())"); + } +} + +function phototrack_check_field_url($a, $table, $field, $id, $url) { + Logger::info('@@@ phototrack_check_field_url table ' . $table . ' field ' . $field . ' id ' . $id . ' url ' . $url); + $baseurl = $a->getBaseURL(); + if (strpos($url, $baseurl) !== FALSE) { + $url = substr($url, strlen($baseurl)); + Logger::info('@@@ phototrack_check_field_url funny url stuff ' . $url . ' base ' . $baseurl); + } + if (strpos($url, '/photo/') !== FALSE) { + $rid = substr($url, strlen('/photo/')); + Logger::info('@@@ phototrack_check_field_url rid ' . $rid); + phototrack_photo_use($rid, $table, $field, $id); + } +} + +function phototrack_check_field_bbcode($a, $table, $field, $id, $value) { + $baseurl = $a->getBaseURL(); + $matches = array(); + preg_match_all("/\[img(\=([0-9]*)x([0-9]*))?\](.*?)\[\/img\]/ism", $value, $matches); + foreach ($matches[4] as $url) { + phototrack_check_field_url($a, $table, $field, $id, $url); + } +} + +function phototrack_post_local_end(&$a, &$item) { + phototrack_check_row($a, 'item', $item); + phototrack_check_row($a, 'item-content', $item); +} + +function phototrack_post_remote_end(&$a, &$item) { + phototrack_check_row($a, 'item', $item); + phototrack_check_row($a, 'item-content', $item); +} + +function phototrack_notifier_end($item) { + $a = get_app(); +} + +function phototrack_check_row($a, $table, $row) { + switch ($table) { + case 'item': + $fields = array( + 'body' => 'bbcode'); + break; + case 'item-content': + $fields = array( + 'body' => 'bbcode'); + break; + case 'contact': + $fields = array( + 'photo' => 'url', + 'thumb' => 'url', + 'micro' => 'url', + 'about' => 'bbcode'); + break; + case 'fcontact': + $fields = array( + 'photo' => 'url'); + break; + case 'fsuggest': + $fields = array( + 'photo' => 'url'); + break; + case 'gcontact': + $fields = array( + 'photo' => 'url', + 'about' => 'bbcode'); + break; + default: $fields = array(); break; + } + foreach ($fields as $field => $type) { + switch ($type) { + case 'bbcode': phototrack_check_field_bbcode($a, $table, $field, $row['id'], $row[$field]); break; + case 'url': phototrack_check_field_url($a, $table, $field, $row['id'], $row[$field]); break; + } + } + phototrack_finished_row($table, $row['id']); +} + +function phototrack_batch_size() { + $batch_size = Config::get('phototrack', 'batch_size'); + if ($batch_size > 0) { + return $batch_size; + } + return PHOTOTRACK_DEFAULT_BATCH_SIZE; +} + +function phototrack_search_table($a, $table) { + $batch_size = phototrack_batch_size(); + $rows = q("SELECT `$table`.* FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) ) ORDER BY phototrack_row_check.checked LIMIT $batch_size"); + foreach ($rows as $row) { + phototrack_check_row($a, $table, $row); + } + $r = q("SELECT COUNT(*) FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) )"); + $remaining = $r[0]['COUNT(*)']; + Logger::info('phototrack: searched ' . count($rows) . ' rows in table ' . $table . ', ' . $remaining . ' still remaining to search'); + return $remaining; +} + +function phototrack_cron_time() { + $prev_remaining = Config::get('phototrack', 'remaining_items'); + if ($prev_remaining > 10 * phototrack_batch_size()) { + Logger::debug('phototrack: more than ' . (10 * phototrack_batch_size()) . ' items remaining'); + return true; + } + $last = Config::get('phototrack', 'last_search'); + $search_interval = intval(Config::get('phototrack', 'search_interval')); + if (!$search_interval) { + $search_interval = PHOTOTRACK_DEFAULT_SEARCH_INTERVAL; + } + if ($last) { + $next = $last + ($search_interval * 60); + if ($next > time()) { + Logger::debug('phototrack: search interval not reached'); + return false; + } + } + return true; +} + +function phototrack_cron($a, $b) { + if (!phototrack_cron_time()) { + return; + } + Config::set('phototrack', 'last_search', time()); + + $remaining = 0; + $remaining += phototrack_search_table($a, 'item'); + $remaining += phototrack_search_table($a, 'item-content'); + $remaining += phototrack_search_table($a, 'contact'); + $remaining += phototrack_search_table($a, 'fcontact'); + $remaining += phototrack_search_table($a, 'fsuggest'); + $remaining += phototrack_search_table($a, 'gcontact'); + + Config::set('phototrack', 'remaining_items', $remaining); + if ($remaining === 0) { + phototrack_tidy(); + } +} + +function phototrack_tidy() { + $batch_size = phototrack_batch_size(); + q('CREATE TABLE IF NOT EXISTS `phototrack-temp` (`resource-id` char(255) not null)'); + q('INSERT INTO `phototrack-temp` SELECT DISTINCT(`resource-id`) FROM photo WHERE photo.`created` < DATE_SUB(NOW(), INTERVAL 2 MONTH)'); + $rows = q('SELECT `phototrack-temp`.`resource-id` FROM `phototrack-temp` LEFT OUTER JOIN phototrack_photo_use ON (`phototrack-temp`.`resource-id` = phototrack_photo_use.`resource-id`) WHERE phototrack_photo_use.id IS NULL limit ' . /*$batch_size*/1000); + foreach ($rows as $row) { + Logger::debug('phototrack: remove photo ' . $row['resource-id']); + q('DELETE FROM photo WHERE `resource-id` = "' . $row['resource-id'] . '"'); + } + q('DROP TABLE `phototrack-temp`'); + Logger::info('phototrack_tidy: deleted ' . count($rows) . ' photos'); + $rows = q('SELECT id FROM phototrack_photo_use WHERE checked < DATE_SUB(NOW(), INTERVAL 14 DAY)'); + foreach ($rows as $row) { + q('DELETE FROM phototrack_photo_use WHERE id = ' . $row['id']); + } + Logger::info('phototrack_tidy: deleted ' . count($rows) . ' phototrack_photo_use rows'); +} diff --git a/publicise/publicise.php b/publicise/publicise.php new file mode 100644 index 00000000..d27eefd4 --- /dev/null +++ b/publicise/publicise.php @@ -0,0 +1,431 @@ + + */ + +use Friendica\Core\Addon; +use Friendica\Core\Logger; +use Friendica\Core\Renderer; +use Friendica\Core\L10n; +use Friendica\Database\DBA; + +function publicise_install() { + Addon::registerHook('post_remote', 'addon/publicise/publicise.php', 'publicise_post_remote_hook'); +} + +function publicise_uninstall() { + Addon::unregisterHook('post_remote', 'addon/publicise/publicise.php', 'publicise_post_remote_hook'); + Addon::unregisterHook('post_remote_end', 'addon/publicise/publicise.php', 'publicise_post_remote_end_hook'); +} + +function publicise_get_contacts() { + $query = <<$v) { + $enabled = ($v['reason'] === 'publicise') ? 1 : NULL; + $expire = 30; + $comments = 1; + $url = $v['url']; + if ($enabled) { + $r = q('SELECT * FROM `user` WHERE `uid` = %d', intval($v['uid'])); + $expire = $r[0]['expire']; + $url = $a->get_baseurl() . '/profile/' . $v['nick']; + if ($r[0]['page-flags'] == PAGE_SOAPBOX) { + $comments = NULL; + } + if ($r[0]['account_expired']) { + $enabled = NULL; + } + } + $contacts[$k]['enabled'] = array('publicise-enabled-' . $v['id'], NULL, $enabled); + $contacts[$k]['comments'] = array('publicise-comments-' . $v['id'], NULL, $comments); + $contacts[$k]['expire'] = $expire; + $contacts[$k]['url'] = $url; + } + $template = Renderer::getMarkupTemplate('admin.tpl', 'addon/publicise/'); + $o .= Renderer::replaceMacros($template, array( + '$feeds' => $contacts, + '$feed_t' => L10n::t('Feed'), + '$publicised_t' => L10n::t('Publicised'), + '$comments_t' => L10n::t('Allow Comments/Likes'), + '$expire_t' => L10n::t('Expire Articles After (Days)'), + '$submit_t' => L10n::t('Submit'))); +} + +function publicise_make_string($in) { + return "'" . DBA::escape($in) . "'"; +} + +function publicise_make_int($in) { + return intval($in) ? $in : 0; +} + +function publicise_create_user($owner, $contact) { + + $nick = $contact['nick']; + if (!$nick) { + notice(sprintf(t("Can't publicise feed \"%s\" because it doesn't have a nickname"), $contact['name']) . EOL); + return; + } + Logger::info('Publicise: create user, beginning key generation...'); + $res=openssl_pkey_new(array( + 'digest_alg' => 'sha1', + 'private_key_bits' => 4096, + 'encrypt_key' => false )); + $prvkey = ''; + openssl_pkey_export($res, $prvkey); + $pkey = openssl_pkey_get_details($res); + $pubkey = $pkey["key"]; + $sres=openssl_pkey_new(array( + 'digest_alg' => 'sha1', + 'private_key_bits' => 512, + 'encrypt_key' => false )); + $sprvkey = ''; + openssl_pkey_export($sres, $sprvkey); + $spkey = openssl_pkey_get_details($sres); + $spubkey = $spkey["key"]; + $guid = generate_user_guid(); + + $newuser = array( + 'guid' => publicise_make_string($guid), + 'username' => publicise_make_string($contact['name']), + 'password' => publicise_make_string($owner['password']), + 'nickname' => publicise_make_string($contact['nick']), + 'email' => publicise_make_string($owner['email']), + 'openid' => publicise_make_string($owner['openid']), + 'timezone' => publicise_make_string($owner['timezone']), + 'language' => publicise_make_string($owner['language']), + 'register_date' => publicise_make_string(datetime_convert()), + 'default-location' => publicise_make_string($owner['default-location']), + 'allow_location' => publicise_make_string($owner['allow_location']), + 'theme' => publicise_make_string($owner['theme']), + 'pubkey' => publicise_make_string($pubkey), + 'prvkey' => publicise_make_string($prvkey), + 'spubkey' => publicise_make_string($spubkey), + 'sprvkey' => publicise_make_string($sprvkey), + 'verified' => publicise_make_int($owner['verified']), + 'blocked' => publicise_make_int(0), + 'blockwall' => publicise_make_int(1), + 'hidewall' => publicise_make_int(0), + 'blocktags' => publicise_make_int(0), + 'notify-flags' => publicise_make_int($owner['notifyflags']), + 'page-flags' => publicise_make_int($comments ? PAGE_COMMUNITY : PAGE_SOAPBOX), + 'expire' => publicise_make_int($expire), + ); + Logger::debug('Publicise: creating user ' . print_r($newuser, true)); + $r = q("INSERT INTO `user` (`" + . implode("`, `", array_keys($newuser)) + . "`) VALUES (" + . implode(", ", array_values($newuser)) + . ")" ); + if (!$r) { + Logger::warning('Publicise: create user failed'); + return; + } + $r = q('SELECT * FROM `user` WHERE `guid` = "%s"', DBA::escape($guid)); + if (count($r) != 1) { + Logger::warning('Publicise: unexpected number of uids returned'); + return; + } + Logger::debug('Publicise: created user ID ' . $r[0]); + return $r[0]; +} + +function publicise_create_self_contact($a, $contact, $uid) { + $newcontact = array( + 'uid' => $uid, + 'created' => publicise_make_string(datetime_convert()), + 'self' => publicise_make_int(1), + 'name' => publicise_make_string($contact['name']), + 'nick' => publicise_make_string($contact['nick']), + 'photo' => publicise_make_string($contact['photo']), + 'thumb' => publicise_make_string($contact['thumb']), + 'micro' => publicise_make_string($contact['micro']), + 'blocked' => publicise_make_int(0), + 'pending' => publicise_make_int(0), + 'url' => publicise_make_string($a->get_baseurl() . '/profile/' . $contact['nick']), + 'nurl' => publicise_make_string($a->get_baseurl() . '/profile/' . $contact['nick']), + 'request' => publicise_make_string($a->get_baseurl() . '/dfrn_request/' . $contact['nick']), + 'notify' => publicise_make_string($a->get_baseurl() . '/dfrn_notify/' . $contact['nick']), + 'poll' => publicise_make_string($a->get_baseurl() . '/dfrn_poll/' . $contact['nick']), + 'confirm' => publicise_make_string($a->get_baseurl() . '/dfrn_confirm/' . $contact['nick']), + 'poco' => publicise_make_string($a->get_baseurl() . '/poco/' . $contact['nick']), + 'uri-date' => publicise_make_string(datetime_convert()), + 'avatar-date' => publicise_make_string(datetime_convert()), + 'closeness' => publicise_make_int(0), + ); + $existing = q("SELECT * FROM `contact` WHERE `uid` = %d AND `self` = 1", intval($uid)); + if (count($existing)) { + $newcontact = $existing[0]; + Logger::debug('Publicise: self contact already exists for user ' . $uid . ' id ' . $newcontact['id']); + } else { + Logger::debug('Publicise: create contact ' . print_r($newcontact, true)); + q("INSERT INTO `contact` (`" + . implode("`, `", array_keys($newcontact)) + . "`) VALUES (" + . implode(", ", array_values($newcontact)) + . ")" ); + $results = q("SELECT `id` FROM `contact` WHERE `uid` = %d AND `self` = 1", intval($uid)); + if (count($results) != 1) { + Logger::warning('Publicise: create self contact failed, will delete uid ' . $uid); + $r = q("DELETE FROM `user` WHERE `uid` = %d", intval($uid)); + return; + } + $newcontact = $results[0]; + Logger::debug('Publicise: created self contact for user ' . $uid . ' id ' . $newcontact['id']); + } + Logger::debug('Publicise: self contact for ' . $uid . ' nick ' . $contact['nick'] . ' is ' . $newcontact['id']); + return $newcontact['id']; +} + +function publicise_create_profile($contact, $uid) { + $newprofile = array( + 'uid' => $uid, + 'profile-name' => publicise_make_string('default'), + 'is-default' => publicise_make_int(1), + 'name' => publicise_make_string($contact['name']), + 'photo' => publicise_make_string($contact['photo']), + 'thumb' => publicise_make_string($contact['thumb']), + 'homepage' => publicise_make_string($contact['url']), + 'publish' => publicise_make_int(1), + 'net-publish' => publicise_make_int(1), + ); + Logger::debug('Publicise: create profile ' . print_r($newprofile, true)); + $r = q("INSERT INTO `profile` (`" + . implode("`, `", array_keys($newprofile)) + . "`) VALUES (" + . implode(", ", array_values($newprofile)) + . ")" ); + if (!$r) { + Logger::warning('Publicise: create profile failed'); + } + $newprofile = q('SELECT `id` FROM `profile` WHERE `uid` = %d AND `is-default` = 1', intval($uid)); + if (count($newprofile) != 1) { + Logger::warning('Publicise: create profile produced unexpected number of results'); + return; + } + Logger::debug('Publicise: created profile ' . $newprofile[0]['id']); + return $newprofile[0]['id']; +} + +function publicise_set_up_user($a, $contact, $owner) { + $user = publicise_create_user($owner, $contact); + if (!$user) { + notice(sprintf(t("Failed to create user for feed \"%s\""), $contact['name']) . EOL); + return; + } + $self_contact = publicise_create_self_contact($a, $contact, $user['uid']); + if (!$self_contact) { + notice(sprintf(t("Failed to create self contact for user \"%s\""), $contact['name']) . EOL); + Logger::warning("Publicise: unable to create self contact, deleting user " . $user['uid']); + q('DELETE FROM `user` WHERE `uid` = %d', intval($user['uid'])); + return; + } + $profile = publicise_create_profile($contact, $user['uid']); + if (!$profile) { + notice(sprintf(t("Failed to create profile for user \"%s\""), $contact['name']) . EOL); + Logger::warning("Publicise: unable to create profile, deleting user $uid contact $self_contact"); + q('DELETE FROM `user` WHERE `uid` = %d', intval($user['uid'])); + q('DELETE FROM `contact` WHERE `id` = %d', intval($self_contact)); + return; + } + return $user; +} + +function publicise($a, &$contact, &$owner) { + Logger::info('@@@ Publicise: publicise'); + if (!is_site_admin()) { + notice(t("Only admin users can publicise feeds")); + Logger::warning('Publicise: non-admin tried to publicise'); + return; + } + + // Check if we're changing our mind about a feed we earlier depublicised + Logger::info('@@@ Publicise: ' . 'SELECT * FROM `user` WHERE `account_expires_on` != "0000-00-00 00:00:00" AND `nickname` = "' . $contact['nick'] . '" AND `email` = "' . $owner['email'] . '" AND `page-flags` in (' . intval(PAGE_COMMUNITY) . ', ' . intval(PAGE_SOAPBOX) . ')'); + $existing = q('SELECT * FROM `user` WHERE `account_expires_on` != "0000-00-00 00:00:00" AND `nickname` = "%s" AND `email` = "%s" AND `page-flags` in (%d, %d)', + DBA::escape($contact['nick']), DBA::escape($owner['email']), intval(PAGE_COMMUNITY), intval(PAGE_SOAPBOX)); + if (count($existing) == 1) { + Logger::info('@@@ Publicise: there is existing'); + $owner = $existing[0]; + q('UPDATE `user` SET `account_expires_on` = "0000-00-00 00:00:00", `account_removed` = 0, `account_expired` = 0 WHERE `uid` = %d', intval($owner['uid'])); + q('UPDATE `profile` SET `publish` = 1, `net-publish` = 1 WHERE `uid` = %d AND `is-default` = 1', intval($owner['uid'])); + Logger::debug('Publicise: recycled previous user ' . $owner['uid']); + } + else { + Logger::info('@@@ Publicise: there is not existing'); + $owner = publicise_set_up_user($a, $contact, $owner); + if (!$owner) { + return; + } + Logger::debug("Publicise: created new user " . $owner['uid']); + } + Logger::info('Publicise: new contact user is ' . $owner['uid']); + + $r = q("UPDATE `contact` SET `uid` = %d, `reason` = 'publicise', `hidden` = 1 WHERE id = %d", intval($owner['uid']), intval($contact['id'])); + if (!$r) { + Logger::warning('Publicise: update contact failed, user is probably in a bad state ' . $user['uid']); + } + $contact['uid'] = $owner['uid']; + $contact['reason'] = 'publicise'; + $contact['hidden'] = 1; + $r = q("UPDATE `item` SET `uid` = %d, type = 'wall', wall = 1, private = 0 WHERE `contact-id` = %d", + intval($owner['uid']), intval($contact['id'])); + Logger::debug('Publicise: moved items from contact ' . $contact['id'] . ' to uid ' . $owner['uid']); + + // Update the retriever config + $r = q("UPDATE `retriever_rule` SET `uid` = %d WHERE `contact-id` = %d", + intval($owner['uid']), intval($contact['id'])); + + info(sprintf(t("Moved feed \"%s\" to dedicated account"), $contact['name']) . EOL); + return true; +} + +function publicise_self_contact($uid) { + $r = q('SELECT * FROM `contact` WHERE `uid` = %d AND `self` = 1', intval($uid)); + if (count($r) != 1) { + Logger::warning('Publicise: unexpected number of self contacts for user ' . $uid); + return; + } + return $r[0]; +} + +function depublicise($a, $contact, $user) { + require_once('include/Contact.php'); + + if (!is_site_admin()) { + notice("Only admin users can depublicise feeds"); + Logger::warning('Publicise: non-admin tried to depublicise'); + return; + } + + Logger::debug('Publicise: about to depublicise contact ' . $contact['id'] . ' user ' . $user['uid']); + + $self_contact = publicise_self_contact($user['uid']); + + // If the local_user() is subscribed to the feed, take ownership + // of the feed and all its items and photos. Otherwise they will + // be deleted when the account expires. + $r = q('SELECT * FROM `contact` WHERE `uid` = %d AND `url` = "%s"', + intval(local_user()), DBA::escape($self_contact['url'])); + if (count($r)) { + // Delete the contact to the feed user and any + // copies of its items. These will be replaced by the originals, + // which will be brought back into the local_user's feed along + // with the feed contact itself. + foreach ($r as $my_contact) { + q('DELETE FROM `item` WHERE `contact-id` = %d', intval($my_contact['id'])); + q('DELETE FROM `contact` WHERE `id` = %d', intval($my_contact['id'])); + } + + // Move the feed contact to local_user. Existing items stay + // attached to the original feed contact, but must have their uid + // updated. Also update the fields we scribbled over in + // publicise_post_remote_hook. + q('UPDATE `contact` SET `uid` = %d, `reason` = "", hidden = 0 WHERE id = %d', + intval(local_user()), intval($contact['id'])); + q('UPDATE `item` SET `uid` = %d, `wall` = 0, `type` = "remote", `private` = 2 WHERE `contact-id` = %d', + intval(local_user()), intval($contact['id'])); + + // Take ownership of any photos created by the feed user + q('UPDATE `photo` SET `uid` = %d WHERE `uid` = %d', + intval(local_user()), intval($user['uid'])); + + // Update the retriever config + $r = q("UPDATE `retriever_rule` SET `uid` = %d WHERE `contact-id` = %d", + intval($owner['uid']), intval($contact['id'])); + } + + // Set the account to removed and expired right now. It will be cleaned up by cron after 3 days, giving a chance to change your mind + q('UPDATE `user` SET `account_removed` = 1, `account_expired` = 1, `account_expires_on` = UTC_TIMESTAMP() WHERE `uid` = %d', + intval($user['uid'])); + q('UPDATE `profile` SET `publish` = 0, `net-publish` = 0 WHERE `uid` = %d AND `is-default` = 1', intval($user['uid'])); + + info(sprintf(t("Removed dedicated account for feed \"%s\""), $contact['name']) . EOL); +} + +function publicise_addon_admin_post ($a) { + Logger::info('@@@ publicise_addon_admin_post'); + if (!is_site_admin()) { + Logger::warning('Publicise: non-admin tried to do admin post'); + return; + } + + foreach (publicise_get_contacts() as $contact) { + Logger::info('@@@ publicise_addon_admin_post contact ' . $contact['id'] . ' ' . $contact['name']); + $user = publicise_get_user($contact['uid']); + if (!$_POST['publicise-enabled-' . $contact['id']]) { + if ($contact['reason'] === 'publicise') { + Logger::info('@@@ depublicise'); + depublicise($a, $contact, $user); + } + } + else { + if ($contact['reason'] !== 'publicise') { + Logger::info('@@@ publicise'); + if (!publicise($a, $contact, $user)) { + Logger::warning('Publicise: failed to publicise contact ' . $contact['id']); + continue; + } + } + if ($_POST['publicise-expire-' . $contact['id']] != $user['expire']) { + q('UPDATE `user` SET `expire` = %d WHERE `uid` = %d', + intval($_POST['publicise-expire-' . $contact['id']]), intval($user['uid'])); + } + if ($_POST['publicise-comments-' . $contact['id']]) { + if ($user['page-flags'] != PAGE_COMMUNITY) { + q('UPDATE `user` SET `page-flags` = %d WHERE `uid` = %d', + intval(PAGE_COMMUNITY), intval($user['uid'])); + q('UPDATE `contact` SET `rel` = %d WHERE `uid` = %d AND `network` = "dfrn"', + intval(CONTACT_IS_SHARING), intval($user['uid'])); + } + } + else { + if ($user['page-flags'] != PAGE_SOAPBOX) { + q('UPDATE `user` SET `page-flags` = %d WHERE `uid` = %d', + intval(PAGE_SOAPBOX), intval($user['uid'])); + q('UPDATE `contact` SET `rel` = %d WHERE `uid` = %d AND `network` = "dfrn"', + intval(CONTACT_IS_FOLLOWER), intval($user['uid'])); + } + } + } + } +} + +function publicise_post_remote_hook(&$a, &$item) { + $r1 = q("SELECT `uid` FROM `contact` WHERE `id` = %d AND `reason` = 'publicise'", intval($item['contact-id'])); + if (!$r1) { + return; + } + + Logger::debug('Publicise: moving to wall: ' . $item['uid'] . ' ' . $item['contact-id'] . ' ' . $item['uri']); + $item['type'] = 'wall'; + $item['wall'] = 1; + $item['private'] = 0; +} + diff --git a/publicise/templates/admin.tpl b/publicise/templates/admin.tpl new file mode 100644 index 00000000..b10c3546 --- /dev/null +++ b/publicise/templates/admin.tpl @@ -0,0 +1,39 @@ +{{* + * AUTOMATICALLY GENERATED TEMPLATE + * DO NOT EDIT THIS FILE, CHANGES WILL BE OVERWRITTEN + * + *}} +
+ + + + + + + + + + +{{foreach $feeds as $f}} + + + + + + +{{/foreach}} + +
{{$feed_t}}{{$publicised_t}}{{$comments_t}}{{$expire_t}}
+ + + {{$f.name}} + + +{{include file="field_yesno.tpl" field=$f.enabled}} + +{{include file="field_yesno.tpl" field=$f.comments}} + + +
+ +
From c287de8f5982045f8035d92ddce00e04067ebe67 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Mon, 30 Sep 2019 20:52:51 +0200 Subject: [PATCH 25/96] retriever tweaks --- retriever/retriever.php | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index fcd45b46..f495578b 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -308,9 +308,8 @@ function retriever_get_item($retriever_item) { Logger::info('@@@ retriever_get_item uri ' . $retriever_item['item-uri'] . ' uid ' . $retriever_item['item-uid'] . ' cid ' . $retriever_item['contact-id']); try {//@@@ not necessary $item = Item::selectFirst([], ['uri' => $retriever_item['item-uri'], 'uid' => intval($retriever_item['item-uid'])]); - Logger::log('@@@ 1 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); if (!DBA::isResult($item)) { - Logger::log('retriever_get_item: no item found for uri ' . $retriever_item['item-uri']); + Logger::warning('retriever_get_item: no item found for uri ' . $retriever_item['item-uri']); return; } Logger::info('@@@ retriever_get_item: yay item found for uri ' . $retriever_item['item-uri'] . ' guid ' . $item['guid'] . ' plink ' . $item['plink']); @@ -329,9 +328,9 @@ function retriever_item_completed($retriever_item_id, $resource, $a) { return; } $item = retriever_get_item($retriever_item); - Logger::log('@@@ 2 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); + Logger::info('@@@ 2 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); if (!$item) { - Logger::log('retriever_item_completed: no item ' . $retriever_item['item-uri']); + Logger::warning('retriever_item_completed: no item ' . $retriever_item['item-uri']); return; } // Note: the retriever might be null. Doesn't matter. @@ -348,21 +347,17 @@ function retriever_resource_completed($resource, $a) { Logger::debug('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url']); $r = q("SELECT `id` FROM `retriever_item` WHERE `resource` = %d", $resource['id']); foreach (DBA::select('retriever_item', ['id'], ['resource' => intval($resource['id'])]) as $retriever_item) { - Logger::debug('@@@ retriever_resource_completed got item id ' . $retriever_item['id']); retriever_item_completed($retriever_item['id'], $resource, $a); } } function apply_retrospective($a, $retriever, $num) { - Logger::info('@@@ apply_retrospective'); $r = q("SELECT * FROM `item` WHERE `contact-id` = %d ORDER BY `received` DESC LIMIT %d", intval($retriever['contact-id']), intval($num)); foreach ($r as $item) { - Logger::info('@@@ apply_retrospective item ' . $item['id']); q('UPDATE `item` SET `visible` = 0 WHERE `id` = %d', $item['id']); q('UPDATE `thread` SET `visible` = 0 WHERE `iid` = %d', $item['id']); foreach (DBA::select('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => $item['uid'], 'contact-id' => $item['contact-id']]) as $retriever_item) { - Logger::info('@@@ about to delete retriever_item id ' . $retriever_item['id'] . ' uri ' . $item['uri'] . ' uid ' . $item['uid'] . ' contact ' . $item['contact-id']); DBA::delete('retriever_resource', ['id' => $retriever_item['resource']]); DBA::delete('retriever_item', ['id' => $retriever_item['id']]); } @@ -378,7 +373,7 @@ function retriever_on_item_insert($a, $retriever, &$item) { Logger::info('retriever_on_item_insert: No retriever supplied'); return; } - if (!$retriever['data']['enable'] == "on") { + if (!array_key_exists('enable', $retriever['data']) || !$retriever['data']['enable'] == "on") { Logger::info('@@@ retriever_on_item_insert: Disabled'); return; } From 32056ea578e91d2b66c4d3779ff4fb5ef95f0166 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 2 Oct 2019 07:19:59 +0200 Subject: [PATCH 26/96] extensive refactoring --- retriever/retriever.php | 305 +++++++++++++--------------- retriever/templates/rule-config.tpl | 32 ++- 2 files changed, 164 insertions(+), 173 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index f495578b..6ace5e98 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -177,14 +177,14 @@ function retriever_clean_up_completed_resources($max_items, $a) { } $resource = DBA::selectFirst('retriever_resource', [], ['id' => intval($rr['resource'])]); retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource, $a); - Logger::info('@@@ retriever_clean_up_completed_resources tried to update id ' . $retriver_item['id'] . ' to finished, better check that it really worked!'); + Logger::info('@@@ retriever_clean_up_completed_resources tried to update id ' . $retriever_item['id'] . ' to finished, better check that it really worked!'); DBA::update('retriever_item', ['finished' => 1], ['id' => intval($retriever_item['id'])], ['finished']); retriever_check_item_completed($item); } } function retriever_tidy() { - // TODO: figure out how to do this with DBA module + // TODO: figure out how to do this with DBA module @@@ it is possible q("DELETE FROM retriever_resource WHERE completed IS NOT NULL AND completed < DATE_SUB(now(), INTERVAL 1 WEEK)"); q("DELETE FROM retriever_resource WHERE completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)"); @@ -212,8 +212,6 @@ function retrieve_dataurl_resource($resource) { } function retrieve_resource($resource) { - Logger::info('@@@ retrieve_resource: url ' . $resource['url'] . ' uid ' . $resource['item-uid'] . ' cid ' . $resource['contact-id']); - if (substr($resource['url'], 0, 5) == "data:") { return retrieve_dataurl_resource($resource); } @@ -221,24 +219,22 @@ function retrieve_resource($resource) { $a = get_app(); $retriever_rule = get_retriever_rule($resource['contact-id'], $resource['item-uid']); + $rule_data = $retriever_rule['data']; try { Logger::debug('retrieve_resource: ' . ($resource['num-tries'] + 1) . ' attempt at resource ' . $resource['id'] . ' ' . $resource['url']); $redirects = 0; $cookiejar = ''; - Logger::debug('@@@ retrieve_resource storecookies ' . $retriever_rule['storecookies']); - if (array_key_exists('storecookies', $retriever_rule) && $retriever_rule['storecookies']) { + if (array_key_exists('storecookies', $rule_data) && $rule_data['storecookies']) { $cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-'); - Logger::debug('@@@ retrieve_resource cookie file ' . $cookiejar . ' content ' . $retriever_rule['cookiedata']); - file_put_contents($cookiejar, $retriever_rule['cookiedata']); + file_put_contents($cookiejar, $rule_data['cookiedata']); } $fetch_result = Network::fetchUrlFull($resource['url'], $resource['binary'], $redirects, '', $cookiejar); - if (array_key_exists('storecookies', $retriever_rule) && $retriever_rule['storecookies']) { - $retriever_rule['cookiedata'] = file_get_contents($cookiejar); - Logger::debug('@@@ retriever_resource update cookie ' . json_encode($retriever_rule['data'] . ' id ' . $retriever_rule['id'])); - q("UPDATE `retriever_rule` SET `data`='%s' WHERE `id` = %d", - DBA::escape(json_encode($retriever_rule['data'])), intval($retriever_rule["id"])); - /* unlink($cookiejar); */ //@@@ + if (array_key_exists('storecookies', $rule_data) && $rule_data['storecookies']) { + $retriever_rule['data']['cookiedata'] = file_get_contents($cookiejar); + DBA::update('retriever_rule', ['data' => json_encode($retriever_rule['data'])], ['id' => intval($retriever_rule["id"])]); + //@@@ check the update worked + unlink($cookiejar); } $resource['data'] = $fetch_result->getBody(); $resource['http-code'] = $fetch_result->getReturnCode(); @@ -248,36 +244,33 @@ function retrieve_resource($resource) { } catch (Exception $e) { Logger::info('retrieve_resource: unable to retrieve ' . $resource['url'] . ' - ' . $e->getMessage()); } + // TODO: figure out how to do this with DBA module q("UPDATE `retriever_resource` SET `last-try` = now(), `num-tries` = `num-tries` + 1, `http-code` = %d, `redirect-url` = '%s' WHERE id = %d", intval($resource['http-code']), DBA::escape($resource['redirect-url']), intval($resource['id'])); if ($resource['data']) { + // TODO: figure out how to do this with DBA module q("UPDATE `retriever_resource` SET `completed` = now(), `data` = '%s', `type` = '%s' WHERE id = %d", DBA::escape($resource['data']), DBA::escape($resource['type']), intval($resource['id'])); retriever_resource_completed($resource, $a); } - Logger::info('@@@ retrieve_resource finished: ' . $resource['url']); } function get_retriever_rule($contact_id, $uid, $create = false) { - Logger::info('@@@ get_retriever_rule ' . "SELECT * FROM `retriever_rule` WHERE `contact-id` = " . intval($contact_id) . " AND `uid` = " . intval($uid)); - $r = q("SELECT * FROM `retriever_rule` WHERE `contact-id` = %d AND `uid` = %d", - intval($contact_id), intval($uid)); - Logger::info('@@@ get_retriever_rule count is ' . count($r)); - if (count($r)) { - $r[0]['data'] = json_decode($r[0]['data'], true); + $retriever_rule = DBA::selectFirst('retriever_rule', [], ['contact-id' => intval($contact_id), 'uid' => intval($uid)]); + //@@@ check that this worked + if ($retriever_rule) { + $retriever_rule['data'] = json_decode($retriever_rule['data'], true); Logger::info('@@@ get_retriever_rule returning an actual thing'); - return $r[0]; + return $retriever_rule; } if ($create) { - q("INSERT INTO `retriever_rule` (`uid`, `contact-id`) VALUES (%d, %d)", - intval($uid), intval($contact_id)); - $r = q("SELECT * FROM `retriever_rule` WHERE `contact-id` = %d AND `uid` = %d", - intval($contact_id), intval($uid)); - return $r[0]; + DBA::insert('retriever_rule', ['uid' => intval($uid), 'contact-id' => intval($contact_id)]); + //@@@ check that this worked + return DBA::selectFirst('retriever_rule', [], ['contact-id' => intval($contact_id), 'uid' => intval($uid)]); } } @@ -285,38 +278,13 @@ function retriever_get_retriever_item($id) { return DBA::selectFirst('retriever_item', [], ['id' => intval($id)]); } -function retriever_class_of_item($item) { //@@@ - if (!$item) { - return 'false'; - } - if (array_key_exists('finished', $item)) { - Logger::info('@@@ oh no this is a bad thing'); - return 'retriever_item'; - } - if (array_key_exists('moderated', $item)) { - return 'friendica_item'; - } - return 'unknown'; -} - -function mat_test($item) { //@@@ - return 'mat_test'; -} - function retriever_get_item($retriever_item) { - // @@@ add contact id as a search term - Logger::info('@@@ retriever_get_item uri ' . $retriever_item['item-uri'] . ' uid ' . $retriever_item['item-uid'] . ' cid ' . $retriever_item['contact-id']); - try {//@@@ not necessary - $item = Item::selectFirst([], ['uri' => $retriever_item['item-uri'], 'uid' => intval($retriever_item['item-uid'])]); - if (!DBA::isResult($item)) { - Logger::warning('retriever_get_item: no item found for uri ' . $retriever_item['item-uri']); - return; - } - Logger::info('@@@ retriever_get_item: yay item found for uri ' . $retriever_item['item-uri'] . ' guid ' . $item['guid'] . ' plink ' . $item['plink']); - return $item; - } catch (Exception $e) { - Logger::info('retriever_get_item: exception ' . $e->getMessage()); + $item = Item::selectFirst([], ['uri' => $retriever_item['item-uri'], 'uid' => intval($retriever_item['item-uid']), 'contact-id' => intval($retriever_item['contact-id'])]); + if (!DBA::isResult($item)) { + Logger::warning('retriever_get_item: no item found for uri ' . $retriever_item['item-uri']); + return; } + return $item; } function retriever_item_completed($retriever_item_id, $resource, $a) { @@ -328,7 +296,6 @@ function retriever_item_completed($retriever_item_id, $resource, $a) { return; } $item = retriever_get_item($retriever_item); - Logger::info('@@@ 2 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); if (!$item) { Logger::warning('retriever_item_completed: no item ' . $retriever_item['item-uri']); return; @@ -338,25 +305,23 @@ function retriever_item_completed($retriever_item_id, $resource, $a) { retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource, $a); - q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d", - intval($retriever_item['id'])); + DBA::update('retriever_item', ['finished' => 1], ['id' => intval($retriever_item['id'])], ['finished']); retriever_check_item_completed($item); } function retriever_resource_completed($resource, $a) { Logger::debug('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url']); - $r = q("SELECT `id` FROM `retriever_item` WHERE `resource` = %d", $resource['id']); foreach (DBA::select('retriever_item', ['id'], ['resource' => intval($resource['id'])]) as $retriever_item) { retriever_item_completed($retriever_item['id'], $resource, $a); } } function apply_retrospective($a, $retriever, $num) { - $r = q("SELECT * FROM `item` WHERE `contact-id` = %d ORDER BY `received` DESC LIMIT %d", - intval($retriever['contact-id']), intval($num)); - foreach ($r as $item) { - q('UPDATE `item` SET `visible` = 0 WHERE `id` = %d', $item['id']); - q('UPDATE `thread` SET `visible` = 0 WHERE `iid` = %d', $item['id']); + Logger::debug('@@@ apply_retrospective'); + foreach (Item::select([], ['contact-id' => intval($retriever['contact-id'])], ['order' => ['received' => true], 'limit' => $num]) as $item) { + Logger::debug('@@@ apply_retrospective got item id ' . $item['id'] . ' uri ' . $item['uri']); + Item::update(['visible' => 0], ['id' => intval($item['id'])]); + //@@@ check that this works foreach (DBA::select('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => $item['uid'], 'contact-id' => $item['contact-id']]) as $retriever_item) { DBA::delete('retriever_resource', ['id' => $retriever_item['resource']]); DBA::delete('retriever_item', ['id' => $retriever_item['id']]); @@ -368,13 +333,11 @@ function apply_retrospective($a, $retriever, $num) { // TODO: Currently this waits until the next cron before actually downloading. Should do it immediately. // TODO: This queries then inserts. It should use some kind of lock to avoid requesting the same resource twice. function retriever_on_item_insert($a, $retriever, &$item) { - Logger::info('@@@ retriever_on_item_insert start plink ' . $item['plink'] . ' id ' . $item['id']); if (!$retriever || !$retriever['id']) { Logger::info('retriever_on_item_insert: No retriever supplied'); return; } if (!array_key_exists('enable', $retriever['data']) || !$retriever['data']['enable'] == "on") { - Logger::info('@@@ retriever_on_item_insert: Disabled'); return; } if (array_key_exists('plink', $item) && strlen($item['plink'])) { @@ -389,13 +352,12 @@ function retriever_on_item_insert($a, $retriever, &$item) { $url = $content['plink']; } - if (array_key_exists('pattern', $retriever['data']) && $retriever['data']['pattern']) { + if ($retriever['data']['modurl']) { $orig_url = $url; $url = preg_replace('/' . $retriever['data']['pattern'] . '/', $retriever['data']['replace'], $orig_url); Logger::debug('retriever_on_item_insert: Changed ' . $orig_url . ' to ' . $url); } - Logger::debug('@@@ retriever_on_item_insert: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id'] . ' url ' . $url); $resource = add_retriever_resource($a, $url, $item['uid'], $item['contact-id']); $retriever_item_id = add_retriever_item($item, $resource); } @@ -412,16 +374,15 @@ function add_retriever_resource($a, $url, $uid, $cid, $binary = false) { fclose($fp); $url = 'md5://' . hash('md5', $url); - //@@@ fix this - $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s' AND `item-uid` = %d AND `contact-id` = %d", DBA::escape($url), intval($uid), intval($cid)); - $resource = $r[0]; - if (count($r)) { + if (DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)])) { + //@@@ test that this really happens - it should sometimes Logger::debug('add_retriever_resource: Resource ' . $url . ' already requested'); return $resource; } Logger::debug('retrieve_resource: got data URL type ' . $resource['type']); - //@@@ fix this + // TODO: figure out how to do this with DBA module + // @@@ DBA::update('workerqueue', ['executed' => DateTimeFormat::utcNow()], ['pid' => $mypid, 'done' => false]); q("INSERT INTO `retriever_resource` (`item-uid`, `contact-id`, `type`, `binary`, `url`, `completed`, `data`) " . "VALUES (%d, %d, '%s', %d, '%s', now(), '%s')", intval($uid), @@ -430,6 +391,7 @@ function add_retriever_resource($a, $url, $uid, $cid, $binary = false) { intval($binary ? 1 : 0), DBA::escape($url), DBA::escape($data)); + //@@@ fix this $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url)); $resource = $r[0]; if (count($r)) { @@ -449,16 +411,18 @@ function add_retriever_resource($a, $url, $uid, $cid, $binary = false) { return $r[0]; } + //@@@ fix this q("INSERT INTO `retriever_resource` (`item-uid`, `contact-id`, `binary`, `url`) " . "VALUES (%d, %d, %d, '%s')", intval($uid), intval($cid), intval($binary ? 1 : 0), DBA::escape($url)); + //@@@ fix this $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url)); return $r[0]; } function add_retriever_item(&$item, $resource) { - Logger::debug('@@@ 5 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); Logger::debug('add_retriever_item: ' . $resource['url'] . ' for ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); + //@@@ can use selectFirst $r = q("SELECT COUNT(*) FROM `retriever_item` WHERE " . "`item-uri` = '%s' AND `item-uid` = %d AND `contact-id` = %d AND `resource` = %d", DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource['id'])); @@ -466,9 +430,11 @@ function add_retriever_item(&$item, $resource) { Logger::info("add_retriever_item: retriever item already present for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); return; } + //@@@ fix this q("INSERT INTO `retriever_item` (`item-uri`, `item-uid`, `contact-id`, `resource`) " . "VALUES ('%s', %d, %d, %d)", DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource["id"])); + //@@@ fix this $r = q("SELECT id FROM `retriever_item` WHERE " . "`item-uri` = '%s' AND `item-uid` = %d AND `contact-id` = %d AND `resource` = %d ORDER BY id DESC", DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource['id'])); @@ -505,8 +471,10 @@ function retriever_apply_xslt_text($xslt_text, $doc) { return $result; } +//@@@ I think this is supposed to update the $item, but it doesn't function retriever_apply_dom_filter($retriever, &$item, $resource) { - Logger::debug('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['uri'] . ' contact ' . $item['contact-id']); + //@@@ check if id and uri-id are there //@@@ uri-id definitely is not + Logger::debug('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['uri'] . ' contact ' . $item['contact-id'] . ' uri-id ' . $item['uri-id']); if (!array_key_exists('include', $retriever['data']) && !array_key_exists('customxslt', $retriever['data'])) { Logger::info('retriever_apply_dom_filter: no include and no customxslt'); @@ -517,41 +485,15 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) { return; } - //@@@ break this bit into separate function - $encoding = retriever_get_encoding($resource); - $content = mb_convert_encoding($resource['data'], 'HTML-ENTITIES', $encoding); - $doc = new DOMDocument('1.0', 'UTF-8'); - if (strpos($resource['type'], 'html') !== false) { - @$doc->loadHTML($content); - } - else { - $doc->loadXML($content); - } + $doc = retriever_load_into_dom($resource); - $params = array('$spec' => $retriever['data']); - $extract_template = Renderer::getMarkupTemplate('extract.tpl', 'addon/retriever/'); - $extract_xslt = Renderer::replaceMacros($extract_template, $params); - if ($retriever['data']['include']) { - Logger::debug('retriever_apply_dom_filter: applying include/exclude template \"' . $extract_xslt . '\"'); - $doc = retriever_apply_xslt_text($extract_xslt, $doc); - } - if (array_key_exists('customxslt', $retriever['data']) && $retriever['data']['customxslt']) { - Logger::debug('retriever_apply_dom_filter: applying custom XSLT \"' . $retriever['data']['customxslt'] . '\"'); - $doc = retriever_apply_xslt_text($retriever['data']['customxslt'], $doc); - } + $doc = retriever_extract($doc, $retriever); if (!$doc) { Logger::info('retriever_apply_dom_filter: failed to apply extract XSLT template'); return; } - //@@@ break this bit into separate function - $components = parse_url($resource['redirect-url']); - $rooturl = $components['scheme'] . "://" . $components['host']; - $dirurl = $rooturl . dirname($components['path']) . "/"; - $params = array('$dirurl' => $dirurl, '$rooturl' => $rooturl); - $fix_urls_template = Renderer::getMarkupTemplate('fix-urls.tpl', 'addon/retriever/'); - $fix_urls_xslt = Renderer::replaceMacros($fix_urls_template, $params); - $doc = retriever_apply_xslt_text($fix_urls_xslt, $doc); + $doc = retriever_globalise_urls($doc, $resource); if (!$doc) { Logger::info('retriever_apply_dom_filter: failed to apply fix urls XSLT template'); return; @@ -571,10 +513,56 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) { Item::update(['body' => $body], ['uri-id' => $uri_id]); } +function retriever_load_into_dom($resource) { + Logger::info('@@@ retriever_load_into_dom start'); + $encoding = retriever_get_encoding($resource); + $content = mb_convert_encoding($resource['data'], 'HTML-ENTITIES', $encoding); + $doc = new DOMDocument('1.0', 'UTF-8'); + if (strpos($resource['type'], 'html') !== false) { + @$doc->loadHTML($content); + } + else { + $doc->loadXML($content); + } + Logger::info('@@@ retriever_load_into_dom end'); + return $doc; +} + +function retriever_extract($doc, $retriever) { + Logger::info('@@@ retriever_extract start'); + $params = array('$spec' => $retriever['data']); + $extract_template = Renderer::getMarkupTemplate('extract.tpl', 'addon/retriever/'); + $extract_xslt = Renderer::replaceMacros($extract_template, $params); + if ($retriever['data']['include']) { + Logger::debug('retriever_apply_dom_filter: applying include/exclude template \"' . $extract_xslt . '\"'); + $doc = retriever_apply_xslt_text($extract_xslt, $doc); + } + if (array_key_exists('customxslt', $retriever['data']) && $retriever['data']['customxslt']) { + Logger::debug('retriever_extract: applying custom XSLT \"' . $retriever['data']['customxslt'] . '\"'); + $doc = retriever_apply_xslt_text($retriever['data']['customxslt'], $doc); + } + Logger::info('@@@ retriever_extract end'); + return $doc; +} + +function retriever_globalise_urls($doc, $resource) { + Logger::info('@@@ retriever_globalise_urls start'); + $components = parse_url($resource['redirect-url']); + $rooturl = $components['scheme'] . "://" . $components['host']; + $dirurl = $rooturl . dirname($components['path']) . "/"; + $params = array('$dirurl' => $dirurl, '$rooturl' => $rooturl); + $fix_urls_template = Renderer::getMarkupTemplate('fix-urls.tpl', 'addon/retriever/'); + $fix_urls_xslt = Renderer::replaceMacros($fix_urls_template, $params); + $doc = retriever_apply_xslt_text($fix_urls_xslt, $doc); + Logger::info('@@@ retriever_globalise_urls end'); + return $doc; +} + function retrieve_images(&$item, $a) { // Note that $item doesn't necessarily contain all the fields you would expect, in particular 'id' + //@@@ doe sit contain uri-id? //@@@ it definitely does not - Logger::debug('@@@ retrieve_images start item '. $item['id'] . ' uri ' . $item['uri'] . ' uri id ' . $item['uri-id'] . ' plink ' . $item['plink'] . ' guid ' . $item['guid']); + Logger::debug('@@@ retrieve_images start item id '. (array_key_exists('id', $item) ? $item['id'] : 'undef') . ' uri ' . $item['uri'] . ' uri id ' . $item['uri-id'] . ' plink ' . $item['plink'] . ' guid ' . $item['guid']); $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself? $content = DBA::selectFirst('item-content', ['body'], ['uri-id' => $uri_id]); @@ -584,7 +572,6 @@ function retrieve_images(&$item, $a) { return; } - Logger::info('@@@ retrieve_images looking in body "' . $body . '"'); // I suspect that the first two are not used any more? preg_match_all("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", $item["body"], $matches1); preg_match_all("/\[img\](.*?)\[\/img\]/ism", $item["body"], $matches2); @@ -592,9 +579,7 @@ function retrieve_images(&$item, $a) { $matches = array_merge($matches1[3], $matches2[1], $matches3[1]); Logger::debug('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); foreach ($matches as $url) { - Logger::debug('@@@ retrieve_images: url ' . $url); if (strpos($url, get_app()->getBaseUrl()) === FALSE) { - Logger::debug('@@@ retrieve_images: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id']); $resource = add_retriever_resource($a, $url, $item['uid'], $item['contact-id'], true); if (!$resource['completed']) { add_retriever_item($item, $resource); @@ -604,12 +589,11 @@ function retrieve_images(&$item, $a) { } } } - Logger::info('@@@ retrieve_images end'); } function retriever_check_item_completed(&$item) { - Logger::debug('@@@ 9 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); + // TODO: figure out how to do this with DBA module $r = q('SELECT count(*) FROM retriever_item WHERE `item-uri` = "%s" ' . 'AND `item-uid` = %d AND `contact-id` = %d AND `finished` = 0', DBA::escape($item['uri']), intval($item['uid']), @@ -620,12 +604,7 @@ function retriever_check_item_completed(&$item) $item['visible'] = $waiting ? 0 : 1; if (array_key_exists('id', $item) && ($item['id'] > 0) && ($old_visible != $item['visible'])) { Logger::debug('retriever_check_item_completed: changing visible flag to ' . $item['visible']); - q("UPDATE `item` SET `visible` = %d WHERE `id` = %d", - intval($item['visible']), - intval($item['id'])); - q("UPDATE `thread` SET `visible` = %d WHERE `iid` = %d", - intval($item['visible']), - intval($item['id'])); + Item::update(['visible' => 0], ['id' => intval($item['id'])]); } } @@ -647,11 +626,8 @@ function retriever_apply_completed_resource_to_item($retriever, &$item, $resourc } } -//@@@ todo: change all Logger::info t etc //@@@ todo: what is this reference for? document if needed delete if not function retriever_transform_images($a, &$item, $resource) { - Logger::debug('@@@ 11 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); - Logger::info('@@@ retriever_transform_images'); if (!$resource['data']) { Logger::info('retriever_transform_images: no data available for ' . $resource['id'] . ' ' . $resource['url']); return; @@ -659,51 +635,37 @@ function retriever_transform_images($a, &$item, $resource) { $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself? - try { //@@@ probably can get rid of this try/catch - $data = $resource['data']; - $type = $resource['type']; - $uid = $item['uid']; - $cid = $item['contact-id']; - $rid = Photo::newResource(); - $path = parse_url($resource['url'], PHP_URL_PATH); - $parts = pathinfo($path); - $filename = $parts['filename'] . (array_key_exists('extension', $parts) ? '.' . $parts['extension'] : ''); - Logger::info('@@@ retriever_transform_images url ' . $resource['url'] . ' path ' . $path . ' filename ' . $parts['filename']); - $album = 'Wall Photos'; - $scale = 0; - $desc = ''; // TODO: store alt text with resource when it's requested so we can fill this in - Logger::debug('retriever_transform_images storing ' . strlen($data) . ' bytes type ' . $type . ': uid ' . $uid . ' cid ' . $cid . ' rid ' . $rid . ' filename ' . $filename . ' album ' . $album . ' scale ' . $scale . ' desc ' . $desc); - Logger::info('@@@ retriever_transform_images before new Image'); - $image = new Image($data, $type); - Logger::info('@@@ retriever_transform_images after new Image'); - if (!$image->isValid()) { - Logger::warning('retriever_transform_images: invalid image found at URL ' . $resource['url'] . ' for item ' . $item['id']); - return; - } - Logger::info('@@@ retriever_transform_images before Photo::store'); - $photo = Photo::store($image, $uid, $cid, $rid, $filename, $album, 0, 0, "", "", "", "", $desc); - Logger::info('@@@ retriever_transform_images after Photo::store'); - $new_url = System::baseUrl() . '/photo/' . $rid . '-0.' . $image->getExt(); - Logger::info('@@@ retriever_transform_images new url ' . $new_url . ' rid ' . $rid . ' ext ' . $image->getExt()); - if (!strlen($new_url)) { - Logger::warning('retriever_transform_images: no replacement URL for image ' . $resource['url']); - return; - } - - $content = DBA::selectFirst('item-content', ['body'], ['uri-id' => $uri_id]); - $body = $content['body']; - Logger::info('@@@ retriever_transform_images: found body for uri id ' . $uri_id . ': ' . $body); - - Logger::debug('retriever_transform_images: replacing ' . $resource['url'] . ' with ' . $new_url . ' in item ' . $item['uri']); - Logger::debug('@@@ retriever_transform_images: replacing ' . $resource['url'] . ' with ' . $new_url . ' in body ' . $body); - $body = str_replace($resource["url"], $new_url, $body); - - Logger::info('@@@ retriever_transform_images: result \"' . $body . '\"'); - Item::update(['body' => $body], ['uri-id' => $uri_id]); - } catch (Exception $e) { - Logger::info('retriever_transform_images caught exception ' . $e->getMessage()); + $data = $resource['data']; + $type = $resource['type']; + $uid = $item['uid']; + $cid = $item['contact-id']; + $rid = Photo::newResource(); + $path = parse_url($resource['url'], PHP_URL_PATH); + $parts = pathinfo($path); + $filename = $parts['filename'] . (array_key_exists('extension', $parts) ? '.' . $parts['extension'] : ''); + $album = 'Wall Photos'; + $scale = 0; + $desc = ''; // TODO: store alt text with resource when it's requested so we can fill this in + Logger::debug('retriever_transform_images storing ' . strlen($data) . ' bytes type ' . $type . ': uid ' . $uid . ' cid ' . $cid . ' rid ' . $rid . ' filename ' . $filename . ' album ' . $album . ' scale ' . $scale . ' desc ' . $desc); + $image = new Image($data, $type); + if (!$image->isValid()) { + Logger::warning('retriever_transform_images: invalid image found at URL ' . $resource['url'] . ' for item ' . $item['id']); return; } + $photo = Photo::store($image, $uid, $cid, $rid, $filename, $album, 0, 0, "", "", "", "", $desc); + $new_url = System::baseUrl() . '/photo/' . $rid . '-0.' . $image->getExt(); + if (!strlen($new_url)) { + Logger::warning('retriever_transform_images: no replacement URL for image ' . $resource['url']); + return; + } + + $content = DBA::selectFirst('item-content', ['body'], ['uri-id' => $uri_id]); + $body = $content['body']; + + Logger::debug('retriever_transform_images: replacing ' . $resource['url'] . ' with ' . $new_url . ' in item ' . $item['uri']); + $body = str_replace($resource["url"], $new_url, $body); + + Item::update(['body' => $body], ['uri-id' => $uri_id]); } function retriever_content($a) { @@ -712,6 +674,7 @@ function retriever_content($a) { return; } if ($a->argv[1] === 'help') { + //@@@ fix me $feeds = q("SELECT `id`, `name`, `thumb` FROM contact WHERE `uid` = %d AND `network` = 'feed'", local_user()); foreach ($feeds as $k=>$v) { @@ -729,7 +692,7 @@ function retriever_content($a) { if (!empty($_POST["id"])) { $retriever_rule = get_retriever_rule($a->argv[1], local_user(), true); $retriever_rule['data'] = array(); - foreach (array('pattern', 'replace', 'enable', 'images', 'customxslt', 'storecookies', 'cookiedata') as $setting) { + foreach (array('modurl', 'pattern', 'replace', 'enable', 'images', 'customxslt', 'storecookies', 'cookiedata') as $setting) { if (empty($_POST['retriever_' . $setting])) { $retriever_rule['data'][$setting] = NULL; } @@ -753,6 +716,7 @@ function retriever_content($a) { unset($retriever_rule['data']['exclude'][$k]); } } + //@@@ fix me q("UPDATE `retriever_rule` SET `data`='%s' WHERE `id` = %d", DBA::escape(json_encode($retriever_rule['data'])), intval($retriever_rule["id"])); $a->page['content'] .= "

Settings Updated"; @@ -769,6 +733,11 @@ function retriever_content($a) { 'retriever_enable', L10n::t('Enabled'), $retriever_rule['data']['enable']), + '$modurl' => array( + 'retriever_modurl', + L10n::t('Modify URL'), + $retriever_rule['data']['modurl'], + L10n::t("Modify each article's URL with regular expressions before retrieving.")), '$pattern' => array( 'retriever_pattern', L10n::t('URL Pattern'), @@ -832,7 +801,8 @@ function retriever_contact_photo_menu($a, &$args) { } function retriever_post_remote_hook(&$a, &$item) { - Logger::info('@@@ 12 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); + // Note that $item doesn't necessarily contain all the fields you would expect, in particular 'id' + Logger::info('retriever_post_remote_hook: ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself? @@ -845,14 +815,13 @@ function retriever_post_remote_hook(&$a, &$item) { // Convert to HTML and back to take advantage of bbcode's resolution of oembeds. $content = DBA::selectFirst('item-content', [], ['uri-id' => $uri_id]); $body = HTML::toBBCode(BBCode::convert($content['body'])); - Logger::debug('@@@ retriever_post_remote_hook item uri-id ' . $uri_id . ' body "' . $item['body'] . '" item content body "' . $body . '"'); if ($body) { $item['body'] = $body; Item::update(['body' => $body], ['uri-id' => $uri_id]); } } if (PConfig::get($item["uid"], 'retriever', 'all_photos')) { - retrieve_images($item, $a); + retrieve_images($item, $a); //@@@ backwards } } retriever_check_item_completed($item); diff --git a/retriever/templates/rule-config.tpl b/retriever/templates/rule-config.tpl index 9061d1ff..171054de 100644 --- a/retriever/templates/rule-config.tpl +++ b/retriever/templates/rule-config.tpl @@ -41,6 +41,25 @@ function retriever_remove_row(id, number) tbody.removeChild(row); } +function retriever_toggle_url_block() +{ + var pattern = document.querySelector("#id_retriever_pattern").parentNode; + if (document.querySelector("#id_retriever_modurl").checked) { + pattern.style.display = "block"; + } + else { + pattern.style.display = "none"; + } + + var replace = document.querySelector("#id_retriever_replace").parentNode; + if (document.querySelector("#id_retriever_modurl").checked) { + replace.style.display = "block"; + } + else { + replace.style.display = "none"; + } +} + function retriever_toggle_cookiedata_block() { var div = document.querySelector("#id_retriever_cookiedata").parentNode; @@ -53,6 +72,8 @@ function retriever_toggle_cookiedata_block() } document.addEventListener('DOMContentLoaded', function() { + retriever_toggle_url_block(); + document.querySelector("#id_retriever_modurl").addEventListener('change', retriever_toggle_url_block, false); retriever_toggle_cookiedata_block(); document.querySelector("#id_retriever_storecookies").addEventListener('change', retriever_toggle_cookiedata_block, false); }, false); @@ -62,10 +83,6 @@ document.addEventListener('DOMContentLoaded', function() {

{{include file="field_checkbox.tpl" field=$enable}} -{{include file="field_input.tpl" field=$pattern}} -{{include file="field_input.tpl" field=$replace}} -{{include file="field_checkbox.tpl" field=$images}} -{{include file="field_input.tpl" field=$retrospective}}

{{$include_t}}:

@@ -98,7 +115,7 @@ document.addEventListener('DOMContentLoaded', function() {
- + {{if $exclude}} @@ -122,9 +139,14 @@ document.addEventListener('DOMContentLoaded', function() {
TagAttributeValue
{{$tag_t}}{{$attribute_t}}{{$value_t}}
+{{include file="field_checkbox.tpl" field=$modurl}} +{{include file="field_input.tpl" field=$pattern}} +{{include file="field_input.tpl" field=$replace}} +{{include file="field_checkbox.tpl" field=$images}} {{include file="field_textarea.tpl" field=$customxslt}} {{include file="field_checkbox.tpl" field=$storecookies}} {{include file="field_textarea.tpl" field=$cookiedata}} +{{include file="field_input.tpl" field=$retrospective}} From 29888f9e87a9547bcbe9f8bc80eaa7d4573405e4 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Tue, 8 Oct 2019 07:29:59 +0200 Subject: [PATCH 27/96] Now retriever works again --- retriever/retriever.php | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 6ace5e98..988bbc43 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -127,8 +127,9 @@ function retriever_retrieve_items($max_items, $a) { } $retrieve_items = $max_items - $retriever_item_count; - Logger::debug('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . $retriever_item_count . ', retrieve ' . $retrieve_items); do { + //@@@ check this looks sane after moving inside the loop + Logger::debug('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . $retriever_item_count . ', retrieve ' . $retrieve_items); // TODO: figure out how to do this with DBA module $retriever_resources = q("SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR %s) ORDER BY `last-try` ASC LIMIT %d", DBA::escape(implode($schedule_clauses, ' OR ')), @@ -178,7 +179,7 @@ function retriever_clean_up_completed_resources($max_items, $a) { $resource = DBA::selectFirst('retriever_resource', [], ['id' => intval($rr['resource'])]); retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource, $a); Logger::info('@@@ retriever_clean_up_completed_resources tried to update id ' . $retriever_item['id'] . ' to finished, better check that it really worked!'); - DBA::update('retriever_item', ['finished' => 1], ['id' => intval($retriever_item['id'])], ['finished']); + DBA::update('retriever_item', ['finished' => 1], ['id' => intval($retriever_item['id'])], ['finished' => 0]); retriever_check_item_completed($item); } } @@ -232,7 +233,7 @@ function retrieve_resource($resource) { $fetch_result = Network::fetchUrlFull($resource['url'], $resource['binary'], $redirects, '', $cookiejar); if (array_key_exists('storecookies', $rule_data) && $rule_data['storecookies']) { $retriever_rule['data']['cookiedata'] = file_get_contents($cookiejar); - DBA::update('retriever_rule', ['data' => json_encode($retriever_rule['data'])], ['id' => intval($retriever_rule["id"])]); + DBA::update('retriever_rule', ['data' => json_encode($retriever_rule['data'])], ['id' => intval($retriever_rule["id"])], $retriever_rule); //@@@ check the update worked unlink($cookiejar); } @@ -305,24 +306,22 @@ function retriever_item_completed($retriever_item_id, $resource, $a) { retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource, $a); - DBA::update('retriever_item', ['finished' => 1], ['id' => intval($retriever_item['id'])], ['finished']); + DBA::update('retriever_item', ['finished' => 1], ['id' => intval($retriever_item['id'])], ['finished' => 0]); retriever_check_item_completed($item); } function retriever_resource_completed($resource, $a) { Logger::debug('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url']); - foreach (DBA::select('retriever_item', ['id'], ['resource' => intval($resource['id'])]) as $retriever_item) { - retriever_item_completed($retriever_item['id'], $resource, $a); + foreach (DBA::selectToArray('retriever_item', ['id'], ['resource' => intval($resource['id'])]) as $retriever_item) { + retriever_item_completed($retriever_item['id'], $resource, $a); //@@@ args in wrong order } } function apply_retrospective($a, $retriever, $num) { - Logger::debug('@@@ apply_retrospective'); - foreach (Item::select([], ['contact-id' => intval($retriever['contact-id'])], ['order' => ['received' => true], 'limit' => $num]) as $item) { - Logger::debug('@@@ apply_retrospective got item id ' . $item['id'] . ' uri ' . $item['uri']); + foreach (Item::selectToArray([], ['contact-id' => intval($retriever['contact-id'])], ['order' => ['received' => true], 'limit' => $num]) as $item) { Item::update(['visible' => 0], ['id' => intval($item['id'])]); //@@@ check that this works - foreach (DBA::select('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => $item['uid'], 'contact-id' => $item['contact-id']]) as $retriever_item) { + foreach (DBA::selectToArray('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => $item['uid'], 'contact-id' => $item['contact-id']]) as $retriever_item) { DBA::delete('retriever_resource', ['id' => $retriever_item['resource']]); DBA::delete('retriever_item', ['id' => $retriever_item['id']]); } @@ -593,7 +592,7 @@ function retrieve_images(&$item, $a) { function retriever_check_item_completed(&$item) { - // TODO: figure out how to do this with DBA module + // TODO: figure out how to do this with DBA module //@@@ selectFirst works $r = q('SELECT count(*) FROM retriever_item WHERE `item-uri` = "%s" ' . 'AND `item-uid` = %d AND `contact-id` = %d AND `finished` = 0', DBA::escape($item['uri']), intval($item['uid']), @@ -604,7 +603,7 @@ function retriever_check_item_completed(&$item) $item['visible'] = $waiting ? 0 : 1; if (array_key_exists('id', $item) && ($item['id'] > 0) && ($old_visible != $item['visible'])) { Logger::debug('retriever_check_item_completed: changing visible flag to ' . $item['visible']); - Item::update(['visible' => 0], ['id' => intval($item['id'])]); + Item::update(['visible' => $item['visible']], ['id' => intval($item['id'])]); } } @@ -615,6 +614,8 @@ function retriever_apply_completed_resource_to_item($retriever, &$item, $resourc retriever_transform_images($a, $item, $resource); } if (!$retriever) { + //@@@ log line here: how normal is this? + Logger::info('@@@ retriever_apply_completed_resource_to_item no retriever'); return; } if ((strpos($resource['type'], 'html') !== false) || @@ -674,12 +675,11 @@ function retriever_content($a) { return; } if ($a->argv[1] === 'help') { - //@@@ fix me - $feeds = q("SELECT `id`, `name`, `thumb` FROM contact WHERE `uid` = %d AND `network` = 'feed'", - local_user()); - foreach ($feeds as $k=>$v) { - $feeds[$k]['url'] = $a->getBaseUrl() . '/retriever/' . $v['id']; + $feeds = DBA::selectToArray('contact', ['id', 'name', 'thumb'], ['uid' => local_user(), 'network' => 'feed']); + for ($i = 0; $i < count($feeds); ++$i) { + $feeds[$i]['url'] = $a->getBaseUrl() . '/retriever/' . $feeds[$i]['id']; } + //@@@ this is broken $template = Renderer::getMarkupTemplate('/help.tpl', 'addon/retriever/'); $a->page['content'] .= Renderer::replaceMacros($template, array( '$config' => $a->getBaseUrl() . '/settings/addon', From 86eeb59ae58ce8e10807a0be7e5b91cc2ba3a0d9 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Tue, 8 Oct 2019 18:55:34 +0200 Subject: [PATCH 28/96] maybe broken again --- retriever/retriever.php | 45 +++++++++++++++++------------------------ 1 file changed, 18 insertions(+), 27 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 988bbc43..128fc80e 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -262,10 +262,8 @@ function retrieve_resource($resource) { function get_retriever_rule($contact_id, $uid, $create = false) { $retriever_rule = DBA::selectFirst('retriever_rule', [], ['contact-id' => intval($contact_id), 'uid' => intval($uid)]); - //@@@ check that this worked if ($retriever_rule) { $retriever_rule['data'] = json_decode($retriever_rule['data'], true); - Logger::info('@@@ get_retriever_rule returning an actual thing'); return $retriever_rule; } if ($create) { @@ -288,7 +286,7 @@ function retriever_get_item($retriever_item) { return $item; } -function retriever_item_completed($retriever_item_id, $resource, $a) { +function retriever_item_completed($a, $retriever_item_id, $resource) { Logger::debug('retriever_item_completed: id ' . $retriever_item_id . ' url ' . $resource['url']); $retriever_item = retriever_get_retriever_item($retriever_item_id); @@ -313,7 +311,7 @@ function retriever_item_completed($retriever_item_id, $resource, $a) { function retriever_resource_completed($resource, $a) { Logger::debug('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url']); foreach (DBA::selectToArray('retriever_item', ['id'], ['resource' => intval($resource['id'])]) as $retriever_item) { - retriever_item_completed($retriever_item['id'], $resource, $a); //@@@ args in wrong order + retriever_item_completed($a, $retriever_item['id'], $resource); } } @@ -358,6 +356,7 @@ function retriever_on_item_insert($a, $retriever, &$item) { } $resource = add_retriever_resource($a, $url, $item['uid'], $item['contact-id']); + Logger::debug('@@@ check this makes sense: ' . $resource['id'] . ' url ' . $resource['url']); $retriever_item_id = add_retriever_item($item, $resource); } @@ -374,7 +373,6 @@ function add_retriever_resource($a, $url, $uid, $cid, $binary = false) { $url = 'md5://' . hash('md5', $url); if (DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)])) { - //@@@ test that this really happens - it should sometimes Logger::debug('add_retriever_resource: Resource ' . $url . ' already requested'); return $resource; } @@ -390,10 +388,7 @@ function add_retriever_resource($a, $url, $uid, $cid, $binary = false) { intval($binary ? 1 : 0), DBA::escape($url), DBA::escape($data)); - //@@@ fix this - $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url)); - $resource = $r[0]; - if (count($r)) { + if (DBA::selectFirst('retriever_resource', [], ['url' => $url])) { retriever_resource_completed($resource, $a); } return $resource; @@ -403,19 +398,15 @@ function add_retriever_resource($a, $url, $uid, $cid, $binary = false) { Logger::warning('add_retriever_resource: URL is longer than 800 characters'); } - //@@@ fix this - $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s' AND `item-uid` = %d AND `contact-id` = %d", DBA::escape($url), intval($uid), intval($cid)); - if (count($r)) { + if (DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)])) { Logger::debug('add_retriever_resource: Resource ' . $url . ' uid ' . $uid . ' cid ' . $cid . ' already requested'); return $r[0]; } - //@@@ fix this - q("INSERT INTO `retriever_resource` (`item-uid`, `contact-id`, `binary`, `url`) " . - "VALUES (%d, %d, %d, '%s')", intval($uid), intval($cid), intval($binary ? 1 : 0), DBA::escape($url)); - //@@@ fix this - $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url)); - return $r[0]; + DBA::insert('retriever_rule', ['item-uid' => intval($uid), 'contact-id' => intval($cid), 'binary' => ($binary ? 1 : 0), 'url' => $url]); + Logge::debug('@@@ add_retriever_resource inserting resource ' . $url . ' uid ' . $uid . ' cid ' . $cid); + //@@@ check the insert worked + return DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)]); } function add_retriever_item(&$item, $resource) { @@ -557,7 +548,7 @@ function retriever_globalise_urls($doc, $resource) { return $doc; } -function retrieve_images(&$item, $a) { +function retrieve_images($a, &$item) { // Note that $item doesn't necessarily contain all the fields you would expect, in particular 'id' //@@@ doe sit contain uri-id? //@@@ it definitely does not @@ -580,6 +571,7 @@ function retrieve_images(&$item, $a) { foreach ($matches as $url) { if (strpos($url, get_app()->getBaseUrl()) === FALSE) { $resource = add_retriever_resource($a, $url, $item['uid'], $item['contact-id'], true); + Logger::debug('@@@ check this makes sense 2: ' . $resource['id'] . ' url ' . $resource['url']); if (!$resource['completed']) { add_retriever_item($item, $resource); } @@ -592,6 +584,8 @@ function retrieve_images(&$item, $a) { function retriever_check_item_completed(&$item) { + $waiting = DBA::selectFirst('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => intval($item['uid']), 'contact-id' => intval($item['contact-id']), 'finished' => 0]); + Logger::debug('@@@ waiting is ' . $waiting); // TODO: figure out how to do this with DBA module //@@@ selectFirst works $r = q('SELECT count(*) FROM retriever_item WHERE `item-uri` = "%s" ' . 'AND `item-uid` = %d AND `contact-id` = %d AND `finished` = 0', @@ -610,19 +604,17 @@ function retriever_check_item_completed(&$item) function retriever_apply_completed_resource_to_item($retriever, &$item, $resource, $a) { Logger::debug('retriever_apply_completed_resource_to_item: retriever ' . ($retriever ? $retriever['id'] : 'none') . ' resource ' . $resource['url'] . ' plink ' . $item['plink']); if (strpos($resource['type'], 'image') !== false) { - Logger::info('@@@ retriever_apply_completed_resource_to_item this is an image must transform'); retriever_transform_images($a, $item, $resource); } if (!$retriever) { - //@@@ log line here: how normal is this? - Logger::info('@@@ retriever_apply_completed_resource_to_item no retriever'); + Logger::warning('retriever_apply_completed_resource_to_item: no retriever'); return; } if ((strpos($resource['type'], 'html') !== false) || (strpos($resource['type'], 'xml') !== false)) { retriever_apply_dom_filter($retriever, $item, $resource); if ($retriever['data']['images'] ) { - retrieve_images($item, $a); + retrieve_images($a, $item); } } } @@ -716,9 +708,8 @@ function retriever_content($a) { unset($retriever_rule['data']['exclude'][$k]); } } - //@@@ fix me - q("UPDATE `retriever_rule` SET `data`='%s' WHERE `id` = %d", - DBA::escape(json_encode($retriever_rule['data'])), intval($retriever_rule["id"])); + //@@@ check that this works + DBA::update('retriever_rule', ['data' => json_encode($retriever_rule['data'])], ['id' => intval($retriever_rule["id"])], ['data' => '']); $a->page['content'] .= "

Settings Updated"; if (!empty($_POST["retriever_retrospective"])) { apply_retrospective($a, $retriever_rule, $_POST["retriever_retrospective"]); @@ -821,7 +812,7 @@ function retriever_post_remote_hook(&$a, &$item) { } } if (PConfig::get($item["uid"], 'retriever', 'all_photos')) { - retrieve_images($item, $a); //@@@ backwards + retrieve_images($a, $item); } } retriever_check_item_completed($item); From 418426e8a8165dcb57d02430edc3dc7a72fc6de6 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 9 Oct 2019 20:54:39 +0200 Subject: [PATCH 29/96] working much better --- retriever/retriever.php | 160 +++++++++++++++++------------------ retriever/templates/help.tpl | 15 +++- 2 files changed, 92 insertions(+), 83 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 128fc80e..b8af7d3d 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -128,8 +128,7 @@ function retriever_retrieve_items($max_items, $a) { $retrieve_items = $max_items - $retriever_item_count; do { - //@@@ check this looks sane after moving inside the loop - Logger::debug('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . $retriever_item_count . ', retrieve ' . $retrieve_items); + Logger::debug('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . intval($retriever_item_count) . ', retrieve ' . $retrieve_items); // TODO: figure out how to do this with DBA module $retriever_resources = q("SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR %s) ORDER BY `last-try` ASC LIMIT %d", DBA::escape(implode($schedule_clauses, ' OR ')), @@ -185,9 +184,11 @@ function retriever_clean_up_completed_resources($max_items, $a) { } function retriever_tidy() { - // TODO: figure out how to do this with DBA module @@@ it is possible - q("DELETE FROM retriever_resource WHERE completed IS NOT NULL AND completed < DATE_SUB(now(), INTERVAL 1 WEEK)"); - q("DELETE FROM retriever_resource WHERE completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)"); + DBA::delete('retriever_resource', ['completed IS NOT NULL AND completed < DATE_SUB(now(), INTERVAL 1 WEEK)']); + DBA::delete('retriever_resource', ['completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)']); + // @@@ check that this worked + /* q("DELETE FROM retriever_resource WHERE completed IS NOT NULL AND completed < DATE_SUB(now(), INTERVAL 1 WEEK)"); */ + /* q("DELETE FROM retriever_resource WHERE completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)"); */ $r = q("SELECT retriever_item.id FROM retriever_item LEFT OUTER JOIN retriever_resource ON (retriever_item.resource = retriever_resource.id) WHERE retriever_resource.id is null"); Logger::info('retriever_tidy: found ' . count($r) . ' retriever_items with no retriever_resource'); @@ -286,7 +287,7 @@ function retriever_get_item($retriever_item) { return $item; } -function retriever_item_completed($a, $retriever_item_id, $resource) { +function retriever_item_completed($retriever_item_id, $resource) { Logger::debug('retriever_item_completed: id ' . $retriever_item_id . ' url ' . $resource['url']); $retriever_item = retriever_get_retriever_item($retriever_item_id); @@ -302,20 +303,20 @@ function retriever_item_completed($a, $retriever_item_id, $resource) { // Note: the retriever might be null. Doesn't matter. $retriever_rule = get_retriever_rule($retriever_item['contact-id'], $retriever_item['item-uid']); - retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource, $a); + retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource); DBA::update('retriever_item', ['finished' => 1], ['id' => intval($retriever_item['id'])], ['finished' => 0]); retriever_check_item_completed($item); } -function retriever_resource_completed($resource, $a) { +function retriever_resource_completed($resource) { Logger::debug('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url']); foreach (DBA::selectToArray('retriever_item', ['id'], ['resource' => intval($resource['id'])]) as $retriever_item) { - retriever_item_completed($a, $retriever_item['id'], $resource); + retriever_item_completed($retriever_item['id'], $resource); } } -function apply_retrospective($a, $retriever, $num) { +function apply_retrospective($retriever, $num) { foreach (Item::selectToArray([], ['contact-id' => intval($retriever['contact-id'])], ['order' => ['received' => true], 'limit' => $num]) as $item) { Item::update(['visible' => 0], ['id' => intval($item['id'])]); //@@@ check that this works @@ -323,13 +324,15 @@ function apply_retrospective($a, $retriever, $num) { DBA::delete('retriever_resource', ['id' => $retriever_item['resource']]); DBA::delete('retriever_item', ['id' => $retriever_item['id']]); } - retriever_on_item_insert($a, $retriever, $item); + retriever_on_item_insert($retriever, $item); } } // TODO: Currently this waits until the next cron before actually downloading. Should do it immediately. +//@@@ I think the above statement is wrong. Check! // TODO: This queries then inserts. It should use some kind of lock to avoid requesting the same resource twice. -function retriever_on_item_insert($a, $retriever, &$item) { +function retriever_on_item_insert($retriever, &$item) { + Logger::debug('@@@ retriever_on_item_insert start'); if (!$retriever || !$retriever['id']) { Logger::info('retriever_on_item_insert: No retriever supplied'); return; @@ -349,18 +352,19 @@ function retriever_on_item_insert($a, $retriever, &$item) { $url = $content['plink']; } - if ($retriever['data']['modurl']) { + if (array_key_exists('modurl', $retriever['data']) && $retriever['data']['modurl']) { $orig_url = $url; $url = preg_replace('/' . $retriever['data']['pattern'] . '/', $retriever['data']['replace'], $orig_url); Logger::debug('retriever_on_item_insert: Changed ' . $orig_url . ' to ' . $url); } - $resource = add_retriever_resource($a, $url, $item['uid'], $item['contact-id']); + $resource = add_retriever_resource($url, $item['uid'], $item['contact-id']); Logger::debug('@@@ check this makes sense: ' . $resource['id'] . ' url ' . $resource['url']); + Logger::debug('@@@ it does not make sense ' . print_r($resource, true)); $retriever_item_id = add_retriever_item($item, $resource); } -function add_retriever_resource($a, $url, $uid, $cid, $binary = false) { +function add_retriever_resource($url, $uid, $cid, $binary = false) { Logger::debug('add_retriever_resource: url ' . $url . ' uid ' . $uid . ' contact-id ' . $cid); $scheme = parse_url($url, PHP_URL_SCHEME); @@ -378,18 +382,10 @@ function add_retriever_resource($a, $url, $uid, $cid, $binary = false) { } Logger::debug('retrieve_resource: got data URL type ' . $resource['type']); - // TODO: figure out how to do this with DBA module - // @@@ DBA::update('workerqueue', ['executed' => DateTimeFormat::utcNow()], ['pid' => $mypid, 'done' => false]); - q("INSERT INTO `retriever_resource` (`item-uid`, `contact-id`, `type`, `binary`, `url`, `completed`, `data`) " . - "VALUES (%d, %d, '%s', %d, '%s', now(), '%s')", - intval($uid), - intval($cid), - DBA::escape($type), - intval($binary ? 1 : 0), - DBA::escape($url), - DBA::escape($data)); + DBA::insert('retriever_resource', ['item-uid' => intval($uid), 'contact-id' => intval($cid), 'type' => $type, 'binary' => ($binary ? 1 : 0), 'url' => $url, 'completed' => DateTimeFormat::utcNow(), 'data' => $data]); + // @@@ check that this makes sense if (DBA::selectFirst('retriever_resource', [], ['url' => $url])) { - retriever_resource_completed($resource, $a); + retriever_resource_completed($resource); } return $resource; } @@ -403,37 +399,31 @@ function add_retriever_resource($a, $url, $uid, $cid, $binary = false) { return $r[0]; } - DBA::insert('retriever_rule', ['item-uid' => intval($uid), 'contact-id' => intval($cid), 'binary' => ($binary ? 1 : 0), 'url' => $url]); - Logge::debug('@@@ add_retriever_resource inserting resource ' . $url . ' uid ' . $uid . ' cid ' . $cid); - //@@@ check the insert worked + DBA::insert('retriever_resource', ['item-uid' => intval($uid), 'contact-id' => intval($cid), 'binary' => ($binary ? 1 : 0), 'url' => $url]); return DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)]); } function add_retriever_item(&$item, $resource) { Logger::debug('add_retriever_item: ' . $resource['url'] . ' for ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); - //@@@ can use selectFirst - $r = q("SELECT COUNT(*) FROM `retriever_item` WHERE " . - "`item-uri` = '%s' AND `item-uid` = %d AND `contact-id` = %d AND `resource` = %d", - DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource['id'])); - if ($r[0]['COUNT(*)'] > 0) { + if (!array_key_exists('id', $resource) || !$resource['id']) { + Logger::warning('add_retriever_item: resource is empty'); + //@@@ check that this does not happen + return; + } + if (DBA::selectFirst('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => intval($item['uid']), 'resource' => intval($resource['id'])])) { + //@@@ check that this worked Logger::info("add_retriever_item: retriever item already present for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); return; } - //@@@ fix this - q("INSERT INTO `retriever_item` (`item-uri`, `item-uid`, `contact-id`, `resource`) " . - "VALUES ('%s', %d, %d, %d)", - DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource["id"])); - //@@@ fix this - $r = q("SELECT id FROM `retriever_item` WHERE " . - "`item-uri` = '%s' AND `item-uid` = %d AND `contact-id` = %d AND `resource` = %d ORDER BY id DESC", - DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource['id'])); - if (!count($r)) { + DBA::insert('retriever_item', ['item-uri' => $item['uri'], 'item-uid' => intval($item['uid']), 'contact-id' => intval($item['contact-id']), 'resource' => intval($resource['id'])]); + $retriever_item = DBA::selectFirst('retriever_item', ['id'], ['item-uri' => $item['uri'], 'item-uid' => intval($item['uid']), 'resource' => intval($resource['id'])]); + if (!$retriever_item) { Logger::info("add_retriever_item: couldn't create retriever item for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); return; } - Logger::debug('add_retriever_item: created retriever_item ' . $r[0]['id'] . ' for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); - return $r[0]['id']; + Logger::debug('add_retriever_item: created retriever_item ' . $retriever_item['id'] . ' for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); + return $retriever_item['id']; } function retriever_get_encoding($resource) { @@ -454,17 +444,14 @@ function retriever_apply_xslt_text($xslt_text, $doc) { Logger::info('retriever_apply_xslt_text: could not load XML'); return $doc; } - Logger::debug('@@@ retriever_apply_xslt_text: ' . $xslt_text); $xp = new XsltProcessor(); $xp->importStylesheet($xslt_doc); $result = $xp->transformToDoc($doc); return $result; } -//@@@ I think this is supposed to update the $item, but it doesn't function retriever_apply_dom_filter($retriever, &$item, $resource) { - //@@@ check if id and uri-id are there //@@@ uri-id definitely is not - Logger::debug('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['uri'] . ' contact ' . $item['contact-id'] . ' uri-id ' . $item['uri-id']); + Logger::debug('retriever_apply_dom_filter: applying XSLT to uri ' . $item['uri'] . ' uid ' . $item['uid'] . ' contact ' . $item['contact-id']); if (!array_key_exists('include', $retriever['data']) && !array_key_exists('customxslt', $retriever['data'])) { Logger::info('retriever_apply_dom_filter: no include and no customxslt'); @@ -498,13 +485,16 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) { $body .= $item['plink']; $body .= ']' . $item['plink'] . '[/url]'; - $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself? Consider using item['id'] instead Logger::debug('retriever_apply_dom_filter: XSLT result \"' . $body . '\"'); - Item::update(['body' => $body], ['uri-id' => $uri_id]); + $item['body'] = $body; + if (array_key_exists('id', $item) && $item['id']) { //@@@ this should be a separate function + //@@@ check that this works + Logger::debug('@@@ retriever_apply_dom_filter updating item by id ' . $item['id']); + Item::update(['body' => $body], ['id' => $item['id']]); + } } function retriever_load_into_dom($resource) { - Logger::info('@@@ retriever_load_into_dom start'); $encoding = retriever_get_encoding($resource); $content = mb_convert_encoding($resource['data'], 'HTML-ENTITIES', $encoding); $doc = new DOMDocument('1.0', 'UTF-8'); @@ -514,12 +504,10 @@ function retriever_load_into_dom($resource) { else { $doc->loadXML($content); } - Logger::info('@@@ retriever_load_into_dom end'); return $doc; } function retriever_extract($doc, $retriever) { - Logger::info('@@@ retriever_extract start'); $params = array('$spec' => $retriever['data']); $extract_template = Renderer::getMarkupTemplate('extract.tpl', 'addon/retriever/'); $extract_xslt = Renderer::replaceMacros($extract_template, $params); @@ -531,12 +519,10 @@ function retriever_extract($doc, $retriever) { Logger::debug('retriever_extract: applying custom XSLT \"' . $retriever['data']['customxslt'] . '\"'); $doc = retriever_apply_xslt_text($retriever['data']['customxslt'], $doc); } - Logger::info('@@@ retriever_extract end'); return $doc; } function retriever_globalise_urls($doc, $resource) { - Logger::info('@@@ retriever_globalise_urls start'); $components = parse_url($resource['redirect-url']); $rooturl = $components['scheme'] . "://" . $components['host']; $dirurl = $rooturl . dirname($components['path']) . "/"; @@ -544,18 +530,19 @@ function retriever_globalise_urls($doc, $resource) { $fix_urls_template = Renderer::getMarkupTemplate('fix-urls.tpl', 'addon/retriever/'); $fix_urls_xslt = Renderer::replaceMacros($fix_urls_template, $params); $doc = retriever_apply_xslt_text($fix_urls_xslt, $doc); - Logger::info('@@@ retriever_globalise_urls end'); return $doc; } -function retrieve_images($a, &$item) { - // Note that $item doesn't necessarily contain all the fields you would expect, in particular 'id' - //@@@ doe sit contain uri-id? //@@@ it definitely does not +function retrieve_images(&$item) { + // Note that $item might not yet have an id or a uri-id - Logger::debug('@@@ retrieve_images start item id '. (array_key_exists('id', $item) ? $item['id'] : 'undef') . ' uri ' . $item['uri'] . ' uri id ' . $item['uri-id'] . ' plink ' . $item['plink'] . ' guid ' . $item['guid']); $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself? - $content = DBA::selectFirst('item-content', ['body'], ['uri-id' => $uri_id]); + $content = DBA::selectFirst('item-content', [], ['body'], ['uri-id' => $uri_id]); + if ($content['body'] != $item['body']) { + Logger::warning('@@@ this is probably bad right 3?'); + //@@@ check for this. + } $body = $content['body']; if (!strlen($body)) { Logger::warning('retrieve_images: no body for uri-id ' . $uri_id); @@ -570,13 +557,13 @@ function retrieve_images($a, &$item) { Logger::debug('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); foreach ($matches as $url) { if (strpos($url, get_app()->getBaseUrl()) === FALSE) { - $resource = add_retriever_resource($a, $url, $item['uid'], $item['contact-id'], true); + $resource = add_retriever_resource($url, $item['uid'], $item['contact-id'], true); Logger::debug('@@@ check this makes sense 2: ' . $resource['id'] . ' url ' . $resource['url']); if (!$resource['completed']) { add_retriever_item($item, $resource); } else { - retriever_transform_images($a, $item, $resource); + retriever_transform_images($item, $resource); } } } @@ -585,14 +572,7 @@ function retrieve_images($a, &$item) { function retriever_check_item_completed(&$item) { $waiting = DBA::selectFirst('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => intval($item['uid']), 'contact-id' => intval($item['contact-id']), 'finished' => 0]); - Logger::debug('@@@ waiting is ' . $waiting); - // TODO: figure out how to do this with DBA module //@@@ selectFirst works - $r = q('SELECT count(*) FROM retriever_item WHERE `item-uri` = "%s" ' . - 'AND `item-uid` = %d AND `contact-id` = %d AND `finished` = 0', - DBA::escape($item['uri']), intval($item['uid']), - intval($item['contact-id'])); - $waiting = $r[0]['count(*)']; - Logger::debug('retriever_check_item_completed: item ' . $item['uri'] . ' ' . $item['uid'] . ' '. $item['contact-id'] . ' waiting for ' . $waiting . ' resources'); + Logger::debug('retriever_check_item_completed: item ' . $item['uri'] . ' ' . $item['uid'] . ' '. $item['contact-id'] . ' waiting for resources'); $old_visible = $item['visible']; $item['visible'] = $waiting ? 0 : 1; if (array_key_exists('id', $item) && ($item['id'] > 0) && ($old_visible != $item['visible'])) { @@ -601,10 +581,10 @@ function retriever_check_item_completed(&$item) } } -function retriever_apply_completed_resource_to_item($retriever, &$item, $resource, $a) { +function retriever_apply_completed_resource_to_item($retriever, &$item, $resource) { Logger::debug('retriever_apply_completed_resource_to_item: retriever ' . ($retriever ? $retriever['id'] : 'none') . ' resource ' . $resource['url'] . ' plink ' . $item['plink']); if (strpos($resource['type'], 'image') !== false) { - retriever_transform_images($a, $item, $resource); + retriever_transform_images($item, $resource); } if (!$retriever) { Logger::warning('retriever_apply_completed_resource_to_item: no retriever'); @@ -614,13 +594,19 @@ function retriever_apply_completed_resource_to_item($retriever, &$item, $resourc (strpos($resource['type'], 'xml') !== false)) { retriever_apply_dom_filter($retriever, $item, $resource); if ($retriever['data']['images'] ) { - retrieve_images($a, $item); + retrieve_images($item); } } } -//@@@ todo: what is this reference for? document if needed delete if not -function retriever_transform_images($a, &$item, $resource) { +/** + * @brief Stores the image downloaded in the supplied resource and updates the item body by replacing the remote URL with the local URL. The body will be updated in the supplied item array. If the item has already been stored, and therefore has an ID already, the row in the database will be updated too. + * + * @param array &$item Row from the item table (by ref) + * @param array $resource Row from the resource table containing successfully downloaded image + */ +// TODO: split this into two functions, one to store the image, the other to change the item body +function retriever_transform_images(&$item, $resource) { if (!$resource['data']) { Logger::info('retriever_transform_images: no data available for ' . $resource['id'] . ' ' . $resource['url']); return; @@ -652,11 +638,16 @@ function retriever_transform_images($a, &$item, $resource) { return; } - $content = DBA::selectFirst('item-content', ['body'], ['uri-id' => $uri_id]); + $content = DBA::selectFirst('item-content', [], ['body'], ['uri-id' => $uri_id]); $body = $content['body']; + if ($body != $item['body']) { + Logger::warning('@@@ this is probably bad right 1?'); + //@@@ check for this. + } Logger::debug('retriever_transform_images: replacing ' . $resource['url'] . ' with ' . $new_url . ' in item ' . $item['uri']); $body = str_replace($resource["url"], $new_url, $body); + $item['body'] = $body; Item::update(['body' => $body], ['uri-id' => $uri_id]); } @@ -671,7 +662,6 @@ function retriever_content($a) { for ($i = 0; $i < count($feeds); ++$i) { $feeds[$i]['url'] = $a->getBaseUrl() . '/retriever/' . $feeds[$i]['id']; } - //@@@ this is broken $template = Renderer::getMarkupTemplate('/help.tpl', 'addon/retriever/'); $a->page['content'] .= Renderer::replaceMacros($template, array( '$config' => $a->getBaseUrl() . '/settings/addon', @@ -799,20 +789,26 @@ function retriever_post_remote_hook(&$a, &$item) { $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself? $retriever_rule = get_retriever_rule($item['contact-id'], $item["uid"], false); if ($retriever_rule) { - retriever_on_item_insert($a, $retriever_rule, $item); + retriever_on_item_insert($retriever_rule, $item); } else { if (PConfig::get($item["uid"], 'retriever', 'oembed')) { // Convert to HTML and back to take advantage of bbcode's resolution of oembeds. $content = DBA::selectFirst('item-content', [], ['uri-id' => $uri_id]); + if ($content['body'] != $item['body']) { + Logger::warning('@@@ this is probably bad right 2?'); + //@@@ check for this. + } $body = HTML::toBBCode(BBCode::convert($content['body'])); if ($body) { $item['body'] = $body; - Item::update(['body' => $body], ['uri-id' => $uri_id]); + if (array_key_exists('id', $item) && $item['id']) { + Item::update(['body' => $body], ['id' => $item['id']]); + } } } if (PConfig::get($item["uid"], 'retriever', 'all_photos')) { - retrieve_images($a, $item); + retrieve_images($item); } } retriever_check_item_completed($item); diff --git a/retriever/templates/help.tpl b/retriever/templates/help.tpl index 10b421d0..b96ec63c 100644 --- a/retriever/templates/help.tpl +++ b/retriever/templates/help.tpl @@ -143,6 +143,19 @@ Photos" box in the "Retriever Settings" section and click "Submit".

Configure Feeds:

{{foreach $feeds as $feed}} -{{include file='contact_template.tpl' contact=$feed}} +
+ +
+
+ {{$feed.name}} +
+
+
+
+ {{$feed.name}} +
+
+
+
{{/foreach}}
From c4e2590891ec31cf971c2607002b73acf874c56b Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 9 Oct 2019 20:56:46 +0200 Subject: [PATCH 30/96] small cleanup --- retriever/retriever.php | 3 --- 1 file changed, 3 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index b8af7d3d..029b0cff 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -332,7 +332,6 @@ function apply_retrospective($retriever, $num) { //@@@ I think the above statement is wrong. Check! // TODO: This queries then inserts. It should use some kind of lock to avoid requesting the same resource twice. function retriever_on_item_insert($retriever, &$item) { - Logger::debug('@@@ retriever_on_item_insert start'); if (!$retriever || !$retriever['id']) { Logger::info('retriever_on_item_insert: No retriever supplied'); return; @@ -359,8 +358,6 @@ function retriever_on_item_insert($retriever, &$item) { } $resource = add_retriever_resource($url, $item['uid'], $item['contact-id']); - Logger::debug('@@@ check this makes sense: ' . $resource['id'] . ' url ' . $resource['url']); - Logger::debug('@@@ it does not make sense ' . print_r($resource, true)); $retriever_item_id = add_retriever_item($item, $resource); } From 0d2b2fd6b8c4a86316ff50732cf5f30ee8f7103e Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 9 Oct 2019 21:03:45 +0200 Subject: [PATCH 31/96] small addition --- retriever/retriever.php | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/retriever/retriever.php b/retriever/retriever.php index 029b0cff..4538e031 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -538,6 +538,7 @@ function retrieve_images(&$item) { $content = DBA::selectFirst('item-content', [], ['body'], ['uri-id' => $uri_id]); if ($content['body'] != $item['body']) { Logger::warning('@@@ this is probably bad right 3?'); + Logger::warning('@@@ content: ' . $content['body'] . ' item ' . $item['body']); //@@@ check for this. } $body = $content['body']; @@ -553,6 +554,9 @@ function retrieve_images(&$item) { $matches = array_merge($matches1[3], $matches2[1], $matches3[1]); Logger::debug('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); foreach ($matches as $url) { + if (!$url) { + continue; + } if (strpos($url, get_app()->getBaseUrl()) === FALSE) { $resource = add_retriever_resource($url, $item['uid'], $item['contact-id'], true); Logger::debug('@@@ check this makes sense 2: ' . $resource['id'] . ' url ' . $resource['url']); @@ -794,6 +798,7 @@ function retriever_post_remote_hook(&$a, &$item) { $content = DBA::selectFirst('item-content', [], ['uri-id' => $uri_id]); if ($content['body'] != $item['body']) { Logger::warning('@@@ this is probably bad right 2?'); + Logger::warning('@@@ content: ' . $content['body'] . ' item ' . $item['body']); //@@@ check for this. } $body = HTML::toBBCode(BBCode::convert($content['body'])); From de1d3a803272d0242a311cf2b2f8dd6e6beffad2 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Fri, 11 Oct 2019 18:47:32 +0200 Subject: [PATCH 32/96] I think this works --- retriever/retriever.php | 134 +++++++++++++++++++++++++--------------- 1 file changed, 84 insertions(+), 50 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 4538e031..42c4a55a 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -21,6 +21,7 @@ use Friendica\Core\L10n; use Friendica\Database\DBA; use Friendica\Model\ItemURI; use Friendica\Model\Item; +use Friendica\Util\DateTimeFormat; function retriever_install() { Addon::registerHook('plugin_settings', 'addon/retriever/retriever.php', 'retriever_plugin_settings'); @@ -129,7 +130,8 @@ function retriever_retrieve_items($max_items, $a) { $retrieve_items = $max_items - $retriever_item_count; do { Logger::debug('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . intval($retriever_item_count) . ', retrieve ' . $retrieve_items); - // TODO: figure out how to do this with DBA module + // TODO: figure out how to do this with DBA module //@@@ this is possible + $retriever_resources2 = DBA::selectToArray('retriever_resource', [], ['`completed` IS NULL AND (`last-try` IS NULL OR ' . implode($schedule_clauses, ' OR ') . ')'], ['order' => ['last-try' => 0], 'limit' => $retrieve_items]); $retriever_resources = q("SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR %s) ORDER BY `last-try` ASC LIMIT %d", DBA::escape(implode($schedule_clauses, ' OR ')), intval($retrieve_items)); @@ -140,6 +142,7 @@ function retriever_retrieve_items($max_items, $a) { break; } Logger::debug('retriever_retrieve_items: found ' . count($retriever_resources) . ' waiting resources in database'); + Logger::debug('@@@ retriever_retrieve_items: alternative found ' . count($retriever_resources2) . ': ' . print_r($retriever_resources2, true)); foreach ($retriever_resources as $retriever_resource) { retrieve_resource($retriever_resource); $retriever_item_count++; @@ -186,9 +189,6 @@ function retriever_clean_up_completed_resources($max_items, $a) { function retriever_tidy() { DBA::delete('retriever_resource', ['completed IS NOT NULL AND completed < DATE_SUB(now(), INTERVAL 1 WEEK)']); DBA::delete('retriever_resource', ['completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)']); - // @@@ check that this worked - /* q("DELETE FROM retriever_resource WHERE completed IS NOT NULL AND completed < DATE_SUB(now(), INTERVAL 1 WEEK)"); */ - /* q("DELETE FROM retriever_resource WHERE completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)"); */ $r = q("SELECT retriever_item.id FROM retriever_item LEFT OUTER JOIN retriever_resource ON (retriever_item.resource = retriever_resource.id) WHERE retriever_resource.id is null"); Logger::info('retriever_tidy: found ' . count($r) . ' retriever_items with no retriever_resource'); @@ -221,7 +221,15 @@ function retrieve_resource($resource) { $a = get_app(); $retriever_rule = get_retriever_rule($resource['contact-id'], $resource['item-uid']); + if (!$retriever_rule) { + Logger::warning('retrieve_resource: no rule found for contact ' . $resource['contact-id'] . ' item ' . $resource['item-uid']); + return; + } $rule_data = $retriever_rule['data']; + if (!$rule_data) { + Logger::warning('retrieve_resource: no rule data found for contact ' . $resource['contact-id'] . ' item ' . $resource['item-uid']); + return; + } try { Logger::debug('retrieve_resource: ' . ($resource['num-tries'] + 1) . ' attempt at resource ' . $resource['id'] . ' ' . $resource['url']); @@ -269,8 +277,8 @@ function get_retriever_rule($contact_id, $uid, $create = false) { } if ($create) { DBA::insert('retriever_rule', ['uid' => intval($uid), 'contact-id' => intval($contact_id)]); - //@@@ check that this worked - return DBA::selectFirst('retriever_rule', [], ['contact-id' => intval($contact_id), 'uid' => intval($uid)]); + $retriever_rule = DBA::selectFirst('retriever_rule', [], ['contact-id' => intval($contact_id), 'uid' => intval($uid)]); + return $retriever_rule; } } @@ -373,15 +381,16 @@ function add_retriever_resource($url, $uid, $cid, $binary = false) { fclose($fp); $url = 'md5://' . hash('md5', $url); - if (DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)])) { + $resource = DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)]); + if ($resource) { Logger::debug('add_retriever_resource: Resource ' . $url . ' already requested'); return $resource; } - Logger::debug('retrieve_resource: got data URL type ' . $resource['type']); DBA::insert('retriever_resource', ['item-uid' => intval($uid), 'contact-id' => intval($cid), 'type' => $type, 'binary' => ($binary ? 1 : 0), 'url' => $url, 'completed' => DateTimeFormat::utcNow(), 'data' => $data]); // @@@ check that this makes sense - if (DBA::selectFirst('retriever_resource', [], ['url' => $url])) { + $resource = DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)]); + if ($resource) { retriever_resource_completed($resource); } return $resource; @@ -391,9 +400,10 @@ function add_retriever_resource($url, $uid, $cid, $binary = false) { Logger::warning('add_retriever_resource: URL is longer than 800 characters'); } - if (DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)])) { + $resource = DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)]); + if ($resource) { Logger::debug('add_retriever_resource: Resource ' . $url . ' uid ' . $uid . ' cid ' . $cid . ' already requested'); - return $r[0]; + return $resource; } DBA::insert('retriever_resource', ['item-uid' => intval($uid), 'contact-id' => intval($cid), 'binary' => ($binary ? 1 : 0), 'url' => $url]); @@ -530,27 +540,67 @@ function retriever_globalise_urls($doc, $resource) { return $doc; } +function retriever_get_body($item) { + if (array_key_exists('id', $item) && $item['id']) { + // item has already been stored in database + if (!array_key_exists('uri-id', $item) || !$item['uri-id']) { + Logger::warning('retriever_get_body: item uri ' . $item['uri'] . ' has id but no uri-id'); + //@@@ check never happens + return $item['body']; + } + $content = DBA::selectFirst('item-content', [], ['body'], ['uri-id' => $item['uri-id']]); + if (!$content) { + Logger::warning('retriever_get_body: item-content uri-id ' . $item['uri-id'] . ' has no content'); + //@@@ check never happens + return $item['body']; + } + if (!$content['body']) { + Logger::warning('retriever_get_body: item-content uri-id ' . $item['uri-id'] . ' has no body'); + //@@@ check never happens + return $item['body']; + } + if ($content['body'] != $item['body']) { + Logger::warning('@@@ this is probably bad content: ' . $content['body'] . ' item ' . $item['body']); + //@@@ check for this. + } + Logger::debug('@@@ retriever_get_body uri-id ' . $item['uri-id'] . ' body: ' . $content['body']); + return $content['body']; + } + // item has not yet been stored in database + Logger::debug('@@@ retriever_get_body id ' . $item['id'] . ' body: ' . $item['body']); + return $item['body']; +} + +function retriever_set_body(&$item, $body, $allow_empty = false) { + if (!$body && !$allow_empty) { + Logger::debug('retriever_set_body: will not set empty body in item id ' . $item['id'] . ' uri ' . $item['uri']); + return; + } + $item['body'] = $body; + Logger::debug('@@@ retriever_set_body set array value to ' . $body); + if (array_key_exists('id', $item) && $item['id']) { + // item has already been stored in database + Logger::debug('@@@ retriever_set_body updating item ' . print_r($item, true) . ' to ' . $body); + Item::update(['body' => $body], ['id' => intval($item['id'])]); + } +} + +/** + * @brief @@@ + * + * @param array &$item Row from the item table (by ref) + */ function retrieve_images(&$item) { - // Note that $item might not yet have an id or a uri-id - - $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself? - - $content = DBA::selectFirst('item-content', [], ['body'], ['uri-id' => $uri_id]); - if ($content['body'] != $item['body']) { - Logger::warning('@@@ this is probably bad right 3?'); - Logger::warning('@@@ content: ' . $content['body'] . ' item ' . $item['body']); - //@@@ check for this. - } - $body = $content['body']; + $body = retriever_get_body($item); if (!strlen($body)) { - Logger::warning('retrieve_images: no body for uri-id ' . $uri_id); + Logger::warning('retrieve_images: no body for item ' . $item['uri']); return; } // I suspect that the first two are not used any more? - preg_match_all("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", $item["body"], $matches1); - preg_match_all("/\[img\](.*?)\[\/img\]/ism", $item["body"], $matches2); - preg_match_all("/\[img\=([^\]]*)\]([^[]*)\[\/img\]/ism", $item["body"], $matches3); + preg_match_all("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", $body, $matches1); + preg_match_all("/\[img\](.*?)\[\/img\]/ism", $body, $matches2); + preg_match_all("/\[img\=([^\]]*)\]([^[]*)\[\/img\]/ism", $body, $matches3); $matches = array_merge($matches1[3], $matches2[1], $matches3[1]); Logger::debug('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); foreach ($matches as $url) { @@ -639,18 +689,11 @@ function retriever_transform_images(&$item, $resource) { return; } - $content = DBA::selectFirst('item-content', [], ['body'], ['uri-id' => $uri_id]); - $body = $content['body']; - if ($body != $item['body']) { - Logger::warning('@@@ this is probably bad right 1?'); - //@@@ check for this. - } + $body = retriever_get_body($item); Logger::debug('retriever_transform_images: replacing ' . $resource['url'] . ' with ' . $new_url . ' in item ' . $item['uri']); $body = str_replace($resource["url"], $new_url, $body); - $item['body'] = $body; - - Item::update(['body' => $body], ['uri-id' => $uri_id]); + retriever_set_body($item, $body); } function retriever_content($a) { @@ -699,7 +742,6 @@ function retriever_content($a) { unset($retriever_rule['data']['exclude'][$k]); } } - //@@@ check that this works DBA::update('retriever_rule', ['data' => json_encode($retriever_rule['data'])], ['id' => intval($retriever_rule["id"])], ['data' => '']); $a->page['content'] .= "

Settings Updated"; if (!empty($_POST["retriever_retrospective"])) { @@ -783,7 +825,9 @@ function retriever_contact_photo_menu($a, &$args) { } function retriever_post_remote_hook(&$a, &$item) { - // Note that $item doesn't necessarily contain all the fields you would expect, in particular 'id' + // @@@ I believe this should either never have the id, or always should. This needs more investigation. + // @@@ and if it does not, does it have a content row? + Logger::debug('@@@ retriever_post_remote_hook uri ' . $item['uri'] . ' has id ' . array_key_exists('id', $item) . ' has uri-id ' . array_key_exists('uri-id', $item)); Logger::info('retriever_post_remote_hook: ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); @@ -795,19 +839,9 @@ function retriever_post_remote_hook(&$a, &$item) { else { if (PConfig::get($item["uid"], 'retriever', 'oembed')) { // Convert to HTML and back to take advantage of bbcode's resolution of oembeds. - $content = DBA::selectFirst('item-content', [], ['uri-id' => $uri_id]); - if ($content['body'] != $item['body']) { - Logger::warning('@@@ this is probably bad right 2?'); - Logger::warning('@@@ content: ' . $content['body'] . ' item ' . $item['body']); - //@@@ check for this. - } - $body = HTML::toBBCode(BBCode::convert($content['body'])); - if ($body) { - $item['body'] = $body; - if (array_key_exists('id', $item) && $item['id']) { - Item::update(['body' => $body], ['id' => $item['id']]); - } - } + $body = retriever_get_body($item); + $body = HTML::toBBCode(BBCode::convert($body)); + retriever_set_body($item, $body); } if (PConfig::get($item["uid"], 'retriever', 'all_photos')) { retrieve_images($item); From 3dbf7aec36b4d4fb455586033a1468c514a31152 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sat, 12 Oct 2019 19:08:11 +0200 Subject: [PATCH 33/96] working much better --- retriever/retriever.php | 177 ++++++++++++++++++++-------------------- 1 file changed, 87 insertions(+), 90 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 42c4a55a..a71f302c 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -23,6 +23,9 @@ use Friendica\Model\ItemURI; use Friendica\Model\Item; use Friendica\Util\DateTimeFormat; +/** + * @brief Installation hook for retriever plugin + */ function retriever_install() { Addon::registerHook('plugin_settings', 'addon/retriever/retriever.php', 'retriever_plugin_settings'); Addon::registerHook('plugin_settings_post', 'addon/retriever/retriever.php', 'retriever_plugin_settings_post'); @@ -53,9 +56,9 @@ function retriever_install() { } if (Config::get('retriever', 'dbversion') != '0.14') { $schema = file_get_contents(dirname(__file__).'/database.sql'); - $arr = explode(';', $schema); - foreach ($arr as $a) { - if (!DBA::e($a)) { + $tables = explode(';', $schema); + foreach ($tables as $table) { + if (!DBA::e($table)) { Logger::warning('Unable to create database table: ' . DBA::errorMessage()); return; } @@ -65,6 +68,9 @@ function retriever_install() { } } +/** + * @brief Uninstallation hook for retriever plugin + */ function retriever_uninstall() { Addon::unregisterHook('plugin_settings', 'addon/retriever/retriever.php', 'retriever_plugin_settings'); Addon::unregisterHook('plugin_settings_post', 'addon/retriever/retriever.php', 'retriever_plugin_settings_post'); @@ -75,9 +81,17 @@ function retriever_uninstall() { Addon::unregisterHook('cron', 'addon/retriever/retriever.php', 'retriever_cron'); } +/** + * @brief Module hook for retriever plugin + * + * TODO: figure out what this should be used for + */ function retriever_module() {} -function retriever_addon_admin(&$a, &$o) { +/** + * @brief Admin page hook for retriever plugin + */ +function retriever_addon_admin() { $downloads_per_cron = Config::get('retriever', 'downloads_per_cron'); $template = Renderer::getMarkupTemplate('admin.tpl', 'addon/retriever/'); $config = ['downloads_per_cron', @@ -89,25 +103,36 @@ function retriever_addon_admin(&$a, &$o) { '$submit' => L10n::t('Save Settings')]); } -function retriever_addon_admin_post ($a) { +/** + * @brief Admin page post hook for retriever plugin + */ +function retriever_addon_admin_post () { if (!empty($_POST['downloads_per_cron'])) { Config::set('retriever', 'downloads_per_cron', $_POST['downloads_per_cron']); } } -function retriever_cron($a, $b) { +/** + * @brief Cron jobs for retriever plugin + */ +function retriever_cron() { $downloads_per_cron = Config::get('retriever', 'downloads_per_cron'); // Do this first, otherwise it can interfere with retriever_retrieve_items - retriever_clean_up_completed_resources($downloads_per_cron, $a); + retriever_clean_up_completed_resources($downloads_per_cron); - retriever_retrieve_items($downloads_per_cron, $a); + retriever_retrieve_items($downloads_per_cron); retriever_tidy(); } $retriever_item_count = 0; -function retriever_retrieve_items($max_items, $a) { +/** + * @brief Searches for items in the retriever_items table that should be retrieved and attempts to retrieve them + * + * @param int $max_items Maximum number of items to retrieve in this call + */ +function retriever_retrieve_items($max_items) { global $retriever_item_count; $retriever_schedule = array(array(1,'minute'), @@ -130,11 +155,7 @@ function retriever_retrieve_items($max_items, $a) { $retrieve_items = $max_items - $retriever_item_count; do { Logger::debug('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . intval($retriever_item_count) . ', retrieve ' . $retrieve_items); - // TODO: figure out how to do this with DBA module //@@@ this is possible - $retriever_resources2 = DBA::selectToArray('retriever_resource', [], ['`completed` IS NULL AND (`last-try` IS NULL OR ' . implode($schedule_clauses, ' OR ') . ')'], ['order' => ['last-try' => 0], 'limit' => $retrieve_items]); - $retriever_resources = q("SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR %s) ORDER BY `last-try` ASC LIMIT %d", - DBA::escape(implode($schedule_clauses, ' OR ')), - intval($retrieve_items)); + $retriever_resources = DBA::selectToArray('retriever_resource', [], ['`completed` IS NULL AND (`last-try` IS NULL OR ' . implode($schedule_clauses, ' OR ') . ')'], ['order' => ['last-try' => 0], 'limit' => $retrieve_items]); if (!is_array($retriever_resources)) { break; } @@ -142,7 +163,6 @@ function retriever_retrieve_items($max_items, $a) { break; } Logger::debug('retriever_retrieve_items: found ' . count($retriever_resources) . ' waiting resources in database'); - Logger::debug('@@@ retriever_retrieve_items: alternative found ' . count($retriever_resources2) . ': ' . print_r($retriever_resources2, true)); foreach ($retriever_resources as $retriever_resource) { retrieve_resource($retriever_resource); $retriever_item_count++; @@ -153,8 +173,12 @@ function retriever_retrieve_items($max_items, $a) { Logger::debug('retriever_retrieve_items: finished retrieving items'); } -// Look for items that are waiting even though the resource has completed. This shouldn't happen, but is worth cleaning up if it does. -function retriever_clean_up_completed_resources($max_items, $a) { +/** + * @brief Looks for items that are waiting even though the resource has completed. This shouldn't happen, but is worth cleaning up if it does. + * + * @param int $max_items Maximum number of items to retrieve in this call + */ +function retriever_clean_up_completed_resources($max_items) { // TODO: figure out how to do this with DBA module $r = q('SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT %d', intval($max_items)); @@ -179,13 +203,15 @@ function retriever_clean_up_completed_resources($max_items, $a) { continue; } $resource = DBA::selectFirst('retriever_resource', [], ['id' => intval($rr['resource'])]); - retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource, $a); - Logger::info('@@@ retriever_clean_up_completed_resources tried to update id ' . $retriever_item['id'] . ' to finished, better check that it really worked!'); + retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource); DBA::update('retriever_item', ['finished' => 1], ['id' => intval($retriever_item['id'])], ['finished' => 0]); retriever_check_item_completed($item); } } +/** + * @brief Deletes old rows from the retriever_item and retriever_resource table that are unlikely to be needed + */ function retriever_tidy() { DBA::delete('retriever_resource', ['completed IS NOT NULL AND completed < DATE_SUB(now(), INTERVAL 1 WEEK)']); DBA::delete('retriever_resource', ['completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)']); @@ -199,7 +225,7 @@ function retriever_tidy() { function retrieve_dataurl_resource($resource) { if (!preg_match("/date:(.*);base64,(.*)/", $resource['url'], $matches)) { - Logger::info('retrieve_dataurl_resource: ' . $resource['id'] . ' does not match pattern'); + Logger::warning('retrieve_dataurl_resource: resource ' . $resource['id'] . ' does not match pattern'); } else { $resource['type'] = $matches[1]; $resource['data'] = base64url_decode($matches[2]); @@ -210,7 +236,7 @@ function retrieve_dataurl_resource($resource) { DBA::escape($resource['data']), DBA::escape($resource['type']), intval($resource['id'])); - retriever_resource_completed($resource, $a); + retriever_resource_completed($resource); } function retrieve_resource($resource) { @@ -218,8 +244,6 @@ function retrieve_resource($resource) { return retrieve_dataurl_resource($resource); } - $a = get_app(); - $retriever_rule = get_retriever_rule($resource['contact-id'], $resource['item-uid']); if (!$retriever_rule) { Logger::warning('retrieve_resource: no rule found for contact ' . $resource['contact-id'] . ' item ' . $resource['item-uid']); @@ -243,7 +267,6 @@ function retrieve_resource($resource) { if (array_key_exists('storecookies', $rule_data) && $rule_data['storecookies']) { $retriever_rule['data']['cookiedata'] = file_get_contents($cookiejar); DBA::update('retriever_rule', ['data' => json_encode($retriever_rule['data'])], ['id' => intval($retriever_rule["id"])], $retriever_rule); - //@@@ check the update worked unlink($cookiejar); } $resource['data'] = $fetch_result->getBody(); @@ -254,18 +277,10 @@ function retrieve_resource($resource) { } catch (Exception $e) { Logger::info('retrieve_resource: unable to retrieve ' . $resource['url'] . ' - ' . $e->getMessage()); } - // TODO: figure out how to do this with DBA module - q("UPDATE `retriever_resource` SET `last-try` = now(), `num-tries` = `num-tries` + 1, `http-code` = %d, `redirect-url` = '%s' WHERE id = %d", - intval($resource['http-code']), - DBA::escape($resource['redirect-url']), - intval($resource['id'])); + DBA::update('retriever_resource', ['id' => intval($resource['id'])], ['last-try' => DateTimeFormat::utcNow(), 'num-tries' => intval($resource['num-tries']) + 1, 'http-code' => intval($resource['http-code']), 'redirect-url' => $resource['redirect-url']], ['last-try' => false]); if ($resource['data']) { - // TODO: figure out how to do this with DBA module - q("UPDATE `retriever_resource` SET `completed` = now(), `data` = '%s', `type` = '%s' WHERE id = %d", - DBA::escape($resource['data']), - DBA::escape($resource['type']), - intval($resource['id'])); - retriever_resource_completed($resource, $a); + DBA::update('retriever_resource', ['id' => intval($resource['id'])], ['completed' => DateTimeFormat::utcNow(), 'data' => $resource['data'], 'type' => $resource['type']], ['completed' => false]); + retriever_resource_completed($resource); } } @@ -327,7 +342,6 @@ function retriever_resource_completed($resource) { function apply_retrospective($retriever, $num) { foreach (Item::selectToArray([], ['contact-id' => intval($retriever['contact-id'])], ['order' => ['received' => true], 'limit' => $num]) as $item) { Item::update(['visible' => 0], ['id' => intval($item['id'])]); - //@@@ check that this works foreach (DBA::selectToArray('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => $item['uid'], 'contact-id' => $item['contact-id']]) as $retriever_item) { DBA::delete('retriever_resource', ['id' => $retriever_item['resource']]); DBA::delete('retriever_item', ['id' => $retriever_item['id']]); @@ -336,9 +350,14 @@ function apply_retrospective($retriever, $num) { } } -// TODO: Currently this waits until the next cron before actually downloading. Should do it immediately. -//@@@ I think the above statement is wrong. Check! -// TODO: This queries then inserts. It should use some kind of lock to avoid requesting the same resource twice. +/** + * @brief Queues an item for retrieval. It does not actually perform the retrieval. + * + * @param array $retriever Retriever rule configuration for this contact + * @param array $item Item that should be retrieved. This may or may not have been already stored in the database. + * + * TODO: This queries then inserts. It should use some kind of lock to avoid requesting the same resource twice. + */ function retriever_on_item_insert($retriever, &$item) { if (!$retriever || !$retriever['id']) { Logger::info('retriever_on_item_insert: No retriever supplied'); @@ -388,7 +407,6 @@ function add_retriever_resource($url, $uid, $cid, $binary = false) { } DBA::insert('retriever_resource', ['item-uid' => intval($uid), 'contact-id' => intval($cid), 'type' => $type, 'binary' => ($binary ? 1 : 0), 'url' => $url, 'completed' => DateTimeFormat::utcNow(), 'data' => $data]); - // @@@ check that this makes sense $resource = DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)]); if ($resource) { retriever_resource_completed($resource); @@ -396,6 +414,7 @@ function add_retriever_resource($url, $uid, $cid, $binary = false) { return $resource; } + // 800 characters is the size of this field in the database if (strlen($url) > 800) { Logger::warning('add_retriever_resource: URL is longer than 800 characters'); } @@ -419,7 +438,6 @@ function add_retriever_item(&$item, $resource) { return; } if (DBA::selectFirst('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => intval($item['uid']), 'resource' => intval($resource['id'])])) { - //@@@ check that this worked Logger::info("add_retriever_item: retriever item already present for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); return; } @@ -493,12 +511,7 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) { $body .= ']' . $item['plink'] . '[/url]'; Logger::debug('retriever_apply_dom_filter: XSLT result \"' . $body . '\"'); - $item['body'] = $body; - if (array_key_exists('id', $item) && $item['id']) { //@@@ this should be a separate function - //@@@ check that this works - Logger::debug('@@@ retriever_apply_dom_filter updating item by id ' . $item['id']); - Item::update(['body' => $body], ['id' => $item['id']]); - } + retriever_set_body($item, $body); } function retriever_load_into_dom($resource) { @@ -541,34 +554,27 @@ function retriever_globalise_urls($doc, $resource) { } function retriever_get_body($item) { - if (array_key_exists('id', $item) && $item['id']) { - // item has already been stored in database - if (!array_key_exists('uri-id', $item) || !$item['uri-id']) { - Logger::warning('retriever_get_body: item uri ' . $item['uri'] . ' has id but no uri-id'); - //@@@ check never happens - return $item['body']; - } - $content = DBA::selectFirst('item-content', [], ['body'], ['uri-id' => $item['uri-id']]); - if (!$content) { - Logger::warning('retriever_get_body: item-content uri-id ' . $item['uri-id'] . ' has no content'); - //@@@ check never happens - return $item['body']; - } - if (!$content['body']) { - Logger::warning('retriever_get_body: item-content uri-id ' . $item['uri-id'] . ' has no body'); - //@@@ check never happens - return $item['body']; - } - if ($content['body'] != $item['body']) { - Logger::warning('@@@ this is probably bad content: ' . $content['body'] . ' item ' . $item['body']); - //@@@ check for this. - } - Logger::debug('@@@ retriever_get_body uri-id ' . $item['uri-id'] . ' body: ' . $content['body']); - return $content['body']; + if (!array_key_exists('uri-id', $item) || !$item['uri-id']) { + // item has not yet been stored in database + return $item['body']; } - // item has not yet been stored in database - Logger::debug('@@@ retriever_get_body id ' . $item['id'] . ' body: ' . $item['body']); - return $item['body']; + + // item has been stored in database, body is stored in the item-content table + $content = DBA::selectFirst('item-content', ['body'], ['uri-id' => $item['uri-id']]); + if (!$content) { + Logger::warning('retriever_get_body: item-content uri-id ' . $item['uri-id'] . ' has no content'); + return $item['body']; + } + if (!$content['body']) { + Logger::warning('retriever_get_body: item-content uri-id ' . $item['uri-id'] . ' has no body'); + //@@@ check never happens + return $item['body']; + } + if ($content['body'] != $item['body']) { + Logger::warning('@@@ this is probably bad @@@ content: ' . $content['body'] . ' @@@ item: ' . $item['body']); + //@@@ check for this. + } + return $content['body']; } function retriever_set_body(&$item, $body, $allow_empty = false) { @@ -577,16 +583,15 @@ function retriever_set_body(&$item, $body, $allow_empty = false) { return; } $item['body'] = $body; - Logger::debug('@@@ retriever_set_body set array value to ' . $body); - if (array_key_exists('id', $item) && $item['id']) { - // item has already been stored in database - Logger::debug('@@@ retriever_set_body updating item ' . print_r($item, true) . ' to ' . $body); - Item::update(['body' => $body], ['id' => intval($item['id'])]); + if (!array_key_exists('id', $item) || !$item['id']) { + // item has not yet been stored in database + return; } + Item::update(['body' => $body], ['id' => intval($item['id'])]); } /** - * @brief @@@ + * @brief Searches for images in the item and adds corresponding retriever_items. If the images have already been downloaded, updates the body in the supplied item array. * * @param array &$item Row from the item table (by ref) */ @@ -609,7 +614,6 @@ function retrieve_images(&$item) { } if (strpos($url, get_app()->getBaseUrl()) === FALSE) { $resource = add_retriever_resource($url, $item['uid'], $item['contact-id'], true); - Logger::debug('@@@ check this makes sense 2: ' . $resource['id'] . ' url ' . $resource['url']); if (!$resource['completed']) { add_retriever_item($item, $resource); } @@ -663,8 +667,6 @@ function retriever_transform_images(&$item, $resource) { return; } - $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself? - $data = $resource['data']; $type = $resource['type']; $uid = $item['uid']; @@ -745,7 +747,7 @@ function retriever_content($a) { DBA::update('retriever_rule', ['data' => json_encode($retriever_rule['data'])], ['id' => intval($retriever_rule["id"])], ['data' => '']); $a->page['content'] .= "

Settings Updated"; if (!empty($_POST["retriever_retrospective"])) { - apply_retrospective($a, $retriever_rule, $_POST["retriever_retrospective"]); + apply_retrospective($retriever_rule, $_POST["retriever_retrospective"]); $a->page['content'] .= " and retrospectively applied to " . $_POST["retriever_retrospective"] . " posts"; } $a->page['content'] .= ".

"; @@ -825,13 +827,8 @@ function retriever_contact_photo_menu($a, &$args) { } function retriever_post_remote_hook(&$a, &$item) { - // @@@ I believe this should either never have the id, or always should. This needs more investigation. - // @@@ and if it does not, does it have a content row? - Logger::debug('@@@ retriever_post_remote_hook uri ' . $item['uri'] . ' has id ' . array_key_exists('id', $item) . ' has uri-id ' . array_key_exists('uri-id', $item)); - Logger::info('retriever_post_remote_hook: ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); - $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself? $retriever_rule = get_retriever_rule($item['contact-id'], $item["uid"], false); if ($retriever_rule) { retriever_on_item_insert($retriever_rule, $item); @@ -850,7 +847,7 @@ function retriever_post_remote_hook(&$a, &$item) { retriever_check_item_completed($item); } -function retriever_plugin_settings(&$a,&$s) { +function retriever_plugin_settings(&$a, &$s) { $all_photos = PConfig::get(local_user(), 'retriever', 'all_photos'); $oembed = PConfig::get(local_user(), 'retriever', 'oembed'); $template = Renderer::getMarkupTemplate('/settings.tpl', 'addon/retriever/'); From 038952461b4cdc1c59def465b1ca6502cbc8334f Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 13 Oct 2019 10:40:24 +0200 Subject: [PATCH 34/96] Almost finished, maybe not working --- retriever/retriever.php | 1492 +++++++++++++++------------ retriever/templates/admin.tpl | 1 + retriever/templates/rule-config.tpl | 2 + retriever/templates/settings.tpl | 19 +- 4 files changed, 834 insertions(+), 680 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index a71f302c..33f9a40e 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -1,10 +1,10 @@ - */ + /** + * Name: Retriever + * Description: Follow the permalink of RSS/Atom feed items and replace the summary with the full content. + * Version: 1.0 + * Author: Matthew Exon + */ use Friendica\Core\Addon; use Friendica\Core\Config; @@ -27,58 +27,37 @@ use Friendica\Util\DateTimeFormat; * @brief Installation hook for retriever plugin */ function retriever_install() { - Addon::registerHook('plugin_settings', 'addon/retriever/retriever.php', 'retriever_plugin_settings'); - Addon::registerHook('plugin_settings_post', 'addon/retriever/retriever.php', 'retriever_plugin_settings_post'); - Addon::registerHook('post_remote', 'addon/retriever/retriever.php', 'retriever_post_remote_hook'); - Addon::registerHook('contact_photo_menu', 'addon/retriever/retriever.php', 'retriever_contact_photo_menu'); - Addon::registerHook('cron', 'addon/retriever/retriever.php', 'retriever_cron'); + Addon::registerHook('addon_settings', 'addon/retriever/retriever.php', 'retriever_addon_settings'); + Addon::registerHook('addon_settings_post', 'addon/retriever/retriever.php', 'retriever_addon_settings_post'); + Addon::registerHook('post_remote', 'addon/retriever/retriever.php', 'retriever_post_remote_hook'); + Addon::registerHook('contact_photo_menu', 'addon/retriever/retriever.php', 'retriever_contact_photo_menu'); + Addon::registerHook('cron', 'addon/retriever/retriever.php', 'retriever_cron'); - if (Config::get('retriever', 'dbversion') == '0.10') { - q('ALTER TABLE `retriever_resource` MODIFY COLUMN `type` char(255) NULL DEFAULT NULL'); - q('ALTER TABLE `retriever_resource` MODIFY COLUMN `data` mediumblob NULL DEFAULT NULL'); - q('ALTER TABLE `retriever_rule` MODIFY COLUMN `data` mediumtext NULL DEFAULT NULL'); - Config::set('retriever', 'dbversion', '0.11'); - } - if (Config::get('retriever', 'dbversion') == '0.11') { - q('ALTER TABLE `retriever_resource` ADD INDEX `url` (`url`)'); - q('ALTER TABLE `retriever_resource` ADD INDEX `completed` (`completed`)'); - q('ALTER TABLE `retriever_item` ADD INDEX `finished` (`finished`)'); - q('ALTER TABLE `retriever_item` ADD INDEX `item-uid` (`item-uid`)'); - Config::set('retriever', 'dbversion', '0.12'); - } - if (Config::get('retriever', 'dbversion') == '0.12') { - q("ALTER TABLE `retriever_resource` ADD COLUMN `contact-id` int(10) unsigned NOT NULL DEFAULT '0' AFTER `id`"); - q("ALTER TABLE `retriever_resource` ADD COLUMN `item-uid` int(10) unsigned NOT NULL DEFAULT '0' AFTER `id`"); - Config::set('retriever', 'dbversion', '0.13'); - } - if (Config::get('retriever', 'dbversion') == '0.13') { - Config::set('retriever', 'downloads_per_cron', '100'); - } - if (Config::get('retriever', 'dbversion') != '0.14') { - $schema = file_get_contents(dirname(__file__).'/database.sql'); - $tables = explode(';', $schema); - foreach ($tables as $table) { - if (!DBA::e($table)) { - Logger::warning('Unable to create database table: ' . DBA::errorMessage()); - return; - } - } - Config::set('retriever', 'downloads_per_cron', '100'); - Config::set('retriever', 'dbversion', '0.14'); - } + if (Config::get('retriever', 'dbversion') != '0.14') { + $schema = file_get_contents(dirname(__file__).'/database.sql'); + $tables = explode(';', $schema); + foreach ($tables as $table) { + if (!DBA::e($table)) { + Logger::warning('Unable to create database table: ' . DBA::errorMessage()); + return; + } + } + Config::set('retriever', 'downloads_per_cron', '100'); + Config::set('retriever', 'dbversion', '0.14'); + } } /** * @brief Uninstallation hook for retriever plugin */ function retriever_uninstall() { - Addon::unregisterHook('plugin_settings', 'addon/retriever/retriever.php', 'retriever_plugin_settings'); - Addon::unregisterHook('plugin_settings_post', 'addon/retriever/retriever.php', 'retriever_plugin_settings_post'); - Addon::unregisterHook('post_remote', 'addon/retriever/retriever.php', 'retriever_post_remote_hook'); - Addon::unregisterHook('plugin_settings', 'addon/retriever/retriever.php', 'retriever_plugin_settings'); - Addon::unregisterHook('plugin_settings_post', 'addon/retriever/retriever.php', 'retriever_plugin_settings_post'); - Addon::unregisterHook('contact_photo_menu', 'addon/retriever/retriever.php', 'retriever_contact_photo_menu'); - Addon::unregisterHook('cron', 'addon/retriever/retriever.php', 'retriever_cron'); + Addon::unregisterHook('addon_settings', 'addon/retriever/retriever.php', 'retriever_addon_settings'); + Addon::unregisterHook('addon_settings_post', 'addon/retriever/retriever.php', 'retriever_addon_settings_post'); + Addon::unregisterHook('post_remote', 'addon/retriever/retriever.php', 'retriever_post_remote_hook'); + Addon::unregisterHook('addon_settings', 'addon/retriever/retriever.php', 'retriever_addon_settings'); + Addon::unregisterHook('addon_settings_post', 'addon/retriever/retriever.php', 'retriever_addon_settings_post'); + Addon::unregisterHook('contact_photo_menu', 'addon/retriever/retriever.php', 'retriever_contact_photo_menu'); + Addon::unregisterHook('cron', 'addon/retriever/retriever.php', 'retriever_cron'); } /** @@ -90,41 +69,55 @@ function retriever_module() {} /** * @brief Admin page hook for retriever plugin + * + * @param App $a App object (by ref) + * @param string $o HTML to append content to (by ref) */ -function retriever_addon_admin() { - $downloads_per_cron = Config::get('retriever', 'downloads_per_cron'); - $template = Renderer::getMarkupTemplate('admin.tpl', 'addon/retriever/'); - $config = ['downloads_per_cron', - L10n::t('Downloads per Cron'), - $downloads_per_cron, - L10n::t('Maximum number of downloads to attempt during each run of the cron job.')]; - $o .= Renderer::replaceMacros($template, [ - '$downloads_per_cron' => $config, - '$submit' => L10n::t('Save Settings')]); +function retriever_addon_admin(&$a, &$o) { + $template = Renderer::getMarkupTemplate('admin.tpl', 'addon/retriever/'); + + $downloads_per_cron = Config::get('retriever', 'downloads_per_cron'); + $downloads_per_cron_config = ['downloads_per_cron', + L10n::t('Downloads per Cron'), + $downloads_per_cron, + L10n::t('Maximum number of downloads to attempt during each run of the cron job.')]; + + $allow_images = Config::get('retriever', 'allow_images'); + $allow_images_config = ['allow_images', + L10n::t('Allow Retrieving Images'), + $allow_images, + L10n::t('Allow users to request images be downloaded as well as text.
Warning: the images are not automatically deleted and may fill up your database.')]; + + $o .= Renderer::replaceMacros($template, [ + '$downloads_per_cron' => $downloads_per_cron_config, + '$allow_images' => $allow_images_config, + '$submit' => L10n::t('Save Settings')]); } /** * @brief Admin page post hook for retriever plugin */ function retriever_addon_admin_post () { - if (!empty($_POST['downloads_per_cron'])) { - Config::set('retriever', 'downloads_per_cron', $_POST['downloads_per_cron']); - } + if (!empty($_POST['downloads_per_cron'])) { + Config::set('retriever', 'downloads_per_cron', $_POST['downloads_per_cron']); + } + Config::set('retriever', 'allow_images', $_POST['allow_images']); } /** * @brief Cron jobs for retriever plugin */ function retriever_cron() { - $downloads_per_cron = Config::get('retriever', 'downloads_per_cron'); + $downloads_per_cron = Config::get('retriever', 'downloads_per_cron'); - // Do this first, otherwise it can interfere with retriever_retrieve_items - retriever_clean_up_completed_resources($downloads_per_cron); + // Do this first, otherwise it can interfere with retriever_retrieve_items + retriever_clean_up_completed_resources($downloads_per_cron); - retriever_retrieve_items($downloads_per_cron); - retriever_tidy(); + retriever_retrieve_items($downloads_per_cron); + retriever_tidy(); } +// This global variable is used to track the number of items that have been retrieved during the course of this process $retriever_item_count = 0; /** @@ -133,44 +126,44 @@ $retriever_item_count = 0; * @param int $max_items Maximum number of items to retrieve in this call */ function retriever_retrieve_items($max_items) { - global $retriever_item_count; + global $retriever_item_count; - $retriever_schedule = array(array(1,'minute'), - array(10,'minute'), - array(1,'hour'), - array(1,'day'), - array(2,'day'), - array(1,'week'), - array(1,'month')); + $retriever_schedule = array(array(1,'minute'), + array(10,'minute'), + array(1,'hour'), + array(1,'day'), + array(2,'day'), + array(1,'week'), + array(1,'month')); - $schedule_clauses = array(); - for ($i = 0; $i < count($retriever_schedule); $i++) { - $num = $retriever_schedule[$i][0]; - $unit = $retriever_schedule[$i][1]; - array_push($schedule_clauses, - '(`num-tries` = ' . $i . ' AND TIMESTAMPADD(' . DBA::escape($unit) . - ', ' . intval($num) . ', `last-try`) < now())'); - } + $schedule_clauses = array(); + for ($i = 0; $i < count($retriever_schedule); $i++) { + $num = $retriever_schedule[$i][0]; + $unit = $retriever_schedule[$i][1]; + array_push($schedule_clauses, + '(`num-tries` = ' . $i . ' AND TIMESTAMPADD(' . DBA::escape($unit) . + ', ' . intval($num) . ', `last-try`) < now())'); + } - $retrieve_items = $max_items - $retriever_item_count; - do { - Logger::debug('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . intval($retriever_item_count) . ', retrieve ' . $retrieve_items); - $retriever_resources = DBA::selectToArray('retriever_resource', [], ['`completed` IS NULL AND (`last-try` IS NULL OR ' . implode($schedule_clauses, ' OR ') . ')'], ['order' => ['last-try' => 0], 'limit' => $retrieve_items]); - if (!is_array($retriever_resources)) { - break; - } - if (count($retriever_resources) == 0) { - break; - } - Logger::debug('retriever_retrieve_items: found ' . count($retriever_resources) . ' waiting resources in database'); - foreach ($retriever_resources as $retriever_resource) { - retrieve_resource($retriever_resource); - $retriever_item_count++; - } - $retrieve_items = $max_items - $retriever_item_count; - } - while ($retrieve_items > 0); - Logger::debug('retriever_retrieve_items: finished retrieving items'); + $retrieve_items = $max_items - $retriever_item_count; + do { + Logger::debug('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . intval($retriever_item_count) . ', retrieve ' . $retrieve_items); + $retriever_resources = DBA::selectToArray('retriever_resource', [], ['`completed` IS NULL AND (`last-try` IS NULL OR ' . implode($schedule_clauses, ' OR ') . ')'], ['order' => ['last-try' => 0], 'limit' => $retrieve_items]); + if (!is_array($retriever_resources)) { + break; + } + if (count($retriever_resources) == 0) { + break; + } + Logger::debug('retriever_retrieve_items: found ' . count($retriever_resources) . ' waiting resources in database'); + foreach ($retriever_resources as $retriever_resource) { + retrieve_resource($retriever_resource); + $retriever_item_count++; + } + $retrieve_items = $max_items - $retriever_item_count; + } + while ($retrieve_items > 0); + Logger::debug('retriever_retrieve_items: finished retrieving items'); } /** @@ -179,175 +172,221 @@ function retriever_retrieve_items($max_items) { * @param int $max_items Maximum number of items to retrieve in this call */ function retriever_clean_up_completed_resources($max_items) { - // TODO: figure out how to do this with DBA module - $r = q('SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT %d', - intval($max_items)); - if (!$r) { - $r = array(); - } - Logger::debug('retriever_clean_up_completed_resources: items waiting even though resource has completed: ' . count($r)); - foreach ($r as $rr) { - $retriever_item = retriever_get_retriever_item($rr['item']); - if (!DBA::isResult($retriever_item)) { - Logger::warning('retriever_clean_up_completed_resources: no retriever item with id ' . $rr['item']); - continue; - } - $item = retriever_get_item($retriever_item); - if (!$item) { - Logger::warning('retriever_clean_up_completed_resources: no item ' . $retriever_item['item-uri']); - continue; - } - $retriever_rule = get_retriever_rule($retriever_item['contact-id'], $item['uid']); - if (!$retriever_rule) { - Logger::warning('retriever_clean_up_completed_resources: no retriever for uri ' . $retriever_item['item-uri'] . ' uid ' . $retriever_item['uid'] . ' ' . $retriever_item['contact-id']); - continue; - } - $resource = DBA::selectFirst('retriever_resource', [], ['id' => intval($rr['resource'])]); - retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource); - DBA::update('retriever_item', ['finished' => 1], ['id' => intval($retriever_item['id'])], ['finished' => 0]); - retriever_check_item_completed($item); - } + // TODO: figure out how to do this with DBA module + $r = q('SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT %d', + intval($max_items)); + if (!$r) { + $r = array(); + } + Logger::debug('retriever_clean_up_completed_resources: items waiting even though resource has completed: ' . count($r)); + foreach ($r as $rr) { + $retriever_item = DBA::selectFirst('retriever_item', [], ['id' => intval($rr['item'])]); + if (!DBA::isResult($retriever_item)) { + Logger::warning('retriever_clean_up_completed_resources: no retriever item with id ' . $rr['item']); + continue; + } + $item = retriever_get_item($retriever_item); + if (!$item) { + Logger::warning('retriever_clean_up_completed_resources: no item ' . $retriever_item['item-uri']); + continue; + } + $retriever_rule = get_retriever_rule($retriever_item['contact-id'], $item['uid'], false); + if (!$retriever_rule) { + Logger::warning('retriever_clean_up_completed_resources: no retriever for uri ' . $retriever_item['item-uri'] . ' uid ' . $retriever_item['uid'] . ' ' . $retriever_item['contact-id']); + continue; + } + $resource = DBA::selectFirst('retriever_resource', [], ['id' => intval($rr['resource'])]); + retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource); + // TODO: I don't really get how the $old_fields argument to DBA::update works + DBA::update('retriever_item', ['finished' => 1], ['id' => intval($retriever_item['id'])], ['finished' => 0]); + retriever_check_item_completed($item); + } } /** * @brief Deletes old rows from the retriever_item and retriever_resource table that are unlikely to be needed */ function retriever_tidy() { - DBA::delete('retriever_resource', ['completed IS NOT NULL AND completed < DATE_SUB(now(), INTERVAL 1 WEEK)']); - DBA::delete('retriever_resource', ['completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)']); + DBA::delete('retriever_resource', ['completed IS NOT NULL AND completed < DATE_SUB(now(), INTERVAL 1 WEEK)']); + DBA::delete('retriever_resource', ['completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)']); - $r = q("SELECT retriever_item.id FROM retriever_item LEFT OUTER JOIN retriever_resource ON (retriever_item.resource = retriever_resource.id) WHERE retriever_resource.id is null"); - Logger::info('retriever_tidy: found ' . count($r) . ' retriever_items with no retriever_resource'); - foreach ($r as $rr) { - q('DELETE FROM retriever_item WHERE id = %d', intval($rr['id'])); - } + $r = q("SELECT retriever_item.id FROM retriever_item LEFT OUTER JOIN retriever_resource ON (retriever_item.resource = retriever_resource.id) WHERE retriever_resource.id is null"); + Logger::info('retriever_tidy: found ' . count($r) . ' retriever_items with no retriever_resource'); + foreach ($r as $rr) { + q('DELETE FROM retriever_item WHERE id = %d', intval($rr['id'])); + } } +/** + * @brief Special case of retrieving a resource: if the URL is a data URL, do not use cURL, decode the URL directly + * + * @param array $resource The row from the retriever_resource table + */ function retrieve_dataurl_resource($resource) { - if (!preg_match("/date:(.*);base64,(.*)/", $resource['url'], $matches)) { - Logger::warning('retrieve_dataurl_resource: resource ' . $resource['id'] . ' does not match pattern'); - } else { - $resource['type'] = $matches[1]; - $resource['data'] = base64url_decode($matches[2]); - } + if (!preg_match("/date:(.*);base64,(.*)/", $resource['url'], $matches)) { + Logger::warning('retrieve_dataurl_resource: resource ' . $resource['id'] . ' does not match pattern'); + } else { + $resource['type'] = $matches[1]; + $resource['data'] = base64url_decode($matches[2]); + } - // Succeed or fail, there's no point retrying - q("UPDATE `retriever_resource` SET `last-try` = now(), `num-tries` = `num-tries` + 1, `completed` = now(), `data` = '%s', `type` = '%s' WHERE id = %d", - DBA::escape($resource['data']), - DBA::escape($resource['type']), - intval($resource['id'])); - retriever_resource_completed($resource); + // Succeed or fail, there's no point retrying + DBA::update('retriever_resource', ['id' => intval($resource['id'])], ['last-try' => DateTimeFormat::utcNow(), 'num-tries' => intval($resource['num-tries']) + 1, 'completed' => DateTimeFormat::utcNow(), 'data' => $resource['data'], 'type' => $resource['type']], ['last-try' => false]); + retriever_resource_completed($resource); } +/** + * @brief Makes an attempt to retrieve the supplied resource, and updates the row in the table with the results + * + * @param array $resource The row from the retriever_resource table + */ function retrieve_resource($resource) { - if (substr($resource['url'], 0, 5) == "data:") { - return retrieve_dataurl_resource($resource); - } + $components = parse_url($resource['url']); + if ($components['scheme'] == "data") { + return retrieve_dataurl_resource($resource); + } + if (($components['scheme'] != "http") && ($components['scheme'] != "https")) { + Logger::warning('retrieve_resource: URL scheme not supported for ' . $resource['url']); + DBA::update('retriever_resource', ['completed' => DateTimeFormat::utcNow()], ['id' => intval($resource['id'])], ['completed' => false]); + retriever_resource_completed($resource); + return; + } - $retriever_rule = get_retriever_rule($resource['contact-id'], $resource['item-uid']); - if (!$retriever_rule) { - Logger::warning('retrieve_resource: no rule found for contact ' . $resource['contact-id'] . ' item ' . $resource['item-uid']); - return; - } - $rule_data = $retriever_rule['data']; - if (!$rule_data) { - Logger::warning('retrieve_resource: no rule data found for contact ' . $resource['contact-id'] . ' item ' . $resource['item-uid']); - return; - } + $retriever_rule = get_retriever_rule($resource['contact-id'], $resource['item-uid'], false); + if (!$retriever_rule) { + Logger::warning('retrieve_resource: no rule found for resource id ' . $resource['id'] . ' contact ' . $resource['contact-id'] . ' item ' . $resource['item-uid']); + DBA::update('retriever_resource', ['completed' => DateTimeFormat::utcNow()], ['id' => intval($resource['id'])], ['completed' => false]); + retriever_resource_completed($resource); + return; + } + $rule_data = $retriever_rule['data']; + if (!$rule_data) { + Logger::warning('retrieve_resource: no rule data found for resource id ' . $resource['id'] . ' contact ' . $resource['contact-id'] . ' item ' . $resource['item-uid']); + DBA::update('retriever_resource', ['completed' => DateTimeFormat::utcNow()], ['id' => intval($resource['id'])], ['completed' => false]); + retriever_resource_completed($resource); + return; + } - try { - Logger::debug('retrieve_resource: ' . ($resource['num-tries'] + 1) . ' attempt at resource ' . $resource['id'] . ' ' . $resource['url']); - $redirects = 0; - $cookiejar = ''; - if (array_key_exists('storecookies', $rule_data) && $rule_data['storecookies']) { - $cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-'); - file_put_contents($cookiejar, $rule_data['cookiedata']); - } - $fetch_result = Network::fetchUrlFull($resource['url'], $resource['binary'], $redirects, '', $cookiejar); - if (array_key_exists('storecookies', $rule_data) && $rule_data['storecookies']) { - $retriever_rule['data']['cookiedata'] = file_get_contents($cookiejar); - DBA::update('retriever_rule', ['data' => json_encode($retriever_rule['data'])], ['id' => intval($retriever_rule["id"])], $retriever_rule); - unlink($cookiejar); - } - $resource['data'] = $fetch_result->getBody(); - $resource['http-code'] = $fetch_result->getReturnCode(); - $resource['type'] = $fetch_result->getContentType(); - $resource['redirect-url'] = $fetch_result->getRedirectUrl(); - Logger::debug('retrieve_resource: got code ' . $resource['http-code'] . ' retrieving resource ' . $resource['id'] . ' final url ' . $resource['redirect-url']); - } catch (Exception $e) { - Logger::info('retrieve_resource: unable to retrieve ' . $resource['url'] . ' - ' . $e->getMessage()); - } - DBA::update('retriever_resource', ['id' => intval($resource['id'])], ['last-try' => DateTimeFormat::utcNow(), 'num-tries' => intval($resource['num-tries']) + 1, 'http-code' => intval($resource['http-code']), 'redirect-url' => $resource['redirect-url']], ['last-try' => false]); - if ($resource['data']) { - DBA::update('retriever_resource', ['id' => intval($resource['id'])], ['completed' => DateTimeFormat::utcNow(), 'data' => $resource['data'], 'type' => $resource['type']], ['completed' => false]); - retriever_resource_completed($resource); - } + try { + Logger::debug('retrieve_resource: ' . ($resource['num-tries'] + 1) . ' attempt at resource ' . $resource['id'] . ' ' . $resource['url']); + $redirects = 0; + $cookiejar = ''; + if (array_key_exists('storecookies', $rule_data) && $rule_data['storecookies']) { + $cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-'); + file_put_contents($cookiejar, $rule_data['cookiedata']); + } + $fetch_result = Network::fetchUrlFull($resource['url'], $resource['binary'], $redirects, '', $cookiejar); + if (array_key_exists('storecookies', $rule_data) && $rule_data['storecookies']) { + $retriever_rule['data']['cookiedata'] = file_get_contents($cookiejar); + DBA::update('retriever_rule', ['data' => json_encode($retriever_rule['data'])], ['id' => intval($retriever_rule["id"])], $retriever_rule); + unlink($cookiejar); + } + $resource['data'] = $fetch_result->getBody(); + $resource['http-code'] = $fetch_result->getReturnCode(); + $resource['type'] = $fetch_result->getContentType(); + $resource['redirect-url'] = $fetch_result->getRedirectUrl(); + Logger::debug('retrieve_resource: got code ' . $resource['http-code'] . ' retrieving resource ' . $resource['id'] . ' final url ' . $resource['redirect-url']); + } catch (Exception $e) { + Logger::info('retrieve_resource: unable to retrieve ' . $resource['url'] . ' - ' . $e->getMessage()); + } + DBA::update('retriever_resource', ['last-try' => DateTimeFormat::utcNow(), 'num-tries' => intval($resource['num-tries']) + 1, 'http-code' => intval($resource['http-code']), 'redirect-url' => $resource['redirect-url']], ['id' => intval($resource['id'])], ['last-try' => false]); + if ($resource['data']) { + DBA::update('retriever_resource', ['completed' => DateTimeFormat::utcNow(), 'data' => $resource['data'], 'type' => $resource['type']], ['id' => intval($resource['id'])], ['completed' => false]); + retriever_resource_completed($resource); + } } -function get_retriever_rule($contact_id, $uid, $create = false) { - $retriever_rule = DBA::selectFirst('retriever_rule', [], ['contact-id' => intval($contact_id), 'uid' => intval($uid)]); - if ($retriever_rule) { - $retriever_rule['data'] = json_decode($retriever_rule['data'], true); - return $retriever_rule; - } - if ($create) { - DBA::insert('retriever_rule', ['uid' => intval($uid), 'contact-id' => intval($contact_id)]); - $retriever_rule = DBA::selectFirst('retriever_rule', [], ['contact-id' => intval($contact_id), 'uid' => intval($uid)]); - return $retriever_rule; - } -} - -function retriever_get_retriever_item($id) { - return DBA::selectFirst('retriever_item', [], ['id' => intval($id)]); +/** + * @brief Gets the retriever configuration for a particular contact. Optionally, will create a blank configuration. + * + * @param int $contact_id The Contact ID of the retriever configuration + * @param int $uid The User ID of the retriever configuration + * @param boolean $create Whether to create a new configuration if none exists already + * @return array The row from the retriever_rule database for this configuration + */ +function get_retriever_rule($contact_id, $uid, $create) { + $retriever_rule = DBA::selectFirst('retriever_rule', [], ['contact-id' => intval($contact_id), 'uid' => intval($uid)]); + if ($retriever_rule) { + $retriever_rule['data'] = json_decode($retriever_rule['data'], true); + return $retriever_rule; + } + if ($create) { + DBA::insert('retriever_rule', ['uid' => intval($uid), 'contact-id' => intval($contact_id)]); + $retriever_rule = DBA::selectFirst('retriever_rule', [], ['contact-id' => intval($contact_id), 'uid' => intval($uid)]); + return $retriever_rule; + } } +/** + * @brief Looks up the item from the database that corresponds to the retriever_item + * + * @param array $retriever_item Row from the retriever_item table + * @return array Item that was found, or undef if no item could be found + */ function retriever_get_item($retriever_item) { - $item = Item::selectFirst([], ['uri' => $retriever_item['item-uri'], 'uid' => intval($retriever_item['item-uid']), 'contact-id' => intval($retriever_item['contact-id'])]); - if (!DBA::isResult($item)) { - Logger::warning('retriever_get_item: no item found for uri ' . $retriever_item['item-uri']); - return; - } - return $item; + $item = Item::selectFirst([], ['uri' => $retriever_item['item-uri'], 'uid' => intval($retriever_item['item-uid']), 'contact-id' => intval($retriever_item['contact-id'])]); + if (!DBA::isResult($item)) { + Logger::warning('retriever_get_item: no item found for uri ' . $retriever_item['item-uri']); + return; + } + return $item; } +/** + * @brief This function should be called when a resource is completed to trigger all next steps, based on the corresponding retriever item + * + * @param int $retriever_item_id ID of the retriever item corresponding to this resource + * @param array $resource The full details of the completed resource + */ function retriever_item_completed($retriever_item_id, $resource) { - Logger::debug('retriever_item_completed: id ' . $retriever_item_id . ' url ' . $resource['url']); + Logger::debug('retriever_item_completed: id ' . $retriever_item_id . ' url ' . $resource['url']); - $retriever_item = retriever_get_retriever_item($retriever_item_id); - if (!DBA::isResult($retriever_item)) { - Logger::info('retriever_item_completed: no retriever item with id ' . $retriever_item_id); - return; - } - $item = retriever_get_item($retriever_item); - if (!$item) { - Logger::warning('retriever_item_completed: no item ' . $retriever_item['item-uri']); - return; - } - // Note: the retriever might be null. Doesn't matter. - $retriever_rule = get_retriever_rule($retriever_item['contact-id'], $retriever_item['item-uid']); + $retriever_item = DBA::selectFirst('retriever_item', [], ['id' => intval($retriever_item_id)]); + if (!DBA::isResult($retriever_item)) { + Logger::info('retriever_item_completed: no retriever item with id ' . $retriever_item_id); + return; + } + $item = retriever_get_item($retriever_item); + if (!$item) { + Logger::warning('retriever_item_completed: no item ' . $retriever_item['item-uri']); + return; + } + // Note: the retriever might be null. Doesn't matter. + $retriever_rule = get_retriever_rule($retriever_item['contact-id'], $retriever_item['item-uid'], false); - retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource); + retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource); - DBA::update('retriever_item', ['finished' => 1], ['id' => intval($retriever_item['id'])], ['finished' => 0]); - retriever_check_item_completed($item); + DBA::update('retriever_item', ['finished' => 1], ['id' => intval($retriever_item['id'])], ['finished' => 0]); + retriever_check_item_completed($item); } +/** + * @brief This function should be called when a resource is completed to trigger all next steps + * + * @param array $resource The full details of the completed resource + */ function retriever_resource_completed($resource) { - Logger::debug('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url']); - foreach (DBA::selectToArray('retriever_item', ['id'], ['resource' => intval($resource['id'])]) as $retriever_item) { - retriever_item_completed($retriever_item['id'], $resource); - } + Logger::debug('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url']); + foreach (DBA::selectToArray('retriever_item', ['id'], ['resource' => intval($resource['id'])]) as $retriever_item) { + retriever_item_completed($retriever_item['id'], $resource); + } } +/** + * @brief For a retriever config for a particular contact, remove existing artifacts for a number of completed items and queue them to be tried again. Will make the items invisible until they are again completed. The items chosen will be the most recently received. + * + * @param array $retriever The row from the retriever_rule table for the contact + * @param int $num The number of existing items to queue for retrieval + */ function apply_retrospective($retriever, $num) { - foreach (Item::selectToArray([], ['contact-id' => intval($retriever['contact-id'])], ['order' => ['received' => true], 'limit' => $num]) as $item) { - Item::update(['visible' => 0], ['id' => intval($item['id'])]); - foreach (DBA::selectToArray('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => $item['uid'], 'contact-id' => $item['contact-id']]) as $retriever_item) { - DBA::delete('retriever_resource', ['id' => $retriever_item['resource']]); - DBA::delete('retriever_item', ['id' => $retriever_item['id']]); - } - retriever_on_item_insert($retriever, $item); - } + foreach (Item::selectToArray([], ['contact-id' => intval($retriever['contact-id'])], ['order' => ['received' => true], 'limit' => $num]) as $item) { + Item::update(['visible' => 0], ['id' => intval($item['id'])]); + foreach (DBA::selectToArray('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => $item['uid'], 'contact-id' => $item['contact-id']]) as $retriever_item) { + DBA::delete('retriever_resource', ['id' => $retriever_item['resource']]); + DBA::delete('retriever_item', ['id' => $retriever_item['id']]); + } + retriever_on_item_insert($retriever, $item); + } } /** @@ -359,299 +398,374 @@ function apply_retrospective($retriever, $num) { * TODO: This queries then inserts. It should use some kind of lock to avoid requesting the same resource twice. */ function retriever_on_item_insert($retriever, &$item) { - if (!$retriever || !$retriever['id']) { - Logger::info('retriever_on_item_insert: No retriever supplied'); - return; - } - if (!array_key_exists('enable', $retriever['data']) || !$retriever['data']['enable'] == "on") { - return; - } - if (array_key_exists('plink', $item) && strlen($item['plink'])) { - $url = $item['plink']; - } - else { - if (!array_key_exists('uri-id', $item)) { - Logger::warning('retriever_on_item_insert: item ' . $item['id'] . ' has no plink and no uri-id'); - return; - } - $content = DBA::selectFirst('item-content', [], ['uri-id' => $item['uri-id']]); - $url = $content['plink']; - } + if (!$retriever || !$retriever['id']) { + Logger::info('retriever_on_item_insert: No retriever supplied'); + return; + } + if (!array_key_exists('enable', $retriever['data']) || !$retriever['data']['enable'] == "on") { + return; + } + if (array_key_exists('plink', $item) && strlen($item['plink'])) { + $url = $item['plink']; + } + else { + if (!array_key_exists('uri-id', $item)) { + Logger::warning('retriever_on_item_insert: item ' . $item['id'] . ' has no plink and no uri-id'); + return; + } + $content = DBA::selectFirst('item-content', [], ['uri-id' => $item['uri-id']]); + $url = $content['plink']; + } - if (array_key_exists('modurl', $retriever['data']) && $retriever['data']['modurl']) { - $orig_url = $url; - $url = preg_replace('/' . $retriever['data']['pattern'] . '/', $retriever['data']['replace'], $orig_url); - Logger::debug('retriever_on_item_insert: Changed ' . $orig_url . ' to ' . $url); - } + if (array_key_exists('modurl', $retriever['data']) && $retriever['data']['modurl']) { + $orig_url = $url; + $url = preg_replace('/' . $retriever['data']['pattern'] . '/', $retriever['data']['replace'], $orig_url); + Logger::debug('retriever_on_item_insert: Changed ' . $orig_url . ' to ' . $url); + } - $resource = add_retriever_resource($url, $item['uid'], $item['contact-id']); - $retriever_item_id = add_retriever_item($item, $resource); + $resource = add_retriever_resource($url, $item['uid'], $item['contact-id']); + $retriever_item_id = add_retriever_item($item, $resource); } +/** + * @brief Creates a new resource to be downloaded from the supplied URL. Unique resources are created for each URL, UID and contact ID, because different contact IDs may have different rules for how to retrieve them. If the URL is actually a data URL, the resource is completed immediately. + * + * @param string $url URL of the resource to be downloaded + * @param int $uid User ID that this resource is being downloaded fore + * @param int $cid Contact ID of the item that triggered the downloading of this resource + * @param boolean $binary Specifies if this download should be done in binary mode + * @return array The created resource + */ function add_retriever_resource($url, $uid, $cid, $binary = false) { - Logger::debug('add_retriever_resource: url ' . $url . ' uid ' . $uid . ' contact-id ' . $cid); + Logger::debug('add_retriever_resource: url ' . $url . ' uid ' . $uid . ' contact-id ' . $cid); - $scheme = parse_url($url, PHP_URL_SCHEME); - if ($scheme == 'data') { - $fp = fopen($url, 'r'); - $meta = stream_get_meta_data($fp); - $type = $meta['mediatype']; - $data = stream_get_contents($fp); - fclose($fp); + $scheme = parse_url($url, PHP_URL_SCHEME); + if ($scheme == 'data') { + $fp = fopen($url, 'r'); + $meta = stream_get_meta_data($fp); + $type = $meta['mediatype']; + $data = stream_get_contents($fp); + fclose($fp); - $url = 'md5://' . hash('md5', $url); - $resource = DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)]); - if ($resource) { - Logger::debug('add_retriever_resource: Resource ' . $url . ' already requested'); - return $resource; - } + $url = 'md5://' . hash('md5', $url); + $resource = DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)]); + if ($resource) { + Logger::debug('add_retriever_resource: Resource ' . $url . ' already requested'); + return $resource; + } - DBA::insert('retriever_resource', ['item-uid' => intval($uid), 'contact-id' => intval($cid), 'type' => $type, 'binary' => ($binary ? 1 : 0), 'url' => $url, 'completed' => DateTimeFormat::utcNow(), 'data' => $data]); - $resource = DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)]); - if ($resource) { - retriever_resource_completed($resource); - } - return $resource; - } + DBA::insert('retriever_resource', ['item-uid' => intval($uid), 'contact-id' => intval($cid), 'type' => $type, 'binary' => ($binary ? 1 : 0), 'url' => $url, 'completed' => DateTimeFormat::utcNow(), 'data' => $data]); + $resource = DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)]); + if ($resource) { + retriever_resource_completed($resource); + } + return $resource; + } - // 800 characters is the size of this field in the database - if (strlen($url) > 800) { - Logger::warning('add_retriever_resource: URL is longer than 800 characters'); - } + // 800 characters is the size of this field in the database + if (strlen($url) > 800) { + Logger::warning('add_retriever_resource: URL is longer than 800 characters'); + } - $resource = DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)]); - if ($resource) { - Logger::debug('add_retriever_resource: Resource ' . $url . ' uid ' . $uid . ' cid ' . $cid . ' already requested'); - return $resource; - } + $resource = DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)]); + if ($resource) { + Logger::debug('add_retriever_resource: Resource ' . $url . ' uid ' . $uid . ' cid ' . $cid . ' already requested'); + return $resource; + } - DBA::insert('retriever_resource', ['item-uid' => intval($uid), 'contact-id' => intval($cid), 'binary' => ($binary ? 1 : 0), 'url' => $url]); - return DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)]); + DBA::insert('retriever_resource', ['item-uid' => intval($uid), 'contact-id' => intval($cid), 'binary' => ($binary ? 1 : 0), 'url' => $url]); + return DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)]); } -function add_retriever_item(&$item, $resource) { - Logger::debug('add_retriever_item: ' . $resource['url'] . ' for ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); +/** + * @brief Adds a retriever item for the supplied resource and item, to mark that this item should wait for the resource to be completed. Does not create a retriever item if a matching one already exists. + * + * @param array $item Item that is waiting for the resource. This may or may not have been already stored in the database. + * @param array $resource Resource that the item needs to wait for. This must have already been stored in the database. + * @return int ID of the retriever item that was created, or the existing one if present + */ +function add_retriever_item($item, $resource) { + Logger::debug('add_retriever_item: ' . $resource['url'] . ' for ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); - if (!array_key_exists('id', $resource) || !$resource['id']) { - Logger::warning('add_retriever_item: resource is empty'); - //@@@ check that this does not happen - return; - } - if (DBA::selectFirst('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => intval($item['uid']), 'resource' => intval($resource['id'])])) { - Logger::info("add_retriever_item: retriever item already present for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); - return; - } - DBA::insert('retriever_item', ['item-uri' => $item['uri'], 'item-uid' => intval($item['uid']), 'contact-id' => intval($item['contact-id']), 'resource' => intval($resource['id'])]); - $retriever_item = DBA::selectFirst('retriever_item', ['id'], ['item-uri' => $item['uri'], 'item-uid' => intval($item['uid']), 'resource' => intval($resource['id'])]); - if (!$retriever_item) { - Logger::info("add_retriever_item: couldn't create retriever item for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); - return; - } - Logger::debug('add_retriever_item: created retriever_item ' . $retriever_item['id'] . ' for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); - return $retriever_item['id']; + if (!array_key_exists('id', $resource) || !$resource['id']) { + Logger::warning('add_retriever_item: resource is empty'); + return; + } + if (DBA::selectFirst('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => intval($item['uid']), 'resource' => intval($resource['id'])])) { + Logger::info("add_retriever_item: retriever item already present for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); + return; + } + DBA::insert('retriever_item', ['item-uri' => $item['uri'], 'item-uid' => intval($item['uid']), 'contact-id' => intval($item['contact-id']), 'resource' => intval($resource['id'])]); + $retriever_item = DBA::selectFirst('retriever_item', ['id'], ['item-uri' => $item['uri'], 'item-uid' => intval($item['uid']), 'resource' => intval($resource['id'])]); + if (!$retriever_item) { + Logger::info("add_retriever_item: couldn't create retriever item for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); + return; + } + Logger::debug('add_retriever_item: created retriever_item ' . $retriever_item['id'] . ' for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); + return $retriever_item['id']; } +/** + * @brief Analyse a completed text resource (such as HTML) for the character encoding used + * + * @param array $resource The completed resource + * @return string Character encoding, e.g. "utf-8" or "iso-8859-1" + */ function retriever_get_encoding($resource) { - $matches = array(); - if (preg_match('/charset=(.*)/', $resource['type'], $matches)) { - return trim(array_pop($matches)); - } - return 'utf-8'; + $matches = array(); + if (preg_match('/charset=(.*)/', $resource['type'], $matches)) { + return trim(array_pop($matches)); + } + return 'utf-8'; } +/** + * @brief Apply the XSLT template to the DOM document + * + * @param string $xslt_text Text of the XSLT template + * @param DOMDocument $doc Input to the XSLT template + * @return DOMDocument Result of applying the template + */ function retriever_apply_xslt_text($xslt_text, $doc) { - if (!$xslt_text) { - Logger::info('retriever_apply_xslt_text: empty XSLT text'); - return $doc; - } - $xslt_doc = new DOMDocument(); - if (!$xslt_doc->loadXML($xslt_text)) { - Logger::info('retriever_apply_xslt_text: could not load XML'); - return $doc; - } - $xp = new XsltProcessor(); - $xp->importStylesheet($xslt_doc); - $result = $xp->transformToDoc($doc); - return $result; + if (!$xslt_text) { + Logger::info('retriever_apply_xslt_text: empty XSLT text'); + return $doc; + } + $xslt_doc = new DOMDocument(); + if (!$xslt_doc->loadXML($xslt_text)) { + Logger::info('retriever_apply_xslt_text: could not load XML'); + return $doc; + } + $xp = new XsltProcessor(); + $xp->importStylesheet($xslt_doc); + $result = $xp->transformToDoc($doc); + return $result; } +/** + * @brief Applies the retriever rules to the downloaded resource, and stores the results as the new body text of the item + * + * @param array $retriever Retriever rules as stored in the database, with the "data" element already decoded from JSON + * @param array &$item Item to be in which to store the new body (by ref). This may or may not be already stored in the database. + * @param array $resource Newly completed resource, which should be text (HTML or XML) + */ function retriever_apply_dom_filter($retriever, &$item, $resource) { - Logger::debug('retriever_apply_dom_filter: applying XSLT to uri ' . $item['uri'] . ' uid ' . $item['uid'] . ' contact ' . $item['contact-id']); + Logger::debug('retriever_apply_dom_filter: applying XSLT to uri ' . $item['uri'] . ' uid ' . $item['uid'] . ' contact ' . $item['contact-id']); - if (!array_key_exists('include', $retriever['data']) && !array_key_exists('customxslt', $retriever['data'])) { - Logger::info('retriever_apply_dom_filter: no include and no customxslt'); - return; - } - if (!$resource['data']) { - Logger::info('retriever_apply_dom_filter: no text to work with'); - return; - } + if (!array_key_exists('include', $retriever['data']) && !array_key_exists('customxslt', $retriever['data'])) { + Logger::info('retriever_apply_dom_filter: no include and no customxslt'); + return; + } + if (!$resource['data']) { + Logger::info('retriever_apply_dom_filter: no text to work with'); + return; + } - $doc = retriever_load_into_dom($resource); + $doc = retriever_load_into_dom($resource); - $doc = retriever_extract($doc, $retriever); - if (!$doc) { - Logger::info('retriever_apply_dom_filter: failed to apply extract XSLT template'); - return; - } + $doc = retriever_extract($doc, $retriever); + if (!$doc) { + Logger::info('retriever_apply_dom_filter: failed to apply extract XSLT template'); + return; + } - $doc = retriever_globalise_urls($doc, $resource); - if (!$doc) { - Logger::info('retriever_apply_dom_filter: failed to apply fix urls XSLT template'); - return; - } + $doc = retriever_globalise_urls($doc, $resource); + if (!$doc) { + Logger::info('retriever_apply_dom_filter: failed to apply fix urls XSLT template'); + return; + } - $body = HTML::toBBCode($doc->saveHTML()); - if (!strlen($body)) { - Logger::info('retriever_apply_dom_filter retriever ' . $retriever['id'] . ' item ' . $item['id'] . ': output was empty'); - return; - } - $body .= "\n\n" . L10n::t('Retrieved') . ' ' . date("Y-m-d") . ': [url='; - $body .= $item['plink']; - $body .= ']' . $item['plink'] . '[/url]'; + $body = HTML::toBBCode($doc->saveHTML()); + if (!strlen($body)) { + Logger::info('retriever_apply_dom_filter retriever ' . $retriever['id'] . ' item ' . $item['id'] . ': output was empty'); + return; + } + $body .= "\n\n" . L10n::t('Retrieved') . ' ' . date("Y-m-d") . ': [url='; + $body .= $item['plink']; + $body .= ']' . $item['plink'] . '[/url]'; - Logger::debug('retriever_apply_dom_filter: XSLT result \"' . $body . '\"'); - retriever_set_body($item, $body); + Logger::debug('retriever_apply_dom_filter: XSLT result \"' . $body . '\"'); + retriever_set_body($item, $body); } +/** + * @brief Converts the completed resource, which must be HTML or XML, into a DOM document + * + * @param array $resource The resource containing the text content + */ function retriever_load_into_dom($resource) { - $encoding = retriever_get_encoding($resource); - $content = mb_convert_encoding($resource['data'], 'HTML-ENTITIES', $encoding); - $doc = new DOMDocument('1.0', 'UTF-8'); - if (strpos($resource['type'], 'html') !== false) { - @$doc->loadHTML($content); - } - else { - $doc->loadXML($content); - } - return $doc; + $encoding = retriever_get_encoding($resource); + $content = mb_convert_encoding($resource['data'], 'HTML-ENTITIES', $encoding); + $doc = new DOMDocument('1.0', 'UTF-8'); + if (strpos($resource['type'], 'html') !== false) { + @$doc->loadHTML($content); + } + else { + $doc->loadXML($content); + } + return $doc; } +/** + * @brief Applies the retriever rules, including configuration for included and excluded portions, to the DOM document + * + * @param DOMDocument $doc The original DOM document downloaded from the link + * @param array $retriever The retriever configuration for this contact + * @return DOMDocument New DOM document containing only the desired content + */ function retriever_extract($doc, $retriever) { - $params = array('$spec' => $retriever['data']); - $extract_template = Renderer::getMarkupTemplate('extract.tpl', 'addon/retriever/'); - $extract_xslt = Renderer::replaceMacros($extract_template, $params); - if ($retriever['data']['include']) { - Logger::debug('retriever_apply_dom_filter: applying include/exclude template \"' . $extract_xslt . '\"'); - $doc = retriever_apply_xslt_text($extract_xslt, $doc); - } - if (array_key_exists('customxslt', $retriever['data']) && $retriever['data']['customxslt']) { - Logger::debug('retriever_extract: applying custom XSLT \"' . $retriever['data']['customxslt'] . '\"'); - $doc = retriever_apply_xslt_text($retriever['data']['customxslt'], $doc); - } - return $doc; + $params = array('$spec' => $retriever['data']); + $extract_template = Renderer::getMarkupTemplate('extract.tpl', 'addon/retriever/'); + $extract_xslt = Renderer::replaceMacros($extract_template, $params); + if ($retriever['data']['include']) { + Logger::debug('retriever_apply_dom_filter: applying include/exclude template \"' . $extract_xslt . '\"'); + $doc = retriever_apply_xslt_text($extract_xslt, $doc); + } + if (array_key_exists('customxslt', $retriever['data']) && $retriever['data']['customxslt']) { + Logger::debug('retriever_extract: applying custom XSLT \"' . $retriever['data']['customxslt'] . '\"'); + $doc = retriever_apply_xslt_text($retriever['data']['customxslt'], $doc); + } + return $doc; } +/** + * @brief Converts local URLs in the DOM document to global URLs + * + * @param DOMDocument $doc DOM document potentially containing links + * @param array $resource Completed resource which contains the text in the DOM document + * @return DOMDocument New DOM document with global URLs + */ function retriever_globalise_urls($doc, $resource) { - $components = parse_url($resource['redirect-url']); - $rooturl = $components['scheme'] . "://" . $components['host']; - $dirurl = $rooturl . dirname($components['path']) . "/"; - $params = array('$dirurl' => $dirurl, '$rooturl' => $rooturl); - $fix_urls_template = Renderer::getMarkupTemplate('fix-urls.tpl', 'addon/retriever/'); - $fix_urls_xslt = Renderer::replaceMacros($fix_urls_template, $params); - $doc = retriever_apply_xslt_text($fix_urls_xslt, $doc); - return $doc; + $components = parse_url($resource['redirect-url']); + $rooturl = $components['scheme'] . "://" . $components['host']; + $dirurl = $rooturl . dirname($components['path']) . "/"; + $params = array('$dirurl' => $dirurl, '$rooturl' => $rooturl); + $fix_urls_template = Renderer::getMarkupTemplate('fix-urls.tpl', 'addon/retriever/'); + $fix_urls_xslt = Renderer::replaceMacros($fix_urls_template, $params); + $doc = retriever_apply_xslt_text($fix_urls_xslt, $doc); + return $doc; } +/** + * @brief Returns the body text for the supplied item. If the item has already been stored in the database, this will fetch the content from the database rather than from the supplied array. + * + * @param array $item Row from the item table + */ function retriever_get_body($item) { - if (!array_key_exists('uri-id', $item) || !$item['uri-id']) { - // item has not yet been stored in database - return $item['body']; - } + if (!array_key_exists('uri-id', $item) || !$item['uri-id']) { + // item has not yet been stored in database + return $item['body']; + } - // item has been stored in database, body is stored in the item-content table - $content = DBA::selectFirst('item-content', ['body'], ['uri-id' => $item['uri-id']]); - if (!$content) { - Logger::warning('retriever_get_body: item-content uri-id ' . $item['uri-id'] . ' has no content'); - return $item['body']; - } - if (!$content['body']) { - Logger::warning('retriever_get_body: item-content uri-id ' . $item['uri-id'] . ' has no body'); - //@@@ check never happens - return $item['body']; - } - if ($content['body'] != $item['body']) { - Logger::warning('@@@ this is probably bad @@@ content: ' . $content['body'] . ' @@@ item: ' . $item['body']); - //@@@ check for this. - } - return $content['body']; + // item has been stored in database, body is stored in the item-content table + $content = DBA::selectFirst('item-content', ['body'], ['uri-id' => $item['uri-id']]); + if (!$content) { + Logger::warning('retriever_get_body: item-content uri-id ' . $item['uri-id'] . ' has no content'); + return $item['body']; + } + if (!$content['body']) { + Logger::warning('retriever_get_body: item-content uri-id ' . $item['uri-id'] . ' has no body'); + return $item['body']; + } + if ($content['body'] != $item['body']) { + Logger::warning('@@@ this is probably bad @@@ content: ' . $content['body'] . ' @@@ item: ' . $item['body']); + } + return $content['body']; } -function retriever_set_body(&$item, $body, $allow_empty = false) { - if (!$body && !$allow_empty) { - Logger::debug('retriever_set_body: will not set empty body in item id ' . $item['id'] . ' uri ' . $item['uri']); - return; - } - $item['body'] = $body; - if (!array_key_exists('id', $item) || !$item['id']) { - // item has not yet been stored in database - return; - } - Item::update(['body' => $body], ['id' => intval($item['id'])]); +/** + * @brief Updates the item with the supplied body text. If the item has already been stored in the database, this will update the database too. + * + * @param array &$item Item in which to set the body (by ref). This may or may not be already stored in the database. + * @param string $body New body content + */ +function retriever_set_body(&$item, $body) { + $item['body'] = $body; + if (!array_key_exists('id', $item) || !$item['id']) { + // item has not yet been stored in database + return; + } + Item::update(['body' => $body], ['id' => intval($item['id'])]); } /** * @brief Searches for images in the item and adds corresponding retriever_items. If the images have already been downloaded, updates the body in the supplied item array. * - * @param array &$item Row from the item table (by ref) + * @param array &$item Item to be searched for images and updated (by ref). This may or may not be already stored in the database. */ function retrieve_images(&$item) { - $body = retriever_get_body($item); - if (!strlen($body)) { - Logger::warning('retrieve_images: no body for item ' . $item['uri']); - return; - } + if (!Config::get('retriever', 'allow_images')) { + return; + } - // I suspect that the first two are not used any more? - preg_match_all("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", $body, $matches1); - preg_match_all("/\[img\](.*?)\[\/img\]/ism", $body, $matches2); - preg_match_all("/\[img\=([^\]]*)\]([^[]*)\[\/img\]/ism", $body, $matches3); - $matches = array_merge($matches1[3], $matches2[1], $matches3[1]); - Logger::debug('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); - foreach ($matches as $url) { - if (!$url) { - continue; - } - if (strpos($url, get_app()->getBaseUrl()) === FALSE) { - $resource = add_retriever_resource($url, $item['uid'], $item['contact-id'], true); - if (!$resource['completed']) { - add_retriever_item($item, $resource); - } - else { - retriever_transform_images($item, $resource); - } - } - } + $body = retriever_get_body($item); + if (!strlen($body)) { + Logger::warning('retrieve_images: no body for item ' . $item['uri']); + return; + } + + // I suspect that the first two are not used any more? + preg_match_all("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", $body, $matches1); + preg_match_all("/\[img\](.*?)\[\/img\]/ism", $body, $matches2); + preg_match_all("/\[img\=([^\]]*)\]([^[]*)\[\/img\]/ism", $body, $matches3); + $matches = array_merge($matches1[3], $matches2[1], $matches3[1]); + Logger::debug('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); + foreach ($matches as $url) { + if (!$url) { + continue; + } + if (strpos($url, System::baseUrl()) === FALSE) { + $resource = add_retriever_resource($url, $item['uid'], $item['contact-id'], true); + if (!$resource['completed']) { + add_retriever_item($item, $resource); + } + else { + retriever_transform_images($item, $resource); + } + } + } } +/** + * @brief Checks if an item has been completed, i.e. all its associated retriever_item rows have been retrieved. If so, update the item to be visible again. + * + * @param array &$item Row from the item table (by ref) + */ function retriever_check_item_completed(&$item) { - $waiting = DBA::selectFirst('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => intval($item['uid']), 'contact-id' => intval($item['contact-id']), 'finished' => 0]); - Logger::debug('retriever_check_item_completed: item ' . $item['uri'] . ' ' . $item['uid'] . ' '. $item['contact-id'] . ' waiting for resources'); - $old_visible = $item['visible']; - $item['visible'] = $waiting ? 0 : 1; - if (array_key_exists('id', $item) && ($item['id'] > 0) && ($old_visible != $item['visible'])) { - Logger::debug('retriever_check_item_completed: changing visible flag to ' . $item['visible']); - Item::update(['visible' => $item['visible']], ['id' => intval($item['id'])]); - } + $waiting = DBA::selectFirst('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => intval($item['uid']), 'contact-id' => intval($item['contact-id']), 'finished' => 0]); + Logger::debug('retriever_check_item_completed: item ' . $item['uri'] . ' ' . $item['uid'] . ' '. $item['contact-id'] . ' waiting for resources'); + $old_visible = $item['visible']; + $item['visible'] = $waiting ? 0 : 1; + if (array_key_exists('id', $item) && ($item['id'] > 0) && ($old_visible != $item['visible'])) { + Logger::debug('retriever_check_item_completed: changing visible flag to ' . $item['visible']); + Item::update(['visible' => $item['visible']], ['id' => intval($item['id'])]); + } } +/** + * @brief Updates an item with a completed resource. If the resource was text, update the body with the new content. If the resource was an image, replace remote images in the body with a local version. + * + * @param array $retriever Rule configuration for this contact + * @param array &$item Row from the item table (by ref) + * @param array $resource The resource that has just been completed + */ function retriever_apply_completed_resource_to_item($retriever, &$item, $resource) { - Logger::debug('retriever_apply_completed_resource_to_item: retriever ' . ($retriever ? $retriever['id'] : 'none') . ' resource ' . $resource['url'] . ' plink ' . $item['plink']); - if (strpos($resource['type'], 'image') !== false) { - retriever_transform_images($item, $resource); - } - if (!$retriever) { - Logger::warning('retriever_apply_completed_resource_to_item: no retriever'); - return; - } - if ((strpos($resource['type'], 'html') !== false) || - (strpos($resource['type'], 'xml') !== false)) { - retriever_apply_dom_filter($retriever, $item, $resource); - if ($retriever['data']['images'] ) { - retrieve_images($item); - } - } + Logger::debug('retriever_apply_completed_resource_to_item: retriever ' . ($retriever ? $retriever['id'] : 'none') . ' resource ' . $resource['url'] . ' plink ' . $item['plink']); + if (strpos($resource['type'], 'image') !== false) { + retriever_transform_images($item, $resource); + } + if (!$retriever) { + Logger::warning('retriever_apply_completed_resource_to_item: no retriever'); + return; + } + if ((strpos($resource['type'], 'html') !== false) || + (strpos($resource['type'], 'xml') !== false)) { + retriever_apply_dom_filter($retriever, $item, $resource); + if ($retriever['data']['images'] ) { + retrieve_images($item); + } + } } /** @@ -659,225 +773,255 @@ function retriever_apply_completed_resource_to_item($retriever, &$item, $resourc * * @param array &$item Row from the item table (by ref) * @param array $resource Row from the resource table containing successfully downloaded image + * + * TODO: split this into two functions, one to store the image, the other to change the item body */ -// TODO: split this into two functions, one to store the image, the other to change the item body function retriever_transform_images(&$item, $resource) { - if (!$resource['data']) { - Logger::info('retriever_transform_images: no data available for ' . $resource['id'] . ' ' . $resource['url']); - return; - } + if (!$resource['data']) { + Logger::info('retriever_transform_images: no data available for ' . $resource['id'] . ' ' . $resource['url']); + return; + } - $data = $resource['data']; - $type = $resource['type']; - $uid = $item['uid']; - $cid = $item['contact-id']; - $rid = Photo::newResource(); - $path = parse_url($resource['url'], PHP_URL_PATH); - $parts = pathinfo($path); - $filename = $parts['filename'] . (array_key_exists('extension', $parts) ? '.' . $parts['extension'] : ''); - $album = 'Wall Photos'; - $scale = 0; - $desc = ''; // TODO: store alt text with resource when it's requested so we can fill this in - Logger::debug('retriever_transform_images storing ' . strlen($data) . ' bytes type ' . $type . ': uid ' . $uid . ' cid ' . $cid . ' rid ' . $rid . ' filename ' . $filename . ' album ' . $album . ' scale ' . $scale . ' desc ' . $desc); - $image = new Image($data, $type); - if (!$image->isValid()) { - Logger::warning('retriever_transform_images: invalid image found at URL ' . $resource['url'] . ' for item ' . $item['id']); - return; - } - $photo = Photo::store($image, $uid, $cid, $rid, $filename, $album, 0, 0, "", "", "", "", $desc); - $new_url = System::baseUrl() . '/photo/' . $rid . '-0.' . $image->getExt(); - if (!strlen($new_url)) { - Logger::warning('retriever_transform_images: no replacement URL for image ' . $resource['url']); - return; - } + $data = $resource['data']; + $type = $resource['type']; + $uid = $item['uid']; + $cid = $item['contact-id']; + $rid = Photo::newResource(); + $path = parse_url($resource['url'], PHP_URL_PATH); + $parts = pathinfo($path); + $filename = $parts['filename'] . (array_key_exists('extension', $parts) ? '.' . $parts['extension'] : ''); + $album = 'Wall Photos'; + $scale = 0; + $desc = ''; // TODO: store alt text with resource when it's requested so we can fill this in + Logger::debug('retriever_transform_images storing ' . strlen($data) . ' bytes type ' . $type . ': uid ' . $uid . ' cid ' . $cid . ' rid ' . $rid . ' filename ' . $filename . ' album ' . $album . ' scale ' . $scale . ' desc ' . $desc); + $image = new Image($data, $type); + if (!$image->isValid()) { + Logger::warning('retriever_transform_images: invalid image found at URL ' . $resource['url'] . ' for item ' . $item['id']); + return; + } + $photo = Photo::store($image, $uid, $cid, $rid, $filename, $album, 0, 0, "", "", "", "", $desc); + $new_url = System::baseUrl() . '/photo/' . $rid . '-0.' . $image->getExt(); + if (!strlen($new_url)) { + Logger::warning('retriever_transform_images: no replacement URL for image ' . $resource['url']); + return; + } - $body = retriever_get_body($item); + $body = retriever_get_body($item); - Logger::debug('retriever_transform_images: replacing ' . $resource['url'] . ' with ' . $new_url . ' in item ' . $item['uri']); - $body = str_replace($resource["url"], $new_url, $body); - retriever_set_body($item, $body); + Logger::debug('retriever_transform_images: replacing ' . $resource['url'] . ' with ' . $new_url . ' in item ' . $item['uri']); + $body = str_replace($resource["url"], $new_url, $body); + retriever_set_body($item, $body); } +/** + * @brief Displays the retriever configuration page for a contact. Alternatively, if the user clicked the "help" button, display the help content. + * + * @param App $a The App object + */ function retriever_content($a) { - if (!local_user()) { - $a->page['content'] .= "

Please log in

"; - return; - } - if ($a->argv[1] === 'help') { - $feeds = DBA::selectToArray('contact', ['id', 'name', 'thumb'], ['uid' => local_user(), 'network' => 'feed']); - for ($i = 0; $i < count($feeds); ++$i) { - $feeds[$i]['url'] = $a->getBaseUrl() . '/retriever/' . $feeds[$i]['id']; - } - $template = Renderer::getMarkupTemplate('/help.tpl', 'addon/retriever/'); - $a->page['content'] .= Renderer::replaceMacros($template, array( - '$config' => $a->getBaseUrl() . '/settings/addon', - '$feeds' => $feeds)); - return; - } - if ($a->argv[1]) { - $retriever_rule = get_retriever_rule($a->argv[1], local_user(), false); + if (!local_user()) { + $a->page['content'] .= "

Please log in

"; + return; + } + if ($a->argv[1] === 'help') { + $feeds = DBA::selectToArray('contact', ['id', 'name', 'thumb'], ['uid' => local_user(), 'network' => 'feed']); + for ($i = 0; $i < count($feeds); ++$i) { + $feeds[$i]['url'] = System::baseUrl() . '/retriever/' . $feeds[$i]['id']; + } + $template = Renderer::getMarkupTemplate('/help.tpl', 'addon/retriever/'); + $a->page['content'] .= Renderer::replaceMacros($template, array( + '$config' => $a->getBaseUrl . '/settings/addon', + '$feeds' => $feeds)); + return; + } + if ($a->argv[1]) { + $retriever_rule = get_retriever_rule($a->argv[1], local_user(), false); - if (!empty($_POST["id"])) { - $retriever_rule = get_retriever_rule($a->argv[1], local_user(), true); - $retriever_rule['data'] = array(); - foreach (array('modurl', 'pattern', 'replace', 'enable', 'images', 'customxslt', 'storecookies', 'cookiedata') as $setting) { - if (empty($_POST['retriever_' . $setting])) { - $retriever_rule['data'][$setting] = NULL; - } - else { - $retriever_rule['data'][$setting] = $_POST['retriever_' . $setting]; - } - } - foreach ($_POST as $k=>$v) { - if (preg_match("/retriever-(include|exclude)-(\d+)-(element|attribute|value)/", $k, $matches)) { - $retriever_rule['data'][$matches[1]][intval($matches[2])][$matches[3]] = $v; - } - } - // You've gotta have an element, even if it's just "*" - foreach ($retriever_rule['data']['include'] as $k=>$clause) { - if (!$clause['element']) { - unset($retriever_rule['data']['include'][$k]); - } - } - foreach ($retriever_rule['data']['exclude'] as $k=>$clause) { - if (!$clause['element']) { - unset($retriever_rule['data']['exclude'][$k]); - } - } - DBA::update('retriever_rule', ['data' => json_encode($retriever_rule['data'])], ['id' => intval($retriever_rule["id"])], ['data' => '']); - $a->page['content'] .= "

Settings Updated"; - if (!empty($_POST["retriever_retrospective"])) { - apply_retrospective($retriever_rule, $_POST["retriever_retrospective"]); - $a->page['content'] .= " and retrospectively applied to " . $_POST["retriever_retrospective"] . " posts"; - } - $a->page['content'] .= ".

"; - } + if (!empty($_POST["id"])) { + $retriever_rule = get_retriever_rule($a->argv[1], local_user(), true); + $retriever_rule['data'] = array(); + foreach (array('modurl', 'pattern', 'replace', 'enable', 'images', 'customxslt', 'storecookies', 'cookiedata') as $setting) { + if (empty($_POST['retriever_' . $setting])) { + $retriever_rule['data'][$setting] = NULL; + } + else { + $retriever_rule['data'][$setting] = $_POST['retriever_' . $setting]; + } + } + foreach ($_POST as $k=>$v) { + if (preg_match("/retriever-(include|exclude)-(\d+)-(element|attribute|value)/", $k, $matches)) { + $retriever_rule['data'][$matches[1]][intval($matches[2])][$matches[3]] = $v; + } + } + // You've gotta have an element, even if it's just "*" + foreach ($retriever_rule['data']['include'] as $k=>$clause) { + if (!$clause['element']) { + unset($retriever_rule['data']['include'][$k]); + } + } + foreach ($retriever_rule['data']['exclude'] as $k=>$clause) { + if (!$clause['element']) { + unset($retriever_rule['data']['exclude'][$k]); + } + } + DBA::update('retriever_rule', ['data' => json_encode($retriever_rule['data'])], ['id' => intval($retriever_rule["id"])], ['data' => '']); + $a->page['content'] .= "

Settings Updated"; + if (!empty($_POST["retriever_retrospective"])) { + apply_retrospective($retriever_rule, $_POST["retriever_retrospective"]); + $a->page['content'] .= " and retrospectively applied to " . $_POST["retriever_retrospective"] . " posts"; + } + $a->page['content'] .= ".

"; + } - $template = Renderer::getMarkupTemplate('/rule-config.tpl', 'addon/retriever/'); - $a->page['content'] .= Renderer::replaceMacros($template, array( - '$enable' => array( - 'retriever_enable', - L10n::t('Enabled'), - $retriever_rule['data']['enable']), - '$modurl' => array( - 'retriever_modurl', - L10n::t('Modify URL'), - $retriever_rule['data']['modurl'], - L10n::t("Modify each article's URL with regular expressions before retrieving.")), - '$pattern' => array( - 'retriever_pattern', - L10n::t('URL Pattern'), - $retriever_rule['data']['pattern'], - L10n::t('Regular expression matching part of the URL to replace')), - '$replace' => array( - 'retriever_replace', - L10n::t('URL Replace'), - $retriever_rule['data']['replace'], - L10n::t('Text to replace matching part of above regular expression')), - '$images' => array( - 'retriever_images', - L10n::t('Download Images'), - $retriever_rule['data']['images']), - '$retrospective' => array( - 'retriever_retrospective', - L10n::t('Retrospectively Apply'), - '0', - L10n::t('Reapply the rules to this number of posts')), - 'storecookies' => array( - 'retriever_storecookies', - L10n::t('Store cookies'), - $retriever_rule['data']['storecookies'], - L10n::t("Preserve cookie data across fetches.")), - '$cookiedata' => array( - 'retriever_cookiedata', - L10n::t('Cookie Data'), - $retriever_rule['data']['cookiedata'], - L10n::t("Latest cookie data for this feed. Netscape cookie file format.")), - '$customxslt' => array( - 'retriever_customxslt', - L10n::t('Custom XSLT'), - $retriever_rule['data']['customxslt'], - L10n::t("When standard rules aren't enough, apply custom XSLT to the article")), - '$title' => L10n::t('Retrieve Feed Content'), - '$help' => $a->getBaseUrl() . '/retriever/help', - '$help_t' => L10n::t('Get Help'), - '$submit_t' => L10n::t('Submit'), - '$submit' => L10n::t('Save Settings'), - '$id' => ($retriever_rule["id"] ? $retriever_rule["id"] : "create"), - '$tag_t' => L10n::t('Tag'), - '$attribute_t' => L10n::t('Attribute'), - '$value_t' => L10n::t('Value'), - '$add_t' => L10n::t('Add'), - '$remove_t' => L10n::t('Remove'), - '$include_t' => L10n::t('Include'), - '$include' => $retriever_rule['data']['include'], - '$exclude_t' => L10n::t('Exclude'), - '$exclude' => $retriever_rule['data']['exclude'])); - return; - } + $template = Renderer::getMarkupTemplate('/rule-config.tpl', 'addon/retriever/'); + $a->page['content'] .= Renderer::replaceMacros($template, array( + '$enable' => array( + 'retriever_enable', + L10n::t('Enabled'), + $retriever_rule['data']['enable']), + '$modurl' => array( + 'retriever_modurl', + L10n::t('Modify URL'), + $retriever_rule['data']['modurl'], + L10n::t("Modify each article's URL with regular expressions before retrieving.")), + '$pattern' => array( + 'retriever_pattern', + L10n::t('URL Pattern'), + $retriever_rule['data']['pattern'], + L10n::t('Regular expression matching part of the URL to replace')), + '$replace' => array( + 'retriever_replace', + L10n::t('URL Replace'), + $retriever_rule['data']['replace'], + L10n::t('Text to replace matching part of above regular expression')), + '$allow_images' => Config::get('retriever', 'allow_images'), + '$images' => array( + 'retriever_images', + L10n::t('Download Images'), + $retriever_rule['data']['images']), + '$retrospective' => array( + 'retriever_retrospective', + L10n::t('Retrospectively Apply'), + '0', + L10n::t('Reapply the rules to this number of posts')), + 'storecookies' => array( + 'retriever_storecookies', + L10n::t('Store cookies'), + $retriever_rule['data']['storecookies'], + L10n::t("Preserve cookie data across fetches.")), + '$cookiedata' => array( + 'retriever_cookiedata', + L10n::t('Cookie Data'), + $retriever_rule['data']['cookiedata'], + L10n::t("Latest cookie data for this feed. Netscape cookie file format.")), + '$customxslt' => array( + 'retriever_customxslt', + L10n::t('Custom XSLT'), + $retriever_rule['data']['customxslt'], + L10n::t("When standard rules aren't enough, apply custom XSLT to the article")), + '$title' => L10n::t('Retrieve Feed Content'), + '$help' => $a->getBaseUrl . '/retriever/help', + '$help_t' => L10n::t('Get Help'), + '$submit_t' => L10n::t('Submit'), + '$submit' => L10n::t('Save Settings'), + '$id' => ($retriever_rule["id"] ? $retriever_rule["id"] : "create"), + '$tag_t' => L10n::t('Tag'), + '$attribute_t' => L10n::t('Attribute'), + '$value_t' => L10n::t('Value'), + '$add_t' => L10n::t('Add'), + '$remove_t' => L10n::t('Remove'), + '$include_t' => L10n::t('Include'), + '$include' => $retriever_rule['data']['include'], + '$exclude_t' => L10n::t('Exclude'), + '$exclude' => $retriever_rule['data']['exclude'])); + return; + } } +/** + * @brief Hook that adds the retriever option to the contact menu + * + * @param App $a The App object + * @param array $args Contact menu details to be filled in (by ref) + */ function retriever_contact_photo_menu($a, &$args) { - if (!$args) { - return; - } - if ($args["contact"]["network"] == "feed") { - $args["menu"][ 'retriever' ] = array(L10n::t('Retriever'), $a->getBaseUrl() . '/retriever/' . $args["contact"]['id']); - } + if (!$args) { + return; + } + if ($args["contact"]["network"] == "feed") { + $args["menu"]['retriever'] = array(L10n::t('Retriever'), System::baseUrl() . '/retriever/' . $args["contact"]['id']); + } } +/** + * @brief Hook for processing new incoming items + * + * @param App $a The App object (by ref) + * @param array $item New item, which has not yet been inserted into database (by ref) + */ function retriever_post_remote_hook(&$a, &$item) { - Logger::info('retriever_post_remote_hook: ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); + Logger::info('retriever_post_remote_hook: ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); - $retriever_rule = get_retriever_rule($item['contact-id'], $item["uid"], false); - if ($retriever_rule) { - retriever_on_item_insert($retriever_rule, $item); - } - else { - if (PConfig::get($item["uid"], 'retriever', 'oembed')) { - // Convert to HTML and back to take advantage of bbcode's resolution of oembeds. - $body = retriever_get_body($item); - $body = HTML::toBBCode(BBCode::convert($body)); - retriever_set_body($item, $body); - } - if (PConfig::get($item["uid"], 'retriever', 'all_photos')) { - retrieve_images($item); - } - } - retriever_check_item_completed($item); + $retriever_rule = get_retriever_rule($item['contact-id'], $item["uid"], false); + if ($retriever_rule) { + retriever_on_item_insert($retriever_rule, $item); + } + else { + if (PConfig::get($item["uid"], 'retriever', 'oembed')) { + // Convert to HTML and back to take advantage of bbcode's resolution of oembeds. + $body = retriever_get_body($item); + $body = HTML::toBBCode(BBCode::convert($body)); + retriever_set_body($item, $body); + } + if (PConfig::get($item["uid"], 'retriever', 'all_photos')) { + retrieve_images($item); + } + } + retriever_check_item_completed($item); } -function retriever_plugin_settings(&$a, &$s) { - $all_photos = PConfig::get(local_user(), 'retriever', 'all_photos'); - $oembed = PConfig::get(local_user(), 'retriever', 'oembed'); - $template = Renderer::getMarkupTemplate('/settings.tpl', 'addon/retriever/'); - $s .= Renderer::replaceMacros($template, array( - '$allphotos' => array( - 'retriever_all_photos', - L10n::t('All Photos'), - $all_photos, - L10n::t('Check this to retrieve photos for all posts')), - '$oembed' => array( - 'retriever_oembed', - L10n::t('Resolve OEmbed'), - $oembed, - L10n::t('Check this to attempt to retrieve embedded content for all posts - useful e.g. for Facebook posts')), - '$submit' => L10n::t('Save Settings'), - '$title' => L10n::t('Retriever Settings'), - '$help' => $a->getBaseUrl() . '/retriever/help')); +/** + * @brief Hook for adding per-user retriever settings to the user's settings page + * + * @param App $a The App object (by ref) + * @param string $s HTML string to which to append settings content (by ref) + */ +function retriever_addon_settings(&$a, &$s) { + $all_photos = PConfig::get(local_user(), 'retriever', 'all_photos'); + $oembed = PConfig::get(local_user(), 'retriever', 'oembed'); + $template = Renderer::getMarkupTemplate('/settings.tpl', 'addon/retriever/'); + $config = array('$submit' => L10n::t('Save Settings'), + '$title' => L10n::t('Retriever Settings'), + '$help' => $a->getBaseUrl . '/retriever/help', + '$allow_images' => Config::get('retriever', 'allow_images')); + $config['$allphotos'] = array('retriever_all_photos', + L10n::t('All Photos'), + $all_photos, + L10n::t('Check this to retrieve photos for all posts')); + $config['$oembed'] = array('retriever_oembed', + L10n::t('Resolve OEmbed'), + $oembed, + L10n::t('Check this to attempt to retrieve embedded content for all posts')); + $s .= Renderer::replaceMacros($template, $config); } -function retriever_plugin_settings_post($a,$post) { - if ($_POST['retriever_all_photos']) { - PConfig::set(local_user(), 'retriever', 'all_photos', $_POST['retriever_all_photos']); - } - else { - PConfig::del(local_user(), 'retriever', 'all_photos'); - } - if ($_POST['retriever_oembed']) { - PConfig::set(local_user(), 'retriever', 'oembed', $_POST['retriever_oembed']); - } - else { - PConfig::del(local_user(), 'retriever', 'oembed'); - } +/** + * @brief Hook for processing post results from user's settings page + * + * @param App $a The App object + * @param array $post Posted content + */ +function retriever_addon_settings_post($a, $post) { + if ($post['retriever_all_photos']) { + PConfig::set(local_user(), 'retriever', 'all_photos', $post['retriever_all_photos']); + } + else { + PConfig::delete(local_user(), 'retriever', 'all_photos'); + } + if ($post['retriever_oembed']) { + PConfig::set(local_user(), 'retriever', 'oembed', $post['retriever_oembed']); + } + else { + PConfig::delete(local_user(), 'retriever', 'oembed'); + } } diff --git a/retriever/templates/admin.tpl b/retriever/templates/admin.tpl index b5a35961..71c8506e 100644 --- a/retriever/templates/admin.tpl +++ b/retriever/templates/admin.tpl @@ -4,5 +4,6 @@ * *}} {{include file="field_input.tpl" field=$downloads_per_cron}} +{{include file="field_checkbox.tpl" field=$allow_images}}
diff --git a/retriever/templates/rule-config.tpl b/retriever/templates/rule-config.tpl index 171054de..8c1bc130 100644 --- a/retriever/templates/rule-config.tpl +++ b/retriever/templates/rule-config.tpl @@ -142,7 +142,9 @@ document.addEventListener('DOMContentLoaded', function() { {{include file="field_checkbox.tpl" field=$modurl}} {{include file="field_input.tpl" field=$pattern}} {{include file="field_input.tpl" field=$replace}} +{{if $allow_images}} {{include file="field_checkbox.tpl" field=$images}} +{{/if}} {{include file="field_textarea.tpl" field=$customxslt}} {{include file="field_checkbox.tpl" field=$storecookies}} {{include file="field_textarea.tpl" field=$cookiedata}} diff --git a/retriever/templates/settings.tpl b/retriever/templates/settings.tpl index 8bfe8db0..3151fd72 100644 --- a/retriever/templates/settings.tpl +++ b/retriever/templates/settings.tpl @@ -1,9 +1,16 @@ -
-

{{$title}}

-

- Get Help -

+ +

{{$title}}

+
+ From 22da732b99a39aca367fa57826392d81791ef2ad Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 13 Oct 2019 11:27:19 +0200 Subject: [PATCH 35/96] remove help section if images not allowed --- retriever/retriever.php | 1 + retriever/templates/help.tpl | 2 ++ 2 files changed, 3 insertions(+) diff --git a/retriever/retriever.php b/retriever/retriever.php index 33f9a40e..6b71c36e 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -831,6 +831,7 @@ function retriever_content($a) { $template = Renderer::getMarkupTemplate('/help.tpl', 'addon/retriever/'); $a->page['content'] .= Renderer::replaceMacros($template, array( '$config' => $a->getBaseUrl . '/settings/addon', + '$allow_images' => Config::get('retriever', 'allow_images'), '$feeds' => $feeds)); return; } diff --git a/retriever/templates/help.tpl b/retriever/templates/help.tpl index b96ec63c..7298c130 100644 --- a/retriever/templates/help.tpl +++ b/retriever/templates/help.tpl @@ -131,6 +131,7 @@ fails, the plugin will keep trying at progressively longer intervals for up to a month, in case the website is temporarily overloaded or the network is down.

+{{if $allow_images}}

Retrieving Images

Retriever can also optionally download images and store them in the @@ -140,6 +141,7 @@ an RSS feed or not. Go to the "Settings" page and click "Plugin settings". Then check the "All Photos" box in the "Retriever Settings" section and click "Submit".

+{{/if}}

Configure Feeds:

{{foreach $feeds as $feed}} From 9dbc11be1b86e03500959b9475fd78b8e90688f8 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Mon, 6 Jan 2020 22:12:47 +0100 Subject: [PATCH 36/96] Fix bug in phototrack --- phototrack/phototrack.php | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/phototrack/phototrack.php b/phototrack/phototrack.php index 8b909f5d..e493871d 100644 --- a/phototrack/phototrack.php +++ b/phototrack/phototrack.php @@ -103,12 +103,22 @@ function phototrack_photo_use($photo, $table, $field, $id) { function phototrack_check_field_url($a, $table, $field, $id, $url) { Logger::info('@@@ phototrack_check_field_url table ' . $table . ' field ' . $field . ' id ' . $id . ' url ' . $url); $baseurl = $a->getBaseURL(); - if (strpos($url, $baseurl) !== FALSE) { + if (strpos($url, $baseurl) === FALSE) { + return; + } + else { $url = substr($url, strlen($baseurl)); Logger::info('@@@ phototrack_check_field_url funny url stuff ' . $url . ' base ' . $baseurl); } - if (strpos($url, '/photo/') !== FALSE) { - $rid = substr($url, strlen('/photo/')); + if (strpos($url, '/photo/') === FALSE) { + return; + } + else { + $url = substr($url, strlen('/photo/')); + Logger::info('@@@ phototrack_check_field_url more url stuff ' . $url); + } + if (preg_match('/([0-9a-z]{32})/', $url, $matches)) { + $rid = $matches[0]; Logger::info('@@@ phototrack_check_field_url rid ' . $rid); phototrack_photo_use($rid, $table, $field, $id); } From ccd0efd0f77e11ffee37b5b1bcbc0a2227e0f06b Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Thu, 9 Jan 2020 22:07:55 +0100 Subject: [PATCH 37/96] Update for new version --- retriever/retriever.php | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 6b71c36e..0b78181f 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -714,7 +714,7 @@ function retrieve_images(&$item) { if (!$url) { continue; } - if (strpos($url, System::baseUrl()) === FALSE) { + if (strpos($url, DI::baseUrl()) === FALSE) { $resource = add_retriever_resource($url, $item['uid'], $item['contact-id'], true); if (!$resource['completed']) { add_retriever_item($item, $resource); @@ -800,7 +800,7 @@ function retriever_transform_images(&$item, $resource) { return; } $photo = Photo::store($image, $uid, $cid, $rid, $filename, $album, 0, 0, "", "", "", "", $desc); - $new_url = System::baseUrl() . '/photo/' . $rid . '-0.' . $image->getExt(); + $new_url = DI::baseUrl() . '/photo/' . $rid . '-0.' . $image->getExt(); if (!strlen($new_url)) { Logger::warning('retriever_transform_images: no replacement URL for image ' . $resource['url']); return; @@ -826,7 +826,7 @@ function retriever_content($a) { if ($a->argv[1] === 'help') { $feeds = DBA::selectToArray('contact', ['id', 'name', 'thumb'], ['uid' => local_user(), 'network' => 'feed']); for ($i = 0; $i < count($feeds); ++$i) { - $feeds[$i]['url'] = System::baseUrl() . '/retriever/' . $feeds[$i]['id']; + $feeds[$i]['url'] = DI::baseUrl() . '/retriever/' . $feeds[$i]['id']; } $template = Renderer::getMarkupTemplate('/help.tpl', 'addon/retriever/'); $a->page['content'] .= Renderer::replaceMacros($template, array( @@ -950,7 +950,7 @@ function retriever_contact_photo_menu($a, &$args) { return; } if ($args["contact"]["network"] == "feed") { - $args["menu"]['retriever'] = array(L10n::t('Retriever'), System::baseUrl() . '/retriever/' . $args["contact"]['id']); + $args["menu"]['retriever'] = array(L10n::t('Retriever'), DI::baseUrl() . '/retriever/' . $args["contact"]['id']); } } From 82d1f71d205836ae0d195a290cd4a21d43d65b49 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Fri, 10 Jan 2020 07:32:39 +0100 Subject: [PATCH 38/96] Missing class --- retriever/retriever.php | 1 + 1 file changed, 1 insertion(+) diff --git a/retriever/retriever.php b/retriever/retriever.php index 0b78181f..5da7aff1 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -22,6 +22,7 @@ use Friendica\Database\DBA; use Friendica\Model\ItemURI; use Friendica\Model\Item; use Friendica\Util\DateTimeFormat; +use Friendica\DI; /** * @brief Installation hook for retriever plugin From 891cb4e876003c86de39f227312b6b059e8fb77b Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Fri, 10 Jan 2020 07:34:19 +0100 Subject: [PATCH 39/96] New way of doing baseurl --- phototrack/phototrack.php | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/phototrack/phototrack.php b/phototrack/phototrack.php index e493871d..4df89873 100644 --- a/phototrack/phototrack.php +++ b/phototrack/phototrack.php @@ -25,6 +25,7 @@ use Friendica\Core\Config; use Friendica\Core\Logger; use Friendica\Object\Image; use Friendica\Database\DBA; +use Friendica\DI; if (!defined('PHOTOTRACK_DEFAULT_BATCH_SIZE')) { define('PHOTOTRACK_DEFAULT_BATCH_SIZE', 1000); @@ -102,7 +103,7 @@ function phototrack_photo_use($photo, $table, $field, $id) { function phototrack_check_field_url($a, $table, $field, $id, $url) { Logger::info('@@@ phototrack_check_field_url table ' . $table . ' field ' . $field . ' id ' . $id . ' url ' . $url); - $baseurl = $a->getBaseURL(); + $baseurl = DI::baseUrl(); if (strpos($url, $baseurl) === FALSE) { return; } @@ -125,7 +126,7 @@ function phototrack_check_field_url($a, $table, $field, $id, $url) { } function phototrack_check_field_bbcode($a, $table, $field, $id, $value) { - $baseurl = $a->getBaseURL(); + $baseurl = DI::baseUrl(); $matches = array(); preg_match_all("/\[img(\=([0-9]*)x([0-9]*))?\](.*?)\[\/img\]/ism", $value, $matches); foreach ($matches[4] as $url) { From 235fa49d50fdb74f67a2890785978fb1a9ccf945 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Fri, 10 Jan 2020 07:47:08 +0100 Subject: [PATCH 40/96] maybe this way works better --- phototrack/phototrack.php | 4 ++-- retriever/retriever.php | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/phototrack/phototrack.php b/phototrack/phototrack.php index 4df89873..25c299ac 100644 --- a/phototrack/phototrack.php +++ b/phototrack/phototrack.php @@ -103,7 +103,7 @@ function phototrack_photo_use($photo, $table, $field, $id) { function phototrack_check_field_url($a, $table, $field, $id, $url) { Logger::info('@@@ phototrack_check_field_url table ' . $table . ' field ' . $field . ' id ' . $id . ' url ' . $url); - $baseurl = DI::baseUrl(); + $baseurl = DI::baseUrl()->get(true); if (strpos($url, $baseurl) === FALSE) { return; } @@ -126,7 +126,7 @@ function phototrack_check_field_url($a, $table, $field, $id, $url) { } function phototrack_check_field_bbcode($a, $table, $field, $id, $value) { - $baseurl = DI::baseUrl(); + $baseurl = DI::baseUrl()->get(true); $matches = array(); preg_match_all("/\[img(\=([0-9]*)x([0-9]*))?\](.*?)\[\/img\]/ism", $value, $matches); foreach ($matches[4] as $url) { diff --git a/retriever/retriever.php b/retriever/retriever.php index 5da7aff1..e6defdf5 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -715,7 +715,7 @@ function retrieve_images(&$item) { if (!$url) { continue; } - if (strpos($url, DI::baseUrl()) === FALSE) { + if (strpos($url, DI::baseUrl()->get(true)) === FALSE) { $resource = add_retriever_resource($url, $item['uid'], $item['contact-id'], true); if (!$resource['completed']) { add_retriever_item($item, $resource); @@ -801,7 +801,7 @@ function retriever_transform_images(&$item, $resource) { return; } $photo = Photo::store($image, $uid, $cid, $rid, $filename, $album, 0, 0, "", "", "", "", $desc); - $new_url = DI::baseUrl() . '/photo/' . $rid . '-0.' . $image->getExt(); + $new_url = DI::baseUrl()->get(true) . '/photo/' . $rid . '-0.' . $image->getExt(); if (!strlen($new_url)) { Logger::warning('retriever_transform_images: no replacement URL for image ' . $resource['url']); return; @@ -827,7 +827,7 @@ function retriever_content($a) { if ($a->argv[1] === 'help') { $feeds = DBA::selectToArray('contact', ['id', 'name', 'thumb'], ['uid' => local_user(), 'network' => 'feed']); for ($i = 0; $i < count($feeds); ++$i) { - $feeds[$i]['url'] = DI::baseUrl() . '/retriever/' . $feeds[$i]['id']; + $feeds[$i]['url'] = DI::baseUrl()->get(true) . '/retriever/' . $feeds[$i]['id']; } $template = Renderer::getMarkupTemplate('/help.tpl', 'addon/retriever/'); $a->page['content'] .= Renderer::replaceMacros($template, array( @@ -951,7 +951,7 @@ function retriever_contact_photo_menu($a, &$args) { return; } if ($args["contact"]["network"] == "feed") { - $args["menu"]['retriever'] = array(L10n::t('Retriever'), DI::baseUrl() . '/retriever/' . $args["contact"]['id']); + $args["menu"]['retriever'] = array(L10n::t('Retriever'), DI::baseUrl()->get(true) . '/retriever/' . $args["contact"]['id']); } } From 62292fd29748dc5ccbcad7bacc7974d0b4096f8e Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 16 Aug 2020 13:59:34 +0200 Subject: [PATCH 41/96] Update to new module structure --- retriever/retriever.php | 123 ++++++++++++++++++++-------------------- 1 file changed, 60 insertions(+), 63 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index e6defdf5..2f62c52e 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -7,8 +7,6 @@ */ use Friendica\Core\Addon; -use Friendica\Core\Config; -use Friendica\Core\PConfig; use Friendica\Core\Logger; use Friendica\Core\Renderer; use Friendica\Core\System; @@ -17,7 +15,6 @@ use Friendica\Content\Text\BBCode; use Friendica\Model\Photo; use Friendica\Object\Image; use Friendica\Util\Network; -use Friendica\Core\L10n; use Friendica\Database\DBA; use Friendica\Model\ItemURI; use Friendica\Model\Item; @@ -34,7 +31,7 @@ function retriever_install() { Addon::registerHook('contact_photo_menu', 'addon/retriever/retriever.php', 'retriever_contact_photo_menu'); Addon::registerHook('cron', 'addon/retriever/retriever.php', 'retriever_cron'); - if (Config::get('retriever', 'dbversion') != '0.14') { + if (DI::config()->get('retriever', 'dbversion') != '0.14') { $schema = file_get_contents(dirname(__file__).'/database.sql'); $tables = explode(';', $schema); foreach ($tables as $table) { @@ -43,8 +40,8 @@ function retriever_install() { return; } } - Config::set('retriever', 'downloads_per_cron', '100'); - Config::set('retriever', 'dbversion', '0.14'); + DI::config()->set('retriever', 'downloads_per_cron', '100'); + DI::config()->set('retriever', 'dbversion', '0.14'); } } @@ -77,22 +74,22 @@ function retriever_module() {} function retriever_addon_admin(&$a, &$o) { $template = Renderer::getMarkupTemplate('admin.tpl', 'addon/retriever/'); - $downloads_per_cron = Config::get('retriever', 'downloads_per_cron'); + $downloads_per_cron = DI::config()->get('retriever', 'downloads_per_cron'); $downloads_per_cron_config = ['downloads_per_cron', - L10n::t('Downloads per Cron'), + DI::l10n()->t('Downloads per Cron'), $downloads_per_cron, - L10n::t('Maximum number of downloads to attempt during each run of the cron job.')]; + DI::l10n()->t('Maximum number of downloads to attempt during each run of the cron job.')]; - $allow_images = Config::get('retriever', 'allow_images'); + $allow_images = DI::config()->get('retriever', 'allow_images'); $allow_images_config = ['allow_images', - L10n::t('Allow Retrieving Images'), + DI::l10n()->t('Allow Retrieving Images'), $allow_images, - L10n::t('Allow users to request images be downloaded as well as text.
Warning: the images are not automatically deleted and may fill up your database.')]; + DI::l10n()->t('Allow users to request images be downloaded as well as text.
Warning: the images are not automatically deleted and may fill up your database.')]; $o .= Renderer::replaceMacros($template, [ '$downloads_per_cron' => $downloads_per_cron_config, '$allow_images' => $allow_images_config, - '$submit' => L10n::t('Save Settings')]); + '$submit' => DI::l10n()->t('Save Settings')]); } /** @@ -100,16 +97,16 @@ function retriever_addon_admin(&$a, &$o) { */ function retriever_addon_admin_post () { if (!empty($_POST['downloads_per_cron'])) { - Config::set('retriever', 'downloads_per_cron', $_POST['downloads_per_cron']); + DI::config()->set('retriever', 'downloads_per_cron', $_POST['downloads_per_cron']); } - Config::set('retriever', 'allow_images', $_POST['allow_images']); + DI::config()->set('retriever', 'allow_images', $_POST['allow_images']); } /** * @brief Cron jobs for retriever plugin */ function retriever_cron() { - $downloads_per_cron = Config::get('retriever', 'downloads_per_cron'); + $downloads_per_cron = DI::config()->get('retriever', 'downloads_per_cron'); // Do this first, otherwise it can interfere with retriever_retrieve_items retriever_clean_up_completed_resources($downloads_per_cron); @@ -581,7 +578,7 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) { Logger::info('retriever_apply_dom_filter retriever ' . $retriever['id'] . ' item ' . $item['id'] . ': output was empty'); return; } - $body .= "\n\n" . L10n::t('Retrieved') . ' ' . date("Y-m-d") . ': [url='; + $body .= "\n\n" . DI::l10n()->t('Retrieved') . ' ' . date("Y-m-d") . ': [url='; $body .= $item['plink']; $body .= ']' . $item['plink'] . '[/url]'; @@ -695,7 +692,7 @@ function retriever_set_body(&$item, $body) { * @param array &$item Item to be searched for images and updated (by ref). This may or may not be already stored in the database. */ function retrieve_images(&$item) { - if (!Config::get('retriever', 'allow_images')) { + if (!DI::config()->get('retriever', 'allow_images')) { return; } @@ -832,7 +829,7 @@ function retriever_content($a) { $template = Renderer::getMarkupTemplate('/help.tpl', 'addon/retriever/'); $a->page['content'] .= Renderer::replaceMacros($template, array( '$config' => $a->getBaseUrl . '/settings/addon', - '$allow_images' => Config::get('retriever', 'allow_images'), + '$allow_images' => DI::config()->get('retriever', 'allow_images'), '$feeds' => $feeds)); return; } @@ -879,62 +876,62 @@ function retriever_content($a) { $a->page['content'] .= Renderer::replaceMacros($template, array( '$enable' => array( 'retriever_enable', - L10n::t('Enabled'), + DI::l10n()->t('Enabled'), $retriever_rule['data']['enable']), '$modurl' => array( 'retriever_modurl', - L10n::t('Modify URL'), + DI::l10n()->t('Modify URL'), $retriever_rule['data']['modurl'], - L10n::t("Modify each article's URL with regular expressions before retrieving.")), + DI::l10n()->t("Modify each article's URL with regular expressions before retrieving.")), '$pattern' => array( 'retriever_pattern', - L10n::t('URL Pattern'), + DI::l10n()->t('URL Pattern'), $retriever_rule['data']['pattern'], - L10n::t('Regular expression matching part of the URL to replace')), + DI::l10n()->t('Regular expression matching part of the URL to replace')), '$replace' => array( 'retriever_replace', - L10n::t('URL Replace'), + DI::l10n()->t('URL Replace'), $retriever_rule['data']['replace'], - L10n::t('Text to replace matching part of above regular expression')), - '$allow_images' => Config::get('retriever', 'allow_images'), + DI::l10n()->t('Text to replace matching part of above regular expression')), + '$allow_images' => DI::config()->get('retriever', 'allow_images'), '$images' => array( 'retriever_images', - L10n::t('Download Images'), + DI::l10n()->t('Download Images'), $retriever_rule['data']['images']), '$retrospective' => array( 'retriever_retrospective', - L10n::t('Retrospectively Apply'), + DI::l10n()->t('Retrospectively Apply'), '0', - L10n::t('Reapply the rules to this number of posts')), + DI::l10n()->t('Reapply the rules to this number of posts')), 'storecookies' => array( 'retriever_storecookies', - L10n::t('Store cookies'), + DI::l10n()->t('Store cookies'), $retriever_rule['data']['storecookies'], - L10n::t("Preserve cookie data across fetches.")), + DI::l10n()->t("Preserve cookie data across fetches.")), '$cookiedata' => array( 'retriever_cookiedata', - L10n::t('Cookie Data'), + DI::l10n()->t('Cookie Data'), $retriever_rule['data']['cookiedata'], - L10n::t("Latest cookie data for this feed. Netscape cookie file format.")), + DI::l10n()->t("Latest cookie data for this feed. Netscape cookie file format.")), '$customxslt' => array( 'retriever_customxslt', - L10n::t('Custom XSLT'), + DI::l10n()->t('Custom XSLT'), $retriever_rule['data']['customxslt'], - L10n::t("When standard rules aren't enough, apply custom XSLT to the article")), - '$title' => L10n::t('Retrieve Feed Content'), + DI::l10n()->t("When standard rules aren't enough, apply custom XSLT to the article")), + '$title' => DI::l10n()->t('Retrieve Feed Content'), '$help' => $a->getBaseUrl . '/retriever/help', - '$help_t' => L10n::t('Get Help'), - '$submit_t' => L10n::t('Submit'), - '$submit' => L10n::t('Save Settings'), + '$help_t' => DI::l10n()->t('Get Help'), + '$submit_t' => DI::l10n()->t('Submit'), + '$submit' => DI::l10n()->t('Save Settings'), '$id' => ($retriever_rule["id"] ? $retriever_rule["id"] : "create"), - '$tag_t' => L10n::t('Tag'), - '$attribute_t' => L10n::t('Attribute'), - '$value_t' => L10n::t('Value'), - '$add_t' => L10n::t('Add'), - '$remove_t' => L10n::t('Remove'), - '$include_t' => L10n::t('Include'), + '$tag_t' => DI::l10n()->t('Tag'), + '$attribute_t' => DI::l10n()->t('Attribute'), + '$value_t' => DI::l10n()->t('Value'), + '$add_t' => DI::l10n()->t('Add'), + '$remove_t' => DI::l10n()->t('Remove'), + '$include_t' => DI::l10n()->t('Include'), '$include' => $retriever_rule['data']['include'], - '$exclude_t' => L10n::t('Exclude'), + '$exclude_t' => DI::l10n()->t('Exclude'), '$exclude' => $retriever_rule['data']['exclude'])); return; } @@ -951,7 +948,7 @@ function retriever_contact_photo_menu($a, &$args) { return; } if ($args["contact"]["network"] == "feed") { - $args["menu"]['retriever'] = array(L10n::t('Retriever'), DI::baseUrl()->get(true) . '/retriever/' . $args["contact"]['id']); + $args["menu"]['retriever'] = array(DI::l10n()->t('Retriever'), DI::baseUrl()->get(true) . '/retriever/' . $args["contact"]['id']); } } @@ -969,13 +966,13 @@ function retriever_post_remote_hook(&$a, &$item) { retriever_on_item_insert($retriever_rule, $item); } else { - if (PConfig::get($item["uid"], 'retriever', 'oembed')) { + if (PDI::config()->get($item["uid"], 'retriever', 'oembed')) { // Convert to HTML and back to take advantage of bbcode's resolution of oembeds. $body = retriever_get_body($item); $body = HTML::toBBCode(BBCode::convert($body)); retriever_set_body($item, $body); } - if (PConfig::get($item["uid"], 'retriever', 'all_photos')) { + if (PDI::config()->get($item["uid"], 'retriever', 'all_photos')) { retrieve_images($item); } } @@ -989,21 +986,21 @@ function retriever_post_remote_hook(&$a, &$item) { * @param string $s HTML string to which to append settings content (by ref) */ function retriever_addon_settings(&$a, &$s) { - $all_photos = PConfig::get(local_user(), 'retriever', 'all_photos'); - $oembed = PConfig::get(local_user(), 'retriever', 'oembed'); + $all_photos = PDI::config()->get(local_user(), 'retriever', 'all_photos'); + $oembed = PDI::config()->get(local_user(), 'retriever', 'oembed'); $template = Renderer::getMarkupTemplate('/settings.tpl', 'addon/retriever/'); - $config = array('$submit' => L10n::t('Save Settings'), - '$title' => L10n::t('Retriever Settings'), + $config = array('$submit' => DI::l10n()->t('Save Settings'), + '$title' => DI::l10n()->t('Retriever Settings'), '$help' => $a->getBaseUrl . '/retriever/help', - '$allow_images' => Config::get('retriever', 'allow_images')); + '$allow_images' => DI::config()->get('retriever', 'allow_images')); $config['$allphotos'] = array('retriever_all_photos', - L10n::t('All Photos'), + DI::l10n()->t('All Photos'), $all_photos, - L10n::t('Check this to retrieve photos for all posts')); + DI::l10n()->t('Check this to retrieve photos for all posts')); $config['$oembed'] = array('retriever_oembed', - L10n::t('Resolve OEmbed'), + DI::l10n()->t('Resolve OEmbed'), $oembed, - L10n::t('Check this to attempt to retrieve embedded content for all posts')); + DI::l10n()->t('Check this to attempt to retrieve embedded content for all posts')); $s .= Renderer::replaceMacros($template, $config); } @@ -1015,15 +1012,15 @@ function retriever_addon_settings(&$a, &$s) { */ function retriever_addon_settings_post($a, $post) { if ($post['retriever_all_photos']) { - PConfig::set(local_user(), 'retriever', 'all_photos', $post['retriever_all_photos']); + PDI::config()->set(local_user(), 'retriever', 'all_photos', $post['retriever_all_photos']); } else { - PConfig::delete(local_user(), 'retriever', 'all_photos'); + PDI::config()->delete(local_user(), 'retriever', 'all_photos'); } if ($post['retriever_oembed']) { - PConfig::set(local_user(), 'retriever', 'oembed', $post['retriever_oembed']); + PDI::config()->set(local_user(), 'retriever', 'oembed', $post['retriever_oembed']); } else { - PConfig::delete(local_user(), 'retriever', 'oembed'); + PDI::config()->delete(local_user(), 'retriever', 'oembed'); } } From 275e0fcabf71e763cfec70416eba687fad5a2738 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 16 Aug 2020 14:00:31 +0200 Subject: [PATCH 42/96] Use new L10n thing --- publicise/publicise.php | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/publicise/publicise.php b/publicise/publicise.php index d27eefd4..a746d8af 100644 --- a/publicise/publicise.php +++ b/publicise/publicise.php @@ -9,7 +9,6 @@ use Friendica\Core\Addon; use Friendica\Core\Logger; use Friendica\Core\Renderer; -use Friendica\Core\L10n; use Friendica\Database\DBA; function publicise_install() { @@ -71,11 +70,11 @@ function publicise_addon_admin(&$a,&$o) { $template = Renderer::getMarkupTemplate('admin.tpl', 'addon/publicise/'); $o .= Renderer::replaceMacros($template, array( '$feeds' => $contacts, - '$feed_t' => L10n::t('Feed'), - '$publicised_t' => L10n::t('Publicised'), - '$comments_t' => L10n::t('Allow Comments/Likes'), - '$expire_t' => L10n::t('Expire Articles After (Days)'), - '$submit_t' => L10n::t('Submit'))); + '$feed_t' => DI::l10n()->t('Feed'), + '$publicised_t' => DI::l10n()->t('Publicised'), + '$comments_t' => DI::l10n()->t('Allow Comments/Likes'), + '$expire_t' => DI::l10n()->t('Expire Articles After (Days)'), + '$submit_t' => DI::l10n()->t('Submit'))); } function publicise_make_string($in) { From 67cf46ae318d34ef73c737911b8af13f0e576d0a Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 23 Aug 2020 21:15:18 +0100 Subject: [PATCH 43/96] Further updates to 2020.03 --- phototrack/phototrack.php | 17 ++++++++--------- retriever/retriever.php | 16 ++++++++-------- 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/phototrack/phototrack.php b/phototrack/phototrack.php index 25c299ac..c67014c1 100644 --- a/phototrack/phototrack.php +++ b/phototrack/phototrack.php @@ -21,7 +21,6 @@ */ use Friendica\Core\Addon; -use Friendica\Core\Config; use Friendica\Core\Logger; use Friendica\Object\Image; use Friendica\Database\DBA; @@ -43,7 +42,7 @@ function phototrack_install() { Addon::registerHook('notifier_end', 'addon/phototrack/phototrack.php', 'phototrack_notifier_end'); Addon::registerHook('cron', 'addon/phototrack/phototrack.php', 'phototrack_cron'); - if (Config::get('phototrack', 'dbversion') != '0.1') { + if (DI::config()->get('phototrack', 'dbversion') != '0.1') { $schema = file_get_contents(dirname(__file__).'/database.sql'); $arr = explode(';', $schema); foreach ($arr as $a) { @@ -52,7 +51,7 @@ function phototrack_install() { return; } } - Config::set('phototrack', 'dbversion', '0.1'); + DI::config()->set('phototrack', 'dbversion', '0.1'); } } @@ -190,7 +189,7 @@ function phototrack_check_row($a, $table, $row) { } function phototrack_batch_size() { - $batch_size = Config::get('phototrack', 'batch_size'); + $batch_size = DI::config()->get('phototrack', 'batch_size'); if ($batch_size > 0) { return $batch_size; } @@ -210,13 +209,13 @@ function phototrack_search_table($a, $table) { } function phototrack_cron_time() { - $prev_remaining = Config::get('phototrack', 'remaining_items'); + $prev_remaining = DI::config()->get('phototrack', 'remaining_items'); if ($prev_remaining > 10 * phototrack_batch_size()) { Logger::debug('phototrack: more than ' . (10 * phototrack_batch_size()) . ' items remaining'); return true; } - $last = Config::get('phototrack', 'last_search'); - $search_interval = intval(Config::get('phototrack', 'search_interval')); + $last = DI::config()->get('phototrack', 'last_search'); + $search_interval = intval(DI::config()->get('phototrack', 'search_interval')); if (!$search_interval) { $search_interval = PHOTOTRACK_DEFAULT_SEARCH_INTERVAL; } @@ -234,7 +233,7 @@ function phototrack_cron($a, $b) { if (!phototrack_cron_time()) { return; } - Config::set('phototrack', 'last_search', time()); + DI::config()->set('phototrack', 'last_search', time()); $remaining = 0; $remaining += phototrack_search_table($a, 'item'); @@ -244,7 +243,7 @@ function phototrack_cron($a, $b) { $remaining += phototrack_search_table($a, 'fsuggest'); $remaining += phototrack_search_table($a, 'gcontact'); - Config::set('phototrack', 'remaining_items', $remaining); + DI::config()->set('phototrack', 'remaining_items', $remaining); if ($remaining === 0) { phototrack_tidy(); } diff --git a/retriever/retriever.php b/retriever/retriever.php index 2f62c52e..4097674f 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -966,13 +966,13 @@ function retriever_post_remote_hook(&$a, &$item) { retriever_on_item_insert($retriever_rule, $item); } else { - if (PDI::config()->get($item["uid"], 'retriever', 'oembed')) { + if (DI::config()->get($item["uid"], 'retriever', 'oembed')) { // Convert to HTML and back to take advantage of bbcode's resolution of oembeds. $body = retriever_get_body($item); $body = HTML::toBBCode(BBCode::convert($body)); retriever_set_body($item, $body); } - if (PDI::config()->get($item["uid"], 'retriever', 'all_photos')) { + if (DI::config()->get($item["uid"], 'retriever', 'all_photos')) { retrieve_images($item); } } @@ -986,8 +986,8 @@ function retriever_post_remote_hook(&$a, &$item) { * @param string $s HTML string to which to append settings content (by ref) */ function retriever_addon_settings(&$a, &$s) { - $all_photos = PDI::config()->get(local_user(), 'retriever', 'all_photos'); - $oembed = PDI::config()->get(local_user(), 'retriever', 'oembed'); + $all_photos = DI::config()->get(local_user(), 'retriever', 'all_photos'); + $oembed = DI::config()->get(local_user(), 'retriever', 'oembed'); $template = Renderer::getMarkupTemplate('/settings.tpl', 'addon/retriever/'); $config = array('$submit' => DI::l10n()->t('Save Settings'), '$title' => DI::l10n()->t('Retriever Settings'), @@ -1012,15 +1012,15 @@ function retriever_addon_settings(&$a, &$s) { */ function retriever_addon_settings_post($a, $post) { if ($post['retriever_all_photos']) { - PDI::config()->set(local_user(), 'retriever', 'all_photos', $post['retriever_all_photos']); + DI::config()->set(local_user(), 'retriever', 'all_photos', $post['retriever_all_photos']); } else { - PDI::config()->delete(local_user(), 'retriever', 'all_photos'); + DI::config()->delete(local_user(), 'retriever', 'all_photos'); } if ($post['retriever_oembed']) { - PDI::config()->set(local_user(), 'retriever', 'oembed', $post['retriever_oembed']); + DI::config()->set(local_user(), 'retriever', 'oembed', $post['retriever_oembed']); } else { - PDI::config()->delete(local_user(), 'retriever', 'oembed'); + DI::config()->delete(local_user(), 'retriever', 'oembed'); } } From c31ed576fc43afe239c1aec144bf1b6f6b2c027d Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Tue, 13 Oct 2020 18:39:01 +0100 Subject: [PATCH 44/96] Update with base url changes and strict key requirements --- retriever/retriever.php | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 4097674f..a98611eb 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -828,7 +828,7 @@ function retriever_content($a) { } $template = Renderer::getMarkupTemplate('/help.tpl', 'addon/retriever/'); $a->page['content'] .= Renderer::replaceMacros($template, array( - '$config' => $a->getBaseUrl . '/settings/addon', + '$config' => DI::baseUrl()->get(true) . '/settings/addon', '$allow_images' => DI::config()->get('retriever', 'allow_images'), '$feeds' => $feeds)); return; @@ -877,27 +877,27 @@ function retriever_content($a) { '$enable' => array( 'retriever_enable', DI::l10n()->t('Enabled'), - $retriever_rule['data']['enable']), + array_key_exists('enable', $retriever_rule['data']) ? $retriever_rule['data']['enable'] : ""), '$modurl' => array( 'retriever_modurl', DI::l10n()->t('Modify URL'), - $retriever_rule['data']['modurl'], + array_key_exists('modurl', $retriever_rule['data']) ? $retriever_rule['data']['modurl'] : "", DI::l10n()->t("Modify each article's URL with regular expressions before retrieving.")), '$pattern' => array( 'retriever_pattern', DI::l10n()->t('URL Pattern'), - $retriever_rule['data']['pattern'], + array_key_exists('pattern', $retriever_rule['data']) ? $retriever_rule['data']['pattern'] : "", DI::l10n()->t('Regular expression matching part of the URL to replace')), '$replace' => array( 'retriever_replace', DI::l10n()->t('URL Replace'), - $retriever_rule['data']['replace'], + array_key_exists('replace', $retriever_rule['data']) ? $retriever_rule['data']['replace'] : "", DI::l10n()->t('Text to replace matching part of above regular expression')), '$allow_images' => DI::config()->get('retriever', 'allow_images'), '$images' => array( 'retriever_images', DI::l10n()->t('Download Images'), - $retriever_rule['data']['images']), + array_key_exists('images', $retriever_rule['data']) ? $retriever_rule['data']['images'] : ""), '$retrospective' => array( 'retriever_retrospective', DI::l10n()->t('Retrospectively Apply'), @@ -906,20 +906,20 @@ function retriever_content($a) { 'storecookies' => array( 'retriever_storecookies', DI::l10n()->t('Store cookies'), - $retriever_rule['data']['storecookies'], + array_key_exists('storecookies', $retriever_rule['data']) ? $retriever_rule['data']['storecookies'] : "", DI::l10n()->t("Preserve cookie data across fetches.")), '$cookiedata' => array( 'retriever_cookiedata', DI::l10n()->t('Cookie Data'), - $retriever_rule['data']['cookiedata'], + array_key_exists('cookiedata', $retriever_rule['data']) ? $retriever_rule['data']['cookiedata'] : "", DI::l10n()->t("Latest cookie data for this feed. Netscape cookie file format.")), '$customxslt' => array( 'retriever_customxslt', DI::l10n()->t('Custom XSLT'), - $retriever_rule['data']['customxslt'], + array_key_exists('customxslt', $retriever_rule['data']) ? $retriever_rule['data']['customxslt'] : "", DI::l10n()->t("When standard rules aren't enough, apply custom XSLT to the article")), '$title' => DI::l10n()->t('Retrieve Feed Content'), - '$help' => $a->getBaseUrl . '/retriever/help', + '$help' => DI::baseUrl()->get(true) . '/retriever/help', '$help_t' => DI::l10n()->t('Get Help'), '$submit_t' => DI::l10n()->t('Submit'), '$submit' => DI::l10n()->t('Save Settings'), @@ -991,7 +991,7 @@ function retriever_addon_settings(&$a, &$s) { $template = Renderer::getMarkupTemplate('/settings.tpl', 'addon/retriever/'); $config = array('$submit' => DI::l10n()->t('Save Settings'), '$title' => DI::l10n()->t('Retriever Settings'), - '$help' => $a->getBaseUrl . '/retriever/help', + '$help' => DI::baseUrl()->get(true) . '/retriever/help', '$allow_images' => DI::config()->get('retriever', 'allow_images')); $config['$allphotos'] = array('retriever_all_photos', DI::l10n()->t('All Photos'), From a71e3a34e562a6fcdeb1ee043104d300e44cc203 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 21 Oct 2020 16:25:51 +0100 Subject: [PATCH 45/96] Fix page assembly --- retriever/retriever.php | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index a98611eb..7c22071c 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -835,6 +835,9 @@ function retriever_content($a) { } if ($a->argv[1]) { $retriever_rule = get_retriever_rule($a->argv[1], local_user(), false); + if (!$retriever_rule) { + $retriever_rule = ['id' => 0, 'data' => ['enable' => 0, 'modurl' => '', 'pattern' => '', 'replace' => '', 'images' => 0, 'storecookies' => 0, 'cookiedata' => '', 'customxslt' => '', 'include' => '', 'exclude' => '']]; + } if (!empty($_POST["id"])) { $retriever_rule = get_retriever_rule($a->argv[1], local_user(), true); @@ -873,31 +876,31 @@ function retriever_content($a) { } $template = Renderer::getMarkupTemplate('/rule-config.tpl', 'addon/retriever/'); - $a->page['content'] .= Renderer::replaceMacros($template, array( + DI::page()['content'] .= Renderer::replaceMacros($template, array( '$enable' => array( 'retriever_enable', DI::l10n()->t('Enabled'), - array_key_exists('enable', $retriever_rule['data']) ? $retriever_rule['data']['enable'] : ""), + $retriever_rule['data']['enable']), '$modurl' => array( 'retriever_modurl', DI::l10n()->t('Modify URL'), - array_key_exists('modurl', $retriever_rule['data']) ? $retriever_rule['data']['modurl'] : "", + $retriever_rule['data']['modurl'], DI::l10n()->t("Modify each article's URL with regular expressions before retrieving.")), '$pattern' => array( 'retriever_pattern', DI::l10n()->t('URL Pattern'), - array_key_exists('pattern', $retriever_rule['data']) ? $retriever_rule['data']['pattern'] : "", + $retriever_rule['data']['pattern'], DI::l10n()->t('Regular expression matching part of the URL to replace')), '$replace' => array( 'retriever_replace', DI::l10n()->t('URL Replace'), - array_key_exists('replace', $retriever_rule['data']) ? $retriever_rule['data']['replace'] : "", + $retriever_rule['data']['replace'], DI::l10n()->t('Text to replace matching part of above regular expression')), '$allow_images' => DI::config()->get('retriever', 'allow_images'), '$images' => array( 'retriever_images', DI::l10n()->t('Download Images'), - array_key_exists('images', $retriever_rule['data']) ? $retriever_rule['data']['images'] : ""), + $retriever_rule['data']['images']), '$retrospective' => array( 'retriever_retrospective', DI::l10n()->t('Retrospectively Apply'), @@ -906,17 +909,17 @@ function retriever_content($a) { 'storecookies' => array( 'retriever_storecookies', DI::l10n()->t('Store cookies'), - array_key_exists('storecookies', $retriever_rule['data']) ? $retriever_rule['data']['storecookies'] : "", + $retriever_rule['data']['storecookies'], DI::l10n()->t("Preserve cookie data across fetches.")), '$cookiedata' => array( 'retriever_cookiedata', DI::l10n()->t('Cookie Data'), - array_key_exists('cookiedata', $retriever_rule['data']) ? $retriever_rule['data']['cookiedata'] : "", + $retriever_rule['data']['cookiedata'], DI::l10n()->t("Latest cookie data for this feed. Netscape cookie file format.")), '$customxslt' => array( 'retriever_customxslt', DI::l10n()->t('Custom XSLT'), - array_key_exists('customxslt', $retriever_rule['data']) ? $retriever_rule['data']['customxslt'] : "", + $retriever_rule['data']['customxslt'], DI::l10n()->t("When standard rules aren't enough, apply custom XSLT to the article")), '$title' => DI::l10n()->t('Retrieve Feed Content'), '$help' => DI::baseUrl()->get(true) . '/retriever/help', From 9cb7c4bbffeb289b0401b08e5136dba6d61e65cd Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 20 Dec 2020 20:38:42 +0000 Subject: [PATCH 46/96] Remove unneeded get_app --- phototrack/phototrack.php | 1 - 1 file changed, 1 deletion(-) diff --git a/phototrack/phototrack.php b/phototrack/phototrack.php index c67014c1..b5f39a1e 100644 --- a/phototrack/phototrack.php +++ b/phototrack/phototrack.php @@ -144,7 +144,6 @@ function phototrack_post_remote_end(&$a, &$item) { } function phototrack_notifier_end($item) { - $a = get_app(); } function phototrack_check_row($a, $table, $row) { From 88b508ae5dc0da2980b903b4358eed36433b9b15 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 7 Feb 2021 19:37:19 +0100 Subject: [PATCH 47/96] Replace fetchUrlFull with HTTPRequest version --- retriever/retriever.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 7c22071c..0147b0d0 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -273,7 +273,7 @@ function retrieve_resource($resource) { $cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-'); file_put_contents($cookiejar, $rule_data['cookiedata']); } - $fetch_result = Network::fetchUrlFull($resource['url'], $resource['binary'], $redirects, '', $cookiejar); + $fetch_result = DI::httpRequest()->fetchFull($resource['url'], $resource['binary'], $redirects, '', $cookiejar); if (array_key_exists('storecookies', $rule_data) && $rule_data['storecookies']) { $retriever_rule['data']['cookiedata'] = file_get_contents($cookiejar); DBA::update('retriever_rule', ['data' => json_encode($retriever_rule['data'])], ['id' => intval($retriever_rule["id"])], $retriever_rule); From a40af26ec538d729f8dd20449a54e1c94ea9f556 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Mon, 8 Feb 2021 19:29:45 +0100 Subject: [PATCH 48/96] Remove binary field from httpRequest --- retriever/retriever.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 0147b0d0..c99ce5f8 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -273,7 +273,7 @@ function retrieve_resource($resource) { $cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-'); file_put_contents($cookiejar, $rule_data['cookiedata']); } - $fetch_result = DI::httpRequest()->fetchFull($resource['url'], $resource['binary'], $redirects, '', $cookiejar); + $fetch_result = DI::httpRequest()->fetchFull($resource['url'], $redirects, '', $cookiejar); if (array_key_exists('storecookies', $rule_data) && $rule_data['storecookies']) { $retriever_rule['data']['cookiedata'] = file_get_contents($cookiejar); DBA::update('retriever_rule', ['data' => json_encode($retriever_rule['data'])], ['id' => intval($retriever_rule["id"])], $retriever_rule); From 3d148ed9bcc417f3e1c7b74b2edc51104e1ffdcd Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Mon, 26 Apr 2021 20:11:52 +0200 Subject: [PATCH 49/96] Adapt Item methods to Post methods --- retriever/retriever.php | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index c99ce5f8..92442c96 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -18,6 +18,7 @@ use Friendica\Util\Network; use Friendica\Database\DBA; use Friendica\Model\ItemURI; use Friendica\Model\Item; +use Friendica\Model\Post; use Friendica\Util\DateTimeFormat; use Friendica\DI; @@ -322,7 +323,7 @@ function get_retriever_rule($contact_id, $uid, $create) { * @return array Item that was found, or undef if no item could be found */ function retriever_get_item($retriever_item) { - $item = Item::selectFirst([], ['uri' => $retriever_item['item-uri'], 'uid' => intval($retriever_item['item-uid']), 'contact-id' => intval($retriever_item['contact-id'])]); + $item = Post::selectFirst([], ['uri' => $retriever_item['item-uri'], 'uid' => intval($retriever_item['item-uid']), 'contact-id' => intval($retriever_item['contact-id'])]); if (!DBA::isResult($item)) { Logger::warning('retriever_get_item: no item found for uri ' . $retriever_item['item-uri']); return; @@ -377,7 +378,7 @@ function retriever_resource_completed($resource) { * @param int $num The number of existing items to queue for retrieval */ function apply_retrospective($retriever, $num) { - foreach (Item::selectToArray([], ['contact-id' => intval($retriever['contact-id'])], ['order' => ['received' => true], 'limit' => $num]) as $item) { + foreach (Post::selectToArray([], ['contact-id' => intval($retriever['contact-id'])], ['order' => ['received' => true], 'limit' => $num]) as $item) { Item::update(['visible' => 0], ['id' => intval($item['id'])]); foreach (DBA::selectToArray('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => $item['uid'], 'contact-id' => $item['contact-id']]) as $retriever_item) { DBA::delete('retriever_resource', ['id' => $retriever_item['resource']]); From 966f0525b7bebce1c54eed919139b5244969eb03 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Mon, 26 Apr 2021 20:14:54 +0200 Subject: [PATCH 50/96] another check for empty results --- phototrack/phototrack.php | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/phototrack/phototrack.php b/phototrack/phototrack.php index b5f39a1e..9f6c86d1 100644 --- a/phototrack/phototrack.php +++ b/phototrack/phototrack.php @@ -253,9 +253,11 @@ function phototrack_tidy() { q('CREATE TABLE IF NOT EXISTS `phototrack-temp` (`resource-id` char(255) not null)'); q('INSERT INTO `phototrack-temp` SELECT DISTINCT(`resource-id`) FROM photo WHERE photo.`created` < DATE_SUB(NOW(), INTERVAL 2 MONTH)'); $rows = q('SELECT `phototrack-temp`.`resource-id` FROM `phototrack-temp` LEFT OUTER JOIN phototrack_photo_use ON (`phototrack-temp`.`resource-id` = phototrack_photo_use.`resource-id`) WHERE phototrack_photo_use.id IS NULL limit ' . /*$batch_size*/1000); - foreach ($rows as $row) { - Logger::debug('phototrack: remove photo ' . $row['resource-id']); - q('DELETE FROM photo WHERE `resource-id` = "' . $row['resource-id'] . '"'); + if (DBA::isResult($ms_item_ids)) { + foreach ($rows as $row) { + Logger::debug('phototrack: remove photo ' . $row['resource-id']); + q('DELETE FROM photo WHERE `resource-id` = "' . $row['resource-id'] . '"'); + } } q('DROP TABLE `phototrack-temp`'); Logger::info('phototrack_tidy: deleted ' . count($rows) . ' photos'); From ba88e7e8cbb7221bb43c32d6ccea734116065f81 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Mon, 26 Apr 2021 20:38:00 +0200 Subject: [PATCH 51/96] Fix a typo --- phototrack/phototrack.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phototrack/phototrack.php b/phototrack/phototrack.php index 9f6c86d1..4437443b 100644 --- a/phototrack/phototrack.php +++ b/phototrack/phototrack.php @@ -253,7 +253,7 @@ function phototrack_tidy() { q('CREATE TABLE IF NOT EXISTS `phototrack-temp` (`resource-id` char(255) not null)'); q('INSERT INTO `phototrack-temp` SELECT DISTINCT(`resource-id`) FROM photo WHERE photo.`created` < DATE_SUB(NOW(), INTERVAL 2 MONTH)'); $rows = q('SELECT `phototrack-temp`.`resource-id` FROM `phototrack-temp` LEFT OUTER JOIN phototrack_photo_use ON (`phototrack-temp`.`resource-id` = phototrack_photo_use.`resource-id`) WHERE phototrack_photo_use.id IS NULL limit ' . /*$batch_size*/1000); - if (DBA::isResult($ms_item_ids)) { + if (DBA::isResult($rows)) { foreach ($rows as $row) { Logger::debug('phototrack: remove photo ' . $row['resource-id']); q('DELETE FROM photo WHERE `resource-id` = "' . $row['resource-id'] . '"'); From 0db41cdbd9f378cd3931c3ba97c76a0d7237273a Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Mon, 26 Apr 2021 20:38:48 +0200 Subject: [PATCH 52/96] fixed another obvious mistake --- phototrack/phototrack.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phototrack/phototrack.php b/phototrack/phototrack.php index 4437443b..712c0fc6 100644 --- a/phototrack/phototrack.php +++ b/phototrack/phototrack.php @@ -258,9 +258,9 @@ function phototrack_tidy() { Logger::debug('phototrack: remove photo ' . $row['resource-id']); q('DELETE FROM photo WHERE `resource-id` = "' . $row['resource-id'] . '"'); } + Logger::info('phototrack_tidy: deleted ' . count($rows) . ' photos'); } q('DROP TABLE `phototrack-temp`'); - Logger::info('phototrack_tidy: deleted ' . count($rows) . ' photos'); $rows = q('SELECT id FROM phototrack_photo_use WHERE checked < DATE_SUB(NOW(), INTERVAL 14 DAY)'); foreach ($rows as $row) { q('DELETE FROM phototrack_photo_use WHERE id = ' . $row['id']); From 71404d970f5f17d46f6997dbd9b3cb4132a8a878 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Mon, 26 Apr 2021 20:48:20 +0200 Subject: [PATCH 53/96] Detect an error in mailstream --- mailstream/mailstream.php | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mailstream/mailstream.php b/mailstream/mailstream.php index 542a1a42..7f7244ab 100644 --- a/mailstream/mailstream.php +++ b/mailstream/mailstream.php @@ -323,6 +323,11 @@ function mailstream_subject(array $item): string ); return DI::l10n()->t("Friendica post"); } + $contact = $r[0]; + if (!DBA::isResult($rows)) { + Logger::error('mailstream_subject no contact for item id ' . $item['id'] . ' plink ' . $item['plink'] . ' contact id ' . $item['contact-id'] . ' uid ' . $item['uid']); + return DI::l10n()->t("Friendica post"); + } if ($contact['network'] === 'dfrn') { return DI::l10n()->t("Friendica post"); } From 8f09825d2c5311af823a7fc419ccbce7897d67a4 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Mon, 26 Apr 2021 20:58:06 +0200 Subject: [PATCH 54/96] fix another stupid mistake --- mailstream/mailstream.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mailstream/mailstream.php b/mailstream/mailstream.php index 7f7244ab..3ecdadc4 100644 --- a/mailstream/mailstream.php +++ b/mailstream/mailstream.php @@ -324,7 +324,7 @@ function mailstream_subject(array $item): string return DI::l10n()->t("Friendica post"); } $contact = $r[0]; - if (!DBA::isResult($rows)) { + if (!DBA::isResult($r)) { Logger::error('mailstream_subject no contact for item id ' . $item['id'] . ' plink ' . $item['plink'] . ' contact id ' . $item['contact-id'] . ' uid ' . $item['uid']); return DI::l10n()->t("Friendica post"); } From 7c915e621f1832e5f04ea9ea0b12a163af9350f4 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Mon, 26 Apr 2021 20:59:06 +0200 Subject: [PATCH 55/96] fix another stupid mistake --- mailstream/mailstream.php | 1 + 1 file changed, 1 insertion(+) diff --git a/mailstream/mailstream.php b/mailstream/mailstream.php index 3ecdadc4..7ebae538 100644 --- a/mailstream/mailstream.php +++ b/mailstream/mailstream.php @@ -328,6 +328,7 @@ function mailstream_subject(array $item): string Logger::error('mailstream_subject no contact for item id ' . $item['id'] . ' plink ' . $item['plink'] . ' contact id ' . $item['contact-id'] . ' uid ' . $item['uid']); return DI::l10n()->t("Friendica post"); } + $contact = $r[0]; if ($contact['network'] === 'dfrn') { return DI::l10n()->t("Friendica post"); } From b883a449fdc3b1977c4ee1934d8f0f681e2fa0c0 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Mon, 26 Apr 2021 21:47:22 +0200 Subject: [PATCH 56/96] error checking in retriever --- retriever/retriever.php | 3 +++ 1 file changed, 3 insertions(+) diff --git a/retriever/retriever.php b/retriever/retriever.php index 92442c96..60c9a2a2 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -241,6 +241,9 @@ function retrieve_dataurl_resource($resource) { */ function retrieve_resource($resource) { $components = parse_url($resource['url']); + if (!$components) { + Logger::warning('retrieve_resource: URL ' . $resource['url'] . ' could not be parsed'); + } if ($components['scheme'] == "data") { return retrieve_dataurl_resource($resource); } From ff8516331f0ad6ccb01fa8398111f5a0d76e8699 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sat, 19 Jun 2021 19:22:37 +0200 Subject: [PATCH 57/96] sync with submitted --- mailstream/mailstream.php | 5 ----- 1 file changed, 5 deletions(-) diff --git a/mailstream/mailstream.php b/mailstream/mailstream.php index 7ebae538..bdad523e 100644 --- a/mailstream/mailstream.php +++ b/mailstream/mailstream.php @@ -324,11 +324,6 @@ function mailstream_subject(array $item): string return DI::l10n()->t("Friendica post"); } $contact = $r[0]; - if (!DBA::isResult($r)) { - Logger::error('mailstream_subject no contact for item id ' . $item['id'] . ' plink ' . $item['plink'] . ' contact id ' . $item['contact-id'] . ' uid ' . $item['uid']); - return DI::l10n()->t("Friendica post"); - } - $contact = $r[0]; if ($contact['network'] === 'dfrn') { return DI::l10n()->t("Friendica post"); } From 6e12c60296947f962a202fc23d0aaa6a1b13e98e Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 2 Mar 2022 15:43:15 +0100 Subject: [PATCH 58/96] switch to new way of executing SQL --- phototrack/phototrack.php | 30 ++++++++-------- publicise/publicise.php | 72 +++++++++++++++++++-------------------- 2 files changed, 51 insertions(+), 51 deletions(-) diff --git a/phototrack/phototrack.php b/phototrack/phototrack.php index 712c0fc6..dcf07676 100644 --- a/phototrack/phototrack.php +++ b/phototrack/phototrack.php @@ -67,10 +67,10 @@ function phototrack_module() {} function phototrack_finished_row($table, $id) { $existing = DBA::selectFirst('phototrack_row_check', ['id'], ['table' => $table, 'row-id' => $id]); if (!is_bool($existing)) { - q("UPDATE phototrack_row_check SET checked = NOW() WHERE `table` = '$table' AND `row-id` = '$id'"); + DBA:e("UPDATE phototrack_row_check SET checked = NOW() WHERE `table` = '$table' AND `row-id` = '$id'"); } else { - q("INSERT INTO phototrack_row_check (`table`, `row-id`, `checked`) VALUES ('$table', '$id', NOW())"); + DBA:e("INSERT INTO phototrack_row_check (`table`, `row-id`, `checked`) VALUES ('$table', '$id', NOW())"); } } @@ -86,17 +86,17 @@ function phototrack_photo_use($photo, $table, $field, $id) { if (strlen($photo) != 32) { return; } - $r = q("SELECT `resource-id` FROM `photo` WHERE `resource-id` = '%s' LIMIT 1", DBA::escape($photo)); + $r = DBA:e("SELECT `resource-id` FROM `photo` WHERE `resource-id` = '%s' LIMIT 1", DBA::escape($photo)); if (!count($r)) { return; } $rid = $r[0]['resource-id']; - $existing = q("SELECT id FROM phototrack_photo_use WHERE `resource-id` = '$rid' AND `table` = '$table' AND `field` = '$field' AND `row-id` = '$id'"); + $existing = DBA:e("SELECT id FROM phototrack_photo_use WHERE `resource-id` = '$rid' AND `table` = '$table' AND `field` = '$field' AND `row-id` = '$id'"); if (count($existing)) { - q("UPDATE phototrack_photo_use SET checked = NOW() WHERE `resource-id` = '$rid' AND `table` = '$table' AND `field` = '$field' AND `row-id` = '$id'"); + DBA:e("UPDATE phototrack_photo_use SET checked = NOW() WHERE `resource-id` = '$rid' AND `table` = '$table' AND `field` = '$field' AND `row-id` = '$id'"); } else { - q("INSERT INTO phototrack_photo_use (`resource-id`, `table`, `field`, `row-id`, `checked`) VALUES ('$rid', '$table', '$field', '$id', NOW())"); + DBA:e("INSERT INTO phototrack_photo_use (`resource-id`, `table`, `field`, `row-id`, `checked`) VALUES ('$rid', '$table', '$field', '$id', NOW())"); } } @@ -197,11 +197,11 @@ function phototrack_batch_size() { function phototrack_search_table($a, $table) { $batch_size = phototrack_batch_size(); - $rows = q("SELECT `$table`.* FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) ) ORDER BY phototrack_row_check.checked LIMIT $batch_size"); + $rows = DBA:e("SELECT `$table`.* FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) ) ORDER BY phototrack_row_check.checked LIMIT $batch_size"); foreach ($rows as $row) { phototrack_check_row($a, $table, $row); } - $r = q("SELECT COUNT(*) FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) )"); + $r = DBA:e("SELECT COUNT(*) FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) )"); $remaining = $r[0]['COUNT(*)']; Logger::info('phototrack: searched ' . count($rows) . ' rows in table ' . $table . ', ' . $remaining . ' still remaining to search'); return $remaining; @@ -250,20 +250,20 @@ function phototrack_cron($a, $b) { function phototrack_tidy() { $batch_size = phototrack_batch_size(); - q('CREATE TABLE IF NOT EXISTS `phototrack-temp` (`resource-id` char(255) not null)'); - q('INSERT INTO `phototrack-temp` SELECT DISTINCT(`resource-id`) FROM photo WHERE photo.`created` < DATE_SUB(NOW(), INTERVAL 2 MONTH)'); - $rows = q('SELECT `phototrack-temp`.`resource-id` FROM `phototrack-temp` LEFT OUTER JOIN phototrack_photo_use ON (`phototrack-temp`.`resource-id` = phototrack_photo_use.`resource-id`) WHERE phototrack_photo_use.id IS NULL limit ' . /*$batch_size*/1000); + DBA:e('CREATE TABLE IF NOT EXISTS `phototrack-temp` (`resource-id` char(255) not null)'); + DBA:e('INSERT INTO `phototrack-temp` SELECT DISTINCT(`resource-id`) FROM photo WHERE photo.`created` < DATE_SUB(NOW(), INTERVAL 2 MONTH)'); + $rows = DBA:e('SELECT `phototrack-temp`.`resource-id` FROM `phototrack-temp` LEFT OUTER JOIN phototrack_photo_use ON (`phototrack-temp`.`resource-id` = phototrack_photo_use.`resource-id`) WHERE phototrack_photo_use.id IS NULL limit ' . /*$batch_size*/1000); if (DBA::isResult($rows)) { foreach ($rows as $row) { Logger::debug('phototrack: remove photo ' . $row['resource-id']); - q('DELETE FROM photo WHERE `resource-id` = "' . $row['resource-id'] . '"'); + DBA:e('DELETE FROM photo WHERE `resource-id` = "' . $row['resource-id'] . '"'); } Logger::info('phototrack_tidy: deleted ' . count($rows) . ' photos'); } - q('DROP TABLE `phototrack-temp`'); - $rows = q('SELECT id FROM phototrack_photo_use WHERE checked < DATE_SUB(NOW(), INTERVAL 14 DAY)'); + DBA:e('DROP TABLE `phototrack-temp`'); + $rows = DBA:e('SELECT id FROM phototrack_photo_use WHERE checked < DATE_SUB(NOW(), INTERVAL 14 DAY)'); foreach ($rows as $row) { - q('DELETE FROM phototrack_photo_use WHERE id = ' . $row['id']); + DBA:e('DELETE FROM phototrack_photo_use WHERE id = ' . $row['id']); } Logger::info('phototrack_tidy: deleted ' . count($rows) . ' phototrack_photo_use rows'); } diff --git a/publicise/publicise.php b/publicise/publicise.php index a746d8af..98af1405 100644 --- a/publicise/publicise.php +++ b/publicise/publicise.php @@ -28,11 +28,11 @@ SELECT * OR (`reason` = 'publicise') ORDER BY `contact`.`name` EOF; - return q($query, intval(local_user())); + return DBA:e($query, intval(local_user())); } function publicise_get_user($uid) { - $r = q('SELECT * FROM `user` WHERE `uid` = %d', intval($uid)); + $r = DBA:e('SELECT * FROM `user` WHERE `uid` = %d', intval($uid)); if (count($r) != 1) { Logger::warning('Publicise: unexpected number of results for uid ' . $uid); } @@ -52,7 +52,7 @@ function publicise_addon_admin(&$a,&$o) { $comments = 1; $url = $v['url']; if ($enabled) { - $r = q('SELECT * FROM `user` WHERE `uid` = %d', intval($v['uid'])); + $r = DBA:e('SELECT * FROM `user` WHERE `uid` = %d', intval($v['uid'])); $expire = $r[0]['expire']; $url = $a->get_baseurl() . '/profile/' . $v['nick']; if ($r[0]['page-flags'] == PAGE_SOAPBOX) { @@ -138,7 +138,7 @@ function publicise_create_user($owner, $contact) { 'expire' => publicise_make_int($expire), ); Logger::debug('Publicise: creating user ' . print_r($newuser, true)); - $r = q("INSERT INTO `user` (`" + $r = DBA:e("INSERT INTO `user` (`" . implode("`, `", array_keys($newuser)) . "`) VALUES (" . implode(", ", array_values($newuser)) @@ -147,7 +147,7 @@ function publicise_create_user($owner, $contact) { Logger::warning('Publicise: create user failed'); return; } - $r = q('SELECT * FROM `user` WHERE `guid` = "%s"', DBA::escape($guid)); + $r = DBA:e('SELECT * FROM `user` WHERE `guid` = "%s"', DBA::escape($guid)); if (count($r) != 1) { Logger::warning('Publicise: unexpected number of uids returned'); return; @@ -179,21 +179,21 @@ function publicise_create_self_contact($a, $contact, $uid) { 'avatar-date' => publicise_make_string(datetime_convert()), 'closeness' => publicise_make_int(0), ); - $existing = q("SELECT * FROM `contact` WHERE `uid` = %d AND `self` = 1", intval($uid)); + $existing = DBA:e("SELECT * FROM `contact` WHERE `uid` = %d AND `self` = 1", intval($uid)); if (count($existing)) { $newcontact = $existing[0]; Logger::debug('Publicise: self contact already exists for user ' . $uid . ' id ' . $newcontact['id']); } else { Logger::debug('Publicise: create contact ' . print_r($newcontact, true)); - q("INSERT INTO `contact` (`" + DBA:e("INSERT INTO `contact` (`" . implode("`, `", array_keys($newcontact)) . "`) VALUES (" . implode(", ", array_values($newcontact)) . ")" ); - $results = q("SELECT `id` FROM `contact` WHERE `uid` = %d AND `self` = 1", intval($uid)); + $results = DBA:e("SELECT `id` FROM `contact` WHERE `uid` = %d AND `self` = 1", intval($uid)); if (count($results) != 1) { Logger::warning('Publicise: create self contact failed, will delete uid ' . $uid); - $r = q("DELETE FROM `user` WHERE `uid` = %d", intval($uid)); + $r = DBA:e("DELETE FROM `user` WHERE `uid` = %d", intval($uid)); return; } $newcontact = $results[0]; @@ -216,7 +216,7 @@ function publicise_create_profile($contact, $uid) { 'net-publish' => publicise_make_int(1), ); Logger::debug('Publicise: create profile ' . print_r($newprofile, true)); - $r = q("INSERT INTO `profile` (`" + $r = DBA:e("INSERT INTO `profile` (`" . implode("`, `", array_keys($newprofile)) . "`) VALUES (" . implode(", ", array_values($newprofile)) @@ -224,7 +224,7 @@ function publicise_create_profile($contact, $uid) { if (!$r) { Logger::warning('Publicise: create profile failed'); } - $newprofile = q('SELECT `id` FROM `profile` WHERE `uid` = %d AND `is-default` = 1', intval($uid)); + $newprofile = DBA:e('SELECT `id` FROM `profile` WHERE `uid` = %d AND `is-default` = 1', intval($uid)); if (count($newprofile) != 1) { Logger::warning('Publicise: create profile produced unexpected number of results'); return; @@ -243,15 +243,15 @@ function publicise_set_up_user($a, $contact, $owner) { if (!$self_contact) { notice(sprintf(t("Failed to create self contact for user \"%s\""), $contact['name']) . EOL); Logger::warning("Publicise: unable to create self contact, deleting user " . $user['uid']); - q('DELETE FROM `user` WHERE `uid` = %d', intval($user['uid'])); + DBA:e('DELETE FROM `user` WHERE `uid` = %d', intval($user['uid'])); return; } $profile = publicise_create_profile($contact, $user['uid']); if (!$profile) { notice(sprintf(t("Failed to create profile for user \"%s\""), $contact['name']) . EOL); Logger::warning("Publicise: unable to create profile, deleting user $uid contact $self_contact"); - q('DELETE FROM `user` WHERE `uid` = %d', intval($user['uid'])); - q('DELETE FROM `contact` WHERE `id` = %d', intval($self_contact)); + DBA:e('DELETE FROM `user` WHERE `uid` = %d', intval($user['uid'])); + DBA:e('DELETE FROM `contact` WHERE `id` = %d', intval($self_contact)); return; } return $user; @@ -267,13 +267,13 @@ function publicise($a, &$contact, &$owner) { // Check if we're changing our mind about a feed we earlier depublicised Logger::info('@@@ Publicise: ' . 'SELECT * FROM `user` WHERE `account_expires_on` != "0000-00-00 00:00:00" AND `nickname` = "' . $contact['nick'] . '" AND `email` = "' . $owner['email'] . '" AND `page-flags` in (' . intval(PAGE_COMMUNITY) . ', ' . intval(PAGE_SOAPBOX) . ')'); - $existing = q('SELECT * FROM `user` WHERE `account_expires_on` != "0000-00-00 00:00:00" AND `nickname` = "%s" AND `email` = "%s" AND `page-flags` in (%d, %d)', + $existing = DBA:e('SELECT * FROM `user` WHERE `account_expires_on` != "0000-00-00 00:00:00" AND `nickname` = "%s" AND `email` = "%s" AND `page-flags` in (%d, %d)', DBA::escape($contact['nick']), DBA::escape($owner['email']), intval(PAGE_COMMUNITY), intval(PAGE_SOAPBOX)); if (count($existing) == 1) { Logger::info('@@@ Publicise: there is existing'); $owner = $existing[0]; - q('UPDATE `user` SET `account_expires_on` = "0000-00-00 00:00:00", `account_removed` = 0, `account_expired` = 0 WHERE `uid` = %d', intval($owner['uid'])); - q('UPDATE `profile` SET `publish` = 1, `net-publish` = 1 WHERE `uid` = %d AND `is-default` = 1', intval($owner['uid'])); + DBA:e('UPDATE `user` SET `account_expires_on` = "0000-00-00 00:00:00", `account_removed` = 0, `account_expired` = 0 WHERE `uid` = %d', intval($owner['uid'])); + DBA:e('UPDATE `profile` SET `publish` = 1, `net-publish` = 1 WHERE `uid` = %d AND `is-default` = 1', intval($owner['uid'])); Logger::debug('Publicise: recycled previous user ' . $owner['uid']); } else { @@ -286,19 +286,19 @@ function publicise($a, &$contact, &$owner) { } Logger::info('Publicise: new contact user is ' . $owner['uid']); - $r = q("UPDATE `contact` SET `uid` = %d, `reason` = 'publicise', `hidden` = 1 WHERE id = %d", intval($owner['uid']), intval($contact['id'])); + $r = DBA:e("UPDATE `contact` SET `uid` = %d, `reason` = 'publicise', `hidden` = 1 WHERE id = %d", intval($owner['uid']), intval($contact['id'])); if (!$r) { Logger::warning('Publicise: update contact failed, user is probably in a bad state ' . $user['uid']); } $contact['uid'] = $owner['uid']; $contact['reason'] = 'publicise'; $contact['hidden'] = 1; - $r = q("UPDATE `item` SET `uid` = %d, type = 'wall', wall = 1, private = 0 WHERE `contact-id` = %d", + $r = DBA:e("UPDATE `item` SET `uid` = %d, type = 'wall', wall = 1, private = 0 WHERE `contact-id` = %d", intval($owner['uid']), intval($contact['id'])); Logger::debug('Publicise: moved items from contact ' . $contact['id'] . ' to uid ' . $owner['uid']); // Update the retriever config - $r = q("UPDATE `retriever_rule` SET `uid` = %d WHERE `contact-id` = %d", + $r = DBA:e("UPDATE `retriever_rule` SET `uid` = %d WHERE `contact-id` = %d", intval($owner['uid']), intval($contact['id'])); info(sprintf(t("Moved feed \"%s\" to dedicated account"), $contact['name']) . EOL); @@ -306,7 +306,7 @@ function publicise($a, &$contact, &$owner) { } function publicise_self_contact($uid) { - $r = q('SELECT * FROM `contact` WHERE `uid` = %d AND `self` = 1', intval($uid)); + $r = DBA:e('SELECT * FROM `contact` WHERE `uid` = %d AND `self` = 1', intval($uid)); if (count($r) != 1) { Logger::warning('Publicise: unexpected number of self contacts for user ' . $uid); return; @@ -330,7 +330,7 @@ function depublicise($a, $contact, $user) { // If the local_user() is subscribed to the feed, take ownership // of the feed and all its items and photos. Otherwise they will // be deleted when the account expires. - $r = q('SELECT * FROM `contact` WHERE `uid` = %d AND `url` = "%s"', + $r = DBA:e('SELECT * FROM `contact` WHERE `uid` = %d AND `url` = "%s"', intval(local_user()), DBA::escape($self_contact['url'])); if (count($r)) { // Delete the contact to the feed user and any @@ -338,32 +338,32 @@ function depublicise($a, $contact, $user) { // which will be brought back into the local_user's feed along // with the feed contact itself. foreach ($r as $my_contact) { - q('DELETE FROM `item` WHERE `contact-id` = %d', intval($my_contact['id'])); - q('DELETE FROM `contact` WHERE `id` = %d', intval($my_contact['id'])); + DBA:e('DELETE FROM `item` WHERE `contact-id` = %d', intval($my_contact['id'])); + DBA:e('DELETE FROM `contact` WHERE `id` = %d', intval($my_contact['id'])); } // Move the feed contact to local_user. Existing items stay // attached to the original feed contact, but must have their uid // updated. Also update the fields we scribbled over in // publicise_post_remote_hook. - q('UPDATE `contact` SET `uid` = %d, `reason` = "", hidden = 0 WHERE id = %d', + DBA:e('UPDATE `contact` SET `uid` = %d, `reason` = "", hidden = 0 WHERE id = %d', intval(local_user()), intval($contact['id'])); - q('UPDATE `item` SET `uid` = %d, `wall` = 0, `type` = "remote", `private` = 2 WHERE `contact-id` = %d', + DBA:e('UPDATE `item` SET `uid` = %d, `wall` = 0, `type` = "remote", `private` = 2 WHERE `contact-id` = %d', intval(local_user()), intval($contact['id'])); // Take ownership of any photos created by the feed user - q('UPDATE `photo` SET `uid` = %d WHERE `uid` = %d', + DBA:e('UPDATE `photo` SET `uid` = %d WHERE `uid` = %d', intval(local_user()), intval($user['uid'])); // Update the retriever config - $r = q("UPDATE `retriever_rule` SET `uid` = %d WHERE `contact-id` = %d", + $r = DBA:e("UPDATE `retriever_rule` SET `uid` = %d WHERE `contact-id` = %d", intval($owner['uid']), intval($contact['id'])); } // Set the account to removed and expired right now. It will be cleaned up by cron after 3 days, giving a chance to change your mind - q('UPDATE `user` SET `account_removed` = 1, `account_expired` = 1, `account_expires_on` = UTC_TIMESTAMP() WHERE `uid` = %d', + DBA:e('UPDATE `user` SET `account_removed` = 1, `account_expired` = 1, `account_expires_on` = UTC_TIMESTAMP() WHERE `uid` = %d', intval($user['uid'])); - q('UPDATE `profile` SET `publish` = 0, `net-publish` = 0 WHERE `uid` = %d AND `is-default` = 1', intval($user['uid'])); + DBA:e('UPDATE `profile` SET `publish` = 0, `net-publish` = 0 WHERE `uid` = %d AND `is-default` = 1', intval($user['uid'])); info(sprintf(t("Removed dedicated account for feed \"%s\""), $contact['name']) . EOL); } @@ -393,22 +393,22 @@ function publicise_addon_admin_post ($a) { } } if ($_POST['publicise-expire-' . $contact['id']] != $user['expire']) { - q('UPDATE `user` SET `expire` = %d WHERE `uid` = %d', + DBA:e('UPDATE `user` SET `expire` = %d WHERE `uid` = %d', intval($_POST['publicise-expire-' . $contact['id']]), intval($user['uid'])); } if ($_POST['publicise-comments-' . $contact['id']]) { if ($user['page-flags'] != PAGE_COMMUNITY) { - q('UPDATE `user` SET `page-flags` = %d WHERE `uid` = %d', + DBA:e('UPDATE `user` SET `page-flags` = %d WHERE `uid` = %d', intval(PAGE_COMMUNITY), intval($user['uid'])); - q('UPDATE `contact` SET `rel` = %d WHERE `uid` = %d AND `network` = "dfrn"', + DBA:e('UPDATE `contact` SET `rel` = %d WHERE `uid` = %d AND `network` = "dfrn"', intval(CONTACT_IS_SHARING), intval($user['uid'])); } } else { if ($user['page-flags'] != PAGE_SOAPBOX) { - q('UPDATE `user` SET `page-flags` = %d WHERE `uid` = %d', + DBA:e('UPDATE `user` SET `page-flags` = %d WHERE `uid` = %d', intval(PAGE_SOAPBOX), intval($user['uid'])); - q('UPDATE `contact` SET `rel` = %d WHERE `uid` = %d AND `network` = "dfrn"', + DBA:e('UPDATE `contact` SET `rel` = %d WHERE `uid` = %d AND `network` = "dfrn"', intval(CONTACT_IS_FOLLOWER), intval($user['uid'])); } } @@ -417,7 +417,7 @@ function publicise_addon_admin_post ($a) { } function publicise_post_remote_hook(&$a, &$item) { - $r1 = q("SELECT `uid` FROM `contact` WHERE `id` = %d AND `reason` = 'publicise'", intval($item['contact-id'])); + $r1 = DBA:e("SELECT `uid` FROM `contact` WHERE `id` = %d AND `reason` = 'publicise'", intval($item['contact-id'])); if (!$r1) { return; } From 1f181edf6f58970a898bf2d42342c66c5e880157 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 2 Mar 2022 16:15:59 +0100 Subject: [PATCH 59/96] switch to new way of executing SQL --- phototrack/phototrack.php | 30 ++++++++-------- publicise/publicise.php | 72 +++++++++++++++++++-------------------- 2 files changed, 51 insertions(+), 51 deletions(-) diff --git a/phototrack/phototrack.php b/phototrack/phototrack.php index dcf07676..f8a3f704 100644 --- a/phototrack/phototrack.php +++ b/phototrack/phototrack.php @@ -67,10 +67,10 @@ function phototrack_module() {} function phototrack_finished_row($table, $id) { $existing = DBA::selectFirst('phototrack_row_check', ['id'], ['table' => $table, 'row-id' => $id]); if (!is_bool($existing)) { - DBA:e("UPDATE phototrack_row_check SET checked = NOW() WHERE `table` = '$table' AND `row-id` = '$id'"); + DBA::e("UPDATE phototrack_row_check SET checked = NOW() WHERE `table` = '$table' AND `row-id` = '$id'"); } else { - DBA:e("INSERT INTO phototrack_row_check (`table`, `row-id`, `checked`) VALUES ('$table', '$id', NOW())"); + DBA::e("INSERT INTO phototrack_row_check (`table`, `row-id`, `checked`) VALUES ('$table', '$id', NOW())"); } } @@ -86,17 +86,17 @@ function phototrack_photo_use($photo, $table, $field, $id) { if (strlen($photo) != 32) { return; } - $r = DBA:e("SELECT `resource-id` FROM `photo` WHERE `resource-id` = '%s' LIMIT 1", DBA::escape($photo)); + $r = DBA::e("SELECT `resource-id` FROM `photo` WHERE `resource-id` = '%s' LIMIT 1", DBA::escape($photo)); if (!count($r)) { return; } $rid = $r[0]['resource-id']; - $existing = DBA:e("SELECT id FROM phototrack_photo_use WHERE `resource-id` = '$rid' AND `table` = '$table' AND `field` = '$field' AND `row-id` = '$id'"); + $existing = DBA::e("SELECT id FROM phototrack_photo_use WHERE `resource-id` = '$rid' AND `table` = '$table' AND `field` = '$field' AND `row-id` = '$id'"); if (count($existing)) { - DBA:e("UPDATE phototrack_photo_use SET checked = NOW() WHERE `resource-id` = '$rid' AND `table` = '$table' AND `field` = '$field' AND `row-id` = '$id'"); + DBA::e("UPDATE phototrack_photo_use SET checked = NOW() WHERE `resource-id` = '$rid' AND `table` = '$table' AND `field` = '$field' AND `row-id` = '$id'"); } else { - DBA:e("INSERT INTO phototrack_photo_use (`resource-id`, `table`, `field`, `row-id`, `checked`) VALUES ('$rid', '$table', '$field', '$id', NOW())"); + DBA::e("INSERT INTO phototrack_photo_use (`resource-id`, `table`, `field`, `row-id`, `checked`) VALUES ('$rid', '$table', '$field', '$id', NOW())"); } } @@ -197,11 +197,11 @@ function phototrack_batch_size() { function phototrack_search_table($a, $table) { $batch_size = phototrack_batch_size(); - $rows = DBA:e("SELECT `$table`.* FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) ) ORDER BY phototrack_row_check.checked LIMIT $batch_size"); + $rows = DBA::e("SELECT `$table`.* FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) ) ORDER BY phototrack_row_check.checked LIMIT $batch_size"); foreach ($rows as $row) { phototrack_check_row($a, $table, $row); } - $r = DBA:e("SELECT COUNT(*) FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) )"); + $r = DBA::e("SELECT COUNT(*) FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) )"); $remaining = $r[0]['COUNT(*)']; Logger::info('phototrack: searched ' . count($rows) . ' rows in table ' . $table . ', ' . $remaining . ' still remaining to search'); return $remaining; @@ -250,20 +250,20 @@ function phototrack_cron($a, $b) { function phototrack_tidy() { $batch_size = phototrack_batch_size(); - DBA:e('CREATE TABLE IF NOT EXISTS `phototrack-temp` (`resource-id` char(255) not null)'); - DBA:e('INSERT INTO `phototrack-temp` SELECT DISTINCT(`resource-id`) FROM photo WHERE photo.`created` < DATE_SUB(NOW(), INTERVAL 2 MONTH)'); - $rows = DBA:e('SELECT `phototrack-temp`.`resource-id` FROM `phototrack-temp` LEFT OUTER JOIN phototrack_photo_use ON (`phototrack-temp`.`resource-id` = phototrack_photo_use.`resource-id`) WHERE phototrack_photo_use.id IS NULL limit ' . /*$batch_size*/1000); + DBA::e('CREATE TABLE IF NOT EXISTS `phototrack-temp` (`resource-id` char(255) not null)'); + DBA::e('INSERT INTO `phototrack-temp` SELECT DISTINCT(`resource-id`) FROM photo WHERE photo.`created` < DATE_SUB(NOW(), INTERVAL 2 MONTH)'); + $rows = DBA::e('SELECT `phototrack-temp`.`resource-id` FROM `phototrack-temp` LEFT OUTER JOIN phototrack_photo_use ON (`phototrack-temp`.`resource-id` = phototrack_photo_use.`resource-id`) WHERE phototrack_photo_use.id IS NULL limit ' . /*$batch_size*/1000); if (DBA::isResult($rows)) { foreach ($rows as $row) { Logger::debug('phototrack: remove photo ' . $row['resource-id']); - DBA:e('DELETE FROM photo WHERE `resource-id` = "' . $row['resource-id'] . '"'); + DBA::e('DELETE FROM photo WHERE `resource-id` = "' . $row['resource-id'] . '"'); } Logger::info('phototrack_tidy: deleted ' . count($rows) . ' photos'); } - DBA:e('DROP TABLE `phototrack-temp`'); - $rows = DBA:e('SELECT id FROM phototrack_photo_use WHERE checked < DATE_SUB(NOW(), INTERVAL 14 DAY)'); + DBA::e('DROP TABLE `phototrack-temp`'); + $rows = DBA::e('SELECT id FROM phototrack_photo_use WHERE checked < DATE_SUB(NOW(), INTERVAL 14 DAY)'); foreach ($rows as $row) { - DBA:e('DELETE FROM phototrack_photo_use WHERE id = ' . $row['id']); + DBA::e( 'DELETE FROM phototrack_photo_use WHERE id = ' . $row['id']); } Logger::info('phototrack_tidy: deleted ' . count($rows) . ' phototrack_photo_use rows'); } diff --git a/publicise/publicise.php b/publicise/publicise.php index 98af1405..a5dc2807 100644 --- a/publicise/publicise.php +++ b/publicise/publicise.php @@ -28,11 +28,11 @@ SELECT * OR (`reason` = 'publicise') ORDER BY `contact`.`name` EOF; - return DBA:e($query, intval(local_user())); + return DBA::e($query, intval(local_user())); } function publicise_get_user($uid) { - $r = DBA:e('SELECT * FROM `user` WHERE `uid` = %d', intval($uid)); + $r = DBA::e('SELECT * FROM `user` WHERE `uid` = %d', intval($uid)); if (count($r) != 1) { Logger::warning('Publicise: unexpected number of results for uid ' . $uid); } @@ -52,7 +52,7 @@ function publicise_addon_admin(&$a,&$o) { $comments = 1; $url = $v['url']; if ($enabled) { - $r = DBA:e('SELECT * FROM `user` WHERE `uid` = %d', intval($v['uid'])); + $r = DBA::e('SELECT * FROM `user` WHERE `uid` = %d', intval($v['uid'])); $expire = $r[0]['expire']; $url = $a->get_baseurl() . '/profile/' . $v['nick']; if ($r[0]['page-flags'] == PAGE_SOAPBOX) { @@ -138,7 +138,7 @@ function publicise_create_user($owner, $contact) { 'expire' => publicise_make_int($expire), ); Logger::debug('Publicise: creating user ' . print_r($newuser, true)); - $r = DBA:e("INSERT INTO `user` (`" + $r = DBA::e("INSERT INTO `user` (`" . implode("`, `", array_keys($newuser)) . "`) VALUES (" . implode(", ", array_values($newuser)) @@ -147,7 +147,7 @@ function publicise_create_user($owner, $contact) { Logger::warning('Publicise: create user failed'); return; } - $r = DBA:e('SELECT * FROM `user` WHERE `guid` = "%s"', DBA::escape($guid)); + $r = DBA::e('SELECT * FROM `user` WHERE `guid` = "%s"', DBA::escape($guid)); if (count($r) != 1) { Logger::warning('Publicise: unexpected number of uids returned'); return; @@ -179,21 +179,21 @@ function publicise_create_self_contact($a, $contact, $uid) { 'avatar-date' => publicise_make_string(datetime_convert()), 'closeness' => publicise_make_int(0), ); - $existing = DBA:e("SELECT * FROM `contact` WHERE `uid` = %d AND `self` = 1", intval($uid)); + $existing = DBA::e("SELECT * FROM `contact` WHERE `uid` = %d AND `self` = 1", intval($uid)); if (count($existing)) { $newcontact = $existing[0]; Logger::debug('Publicise: self contact already exists for user ' . $uid . ' id ' . $newcontact['id']); } else { Logger::debug('Publicise: create contact ' . print_r($newcontact, true)); - DBA:e("INSERT INTO `contact` (`" + DBA::e("INSERT INTO `contact` (`" . implode("`, `", array_keys($newcontact)) . "`) VALUES (" . implode(", ", array_values($newcontact)) . ")" ); - $results = DBA:e("SELECT `id` FROM `contact` WHERE `uid` = %d AND `self` = 1", intval($uid)); + $results = DBA::e("SELECT `id` FROM `contact` WHERE `uid` = %d AND `self` = 1", intval($uid)); if (count($results) != 1) { Logger::warning('Publicise: create self contact failed, will delete uid ' . $uid); - $r = DBA:e("DELETE FROM `user` WHERE `uid` = %d", intval($uid)); + $r = DBA::e("DELETE FROM `user` WHERE `uid` = %d", intval($uid)); return; } $newcontact = $results[0]; @@ -216,7 +216,7 @@ function publicise_create_profile($contact, $uid) { 'net-publish' => publicise_make_int(1), ); Logger::debug('Publicise: create profile ' . print_r($newprofile, true)); - $r = DBA:e("INSERT INTO `profile` (`" + $r = DBA::e("INSERT INTO `profile` (`" . implode("`, `", array_keys($newprofile)) . "`) VALUES (" . implode(", ", array_values($newprofile)) @@ -224,7 +224,7 @@ function publicise_create_profile($contact, $uid) { if (!$r) { Logger::warning('Publicise: create profile failed'); } - $newprofile = DBA:e('SELECT `id` FROM `profile` WHERE `uid` = %d AND `is-default` = 1', intval($uid)); + $newprofile = DBA::e('SELECT `id` FROM `profile` WHERE `uid` = %d AND `is-default` = 1', intval($uid)); if (count($newprofile) != 1) { Logger::warning('Publicise: create profile produced unexpected number of results'); return; @@ -243,15 +243,15 @@ function publicise_set_up_user($a, $contact, $owner) { if (!$self_contact) { notice(sprintf(t("Failed to create self contact for user \"%s\""), $contact['name']) . EOL); Logger::warning("Publicise: unable to create self contact, deleting user " . $user['uid']); - DBA:e('DELETE FROM `user` WHERE `uid` = %d', intval($user['uid'])); + DBA::e('DELETE FROM `user` WHERE `uid` = %d', intval($user['uid'])); return; } $profile = publicise_create_profile($contact, $user['uid']); if (!$profile) { notice(sprintf(t("Failed to create profile for user \"%s\""), $contact['name']) . EOL); Logger::warning("Publicise: unable to create profile, deleting user $uid contact $self_contact"); - DBA:e('DELETE FROM `user` WHERE `uid` = %d', intval($user['uid'])); - DBA:e('DELETE FROM `contact` WHERE `id` = %d', intval($self_contact)); + DBA::e('DELETE FROM `user` WHERE `uid` = %d', intval($user['uid'])); + DBA::e('DELETE FROM `contact` WHERE `id` = %d', intval($self_contact)); return; } return $user; @@ -267,13 +267,13 @@ function publicise($a, &$contact, &$owner) { // Check if we're changing our mind about a feed we earlier depublicised Logger::info('@@@ Publicise: ' . 'SELECT * FROM `user` WHERE `account_expires_on` != "0000-00-00 00:00:00" AND `nickname` = "' . $contact['nick'] . '" AND `email` = "' . $owner['email'] . '" AND `page-flags` in (' . intval(PAGE_COMMUNITY) . ', ' . intval(PAGE_SOAPBOX) . ')'); - $existing = DBA:e('SELECT * FROM `user` WHERE `account_expires_on` != "0000-00-00 00:00:00" AND `nickname` = "%s" AND `email` = "%s" AND `page-flags` in (%d, %d)', + $existing = DBA::e('SELECT * FROM `user` WHERE `account_expires_on` != "0000-00-00 00:00:00" AND `nickname` = "%s" AND `email` = "%s" AND `page-flags` in (%d, %d)', DBA::escape($contact['nick']), DBA::escape($owner['email']), intval(PAGE_COMMUNITY), intval(PAGE_SOAPBOX)); if (count($existing) == 1) { Logger::info('@@@ Publicise: there is existing'); $owner = $existing[0]; - DBA:e('UPDATE `user` SET `account_expires_on` = "0000-00-00 00:00:00", `account_removed` = 0, `account_expired` = 0 WHERE `uid` = %d', intval($owner['uid'])); - DBA:e('UPDATE `profile` SET `publish` = 1, `net-publish` = 1 WHERE `uid` = %d AND `is-default` = 1', intval($owner['uid'])); + DBA::e('UPDATE `user` SET `account_expires_on` = "0000-00-00 00:00:00", `account_removed` = 0, `account_expired` = 0 WHERE `uid` = %d', intval($owner['uid'])); + DBA::e('UPDATE `profile` SET `publish` = 1, `net-publish` = 1 WHERE `uid` = %d AND `is-default` = 1', intval($owner['uid'])); Logger::debug('Publicise: recycled previous user ' . $owner['uid']); } else { @@ -286,19 +286,19 @@ function publicise($a, &$contact, &$owner) { } Logger::info('Publicise: new contact user is ' . $owner['uid']); - $r = DBA:e("UPDATE `contact` SET `uid` = %d, `reason` = 'publicise', `hidden` = 1 WHERE id = %d", intval($owner['uid']), intval($contact['id'])); + $r = DBA::e("UPDATE `contact` SET `uid` = %d, `reason` = 'publicise', `hidden` = 1 WHERE id = %d", intval($owner['uid']), intval($contact['id'])); if (!$r) { Logger::warning('Publicise: update contact failed, user is probably in a bad state ' . $user['uid']); } $contact['uid'] = $owner['uid']; $contact['reason'] = 'publicise'; $contact['hidden'] = 1; - $r = DBA:e("UPDATE `item` SET `uid` = %d, type = 'wall', wall = 1, private = 0 WHERE `contact-id` = %d", + $r = DBA::e("UPDATE `item` SET `uid` = %d, type = 'wall', wall = 1, private = 0 WHERE `contact-id` = %d", intval($owner['uid']), intval($contact['id'])); Logger::debug('Publicise: moved items from contact ' . $contact['id'] . ' to uid ' . $owner['uid']); // Update the retriever config - $r = DBA:e("UPDATE `retriever_rule` SET `uid` = %d WHERE `contact-id` = %d", + $r = DBA::e("UPDATE `retriever_rule` SET `uid` = %d WHERE `contact-id` = %d", intval($owner['uid']), intval($contact['id'])); info(sprintf(t("Moved feed \"%s\" to dedicated account"), $contact['name']) . EOL); @@ -306,7 +306,7 @@ function publicise($a, &$contact, &$owner) { } function publicise_self_contact($uid) { - $r = DBA:e('SELECT * FROM `contact` WHERE `uid` = %d AND `self` = 1', intval($uid)); + $r = DBA::e('SELECT * FROM `contact` WHERE `uid` = %d AND `self` = 1', intval($uid)); if (count($r) != 1) { Logger::warning('Publicise: unexpected number of self contacts for user ' . $uid); return; @@ -330,7 +330,7 @@ function depublicise($a, $contact, $user) { // If the local_user() is subscribed to the feed, take ownership // of the feed and all its items and photos. Otherwise they will // be deleted when the account expires. - $r = DBA:e('SELECT * FROM `contact` WHERE `uid` = %d AND `url` = "%s"', + $r = DBA::e('SELECT * FROM `contact` WHERE `uid` = %d AND `url` = "%s"', intval(local_user()), DBA::escape($self_contact['url'])); if (count($r)) { // Delete the contact to the feed user and any @@ -338,32 +338,32 @@ function depublicise($a, $contact, $user) { // which will be brought back into the local_user's feed along // with the feed contact itself. foreach ($r as $my_contact) { - DBA:e('DELETE FROM `item` WHERE `contact-id` = %d', intval($my_contact['id'])); - DBA:e('DELETE FROM `contact` WHERE `id` = %d', intval($my_contact['id'])); + DBA::e('DELETE FROM `item` WHERE `contact-id` = %d', intval($my_contact['id'])); + DBA::e('DELETE FROM `contact` WHERE `id` = %d', intval($my_contact['id'])); } // Move the feed contact to local_user. Existing items stay // attached to the original feed contact, but must have their uid // updated. Also update the fields we scribbled over in // publicise_post_remote_hook. - DBA:e('UPDATE `contact` SET `uid` = %d, `reason` = "", hidden = 0 WHERE id = %d', + DBA::e('UPDATE `contact` SET `uid` = %d, `reason` = "", hidden = 0 WHERE id = %d', intval(local_user()), intval($contact['id'])); - DBA:e('UPDATE `item` SET `uid` = %d, `wall` = 0, `type` = "remote", `private` = 2 WHERE `contact-id` = %d', + DBA::e('UPDATE `item` SET `uid` = %d, `wall` = 0, `type` = "remote", `private` = 2 WHERE `contact-id` = %d', intval(local_user()), intval($contact['id'])); // Take ownership of any photos created by the feed user - DBA:e('UPDATE `photo` SET `uid` = %d WHERE `uid` = %d', + DBA::e('UPDATE `photo` SET `uid` = %d WHERE `uid` = %d', intval(local_user()), intval($user['uid'])); // Update the retriever config - $r = DBA:e("UPDATE `retriever_rule` SET `uid` = %d WHERE `contact-id` = %d", + $r = DBA::e("UPDATE `retriever_rule` SET `uid` = %d WHERE `contact-id` = %d", intval($owner['uid']), intval($contact['id'])); } // Set the account to removed and expired right now. It will be cleaned up by cron after 3 days, giving a chance to change your mind - DBA:e('UPDATE `user` SET `account_removed` = 1, `account_expired` = 1, `account_expires_on` = UTC_TIMESTAMP() WHERE `uid` = %d', + DBA::e('UPDATE `user` SET `account_removed` = 1, `account_expired` = 1, `account_expires_on` = UTC_TIMESTAMP() WHERE `uid` = %d', intval($user['uid'])); - DBA:e('UPDATE `profile` SET `publish` = 0, `net-publish` = 0 WHERE `uid` = %d AND `is-default` = 1', intval($user['uid'])); + DBA::e('UPDATE `profile` SET `publish` = 0, `net-publish` = 0 WHERE `uid` = %d AND `is-default` = 1', intval($user['uid'])); info(sprintf(t("Removed dedicated account for feed \"%s\""), $contact['name']) . EOL); } @@ -393,22 +393,22 @@ function publicise_addon_admin_post ($a) { } } if ($_POST['publicise-expire-' . $contact['id']] != $user['expire']) { - DBA:e('UPDATE `user` SET `expire` = %d WHERE `uid` = %d', + DBA::e('UPDATE `user` SET `expire` = %d WHERE `uid` = %d', intval($_POST['publicise-expire-' . $contact['id']]), intval($user['uid'])); } if ($_POST['publicise-comments-' . $contact['id']]) { if ($user['page-flags'] != PAGE_COMMUNITY) { - DBA:e('UPDATE `user` SET `page-flags` = %d WHERE `uid` = %d', + DBA::e('UPDATE `user` SET `page-flags` = %d WHERE `uid` = %d', intval(PAGE_COMMUNITY), intval($user['uid'])); - DBA:e('UPDATE `contact` SET `rel` = %d WHERE `uid` = %d AND `network` = "dfrn"', + DBA::e('UPDATE `contact` SET `rel` = %d WHERE `uid` = %d AND `network` = "dfrn"', intval(CONTACT_IS_SHARING), intval($user['uid'])); } } else { if ($user['page-flags'] != PAGE_SOAPBOX) { - DBA:e('UPDATE `user` SET `page-flags` = %d WHERE `uid` = %d', + DBA::e('UPDATE `user` SET `page-flags` = %d WHERE `uid` = %d', intval(PAGE_SOAPBOX), intval($user['uid'])); - DBA:e('UPDATE `contact` SET `rel` = %d WHERE `uid` = %d AND `network` = "dfrn"', + DBA::e('UPDATE `contact` SET `rel` = %d WHERE `uid` = %d AND `network` = "dfrn"', intval(CONTACT_IS_FOLLOWER), intval($user['uid'])); } } @@ -417,7 +417,7 @@ function publicise_addon_admin_post ($a) { } function publicise_post_remote_hook(&$a, &$item) { - $r1 = DBA:e("SELECT `uid` FROM `contact` WHERE `id` = %d AND `reason` = 'publicise'", intval($item['contact-id'])); + $r1 = DBA::e("SELECT `uid` FROM `contact` WHERE `id` = %d AND `reason` = 'publicise'", intval($item['contact-id'])); if (!$r1) { return; } From 5442886f4cde77aec19039fe550e545e2c956b93 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 2 Mar 2022 16:22:45 +0100 Subject: [PATCH 60/96] switch to new way of executing SQL --- retriever/retriever.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 60c9a2a2..5e7a783e 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -172,7 +172,7 @@ function retriever_retrieve_items($max_items) { */ function retriever_clean_up_completed_resources($max_items) { // TODO: figure out how to do this with DBA module - $r = q('SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT %d', + $r = DBA::e('SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT %d', intval($max_items)); if (!$r) { $r = array(); @@ -209,10 +209,10 @@ function retriever_tidy() { DBA::delete('retriever_resource', ['completed IS NOT NULL AND completed < DATE_SUB(now(), INTERVAL 1 WEEK)']); DBA::delete('retriever_resource', ['completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)']); - $r = q("SELECT retriever_item.id FROM retriever_item LEFT OUTER JOIN retriever_resource ON (retriever_item.resource = retriever_resource.id) WHERE retriever_resource.id is null"); + $r = DBA::e("SELECT retriever_item.id FROM retriever_item LEFT OUTER JOIN retriever_resource ON (retriever_item.resource = retriever_resource.id) WHERE retriever_resource.id is null"); Logger::info('retriever_tidy: found ' . count($r) . ' retriever_items with no retriever_resource'); foreach ($r as $rr) { - q('DELETE FROM retriever_item WHERE id = %d', intval($rr['id'])); + DBA::e('DELETE FROM retriever_item WHERE id = %d', intval($rr['id'])); } } From a5241698c8a3da2d534a2d1dfd2e996512df0d75 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 2 Mar 2022 16:26:56 +0100 Subject: [PATCH 61/96] new style of http request --- retriever/retriever.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 5e7a783e..cd8aacea 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -277,7 +277,7 @@ function retrieve_resource($resource) { $cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-'); file_put_contents($cookiejar, $rule_data['cookiedata']); } - $fetch_result = DI::httpRequest()->fetchFull($resource['url'], $redirects, '', $cookiejar); + $fetch_result = DI::httpClient()->fetchFull($resource['url'], $redirects, '', $cookiejar); if (array_key_exists('storecookies', $rule_data) && $rule_data['storecookies']) { $retriever_rule['data']['cookiedata'] = file_get_contents($cookiejar); DBA::update('retriever_rule', ['data' => json_encode($retriever_rule['data'])], ['id' => intval($retriever_rule["id"])], $retriever_rule); From 194f987afde910a1af2492b8c4e71f6fdc275a8d Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 2 Mar 2022 16:39:12 +0100 Subject: [PATCH 62/96] attempt to handle one error --- retriever/retriever.php | 3 +++ 1 file changed, 3 insertions(+) diff --git a/retriever/retriever.php b/retriever/retriever.php index cd8aacea..263f5842 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -639,6 +639,9 @@ function retriever_extract($doc, $retriever) { */ function retriever_globalise_urls($doc, $resource) { $components = parse_url($resource['redirect-url']); + if (!array_key_exists('scheme', $components) || !array_key_exists('host', $components) || !array_key_exists('path', $components)) { + return $doc; + } $rooturl = $components['scheme'] . "://" . $components['host']; $dirurl = $rooturl . dirname($components['path']) . "/"; $params = array('$dirurl' => $dirurl, '$rooturl' => $rooturl); From c720283533c39e42d7c19a3ad84d17a14f2623ce Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 2 Mar 2022 16:44:54 +0100 Subject: [PATCH 63/96] perhaps it should be this style --- publicise/publicise.php | 421 +--------------------------------------- retriever/retriever.php | 4 +- 2 files changed, 3 insertions(+), 422 deletions(-) diff --git a/publicise/publicise.php b/publicise/publicise.php index a5dc2807..d2dbca24 100644 --- a/publicise/publicise.php +++ b/publicise/publicise.php @@ -1,423 +1,4 @@ - - */ - -use Friendica\Core\Addon; -use Friendica\Core\Logger; -use Friendica\Core\Renderer; -use Friendica\Database\DBA; - -function publicise_install() { - Addon::registerHook('post_remote', 'addon/publicise/publicise.php', 'publicise_post_remote_hook'); -} - -function publicise_uninstall() { - Addon::unregisterHook('post_remote', 'addon/publicise/publicise.php', 'publicise_post_remote_hook'); - Addon::unregisterHook('post_remote_end', 'addon/publicise/publicise.php', 'publicise_post_remote_end_hook'); -} - -function publicise_get_contacts() { - $query = <<$v) { - $enabled = ($v['reason'] === 'publicise') ? 1 : NULL; - $expire = 30; - $comments = 1; - $url = $v['url']; - if ($enabled) { - $r = DBA::e('SELECT * FROM `user` WHERE `uid` = %d', intval($v['uid'])); - $expire = $r[0]['expire']; - $url = $a->get_baseurl() . '/profile/' . $v['nick']; - if ($r[0]['page-flags'] == PAGE_SOAPBOX) { - $comments = NULL; - } - if ($r[0]['account_expired']) { - $enabled = NULL; - } - } - $contacts[$k]['enabled'] = array('publicise-enabled-' . $v['id'], NULL, $enabled); - $contacts[$k]['comments'] = array('publicise-comments-' . $v['id'], NULL, $comments); - $contacts[$k]['expire'] = $expire; - $contacts[$k]['url'] = $url; - } - $template = Renderer::getMarkupTemplate('admin.tpl', 'addon/publicise/'); - $o .= Renderer::replaceMacros($template, array( - '$feeds' => $contacts, - '$feed_t' => DI::l10n()->t('Feed'), - '$publicised_t' => DI::l10n()->t('Publicised'), - '$comments_t' => DI::l10n()->t('Allow Comments/Likes'), - '$expire_t' => DI::l10n()->t('Expire Articles After (Days)'), - '$submit_t' => DI::l10n()->t('Submit'))); -} - -function publicise_make_string($in) { - return "'" . DBA::escape($in) . "'"; -} - -function publicise_make_int($in) { - return intval($in) ? $in : 0; -} - -function publicise_create_user($owner, $contact) { - - $nick = $contact['nick']; - if (!$nick) { - notice(sprintf(t("Can't publicise feed \"%s\" because it doesn't have a nickname"), $contact['name']) . EOL); - return; - } - Logger::info('Publicise: create user, beginning key generation...'); - $res=openssl_pkey_new(array( - 'digest_alg' => 'sha1', - 'private_key_bits' => 4096, - 'encrypt_key' => false )); - $prvkey = ''; - openssl_pkey_export($res, $prvkey); - $pkey = openssl_pkey_get_details($res); - $pubkey = $pkey["key"]; - $sres=openssl_pkey_new(array( - 'digest_alg' => 'sha1', - 'private_key_bits' => 512, - 'encrypt_key' => false )); - $sprvkey = ''; - openssl_pkey_export($sres, $sprvkey); - $spkey = openssl_pkey_get_details($sres); - $spubkey = $spkey["key"]; - $guid = generate_user_guid(); - - $newuser = array( - 'guid' => publicise_make_string($guid), - 'username' => publicise_make_string($contact['name']), - 'password' => publicise_make_string($owner['password']), - 'nickname' => publicise_make_string($contact['nick']), - 'email' => publicise_make_string($owner['email']), - 'openid' => publicise_make_string($owner['openid']), - 'timezone' => publicise_make_string($owner['timezone']), - 'language' => publicise_make_string($owner['language']), - 'register_date' => publicise_make_string(datetime_convert()), - 'default-location' => publicise_make_string($owner['default-location']), - 'allow_location' => publicise_make_string($owner['allow_location']), - 'theme' => publicise_make_string($owner['theme']), - 'pubkey' => publicise_make_string($pubkey), - 'prvkey' => publicise_make_string($prvkey), - 'spubkey' => publicise_make_string($spubkey), - 'sprvkey' => publicise_make_string($sprvkey), - 'verified' => publicise_make_int($owner['verified']), - 'blocked' => publicise_make_int(0), - 'blockwall' => publicise_make_int(1), - 'hidewall' => publicise_make_int(0), - 'blocktags' => publicise_make_int(0), - 'notify-flags' => publicise_make_int($owner['notifyflags']), - 'page-flags' => publicise_make_int($comments ? PAGE_COMMUNITY : PAGE_SOAPBOX), - 'expire' => publicise_make_int($expire), - ); - Logger::debug('Publicise: creating user ' . print_r($newuser, true)); - $r = DBA::e("INSERT INTO `user` (`" - . implode("`, `", array_keys($newuser)) - . "`) VALUES (" - . implode(", ", array_values($newuser)) - . ")" ); - if (!$r) { - Logger::warning('Publicise: create user failed'); - return; - } - $r = DBA::e('SELECT * FROM `user` WHERE `guid` = "%s"', DBA::escape($guid)); - if (count($r) != 1) { - Logger::warning('Publicise: unexpected number of uids returned'); - return; - } - Logger::debug('Publicise: created user ID ' . $r[0]); - return $r[0]; -} - -function publicise_create_self_contact($a, $contact, $uid) { - $newcontact = array( - 'uid' => $uid, - 'created' => publicise_make_string(datetime_convert()), - 'self' => publicise_make_int(1), - 'name' => publicise_make_string($contact['name']), - 'nick' => publicise_make_string($contact['nick']), - 'photo' => publicise_make_string($contact['photo']), - 'thumb' => publicise_make_string($contact['thumb']), - 'micro' => publicise_make_string($contact['micro']), - 'blocked' => publicise_make_int(0), - 'pending' => publicise_make_int(0), - 'url' => publicise_make_string($a->get_baseurl() . '/profile/' . $contact['nick']), - 'nurl' => publicise_make_string($a->get_baseurl() . '/profile/' . $contact['nick']), - 'request' => publicise_make_string($a->get_baseurl() . '/dfrn_request/' . $contact['nick']), - 'notify' => publicise_make_string($a->get_baseurl() . '/dfrn_notify/' . $contact['nick']), - 'poll' => publicise_make_string($a->get_baseurl() . '/dfrn_poll/' . $contact['nick']), - 'confirm' => publicise_make_string($a->get_baseurl() . '/dfrn_confirm/' . $contact['nick']), - 'poco' => publicise_make_string($a->get_baseurl() . '/poco/' . $contact['nick']), - 'uri-date' => publicise_make_string(datetime_convert()), - 'avatar-date' => publicise_make_string(datetime_convert()), - 'closeness' => publicise_make_int(0), - ); - $existing = DBA::e("SELECT * FROM `contact` WHERE `uid` = %d AND `self` = 1", intval($uid)); - if (count($existing)) { - $newcontact = $existing[0]; - Logger::debug('Publicise: self contact already exists for user ' . $uid . ' id ' . $newcontact['id']); - } else { - Logger::debug('Publicise: create contact ' . print_r($newcontact, true)); - DBA::e("INSERT INTO `contact` (`" - . implode("`, `", array_keys($newcontact)) - . "`) VALUES (" - . implode(", ", array_values($newcontact)) - . ")" ); - $results = DBA::e("SELECT `id` FROM `contact` WHERE `uid` = %d AND `self` = 1", intval($uid)); - if (count($results) != 1) { - Logger::warning('Publicise: create self contact failed, will delete uid ' . $uid); - $r = DBA::e("DELETE FROM `user` WHERE `uid` = %d", intval($uid)); - return; - } - $newcontact = $results[0]; - Logger::debug('Publicise: created self contact for user ' . $uid . ' id ' . $newcontact['id']); - } - Logger::debug('Publicise: self contact for ' . $uid . ' nick ' . $contact['nick'] . ' is ' . $newcontact['id']); - return $newcontact['id']; -} - -function publicise_create_profile($contact, $uid) { - $newprofile = array( - 'uid' => $uid, - 'profile-name' => publicise_make_string('default'), - 'is-default' => publicise_make_int(1), - 'name' => publicise_make_string($contact['name']), - 'photo' => publicise_make_string($contact['photo']), - 'thumb' => publicise_make_string($contact['thumb']), - 'homepage' => publicise_make_string($contact['url']), - 'publish' => publicise_make_int(1), - 'net-publish' => publicise_make_int(1), - ); - Logger::debug('Publicise: create profile ' . print_r($newprofile, true)); - $r = DBA::e("INSERT INTO `profile` (`" - . implode("`, `", array_keys($newprofile)) - . "`) VALUES (" - . implode(", ", array_values($newprofile)) - . ")" ); - if (!$r) { - Logger::warning('Publicise: create profile failed'); - } - $newprofile = DBA::e('SELECT `id` FROM `profile` WHERE `uid` = %d AND `is-default` = 1', intval($uid)); - if (count($newprofile) != 1) { - Logger::warning('Publicise: create profile produced unexpected number of results'); - return; - } - Logger::debug('Publicise: created profile ' . $newprofile[0]['id']); - return $newprofile[0]['id']; -} - -function publicise_set_up_user($a, $contact, $owner) { - $user = publicise_create_user($owner, $contact); - if (!$user) { - notice(sprintf(t("Failed to create user for feed \"%s\""), $contact['name']) . EOL); - return; - } - $self_contact = publicise_create_self_contact($a, $contact, $user['uid']); - if (!$self_contact) { - notice(sprintf(t("Failed to create self contact for user \"%s\""), $contact['name']) . EOL); - Logger::warning("Publicise: unable to create self contact, deleting user " . $user['uid']); - DBA::e('DELETE FROM `user` WHERE `uid` = %d', intval($user['uid'])); - return; - } - $profile = publicise_create_profile($contact, $user['uid']); - if (!$profile) { - notice(sprintf(t("Failed to create profile for user \"%s\""), $contact['name']) . EOL); - Logger::warning("Publicise: unable to create profile, deleting user $uid contact $self_contact"); - DBA::e('DELETE FROM `user` WHERE `uid` = %d', intval($user['uid'])); - DBA::e('DELETE FROM `contact` WHERE `id` = %d', intval($self_contact)); - return; - } - return $user; -} - -function publicise($a, &$contact, &$owner) { - Logger::info('@@@ Publicise: publicise'); - if (!is_site_admin()) { - notice(t("Only admin users can publicise feeds")); - Logger::warning('Publicise: non-admin tried to publicise'); - return; - } - - // Check if we're changing our mind about a feed we earlier depublicised - Logger::info('@@@ Publicise: ' . 'SELECT * FROM `user` WHERE `account_expires_on` != "0000-00-00 00:00:00" AND `nickname` = "' . $contact['nick'] . '" AND `email` = "' . $owner['email'] . '" AND `page-flags` in (' . intval(PAGE_COMMUNITY) . ', ' . intval(PAGE_SOAPBOX) . ')'); - $existing = DBA::e('SELECT * FROM `user` WHERE `account_expires_on` != "0000-00-00 00:00:00" AND `nickname` = "%s" AND `email` = "%s" AND `page-flags` in (%d, %d)', - DBA::escape($contact['nick']), DBA::escape($owner['email']), intval(PAGE_COMMUNITY), intval(PAGE_SOAPBOX)); - if (count($existing) == 1) { - Logger::info('@@@ Publicise: there is existing'); - $owner = $existing[0]; - DBA::e('UPDATE `user` SET `account_expires_on` = "0000-00-00 00:00:00", `account_removed` = 0, `account_expired` = 0 WHERE `uid` = %d', intval($owner['uid'])); - DBA::e('UPDATE `profile` SET `publish` = 1, `net-publish` = 1 WHERE `uid` = %d AND `is-default` = 1', intval($owner['uid'])); - Logger::debug('Publicise: recycled previous user ' . $owner['uid']); - } - else { - Logger::info('@@@ Publicise: there is not existing'); - $owner = publicise_set_up_user($a, $contact, $owner); - if (!$owner) { - return; - } - Logger::debug("Publicise: created new user " . $owner['uid']); - } - Logger::info('Publicise: new contact user is ' . $owner['uid']); - - $r = DBA::e("UPDATE `contact` SET `uid` = %d, `reason` = 'publicise', `hidden` = 1 WHERE id = %d", intval($owner['uid']), intval($contact['id'])); - if (!$r) { - Logger::warning('Publicise: update contact failed, user is probably in a bad state ' . $user['uid']); - } - $contact['uid'] = $owner['uid']; - $contact['reason'] = 'publicise'; - $contact['hidden'] = 1; - $r = DBA::e("UPDATE `item` SET `uid` = %d, type = 'wall', wall = 1, private = 0 WHERE `contact-id` = %d", - intval($owner['uid']), intval($contact['id'])); - Logger::debug('Publicise: moved items from contact ' . $contact['id'] . ' to uid ' . $owner['uid']); - - // Update the retriever config - $r = DBA::e("UPDATE `retriever_rule` SET `uid` = %d WHERE `contact-id` = %d", - intval($owner['uid']), intval($contact['id'])); - - info(sprintf(t("Moved feed \"%s\" to dedicated account"), $contact['name']) . EOL); - return true; -} - -function publicise_self_contact($uid) { - $r = DBA::e('SELECT * FROM `contact` WHERE `uid` = %d AND `self` = 1', intval($uid)); - if (count($r) != 1) { - Logger::warning('Publicise: unexpected number of self contacts for user ' . $uid); - return; - } - return $r[0]; -} - -function depublicise($a, $contact, $user) { - require_once('include/Contact.php'); - - if (!is_site_admin()) { - notice("Only admin users can depublicise feeds"); - Logger::warning('Publicise: non-admin tried to depublicise'); - return; - } - - Logger::debug('Publicise: about to depublicise contact ' . $contact['id'] . ' user ' . $user['uid']); - - $self_contact = publicise_self_contact($user['uid']); - - // If the local_user() is subscribed to the feed, take ownership - // of the feed and all its items and photos. Otherwise they will - // be deleted when the account expires. - $r = DBA::e('SELECT * FROM `contact` WHERE `uid` = %d AND `url` = "%s"', - intval(local_user()), DBA::escape($self_contact['url'])); - if (count($r)) { - // Delete the contact to the feed user and any - // copies of its items. These will be replaced by the originals, - // which will be brought back into the local_user's feed along - // with the feed contact itself. - foreach ($r as $my_contact) { - DBA::e('DELETE FROM `item` WHERE `contact-id` = %d', intval($my_contact['id'])); - DBA::e('DELETE FROM `contact` WHERE `id` = %d', intval($my_contact['id'])); - } - - // Move the feed contact to local_user. Existing items stay - // attached to the original feed contact, but must have their uid - // updated. Also update the fields we scribbled over in - // publicise_post_remote_hook. - DBA::e('UPDATE `contact` SET `uid` = %d, `reason` = "", hidden = 0 WHERE id = %d', - intval(local_user()), intval($contact['id'])); - DBA::e('UPDATE `item` SET `uid` = %d, `wall` = 0, `type` = "remote", `private` = 2 WHERE `contact-id` = %d', - intval(local_user()), intval($contact['id'])); - - // Take ownership of any photos created by the feed user - DBA::e('UPDATE `photo` SET `uid` = %d WHERE `uid` = %d', - intval(local_user()), intval($user['uid'])); - - // Update the retriever config - $r = DBA::e("UPDATE `retriever_rule` SET `uid` = %d WHERE `contact-id` = %d", - intval($owner['uid']), intval($contact['id'])); - } - - // Set the account to removed and expired right now. It will be cleaned up by cron after 3 days, giving a chance to change your mind - DBA::e('UPDATE `user` SET `account_removed` = 1, `account_expired` = 1, `account_expires_on` = UTC_TIMESTAMP() WHERE `uid` = %d', - intval($user['uid'])); - DBA::e('UPDATE `profile` SET `publish` = 0, `net-publish` = 0 WHERE `uid` = %d AND `is-default` = 1', intval($user['uid'])); - - info(sprintf(t("Removed dedicated account for feed \"%s\""), $contact['name']) . EOL); -} - -function publicise_addon_admin_post ($a) { - Logger::info('@@@ publicise_addon_admin_post'); - if (!is_site_admin()) { - Logger::warning('Publicise: non-admin tried to do admin post'); - return; - } - - foreach (publicise_get_contacts() as $contact) { - Logger::info('@@@ publicise_addon_admin_post contact ' . $contact['id'] . ' ' . $contact['name']); - $user = publicise_get_user($contact['uid']); - if (!$_POST['publicise-enabled-' . $contact['id']]) { - if ($contact['reason'] === 'publicise') { - Logger::info('@@@ depublicise'); - depublicise($a, $contact, $user); - } - } - else { - if ($contact['reason'] !== 'publicise') { - Logger::info('@@@ publicise'); - if (!publicise($a, $contact, $user)) { - Logger::warning('Publicise: failed to publicise contact ' . $contact['id']); - continue; - } - } - if ($_POST['publicise-expire-' . $contact['id']] != $user['expire']) { - DBA::e('UPDATE `user` SET `expire` = %d WHERE `uid` = %d', - intval($_POST['publicise-expire-' . $contact['id']]), intval($user['uid'])); - } - if ($_POST['publicise-comments-' . $contact['id']]) { - if ($user['page-flags'] != PAGE_COMMUNITY) { - DBA::e('UPDATE `user` SET `page-flags` = %d WHERE `uid` = %d', - intval(PAGE_COMMUNITY), intval($user['uid'])); - DBA::e('UPDATE `contact` SET `rel` = %d WHERE `uid` = %d AND `network` = "dfrn"', - intval(CONTACT_IS_SHARING), intval($user['uid'])); - } - } - else { - if ($user['page-flags'] != PAGE_SOAPBOX) { - DBA::e('UPDATE `user` SET `page-flags` = %d WHERE `uid` = %d', - intval(PAGE_SOAPBOX), intval($user['uid'])); - DBA::e('UPDATE `contact` SET `rel` = %d WHERE `uid` = %d AND `network` = "dfrn"', - intval(CONTACT_IS_FOLLOWER), intval($user['uid'])); - } - } - } - } -} - -function publicise_post_remote_hook(&$a, &$item) { - $r1 = DBA::e("SELECT `uid` FROM `contact` WHERE `id` = %d AND `reason` = 'publicise'", intval($item['contact-id'])); +"SELECT `uid` FROM `contact` WHERE `id` = %d AND `reason` = 'publicise'", intval($item['contact-id'])); if (!$r1) { return; } diff --git a/retriever/retriever.php b/retriever/retriever.php index 263f5842..fc864129 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -172,7 +172,7 @@ function retriever_retrieve_items($max_items) { */ function retriever_clean_up_completed_resources($max_items) { // TODO: figure out how to do this with DBA module - $r = DBA::e('SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT %d', + $r = DBA::p('SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT %d', intval($max_items)); if (!$r) { $r = array(); @@ -209,7 +209,7 @@ function retriever_tidy() { DBA::delete('retriever_resource', ['completed IS NOT NULL AND completed < DATE_SUB(now(), INTERVAL 1 WEEK)']); DBA::delete('retriever_resource', ['completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)']); - $r = DBA::e("SELECT retriever_item.id FROM retriever_item LEFT OUTER JOIN retriever_resource ON (retriever_item.resource = retriever_resource.id) WHERE retriever_resource.id is null"); + $r = DBA::p("SELECT retriever_item.id FROM retriever_item LEFT OUTER JOIN retriever_resource ON (retriever_item.resource = retriever_resource.id) WHERE retriever_resource.id is null"); Logger::info('retriever_tidy: found ' . count($r) . ' retriever_items with no retriever_resource'); foreach ($r as $rr) { DBA::e('DELETE FROM retriever_item WHERE id = %d', intval($rr['id'])); From 7b1d39827909747c9ebda2bacdaee3f9d9ba6c43 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 2 Mar 2022 16:46:22 +0100 Subject: [PATCH 64/96] stray line --- mailstream/mailstream.php | 1 - 1 file changed, 1 deletion(-) diff --git a/mailstream/mailstream.php b/mailstream/mailstream.php index bdad523e..542a1a42 100644 --- a/mailstream/mailstream.php +++ b/mailstream/mailstream.php @@ -323,7 +323,6 @@ function mailstream_subject(array $item): string ); return DI::l10n()->t("Friendica post"); } - $contact = $r[0]; if ($contact['network'] === 'dfrn') { return DI::l10n()->t("Friendica post"); } From e29060c43846e81229d15a9b539a6c50e8a9ef8a Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 2 Mar 2022 16:51:40 +0100 Subject: [PATCH 65/96] also update these queries --- phototrack/phototrack.php | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/phototrack/phototrack.php b/phototrack/phototrack.php index f8a3f704..1b8ad738 100644 --- a/phototrack/phototrack.php +++ b/phototrack/phototrack.php @@ -86,12 +86,12 @@ function phototrack_photo_use($photo, $table, $field, $id) { if (strlen($photo) != 32) { return; } - $r = DBA::e("SELECT `resource-id` FROM `photo` WHERE `resource-id` = '%s' LIMIT 1", DBA::escape($photo)); + $r = DBA::p("SELECT `resource-id` FROM `photo` WHERE `resource-id` = '%s' LIMIT 1", DBA::escape($photo)); if (!count($r)) { return; } $rid = $r[0]['resource-id']; - $existing = DBA::e("SELECT id FROM phototrack_photo_use WHERE `resource-id` = '$rid' AND `table` = '$table' AND `field` = '$field' AND `row-id` = '$id'"); + $existing = DBA::p("SELECT id FROM phototrack_photo_use WHERE `resource-id` = '$rid' AND `table` = '$table' AND `field` = '$field' AND `row-id` = '$id'"); if (count($existing)) { DBA::e("UPDATE phototrack_photo_use SET checked = NOW() WHERE `resource-id` = '$rid' AND `table` = '$table' AND `field` = '$field' AND `row-id` = '$id'"); } @@ -197,11 +197,11 @@ function phototrack_batch_size() { function phototrack_search_table($a, $table) { $batch_size = phototrack_batch_size(); - $rows = DBA::e("SELECT `$table`.* FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) ) ORDER BY phototrack_row_check.checked LIMIT $batch_size"); + $rows = DBA::p("SELECT `$table`.* FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) ) ORDER BY phototrack_row_check.checked LIMIT $batch_size"); foreach ($rows as $row) { phototrack_check_row($a, $table, $row); } - $r = DBA::e("SELECT COUNT(*) FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) )"); + $r = DBA::p("SELECT COUNT(*) FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) )"); $remaining = $r[0]['COUNT(*)']; Logger::info('phototrack: searched ' . count($rows) . ' rows in table ' . $table . ', ' . $remaining . ' still remaining to search'); return $remaining; @@ -252,7 +252,7 @@ function phototrack_tidy() { $batch_size = phototrack_batch_size(); DBA::e('CREATE TABLE IF NOT EXISTS `phototrack-temp` (`resource-id` char(255) not null)'); DBA::e('INSERT INTO `phototrack-temp` SELECT DISTINCT(`resource-id`) FROM photo WHERE photo.`created` < DATE_SUB(NOW(), INTERVAL 2 MONTH)'); - $rows = DBA::e('SELECT `phototrack-temp`.`resource-id` FROM `phototrack-temp` LEFT OUTER JOIN phototrack_photo_use ON (`phototrack-temp`.`resource-id` = phototrack_photo_use.`resource-id`) WHERE phototrack_photo_use.id IS NULL limit ' . /*$batch_size*/1000); + $rows = DBA::p('SELECT `phototrack-temp`.`resource-id` FROM `phototrack-temp` LEFT OUTER JOIN phototrack_photo_use ON (`phototrack-temp`.`resource-id` = phototrack_photo_use.`resource-id`) WHERE phototrack_photo_use.id IS NULL limit ' . /*$batch_size*/1000); if (DBA::isResult($rows)) { foreach ($rows as $row) { Logger::debug('phototrack: remove photo ' . $row['resource-id']); @@ -261,7 +261,7 @@ function phototrack_tidy() { Logger::info('phototrack_tidy: deleted ' . count($rows) . ' photos'); } DBA::e('DROP TABLE `phototrack-temp`'); - $rows = DBA::e('SELECT id FROM phototrack_photo_use WHERE checked < DATE_SUB(NOW(), INTERVAL 14 DAY)'); + $rows = DBA::p('SELECT id FROM phototrack_photo_use WHERE checked < DATE_SUB(NOW(), INTERVAL 14 DAY)'); foreach ($rows as $row) { DBA::e( 'DELETE FROM phototrack_photo_use WHERE id = ' . $row['id']); } From 05c37f31568092cc1b3cbd769e4a35f2c1c7ee17 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 2 Mar 2022 16:54:23 +0100 Subject: [PATCH 66/96] add anotehr check --- retriever/retriever.php | 3 +++ 1 file changed, 3 insertions(+) diff --git a/retriever/retriever.php b/retriever/retriever.php index fc864129..8083dcf7 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -210,6 +210,9 @@ function retriever_tidy() { DBA::delete('retriever_resource', ['completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)']); $r = DBA::p("SELECT retriever_item.id FROM retriever_item LEFT OUTER JOIN retriever_resource ON (retriever_item.resource = retriever_resource.id) WHERE retriever_resource.id is null"); + if (!DBA::isResult($r)) { + return; + } Logger::info('retriever_tidy: found ' . count($r) . ' retriever_items with no retriever_resource'); foreach ($r as $rr) { DBA::e('DELETE FROM retriever_item WHERE id = %d', intval($rr['id'])); From 497a11a032f2c2d6116b0e20fff652d7ff9c8372 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 2 Mar 2022 17:36:38 +0100 Subject: [PATCH 67/96] another migrated function --- phototrack/phototrack.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/phototrack/phototrack.php b/phototrack/phototrack.php index 1b8ad738..ef2cb154 100644 --- a/phototrack/phototrack.php +++ b/phototrack/phototrack.php @@ -24,6 +24,7 @@ use Friendica\Core\Addon; use Friendica\Core\Logger; use Friendica\Object\Image; use Friendica\Database\DBA; +use Friendica\Util\Images; use Friendica\DI; if (!defined('PHOTOTRACK_DEFAULT_BATCH_SIZE')) { @@ -76,7 +77,7 @@ function phototrack_finished_row($table, $id) { function phototrack_photo_use($photo, $table, $field, $id) { Logger::debug('@@@ phototrack_photo_use ' . $photo); - foreach (Image::supportedTypes() as $m => $e) { + foreach (Images::supportedTypes() as $m => $e) { $photo = str_replace(".$e", '', $photo); } if (substr($photo, -2, 1) == '-') { From d9094aa032b2580f4f9c1f7bb816c065132d7a68 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 2 Mar 2022 21:05:23 +0100 Subject: [PATCH 68/96] this is more correct --- retriever/retriever.php | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 8083dcf7..8755dc92 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -174,11 +174,11 @@ function retriever_clean_up_completed_resources($max_items) { // TODO: figure out how to do this with DBA module $r = DBA::p('SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT %d', intval($max_items)); - if (!$r) { - $r = array(); + if (!DBA::isResult($r)) { + return; } - Logger::debug('retriever_clean_up_completed_resources: items waiting even though resource has completed: ' . count($r)); - foreach ($r as $rr) { + Logger::debug('retriever_clean_up_completed_resources: items waiting even though resource has completed: ' . DBA::numRows($r)); + while ($rr = DBA::fetch($r)) { $retriever_item = DBA::selectFirst('retriever_item', [], ['id' => intval($rr['item'])]); if (!DBA::isResult($retriever_item)) { Logger::warning('retriever_clean_up_completed_resources: no retriever item with id ' . $rr['item']); @@ -210,11 +210,11 @@ function retriever_tidy() { DBA::delete('retriever_resource', ['completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)']); $r = DBA::p("SELECT retriever_item.id FROM retriever_item LEFT OUTER JOIN retriever_resource ON (retriever_item.resource = retriever_resource.id) WHERE retriever_resource.id is null"); - if (!DBA::isResult($r)) { - return; - } - Logger::info('retriever_tidy: found ' . count($r) . ' retriever_items with no retriever_resource'); - foreach ($r as $rr) { + if (!DBA::isResult($r)) { + return; + } + Logger::info('retriever_tidy: found ' . DBA::numRows($r) . ' retriever_items with no retriever_resource'); + while ($rr = DBA::fetch($r)) { DBA::e('DELETE FROM retriever_item WHERE id = %d', intval($rr['id'])); } } From 23106aaed462b85945ea2fb2f17fdde96b5f8f4c Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 2 Mar 2022 21:20:20 +0100 Subject: [PATCH 69/96] this is more correcter --- phototrack/phototrack.php | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/phototrack/phototrack.php b/phototrack/phototrack.php index ef2cb154..0e6db4c1 100644 --- a/phototrack/phototrack.php +++ b/phototrack/phototrack.php @@ -25,6 +25,7 @@ use Friendica\Core\Logger; use Friendica\Object\Image; use Friendica\Database\DBA; use Friendica\Util\Images; +use Friendica\Util\DateTimeFormat; use Friendica\DI; if (!defined('PHOTOTRACK_DEFAULT_BATCH_SIZE')) { @@ -68,10 +69,10 @@ function phototrack_module() {} function phototrack_finished_row($table, $id) { $existing = DBA::selectFirst('phototrack_row_check', ['id'], ['table' => $table, 'row-id' => $id]); if (!is_bool($existing)) { - DBA::e("UPDATE phototrack_row_check SET checked = NOW() WHERE `table` = '$table' AND `row-id` = '$id'"); + DBA::update('phototrack_row_check', ['checked' => DateTimeFormat::utcNow()], ['table' => $table, 'row-id' = $id]); } else { - DBA::e("INSERT INTO phototrack_row_check (`table`, `row-id`, `checked`) VALUES ('$table', '$id', NOW())"); + DBA::insert('phototrack_row_check', ['table' => $table, 'row-id' = $id, 'checked' => DateTimeFormat::utcNow()]); } } @@ -87,17 +88,17 @@ function phototrack_photo_use($photo, $table, $field, $id) { if (strlen($photo) != 32) { return; } - $r = DBA::p("SELECT `resource-id` FROM `photo` WHERE `resource-id` = '%s' LIMIT 1", DBA::escape($photo)); - if (!count($r)) { + $r = DBA::selectFirst('photo', ['resource-id'], ['resource-id' => $photo]); + if (!DBA::isResult($r)) { return; } - $rid = $r[0]['resource-id']; - $existing = DBA::p("SELECT id FROM phototrack_photo_use WHERE `resource-id` = '$rid' AND `table` = '$table' AND `field` = '$field' AND `row-id` = '$id'"); - if (count($existing)) { - DBA::e("UPDATE phototrack_photo_use SET checked = NOW() WHERE `resource-id` = '$rid' AND `table` = '$table' AND `field` = '$field' AND `row-id` = '$id'"); + $rid = $r['resource-id']; + $existing = DBA::selectFirst('phototrack_photo_use', ['id'], ['resource-id' => $rid, 'table' => $table, 'field' => $field, 'row-id' = $id]); + if (DBA::isResult($existing)) { + DBA::update('phototrack_photo_use', ['checked' => DateTimeFormat::utcNow()], ['resource-id' => $rid, 'table' => $table, 'field' => $field, 'row-id' = $id]); } else { - DBA::e("INSERT INTO phototrack_photo_use (`resource-id`, `table`, `field`, `row-id`, `checked`) VALUES ('$rid', '$table', '$field', '$id', NOW())"); + DBA::insert('phototrack_photo_use', ['resource-id' => $rid, 'table' => $table, 'field' => $field, 'row-id' = $id, 'checked' => DateTimeFormat::utcNow()]); } } @@ -199,11 +200,13 @@ function phototrack_batch_size() { function phototrack_search_table($a, $table) { $batch_size = phototrack_batch_size(); $rows = DBA::p("SELECT `$table`.* FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) ) ORDER BY phototrack_row_check.checked LIMIT $batch_size"); - foreach ($rows as $row) { - phototrack_check_row($a, $table, $row); + if (DBA::isResult($rows)) { + while ($row = DBA::fetch($rows)) { + phototrack_check_row($a, $table, $row); + } } $r = DBA::p("SELECT COUNT(*) FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) )"); - $remaining = $r[0]['COUNT(*)']; + $remaining = DBA::fetch($r)['COUNT(*)']; Logger::info('phototrack: searched ' . count($rows) . ' rows in table ' . $table . ', ' . $remaining . ' still remaining to search'); return $remaining; } From 2247073e47fe47d581f32582dce748a2098ca312 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 2 Mar 2022 21:39:03 +0100 Subject: [PATCH 70/96] syntax errors --- phototrack/phototrack.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/phototrack/phototrack.php b/phototrack/phototrack.php index 0e6db4c1..82b4bfd3 100644 --- a/phototrack/phototrack.php +++ b/phototrack/phototrack.php @@ -69,10 +69,10 @@ function phototrack_module() {} function phototrack_finished_row($table, $id) { $existing = DBA::selectFirst('phototrack_row_check', ['id'], ['table' => $table, 'row-id' => $id]); if (!is_bool($existing)) { - DBA::update('phototrack_row_check', ['checked' => DateTimeFormat::utcNow()], ['table' => $table, 'row-id' = $id]); + DBA::update('phototrack_row_check', ['checked' => DateTimeFormat::utcNow()], ['table' => $table, 'row-id' => $id]); } else { - DBA::insert('phototrack_row_check', ['table' => $table, 'row-id' = $id, 'checked' => DateTimeFormat::utcNow()]); + DBA::insert('phototrack_row_check', ['table' => $table, 'row-id' => $id, 'checked' => DateTimeFormat::utcNow()]); } } @@ -206,7 +206,7 @@ function phototrack_search_table($a, $table) { } } $r = DBA::p("SELECT COUNT(*) FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) )"); - $remaining = DBA::fetch($r)['COUNT(*)']; + $remaining = DBA::fetch($r)['count(*)']; Logger::info('phototrack: searched ' . count($rows) . ' rows in table ' . $table . ', ' . $remaining . ' still remaining to search'); return $remaining; } From f9353dea28badab91e4fdd3d3c009ddc3a65bb71 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 2 Mar 2022 22:02:01 +0100 Subject: [PATCH 71/96] syntax errors --- phototrack/phototrack.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/phototrack/phototrack.php b/phototrack/phototrack.php index 82b4bfd3..e9f0a7cd 100644 --- a/phototrack/phototrack.php +++ b/phototrack/phototrack.php @@ -93,9 +93,9 @@ function phototrack_photo_use($photo, $table, $field, $id) { return; } $rid = $r['resource-id']; - $existing = DBA::selectFirst('phototrack_photo_use', ['id'], ['resource-id' => $rid, 'table' => $table, 'field' => $field, 'row-id' = $id]); + $existing = DBA::selectFirst('phototrack_photo_use', ['id'], ['resource-id' => $rid, 'table' => $table, 'field' => $field, 'row-id' => $id]); if (DBA::isResult($existing)) { - DBA::update('phototrack_photo_use', ['checked' => DateTimeFormat::utcNow()], ['resource-id' => $rid, 'table' => $table, 'field' => $field, 'row-id' = $id]); + DBA::update('phototrack_photo_use', ['checked' => DateTimeFormat::utcNow()], ['resource-id' => $rid, 'table' => $table, 'field' => $field, 'row-id' => $id]); } else { DBA::insert('phototrack_photo_use', ['resource-id' => $rid, 'table' => $table, 'field' => $field, 'row-id' = $id, 'checked' => DateTimeFormat::utcNow()]); From fff186c0b09e523cc6f973e89eece742ee24da83 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 2 Mar 2022 22:02:58 +0100 Subject: [PATCH 72/96] syntax errors --- phototrack/phototrack.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phototrack/phototrack.php b/phototrack/phototrack.php index e9f0a7cd..63c764fc 100644 --- a/phototrack/phototrack.php +++ b/phototrack/phototrack.php @@ -98,7 +98,7 @@ function phototrack_photo_use($photo, $table, $field, $id) { DBA::update('phototrack_photo_use', ['checked' => DateTimeFormat::utcNow()], ['resource-id' => $rid, 'table' => $table, 'field' => $field, 'row-id' => $id]); } else { - DBA::insert('phototrack_photo_use', ['resource-id' => $rid, 'table' => $table, 'field' => $field, 'row-id' = $id, 'checked' => DateTimeFormat::utcNow()]); + DBA::insert('phototrack_photo_use', ['resource-id' => $rid, 'table' => $table, 'field' => $field, 'row-id' => $id, 'checked' => DateTimeFormat::utcNow()]); } } From 3db0c551d68d7dc99a4c1c808a6d3cd9f8e293b4 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 2 Mar 2022 22:05:28 +0100 Subject: [PATCH 73/96] syntax errors --- phototrack/phototrack.php | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/phototrack/phototrack.php b/phototrack/phototrack.php index 63c764fc..8fb637b2 100644 --- a/phototrack/phototrack.php +++ b/phototrack/phototrack.php @@ -206,8 +206,8 @@ function phototrack_search_table($a, $table) { } } $r = DBA::p("SELECT COUNT(*) FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) )"); - $remaining = DBA::fetch($r)['count(*)']; - Logger::info('phototrack: searched ' . count($rows) . ' rows in table ' . $table . ', ' . $remaining . ' still remaining to search'); + $remaining = DBA::fetch($r)['count']; + Logger::info('phototrack: searched ' . DBA::numRows($rows) . ' rows in table ' . $table . ', ' . $remaining . ' still remaining to search'); return $remaining; } @@ -262,12 +262,12 @@ function phototrack_tidy() { Logger::debug('phototrack: remove photo ' . $row['resource-id']); DBA::e('DELETE FROM photo WHERE `resource-id` = "' . $row['resource-id'] . '"'); } - Logger::info('phototrack_tidy: deleted ' . count($rows) . ' photos'); + Logger::info('phototrack_tidy: deleted ' . DBA::numRows($rows) . ' photos'); } DBA::e('DROP TABLE `phototrack-temp`'); $rows = DBA::p('SELECT id FROM phototrack_photo_use WHERE checked < DATE_SUB(NOW(), INTERVAL 14 DAY)'); foreach ($rows as $row) { DBA::e( 'DELETE FROM phototrack_photo_use WHERE id = ' . $row['id']); } - Logger::info('phototrack_tidy: deleted ' . count($rows) . ' phototrack_photo_use rows'); + Logger::info('phototrack_tidy: deleted ' . DBA::numRows($rows) . ' phototrack_photo_use rows'); } From 5e7311a5882cfbadb4ef71d0ca150533aca21298 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 2 Mar 2022 22:19:00 +0100 Subject: [PATCH 74/96] improvements --- phototrack/phototrack.php | 1 + retriever/retriever.php | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/phototrack/phototrack.php b/phototrack/phototrack.php index 8fb637b2..0ede2a1c 100644 --- a/phototrack/phototrack.php +++ b/phototrack/phototrack.php @@ -206,6 +206,7 @@ function phototrack_search_table($a, $table) { } } $r = DBA::p("SELECT COUNT(*) FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) )"); + Logger::info("@@@ phototrack_search_table " . print_r(DBA::fetch($r))); $remaining = DBA::fetch($r)['count']; Logger::info('phototrack: searched ' . DBA::numRows($rows) . ' rows in table ' . $table . ', ' . $remaining . ' still remaining to search'); return $remaining; diff --git a/retriever/retriever.php b/retriever/retriever.php index 8755dc92..7caa05e5 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -215,7 +215,7 @@ function retriever_tidy() { } Logger::info('retriever_tidy: found ' . DBA::numRows($r) . ' retriever_items with no retriever_resource'); while ($rr = DBA::fetch($r)) { - DBA::e('DELETE FROM retriever_item WHERE id = %d', intval($rr['id'])); + DBA::delete('retriever_item', ['id' => intval($rr['id'])]); } } From 8b4e9c0e5d9baa9d4b6ced2eb6eaeb54c4a55efd Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Thu, 3 Mar 2022 07:37:37 +0100 Subject: [PATCH 75/96] fix sql syntax --- retriever/retriever.php | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 7caa05e5..047511c5 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -172,8 +172,7 @@ function retriever_retrieve_items($max_items) { */ function retriever_clean_up_completed_resources($max_items) { // TODO: figure out how to do this with DBA module - $r = DBA::p('SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT %d', - intval($max_items)); + $r = DBA::p("SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT $max_items"); if (!DBA::isResult($r)) { return; } From 81433d0b43c5cfbc9f9b77b54f187ac21fcf740b Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Thu, 3 Mar 2022 11:33:35 +0100 Subject: [PATCH 76/96] use new temppath function --- retriever/retriever.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 047511c5..ef00bd33 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -276,7 +276,7 @@ function retrieve_resource($resource) { $redirects = 0; $cookiejar = ''; if (array_key_exists('storecookies', $rule_data) && $rule_data['storecookies']) { - $cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-'); + $cookiejar = tempnam(System::getTempPath(), 'cookiejar-retriever-'); file_put_contents($cookiejar, $rule_data['cookiedata']); } $fetch_result = DI::httpClient()->fetchFull($resource['url'], $redirects, '', $cookiejar); From 09933337b4f81b29d4961d7d2af21e287f3e27d7 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sat, 7 May 2022 19:32:24 +0200 Subject: [PATCH 77/96] fix argv stuff --- retriever/retriever.php | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index ef00bd33..9dbe6170 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -830,7 +830,7 @@ function retriever_content($a) { $a->page['content'] .= "

Please log in

"; return; } - if ($a->argv[1] === 'help') { + if (isset(DI::args()->getArgv()[1]) and DI::args()->getArgv()[1] === 'help') { $feeds = DBA::selectToArray('contact', ['id', 'name', 'thumb'], ['uid' => local_user(), 'network' => 'feed']); for ($i = 0; $i < count($feeds); ++$i) { $feeds[$i]['url'] = DI::baseUrl()->get(true) . '/retriever/' . $feeds[$i]['id']; @@ -842,14 +842,15 @@ function retriever_content($a) { '$feeds' => $feeds)); return; } - if ($a->argv[1]) { - $retriever_rule = get_retriever_rule($a->argv[1], local_user(), false); + if (isset(DI::args()->getArgv()[1])) { + $arg1 = DI::args()->getArgv()[1] + $retriever_rule = get_retriever_rule($arg1, local_user(), false); if (!$retriever_rule) { - $retriever_rule = ['id' => 0, 'data' => ['enable' => 0, 'modurl' => '', 'pattern' => '', 'replace' => '', 'images' => 0, 'storecookies' => 0, 'cookiedata' => '', 'customxslt' => '', 'include' => '', 'exclude' => '']]; + $retriever_rule = ['id' => 0, 'data' => ['enable' => 0, 'modurl' => '', 'pattern' => '', 'replace' => '', 'images' => 0, 'storecookies' => 0, 'cookiedata' => '', 'customxslt' => '', 'include' => '', 'exclude' => '']]; } if (!empty($_POST["id"])) { - $retriever_rule = get_retriever_rule($a->argv[1], local_user(), true); + $retriever_rule = get_retriever_rule($arg1, local_user(), true); $retriever_rule['data'] = array(); foreach (array('modurl', 'pattern', 'replace', 'enable', 'images', 'customxslt', 'storecookies', 'cookiedata') as $setting) { if (empty($_POST['retriever_' . $setting])) { From 430211ebc2ff789bff9c9183e0c8480c150e2f6b Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sat, 7 May 2022 19:33:34 +0200 Subject: [PATCH 78/96] fix argv stuff --- retriever/retriever.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 9dbe6170..210c6542 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -843,7 +843,7 @@ function retriever_content($a) { return; } if (isset(DI::args()->getArgv()[1])) { - $arg1 = DI::args()->getArgv()[1] + $arg1 = DI::args()->getArgv()[1]; $retriever_rule = get_retriever_rule($arg1, local_user(), false); if (!$retriever_rule) { $retriever_rule = ['id' => 0, 'data' => ['enable' => 0, 'modurl' => '', 'pattern' => '', 'replace' => '', 'images' => 0, 'storecookies' => 0, 'cookiedata' => '', 'customxslt' => '', 'include' => '', 'exclude' => '']]; From 41fd46600e26dd946023e1d0b05044272a6df5b9 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 8 May 2022 14:46:06 +0100 Subject: [PATCH 79/96] correct use of fetchFull --- retriever/retriever.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 210c6542..42462bcb 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -279,7 +279,7 @@ function retrieve_resource($resource) { $cookiejar = tempnam(System::getTempPath(), 'cookiejar-retriever-'); file_put_contents($cookiejar, $rule_data['cookiedata']); } - $fetch_result = DI::httpClient()->fetchFull($resource['url'], $redirects, '', $cookiejar); + $fetch_result = DI::httpClient()->fetchFull($resource['url'], $redirects, 0, $cookiejar); if (array_key_exists('storecookies', $rule_data) && $rule_data['storecookies']) { $retriever_rule['data']['cookiedata'] = file_get_contents($cookiejar); DBA::update('retriever_rule', ['data' => json_encode($retriever_rule['data'])], ['id' => intval($retriever_rule["id"])], $retriever_rule); From 63a8ac39f839e7bc86356a217b2ab8ee2ebbe96f Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 8 May 2022 21:41:30 +0200 Subject: [PATCH 80/96] fix comment --- retriever/retriever.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 42462bcb..1401f90d 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -925,7 +925,7 @@ function retriever_content($a) { 'retriever_cookiedata', DI::l10n()->t('Cookie Data'), $retriever_rule['data']['cookiedata'], - DI::l10n()->t("Latest cookie data for this feed. Netscape cookie file format.")), + DI::l10n()->t("Latest cookie data for this feed. Example: [{\"Name\":\"cookie-name\",\"Value\":\"cookie-value\",\"Domain\":\"example.com\",\"Path\":\"\\/path\\/\",\"Max-Age\":null,\"Expires\":1682450014,\"Secure\":true,\"Discard\":false,\"HttpOnly\":true}]")), '$customxslt' => array( 'retriever_customxslt', DI::l10n()->t('Custom XSLT'), From 70153bde618b7fde392826bb32149198f94db330 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 22 Jun 2022 17:55:36 +0100 Subject: [PATCH 81/96] Use separate album and repair dox for ces --- retriever/retriever.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 1401f90d..83c357d3 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -797,7 +797,7 @@ function retriever_transform_images(&$item, $resource) { $path = parse_url($resource['url'], PHP_URL_PATH); $parts = pathinfo($path); $filename = $parts['filename'] . (array_key_exists('extension', $parts) ? '.' . $parts['extension'] : ''); - $album = 'Wall Photos'; + $album = 'Retriever'; $scale = 0; $desc = ''; // TODO: store alt text with resource when it's requested so we can fill this in Logger::debug('retriever_transform_images storing ' . strlen($data) . ' bytes type ' . $type . ': uid ' . $uid . ' cid ' . $cid . ' rid ' . $rid . ' filename ' . $filename . ' album ' . $album . ' scale ' . $scale . ' desc ' . $desc); From a21413dce3a266611dd04b5bb6ba9d9e39496406 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 2 Oct 2022 20:18:48 +0200 Subject: [PATCH 82/96] Update to correct collation mode --- retriever/database.sql | 12 ++++++------ retriever/retriever.php | 14 +++++++++++++- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/retriever/database.sql b/retriever/database.sql index 68480cfd..6139fea4 100644 --- a/retriever/database.sql +++ b/retriever/database.sql @@ -6,11 +6,11 @@ CREATE TABLE IF NOT EXISTS `retriever_rule` ( PRIMARY KEY (`id`), KEY `uid` (`uid`), KEY `contact-id` (`contact-id`) -) DEFAULT CHARSET=utf8 COLLATE=utf8_bin; +) DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; CREATE TABLE IF NOT EXISTS `retriever_item` ( `id` int(11) unsigned NOT NULL AUTO_INCREMENT, - `item-uri` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NOT NULL, + `item-uri` varbinary(255) NOT NULL, `item-uid` int(10) unsigned NOT NULL DEFAULT '0', `contact-id` int(10) unsigned NOT NULL DEFAULT '0', `resource` int(11) NOT NULL, @@ -20,7 +20,7 @@ CREATE TABLE IF NOT EXISTS `retriever_item` ( KEY `item-uid` (`item-uid`), KEY `all` (`item-uri`, `item-uid`, `contact-id`), PRIMARY KEY (`id`) -) DEFAULT CHARSET=utf8 COLLATE=utf8_bin; +) DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; CREATE TABLE IF NOT EXISTS `retriever_resource` ( `id` int(11) unsigned NOT NULL AUTO_INCREMENT, @@ -28,15 +28,15 @@ CREATE TABLE IF NOT EXISTS `retriever_resource` ( `contact-id` int(10) unsigned NOT NULL DEFAULT '0', `type` char(255) NULL DEFAULT NULL, `binary` int(1) NOT NULL DEFAULT 0, - `url` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NOT NULL, + `url` varbinary(800) NOT NULL, `created` timestamp NOT NULL DEFAULT now(), `completed` timestamp NULL DEFAULT NULL, `last-try` timestamp NULL DEFAULT NULL, `num-tries` int(11) NOT NULL DEFAULT 0, `data` mediumblob NULL DEFAULT NULL, `http-code` smallint(1) unsigned NULL DEFAULT NULL, - `redirect-url` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NULL DEFAULT NULL, + `redirect-url` varbinary(800) NOT NULL, KEY `url` (`url`), KEY `completed` (`completed`), PRIMARY KEY (`id`) -) DEFAULT CHARSET=utf8 COLLATE=utf8_bin +) DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; diff --git a/retriever/retriever.php b/retriever/retriever.php index 83c357d3..714a33a1 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -32,7 +32,19 @@ function retriever_install() { Addon::registerHook('contact_photo_menu', 'addon/retriever/retriever.php', 'retriever_contact_photo_menu'); Addon::registerHook('cron', 'addon/retriever/retriever.php', 'retriever_cron'); - if (DI::config()->get('retriever', 'dbversion') != '0.14') { + if (DI::config()->get('retriever', 'dbversion') == '0.14') { + if (!DBA::e("ALTER TABLE `retriever_rule` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci") || + !DBA::e("ALTER TABLE `retriever_item` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci") || + !DBA::e("ALTER TABLE `retriever_item` MODIFY `item-uri` varbinary(255) NOT NULL") || + !DBA::e("ALTER TABLE `retriever_resource` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci") || + !DBA::e("ALTER TABLE `retriever_resource` MODIFY `url` varbinary(800) NOT NULL") || + !DBA::e("ALTER TABLE `retriever_resource` MODIFY `redirect-url` varbinary(800) NOT NULL")) { + Logger::warning('Unable to update database tables: ' . DBA::errorMessage()); + return; + } + DI::config()->set('retriever', 'dbversion', '0.15'); + } + if (DI::config()->get('retriever', 'dbversion') != '0.15') { $schema = file_get_contents(dirname(__file__).'/database.sql'); $tables = explode(';', $schema); foreach ($tables as $table) { From b3ba24921ee7e17960c22b75e30802041a8cc65c Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 2 Oct 2022 21:09:16 +0200 Subject: [PATCH 83/96] Use new hook registration calls --- retriever/retriever.php | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 714a33a1..5bd078c6 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -7,6 +7,7 @@ */ use Friendica\Core\Addon; +use Friendica\Core\Hook; use Friendica\Core\Logger; use Friendica\Core\Renderer; use Friendica\Core\System; @@ -26,11 +27,11 @@ use Friendica\DI; * @brief Installation hook for retriever plugin */ function retriever_install() { - Addon::registerHook('addon_settings', 'addon/retriever/retriever.php', 'retriever_addon_settings'); - Addon::registerHook('addon_settings_post', 'addon/retriever/retriever.php', 'retriever_addon_settings_post'); - Addon::registerHook('post_remote', 'addon/retriever/retriever.php', 'retriever_post_remote_hook'); - Addon::registerHook('contact_photo_menu', 'addon/retriever/retriever.php', 'retriever_contact_photo_menu'); - Addon::registerHook('cron', 'addon/retriever/retriever.php', 'retriever_cron'); + Hook::register('addon_settings', 'addon/retriever/retriever.php', 'retriever_addon_settings'); + Hook::register('addon_settings_post', 'addon/retriever/retriever.php', 'retriever_addon_settings_post'); + Hook::register('post_remote', 'addon/retriever/retriever.php', 'retriever_post_remote_hook'); + Hook::register('contact_photo_menu', 'addon/retriever/retriever.php', 'retriever_contact_photo_menu'); + Hook::register('cron', 'addon/retriever/retriever.php', 'retriever_cron'); if (DI::config()->get('retriever', 'dbversion') == '0.14') { if (!DBA::e("ALTER TABLE `retriever_rule` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci") || @@ -62,13 +63,13 @@ function retriever_install() { * @brief Uninstallation hook for retriever plugin */ function retriever_uninstall() { - Addon::unregisterHook('addon_settings', 'addon/retriever/retriever.php', 'retriever_addon_settings'); - Addon::unregisterHook('addon_settings_post', 'addon/retriever/retriever.php', 'retriever_addon_settings_post'); - Addon::unregisterHook('post_remote', 'addon/retriever/retriever.php', 'retriever_post_remote_hook'); - Addon::unregisterHook('addon_settings', 'addon/retriever/retriever.php', 'retriever_addon_settings'); - Addon::unregisterHook('addon_settings_post', 'addon/retriever/retriever.php', 'retriever_addon_settings_post'); - Addon::unregisterHook('contact_photo_menu', 'addon/retriever/retriever.php', 'retriever_contact_photo_menu'); - Addon::unregisterHook('cron', 'addon/retriever/retriever.php', 'retriever_cron'); + Hook::unregister('addon_settings', 'addon/retriever/retriever.php', 'retriever_addon_settings'); + Hook::unregister('addon_settings_post', 'addon/retriever/retriever.php', 'retriever_addon_settings_post'); + Hook::unregister('post_remote', 'addon/retriever/retriever.php', 'retriever_post_remote_hook'); + Hook::unregister('addon_settings', 'addon/retriever/retriever.php', 'retriever_addon_settings'); + Hook::unregister('addon_settings_post', 'addon/retriever/retriever.php', 'retriever_addon_settings_post'); + Hook::unregister('contact_photo_menu', 'addon/retriever/retriever.php', 'retriever_contact_photo_menu'); + Hook::unregister('cron', 'addon/retriever/retriever.php', 'retriever_cron'); } /** From 301d6452dcb3e64dc69922130839841e11559da6 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 2 Oct 2022 21:19:08 +0200 Subject: [PATCH 84/96] Fix length of keys --- retriever/database.sql | 4 ++-- retriever/retriever.php | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/retriever/database.sql b/retriever/database.sql index 6139fea4..2cabf9ef 100644 --- a/retriever/database.sql +++ b/retriever/database.sql @@ -28,14 +28,14 @@ CREATE TABLE IF NOT EXISTS `retriever_resource` ( `contact-id` int(10) unsigned NOT NULL DEFAULT '0', `type` char(255) NULL DEFAULT NULL, `binary` int(1) NOT NULL DEFAULT 0, - `url` varbinary(800) NOT NULL, + `url` varbinary(700) NOT NULL, `created` timestamp NOT NULL DEFAULT now(), `completed` timestamp NULL DEFAULT NULL, `last-try` timestamp NULL DEFAULT NULL, `num-tries` int(11) NOT NULL DEFAULT 0, `data` mediumblob NULL DEFAULT NULL, `http-code` smallint(1) unsigned NULL DEFAULT NULL, - `redirect-url` varbinary(800) NOT NULL, + `redirect-url` varbinary(700) NOT NULL, KEY `url` (`url`), KEY `completed` (`completed`), PRIMARY KEY (`id`) diff --git a/retriever/retriever.php b/retriever/retriever.php index 5bd078c6..977ed49a 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -38,8 +38,8 @@ function retriever_install() { !DBA::e("ALTER TABLE `retriever_item` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci") || !DBA::e("ALTER TABLE `retriever_item` MODIFY `item-uri` varbinary(255) NOT NULL") || !DBA::e("ALTER TABLE `retriever_resource` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci") || - !DBA::e("ALTER TABLE `retriever_resource` MODIFY `url` varbinary(800) NOT NULL") || - !DBA::e("ALTER TABLE `retriever_resource` MODIFY `redirect-url` varbinary(800) NOT NULL")) { + !DBA::e("ALTER TABLE `retriever_resource` MODIFY `url` varbinary(700) NOT NULL") || + !DBA::e("ALTER TABLE `retriever_resource` MODIFY `redirect-url` varbinary(700) NOT NULL")) { Logger::warning('Unable to update database tables: ' . DBA::errorMessage()); return; } @@ -479,9 +479,9 @@ function add_retriever_resource($url, $uid, $cid, $binary = false) { return $resource; } - // 800 characters is the size of this field in the database - if (strlen($url) > 800) { - Logger::warning('add_retriever_resource: URL is longer than 800 characters'); + // 700 characters is the size of this field in the database + if (strlen($url) > 700) { + Logger::warning('add_retriever_resource: URL is longer than 700 characters'); } $resource = DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)]); From 472daa8672544eeccb8d59a98a122c0bb5571b99 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 2 Oct 2022 20:19:53 +0100 Subject: [PATCH 85/96] add log lines to install --- retriever/retriever.php | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/retriever/retriever.php b/retriever/retriever.php index 977ed49a..640bc5cc 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -27,6 +27,8 @@ use Friendica\DI; * @brief Installation hook for retriever plugin */ function retriever_install() { + Logger::debug('Install retriever'); + Hook::register('addon_settings', 'addon/retriever/retriever.php', 'retriever_addon_settings'); Hook::register('addon_settings_post', 'addon/retriever/retriever.php', 'retriever_addon_settings_post'); Hook::register('post_remote', 'addon/retriever/retriever.php', 'retriever_post_remote_hook'); @@ -63,6 +65,8 @@ function retriever_install() { * @brief Uninstallation hook for retriever plugin */ function retriever_uninstall() { + Logger::debug('Uninstall retriever'); + Hook::unregister('addon_settings', 'addon/retriever/retriever.php', 'retriever_addon_settings'); Hook::unregister('addon_settings_post', 'addon/retriever/retriever.php', 'retriever_addon_settings_post'); Hook::unregister('post_remote', 'addon/retriever/retriever.php', 'retriever_post_remote_hook'); From 4b1f673ec755440ab8770694a19f7f0d2721cd07 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 2 Oct 2022 21:29:16 +0200 Subject: [PATCH 86/96] fix order of upgrade commands --- retriever/retriever.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 640bc5cc..52f5f0b0 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -37,11 +37,11 @@ function retriever_install() { if (DI::config()->get('retriever', 'dbversion') == '0.14') { if (!DBA::e("ALTER TABLE `retriever_rule` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci") || - !DBA::e("ALTER TABLE `retriever_item` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci") || !DBA::e("ALTER TABLE `retriever_item` MODIFY `item-uri` varbinary(255) NOT NULL") || - !DBA::e("ALTER TABLE `retriever_resource` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci") || + !DBA::e("ALTER TABLE `retriever_item` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci") || !DBA::e("ALTER TABLE `retriever_resource` MODIFY `url` varbinary(700) NOT NULL") || !DBA::e("ALTER TABLE `retriever_resource` MODIFY `redirect-url` varbinary(700) NOT NULL")) { + !DBA::e("ALTER TABLE `retriever_resource` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci") || Logger::warning('Unable to update database tables: ' . DBA::errorMessage()); return; } From 440ff9c6d3c5425d9b6133225a9677699dd8fd64 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sat, 15 Oct 2022 18:02:43 +0200 Subject: [PATCH 87/96] add types to parameters --- retriever/retriever.php | 68 ++++++++++++++++++++--------------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 52f5f0b0..7d1229e6 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -77,19 +77,19 @@ function retriever_uninstall() { } /** - * @brief Module hook for retriever plugin - * - * TODO: figure out what this should be used for + * This is a statement rather than an actual function definition. The simple + * existence of this method is checked to figure out if the addon offers a + * module. */ function retriever_module() {} /** * @brief Admin page hook for retriever plugin * - * @param App $a App object (by ref) + * @param App $a App object (unused) * @param string $o HTML to append content to (by ref) */ -function retriever_addon_admin(&$a, &$o) { +function retriever_addon_admin(App $a, string &$o) { $template = Renderer::getMarkupTemplate('admin.tpl', 'addon/retriever/'); $downloads_per_cron = DI::config()->get('retriever', 'downloads_per_cron'); @@ -141,7 +141,7 @@ $retriever_item_count = 0; * * @param int $max_items Maximum number of items to retrieve in this call */ -function retriever_retrieve_items($max_items) { +function retriever_retrieve_items(int $max_items) { global $retriever_item_count; $retriever_schedule = array(array(1,'minute'), @@ -187,7 +187,7 @@ function retriever_retrieve_items($max_items) { * * @param int $max_items Maximum number of items to retrieve in this call */ -function retriever_clean_up_completed_resources($max_items) { +function retriever_clean_up_completed_resources(int $max_items) { // TODO: figure out how to do this with DBA module $r = DBA::p("SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT $max_items"); if (!DBA::isResult($r)) { @@ -240,7 +240,7 @@ function retriever_tidy() { * * @param array $resource The row from the retriever_resource table */ -function retrieve_dataurl_resource($resource) { +function retrieve_dataurl_resource(array $resource) { if (!preg_match("/date:(.*);base64,(.*)/", $resource['url'], $matches)) { Logger::warning('retrieve_dataurl_resource: resource ' . $resource['id'] . ' does not match pattern'); } else { @@ -258,7 +258,7 @@ function retrieve_dataurl_resource($resource) { * * @param array $resource The row from the retriever_resource table */ -function retrieve_resource($resource) { +function retrieve_resource(array $resource) { $components = parse_url($resource['url']); if (!$components) { Logger::warning('retrieve_resource: URL ' . $resource['url'] . ' could not be parsed'); @@ -325,7 +325,7 @@ function retrieve_resource($resource) { * @param boolean $create Whether to create a new configuration if none exists already * @return array The row from the retriever_rule database for this configuration */ -function get_retriever_rule($contact_id, $uid, $create) { +function get_retriever_rule(string $contact_id, string $uid, bool $create) { $retriever_rule = DBA::selectFirst('retriever_rule', [], ['contact-id' => intval($contact_id), 'uid' => intval($uid)]); if ($retriever_rule) { $retriever_rule['data'] = json_decode($retriever_rule['data'], true); @@ -344,7 +344,7 @@ function get_retriever_rule($contact_id, $uid, $create) { * @param array $retriever_item Row from the retriever_item table * @return array Item that was found, or undef if no item could be found */ -function retriever_get_item($retriever_item) { +function retriever_get_item(array $retriever_item) { $item = Post::selectFirst([], ['uri' => $retriever_item['item-uri'], 'uid' => intval($retriever_item['item-uid']), 'contact-id' => intval($retriever_item['contact-id'])]); if (!DBA::isResult($item)) { Logger::warning('retriever_get_item: no item found for uri ' . $retriever_item['item-uri']); @@ -359,7 +359,7 @@ function retriever_get_item($retriever_item) { * @param int $retriever_item_id ID of the retriever item corresponding to this resource * @param array $resource The full details of the completed resource */ -function retriever_item_completed($retriever_item_id, $resource) { +function retriever_item_completed(string $retriever_item_id, array $resource) { Logger::debug('retriever_item_completed: id ' . $retriever_item_id . ' url ' . $resource['url']); $retriever_item = DBA::selectFirst('retriever_item', [], ['id' => intval($retriever_item_id)]); @@ -386,7 +386,7 @@ function retriever_item_completed($retriever_item_id, $resource) { * * @param array $resource The full details of the completed resource */ -function retriever_resource_completed($resource) { +function retriever_resource_completed(array $resource) { Logger::debug('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url']); foreach (DBA::selectToArray('retriever_item', ['id'], ['resource' => intval($resource['id'])]) as $retriever_item) { retriever_item_completed($retriever_item['id'], $resource); @@ -399,7 +399,7 @@ function retriever_resource_completed($resource) { * @param array $retriever The row from the retriever_rule table for the contact * @param int $num The number of existing items to queue for retrieval */ -function apply_retrospective($retriever, $num) { +function apply_retrospective(array $retriever, int $num) { foreach (Post::selectToArray([], ['contact-id' => intval($retriever['contact-id'])], ['order' => ['received' => true], 'limit' => $num]) as $item) { Item::update(['visible' => 0], ['id' => intval($item['id'])]); foreach (DBA::selectToArray('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => $item['uid'], 'contact-id' => $item['contact-id']]) as $retriever_item) { @@ -418,7 +418,7 @@ function apply_retrospective($retriever, $num) { * * TODO: This queries then inserts. It should use some kind of lock to avoid requesting the same resource twice. */ -function retriever_on_item_insert($retriever, &$item) { +function retriever_on_item_insert(array $retriever, array &$item) { if (!$retriever || !$retriever['id']) { Logger::info('retriever_on_item_insert: No retriever supplied'); return; @@ -457,7 +457,7 @@ function retriever_on_item_insert($retriever, &$item) { * @param boolean $binary Specifies if this download should be done in binary mode * @return array The created resource */ -function add_retriever_resource($url, $uid, $cid, $binary = false) { +function add_retriever_resource(string $url, string $uid, string $cid, bool $binary = false) { Logger::debug('add_retriever_resource: url ' . $url . ' uid ' . $uid . ' contact-id ' . $cid); $scheme = parse_url($url, PHP_URL_SCHEME); @@ -505,7 +505,7 @@ function add_retriever_resource($url, $uid, $cid, $binary = false) { * @param array $resource Resource that the item needs to wait for. This must have already been stored in the database. * @return int ID of the retriever item that was created, or the existing one if present */ -function add_retriever_item($item, $resource) { +function add_retriever_item(array $item, array $resource) { Logger::debug('add_retriever_item: ' . $resource['url'] . ' for ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); if (!array_key_exists('id', $resource) || !$resource['id']) { @@ -532,7 +532,7 @@ function add_retriever_item($item, $resource) { * @param array $resource The completed resource * @return string Character encoding, e.g. "utf-8" or "iso-8859-1" */ -function retriever_get_encoding($resource) { +function retriever_get_encoding(array $resource) { $matches = array(); if (preg_match('/charset=(.*)/', $resource['type'], $matches)) { return trim(array_pop($matches)); @@ -547,7 +547,7 @@ function retriever_get_encoding($resource) { * @param DOMDocument $doc Input to the XSLT template * @return DOMDocument Result of applying the template */ -function retriever_apply_xslt_text($xslt_text, $doc) { +function retriever_apply_xslt_text(string $xslt_text, DOMDocument $doc) { if (!$xslt_text) { Logger::info('retriever_apply_xslt_text: empty XSLT text'); return $doc; @@ -570,7 +570,7 @@ function retriever_apply_xslt_text($xslt_text, $doc) { * @param array &$item Item to be in which to store the new body (by ref). This may or may not be already stored in the database. * @param array $resource Newly completed resource, which should be text (HTML or XML) */ -function retriever_apply_dom_filter($retriever, &$item, $resource) { +function retriever_apply_dom_filter(array $retriever, array &$item, array $resource) { Logger::debug('retriever_apply_dom_filter: applying XSLT to uri ' . $item['uri'] . ' uid ' . $item['uid'] . ' contact ' . $item['contact-id']); if (!array_key_exists('include', $retriever['data']) && !array_key_exists('customxslt', $retriever['data'])) { @@ -614,7 +614,7 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) { * * @param array $resource The resource containing the text content */ -function retriever_load_into_dom($resource) { +function retriever_load_into_dom(array $resource) { $encoding = retriever_get_encoding($resource); $content = mb_convert_encoding($resource['data'], 'HTML-ENTITIES', $encoding); $doc = new DOMDocument('1.0', 'UTF-8'); @@ -634,7 +634,7 @@ function retriever_load_into_dom($resource) { * @param array $retriever The retriever configuration for this contact * @return DOMDocument New DOM document containing only the desired content */ -function retriever_extract($doc, $retriever) { +function retriever_extract(DOMDocument $doc, array $retriever) { $params = array('$spec' => $retriever['data']); $extract_template = Renderer::getMarkupTemplate('extract.tpl', 'addon/retriever/'); $extract_xslt = Renderer::replaceMacros($extract_template, $params); @@ -656,7 +656,7 @@ function retriever_extract($doc, $retriever) { * @param array $resource Completed resource which contains the text in the DOM document * @return DOMDocument New DOM document with global URLs */ -function retriever_globalise_urls($doc, $resource) { +function retriever_globalise_urls(DOMDocument $doc, array $resource) { $components = parse_url($resource['redirect-url']); if (!array_key_exists('scheme', $components) || !array_key_exists('host', $components) || !array_key_exists('path', $components)) { return $doc; @@ -675,7 +675,7 @@ function retriever_globalise_urls($doc, $resource) { * * @param array $item Row from the item table */ -function retriever_get_body($item) { +function retriever_get_body(array $item) { if (!array_key_exists('uri-id', $item) || !$item['uri-id']) { // item has not yet been stored in database return $item['body']; @@ -703,7 +703,7 @@ function retriever_get_body($item) { * @param array &$item Item in which to set the body (by ref). This may or may not be already stored in the database. * @param string $body New body content */ -function retriever_set_body(&$item, $body) { +function retriever_set_body(array &$item, string $body) { $item['body'] = $body; if (!array_key_exists('id', $item) || !$item['id']) { // item has not yet been stored in database @@ -717,7 +717,7 @@ function retriever_set_body(&$item, $body) { * * @param array &$item Item to be searched for images and updated (by ref). This may or may not be already stored in the database. */ -function retrieve_images(&$item) { +function retrieve_images(array &$item) { if (!DI::config()->get('retriever', 'allow_images')) { return; } @@ -755,7 +755,7 @@ function retrieve_images(&$item) { * * @param array &$item Row from the item table (by ref) */ -function retriever_check_item_completed(&$item) +function retriever_check_item_completed(array &$item) { $waiting = DBA::selectFirst('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => intval($item['uid']), 'contact-id' => intval($item['contact-id']), 'finished' => 0]); Logger::debug('retriever_check_item_completed: item ' . $item['uri'] . ' ' . $item['uid'] . ' '. $item['contact-id'] . ' waiting for resources'); @@ -774,7 +774,7 @@ function retriever_check_item_completed(&$item) * @param array &$item Row from the item table (by ref) * @param array $resource The resource that has just been completed */ -function retriever_apply_completed_resource_to_item($retriever, &$item, $resource) { +function retriever_apply_completed_resource_to_item(array $retriever, array &$item, array $resource) { Logger::debug('retriever_apply_completed_resource_to_item: retriever ' . ($retriever ? $retriever['id'] : 'none') . ' resource ' . $resource['url'] . ' plink ' . $item['plink']); if (strpos($resource['type'], 'image') !== false) { retriever_transform_images($item, $resource); @@ -800,7 +800,7 @@ function retriever_apply_completed_resource_to_item($retriever, &$item, $resourc * * TODO: split this into two functions, one to store the image, the other to change the item body */ -function retriever_transform_images(&$item, $resource) { +function retriever_transform_images(array &$item, array $resource) { if (!$resource['data']) { Logger::info('retriever_transform_images: no data available for ' . $resource['id'] . ' ' . $resource['url']); return; @@ -842,7 +842,7 @@ function retriever_transform_images(&$item, $resource) { * * @param App $a The App object */ -function retriever_content($a) { +function retriever_content(App $a) { if (!local_user()) { $a->page['content'] .= "

Please log in

"; return; @@ -973,7 +973,7 @@ function retriever_content($a) { * @param App $a The App object * @param array $args Contact menu details to be filled in (by ref) */ -function retriever_contact_photo_menu($a, &$args) { +function retriever_contact_photo_menu(App $a, array &$args) { if (!$args) { return; } @@ -988,7 +988,7 @@ function retriever_contact_photo_menu($a, &$args) { * @param App $a The App object (by ref) * @param array $item New item, which has not yet been inserted into database (by ref) */ -function retriever_post_remote_hook(&$a, &$item) { +function retriever_post_remote_hook(App &$a, array &$item) { Logger::info('retriever_post_remote_hook: ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); $retriever_rule = get_retriever_rule($item['contact-id'], $item["uid"], false); @@ -1015,7 +1015,7 @@ function retriever_post_remote_hook(&$a, &$item) { * @param App $a The App object (by ref) * @param string $s HTML string to which to append settings content (by ref) */ -function retriever_addon_settings(&$a, &$s) { +function retriever_addon_settings(App &$a, string &$s) { $all_photos = DI::config()->get(local_user(), 'retriever', 'all_photos'); $oembed = DI::config()->get(local_user(), 'retriever', 'oembed'); $template = Renderer::getMarkupTemplate('/settings.tpl', 'addon/retriever/'); @@ -1040,7 +1040,7 @@ function retriever_addon_settings(&$a, &$s) { * @param App $a The App object * @param array $post Posted content */ -function retriever_addon_settings_post($a, $post) { +function retriever_addon_settings_post(App $a, array $post) { if ($post['retriever_all_photos']) { DI::config()->set(local_user(), 'retriever', 'all_photos', $post['retriever_all_photos']); } From 00b370c6713a95a0e5667cc9454f5854821b6107 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sat, 7 Jan 2023 00:05:19 +0100 Subject: [PATCH 88/96] Add missing use statement --- retriever/retriever.php | 1 + 1 file changed, 1 insertion(+) diff --git a/retriever/retriever.php b/retriever/retriever.php index 7d1229e6..7b9a7bf4 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -22,6 +22,7 @@ use Friendica\Model\Item; use Friendica\Model\Post; use Friendica\Util\DateTimeFormat; use Friendica\DI; +use Friendica\App; /** * @brief Installation hook for retriever plugin From 7702b6b8ce430bdd6aa5a128b261327b7e62dc86 Mon Sep 17 00:00:00 2001 From: Michael Date: Mon, 17 Oct 2022 05:50:23 +0000 Subject: [PATCH 89/96] The priority is now a class constant --- ifttt/ifttt.php | 2 +- twitter/twitter.php | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ifttt/ifttt.php b/ifttt/ifttt.php index 6d4f402c..faaea2ca 100644 --- a/ifttt/ifttt.php +++ b/ifttt/ifttt.php @@ -180,5 +180,5 @@ function ifttt_message($uid, $item) $link = hash('ripemd128', $item['msg']); } - Post\Delayed::add($link, $post, Worker::PRIORITY_MEDIUM, Post\Delayed::PREPARED); + Post\Delayed::add($link, $post, Worker::PRIORITY_MEDIUM, Post\Delayed::UNPREPARED); } diff --git a/twitter/twitter.php b/twitter/twitter.php index c55489a0..59861a35 100644 --- a/twitter/twitter.php +++ b/twitter/twitter.php @@ -1363,7 +1363,7 @@ function twitter_fetchtimeline(int $uid): void Logger::info('Posting mirror post', ['twitter-id' => $post->id_str, 'uid' => $uid]); - Post\Delayed::add($mirrorpost['extid'], $mirrorpost, Worker::PRIORITY_MEDIUM, Post\Delayed::PREPARED); + Post\Delayed::add($mirrorpost['extid'], $mirrorpost, Worker::PRIORITY_MEDIUM, Post\Delayed::UNPREPARED); } } DI::pConfig()->set($uid, 'twitter', 'lastid', $lastid); From 6dac6a00e9ea8284a8e0b63adcc39eae0a611828 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 28 Dec 2022 19:40:50 +0100 Subject: [PATCH 90/96] replace local_user --- retriever/retriever.php | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 7b9a7bf4..d374ca80 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -10,6 +10,7 @@ use Friendica\Core\Addon; use Friendica\Core\Hook; use Friendica\Core\Logger; use Friendica\Core\Renderer; +use Friendica\Core\Session; use Friendica\Core\System; use Friendica\Content\Text\HTML; use Friendica\Content\Text\BBCode; @@ -844,12 +845,12 @@ function retriever_transform_images(array &$item, array $resource) { * @param App $a The App object */ function retriever_content(App $a) { - if (!local_user()) { + if (!Session::getLocalUser()) { $a->page['content'] .= "

Please log in

"; return; } if (isset(DI::args()->getArgv()[1]) and DI::args()->getArgv()[1] === 'help') { - $feeds = DBA::selectToArray('contact', ['id', 'name', 'thumb'], ['uid' => local_user(), 'network' => 'feed']); + $feeds = DBA::selectToArray('contact', ['id', 'name', 'thumb'], ['uid' => Session::getLocalUser(), 'network' => 'feed']); for ($i = 0; $i < count($feeds); ++$i) { $feeds[$i]['url'] = DI::baseUrl()->get(true) . '/retriever/' . $feeds[$i]['id']; } @@ -862,13 +863,13 @@ function retriever_content(App $a) { } if (isset(DI::args()->getArgv()[1])) { $arg1 = DI::args()->getArgv()[1]; - $retriever_rule = get_retriever_rule($arg1, local_user(), false); + $retriever_rule = get_retriever_rule($arg1, Session::getLocalUser(), false); if (!$retriever_rule) { $retriever_rule = ['id' => 0, 'data' => ['enable' => 0, 'modurl' => '', 'pattern' => '', 'replace' => '', 'images' => 0, 'storecookies' => 0, 'cookiedata' => '', 'customxslt' => '', 'include' => '', 'exclude' => '']]; } if (!empty($_POST["id"])) { - $retriever_rule = get_retriever_rule($arg1, local_user(), true); + $retriever_rule = get_retriever_rule($arg1, Session::getLocalUser(), true); $retriever_rule['data'] = array(); foreach (array('modurl', 'pattern', 'replace', 'enable', 'images', 'customxslt', 'storecookies', 'cookiedata') as $setting) { if (empty($_POST['retriever_' . $setting])) { @@ -1017,8 +1018,8 @@ function retriever_post_remote_hook(App &$a, array &$item) { * @param string $s HTML string to which to append settings content (by ref) */ function retriever_addon_settings(App &$a, string &$s) { - $all_photos = DI::config()->get(local_user(), 'retriever', 'all_photos'); - $oembed = DI::config()->get(local_user(), 'retriever', 'oembed'); + $all_photos = DI::config()->get(Session::getLocalUser(), 'retriever', 'all_photos'); + $oembed = DI::config()->get(Session::getLocalUser(), 'retriever', 'oembed'); $template = Renderer::getMarkupTemplate('/settings.tpl', 'addon/retriever/'); $config = array('$submit' => DI::l10n()->t('Save Settings'), '$title' => DI::l10n()->t('Retriever Settings'), @@ -1043,15 +1044,15 @@ function retriever_addon_settings(App &$a, string &$s) { */ function retriever_addon_settings_post(App $a, array $post) { if ($post['retriever_all_photos']) { - DI::config()->set(local_user(), 'retriever', 'all_photos', $post['retriever_all_photos']); + DI::config()->set(Session::getLocalUser(), 'retriever', 'all_photos', $post['retriever_all_photos']); } else { - DI::config()->delete(local_user(), 'retriever', 'all_photos'); + DI::config()->delete(Session::getLocalUser(), 'retriever', 'all_photos'); } if ($post['retriever_oembed']) { - DI::config()->set(local_user(), 'retriever', 'oembed', $post['retriever_oembed']); + DI::config()->set(Session::getLocalUser(), 'retriever', 'oembed', $post['retriever_oembed']); } else { - DI::config()->delete(local_user(), 'retriever', 'oembed'); + DI::config()->delete(Session::getLocalUser(), 'retriever', 'oembed'); } } From be44ac9b2b8cd93008866544d5d134d052036311 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Thu, 29 Dec 2022 19:57:03 +0100 Subject: [PATCH 91/96] fix contact photo menu callback --- retriever/retriever.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index d374ca80..46916647 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -975,7 +975,7 @@ function retriever_content(App $a) { * @param App $a The App object * @param array $args Contact menu details to be filled in (by ref) */ -function retriever_contact_photo_menu(App $a, array &$args) { +function retriever_contact_photo_menu(array &$args) { if (!$args) { return; } From 761a9d8c209d72eacf47fa064304ac5c35cb4ca8 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Thu, 29 Dec 2022 20:02:18 +0100 Subject: [PATCH 92/96] fix contact photo menu callback really --- retriever/retriever.php | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 46916647..a5e2f779 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -6,6 +6,7 @@ * Author: Matthew Exon */ +use Friendica\App; use Friendica\Core\Addon; use Friendica\Core\Hook; use Friendica\Core\Logger; @@ -972,10 +973,10 @@ function retriever_content(App $a) { /** * @brief Hook that adds the retriever option to the contact menu * - * @param App $a The App object + * @param App $a The App object (by ref) * @param array $args Contact menu details to be filled in (by ref) */ -function retriever_contact_photo_menu(array &$args) { +function retriever_contact_photo_menu(App &$a, array &$args) { if (!$args) { return; } From b677b6a63f8fe21c2d5d052f9c3b211927d3993c Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sat, 7 Jan 2023 18:46:09 +0100 Subject: [PATCH 93/96] remove duplicate use directive --- retriever/retriever.php | 1 - 1 file changed, 1 deletion(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index a5e2f779..9370271c 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -24,7 +24,6 @@ use Friendica\Model\Item; use Friendica\Model\Post; use Friendica\Util\DateTimeFormat; use Friendica\DI; -use Friendica\App; /** * @brief Installation hook for retriever plugin From 7a8296f356027dd9eb047a7a92dcbbb504a0407f Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Fri, 5 May 2023 17:36:11 +0200 Subject: [PATCH 94/96] remove App arguments --- mailstream/mailstream.php | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/mailstream/mailstream.php b/mailstream/mailstream.php index 542a1a42..1392d0d1 100644 --- a/mailstream/mailstream.php +++ b/mailstream/mailstream.php @@ -66,10 +66,9 @@ function mailstream_module() {} /** * Adds an item in "addon features" in the admin menu of the site * - * @param App $a App object (unused) * @param string $o HTML form data */ -function mailstream_addon_admin(App $a, string &$o) +function mailstream_addon_admin(string &$o) { $frommail = DI::config()->get('mailstream', 'frommail'); $template = Renderer::getMarkupTemplate('admin.tpl', 'addon/mailstream/'); @@ -110,7 +109,7 @@ function mailstream_generate_id(string $uri): string return $message_id; } -function mailstream_send_hook(App $a, array $data) +function mailstream_send_hook(array $data) { $criteria = array('uid' => $data['uid'], 'contact-id' => $data['contact-id'], 'uri' => $data['uri']); $item = Post::selectFirst([], $criteria); @@ -138,11 +137,10 @@ function mailstream_send_hook(App $a, array $data) * mailstream is enabled and the necessary data is available, forks a * workerqueue item to send the email. * - * @param App $a App object (unused) * @param array $item content of the item (may or may not already be stored in the item table) * @return void */ -function mailstream_post_hook(App $a, array &$item) +function mailstream_post_hook(array &$item) { mailstream_check_version(); @@ -468,7 +466,7 @@ function mailstream_convert_table_entries() 'message_id' => $ms_item_id['message-id'], 'tries' => 0); if (!$ms_item_id['message-id'] || !strlen($ms_item_id['message-id'])) { - Logger::info('mailstream_convert_table_entries: item has no message-id.', 'item' => $ms_item_id['id'], 'uri' => $ms_item_id['uri']]); + Logger::info('mailstream_convert_table_entries: item has no message-id.', ['item' => $ms_item_id['id'], 'uri' => $ms_item_id['uri']]); continue; } Logger::info('mailstream_convert_table_entries: convert item to workerqueue', $send_hook_data); @@ -480,11 +478,10 @@ function mailstream_convert_table_entries() /** * Form for configuring mailstream features for a user * - * @param App $a App object * @param array $data Hook data array * @throws \Friendica\Network\HTTPException\ServiceUnavailableException */ -function mailstream_addon_settings(App &$a, array &$data) +function mailstream_addon_settings(array &$data) { $enabled = DI::pConfig()->get(DI::userSession()->getLocalUserId(), 'mailstream', 'enabled'); $address = DI::pConfig()->get(DI::userSession()->getLocalUserId(), 'mailstream', 'address'); @@ -528,11 +525,10 @@ function mailstream_addon_settings(App &$a, array &$data) /** * Process data submitted to user's mailstream features form - * @param App $a * @param array $post POST data * @return void */ -function mailstream_addon_settings_post(App $a, array $post) +function mailstream_addon_settings_post(array $post) { if (!DI::userSession()->getLocalUserId() || empty($post['mailstream-submit'])) { return; From 260d17ed74ff12743080765a240941f19a5790bb Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Fri, 5 May 2023 18:08:51 +0200 Subject: [PATCH 95/96] use new style of accessing baseUrl --- mailstream/mailstream.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mailstream/mailstream.php b/mailstream/mailstream.php index 1392d0d1..f53d4fe9 100644 --- a/mailstream/mailstream.php +++ b/mailstream/mailstream.php @@ -102,7 +102,7 @@ function mailstream_addon_admin_post() */ function mailstream_generate_id(string $uri): string { - $host = DI::baseUrl()->getHostname(); + $host = DI::baseUrl()->getHost(); $resource = hash('md5', $uri); $message_id = "<" . $resource . "@" . $host . ">"; Logger::debug('mailstream: Generated message ID ' . $message_id . ' for URI ' . $uri); @@ -412,7 +412,7 @@ function mailstream_send(string $message_id, array $item, array $user): bool $template = Renderer::getMarkupTemplate('mail.tpl', 'addon/mailstream/'); $mail->AltBody = BBCode::toPlaintext($item['body']); $item['body'] = BBCode::convertForUriId($item['uri-id'], $item['body'], BBCode::CONNECTORS); - $item['url'] = DI::baseUrl()->get() . '/display/' . $item['guid']; + $item['url'] = DI::baseUrl() . '/display/' . $item['guid']; $mail->Body = Renderer::replaceMacros($template, [ '$upstream' => DI::l10n()->t('Upstream'), '$uri' => DI::l10n()->t('URI'), From 8516079ed1c4677c4c7fca807a895830a1c675c3 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 7 May 2023 13:37:00 +0200 Subject: [PATCH 96/96] log uid but ignore results --- mailstream/mailstream.php | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mailstream/mailstream.php b/mailstream/mailstream.php index f53d4fe9..5ece12a1 100644 --- a/mailstream/mailstream.php +++ b/mailstream/mailstream.php @@ -143,10 +143,11 @@ function mailstream_send_hook(array $data) function mailstream_post_hook(array &$item) { mailstream_check_version(); + Logger::debug('@@@ mailstream_post_hook', ['item-uid' => $item['uid']]); if (!DI::pConfig()->get($item['uid'], 'mailstream', 'enabled')) { - Logger::debug('mailstream: not enabled.', ['item' => $item['id'], ' uid ' => $item['uid']]); - return; + Logger::debug('mailstream: not enabled for item ' . $item['id'] . ' uid ' . $item['uid']); + // return; } if (!$item['uid']) { Logger::debug('mailstream: no uid for item ' . $item['id']);