From b994de33082d23fe40a0a894b86e58bac3a4acb6 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 7 Jul 2019 14:45:23 +0100 Subject: [PATCH 01/94] Latest version of retriever --- retriever/database.sql | 40 ++ retriever/retriever.php | 832 ++++++++++++++++++++++++++++ retriever/templates/extract.tpl | 32 ++ retriever/templates/fix-urls.tpl | 26 + retriever/templates/help.tpl | 148 +++++ retriever/templates/rule-config.tpl | 112 ++++ retriever/templates/settings.tpl | 9 + 7 files changed, 1199 insertions(+) create mode 100644 retriever/database.sql create mode 100644 retriever/retriever.php create mode 100644 retriever/templates/extract.tpl create mode 100644 retriever/templates/fix-urls.tpl create mode 100644 retriever/templates/help.tpl create mode 100644 retriever/templates/rule-config.tpl create mode 100644 retriever/templates/settings.tpl diff --git a/retriever/database.sql b/retriever/database.sql new file mode 100644 index 00000000..340e33eb --- /dev/null +++ b/retriever/database.sql @@ -0,0 +1,40 @@ +CREATE TABLE IF NOT EXISTS `retriever_rule` ( + `id` int(11) unsigned NOT NULL AUTO_INCREMENT, + `uid` int(11) NOT NULL, + `contact-id` int(11) NOT NULL, + `data` mediumtext NULL DEFAULT NULL, + PRIMARY KEY (`id`), + KEY `uid` (`uid`), + KEY `contact-id` (`contact-id`) +) DEFAULT CHARSET=utf8 COLLATE=utf8_bin; + +CREATE TABLE IF NOT EXISTS `retriever_item` ( + `id` int(11) unsigned NOT NULL AUTO_INCREMENT, + `item-uri` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NOT NULL, + `item-uid` int(10) unsigned NOT NULL DEFAULT '0', + `contact-id` int(10) unsigned NOT NULL DEFAULT '0', + `resource` int(11) NOT NULL, + `finished` tinyint(1) unsigned NOT NULL DEFAULT '0', + KEY `resource` (`resource`), + KEY `finished` (`finished`), + KEY `item-uid` (`item-uid`), + KEY `all` (`item-uri`, `item-uid`, `contact-id`), + PRIMARY KEY (`id`) +) DEFAULT CHARSET=utf8 COLLATE=utf8_bin; + +CREATE TABLE IF NOT EXISTS `retriever_resource` ( + `id` int(11) unsigned NOT NULL AUTO_INCREMENT, + `type` char(255) NULL DEFAULT NULL, + `binary` int(1) NOT NULL DEFAULT 0, + `url` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NOT NULL, + `created` timestamp NOT NULL DEFAULT now(), + `completed` timestamp NULL DEFAULT NULL, + `last-try` timestamp NULL DEFAULT NULL, + `num-tries` int(11) NOT NULL DEFAULT 0, + `data` mediumblob NULL DEFAULT NULL, + `http-code` smallint(1) unsigned NULL DEFAULT NULL, + `redirect-url` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NULL DEFAULT NULL, + KEY `retriever_resource` ADD INDEX `url` (`url`), + KEY `retriever_resource` ADD INDEX `completed` (`completed`), + PRIMARY KEY (`id`) +) DEFAULT CHARSET=utf8 COLLATE=utf8_bin diff --git a/retriever/retriever.php b/retriever/retriever.php new file mode 100644 index 00000000..78fe575f --- /dev/null +++ b/retriever/retriever.php @@ -0,0 +1,832 @@ + + * Status: Unsupported + */ + +use Friendica\Core\Addon; +use Friendica\Core\Config; +use Friendica\Core\PConfig; +use Friendica\Content\Text\HTML; +use Friendica\Content\Text\BBCode; +use Friendica\Object\Image; +use Friendica\Util\Network; +use Friendica\Core\L10n; +use Friendica\Database\DBA; + +function retriever_install() { + Addon::registerHook('plugin_settings', 'addon/retriever/retriever.php', 'retriever_plugin_settings'); + Addon::registerHook('plugin_settings_post', 'addon/retriever/retriever.php', 'retriever_plugin_settings_post'); + Addon::registerHook('post_remote', 'addon/retriever/retriever.php', 'retriever_post_remote_hook'); + Addon::registerHook('contact_photo_menu', 'addon/retriever/retriever.php', 'retriever_contact_photo_menu'); + Addon::registerHook('cron', 'addon/retriever/retriever.php', 'retriever_cron'); + + $r = q("SELECT `id` FROM `pconfig` WHERE `cat` LIKE 'retriever_%%'"); + if (count($r) || (Config::get('retriever', 'dbversion') == '0.1')) { + $retrievers = array(); + $r = q("SELECT SUBSTRING(`cat`, 10) AS `contact`, `k`, `v` FROM `pconfig` WHERE `cat` LIKE 'retriever%%'"); + foreach ($r as $rr) { + $retrievers[$rr['contact']][$rr['k']] = $rr['v']; + } + foreach ($retrievers as $k => $v) { + $rr = q("SELECT `uid` FROM `contact` WHERE `id` = %d", intval($k)); + $uid = $rr[0]['uid']; + $v['images'] = 'on'; + q("INSERT INTO `retriever_rule` (`uid`, `contact-id`, `data`) VALUES (%d, %d, '%s')", + intval($uid), intval($k), DBA::escape(json_encode($v))); + } + q("DELETE FROM `pconfig` WHERE `cat` LIKE 'retriever_%%'"); + Config::set('retriever', 'dbversion', '0.2'); + } + if (Config::get('retriever', 'dbversion') == '0.2') { + q("ALTER TABLE `retriever_resource` DROP COLUMN `retriever`"); + Config::set('retriever', 'dbversion', '0.3'); + } + if (Config::get('retriever', 'dbversion') == '0.3') { + q("ALTER TABLE `retriever_item` MODIFY COLUMN `item-uri` varchar(800) CHARACTER SET ascii NOT NULL"); + q("ALTER TABLE `retriever_resource` MODIFY COLUMN `url` varchar(800) CHARACTER SET ascii NOT NULL"); + Config::set('retriever', 'dbversion', '0.4'); + } + if (Config::get('retriever', 'dbversion') == '0.4') { + q("ALTER TABLE `retriever_item` ADD COLUMN `finished` tinyint(1) unsigned NOT NULL DEFAULT '0'"); + Config::set('retriever', 'dbversion', '0.5'); + } + if (Config::get('retriever', 'dbversion') == '0.5') { + q('ALTER TABLE `retriever_resource` CHANGE `created` `created` timestamp NOT NULL DEFAULT now()'); + q('ALTER TABLE `retriever_resource` CHANGE `completed` `completed` timestamp NULL DEFAULT NULL'); + q('ALTER TABLE `retriever_resource` CHANGE `last-try` `last-try` timestamp NULL DEFAULT NULL'); + q('ALTER TABLE `retriever_item` DROP KEY `all`'); + q('ALTER TABLE `retriever_item` ADD KEY `all` (`item-uri`, `item-uid`, `contact-id`)'); + Config::set('retriever', 'dbversion', '0.6'); + } + if (Config::get('retriever', 'dbversion') == '0.6') { + q('ALTER TABLE `retriever_item` CONVERT TO CHARACTER SET utf8 COLLATE utf8_bin'); + q('ALTER TABLE `retriever_item` CHANGE `item-uri` `item-uri` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NOT NULL'); + q('ALTER TABLE `retriever_resource` CONVERT TO CHARACTER SET utf8 COLLATE utf8_bin'); + q('ALTER TABLE `retriever_resource` CHANGE `url` `url` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NOT NULL'); + q('ALTER TABLE `retriever_rule` CONVERT TO CHARACTER SET utf8 COLLATE utf8_bin'); + Config::set('retriever', 'dbversion', '0.7'); + } + if (Config::get('retriever', 'dbversion') == '0.7') { + $r = q("SELECT `id`, `data` FROM `retriever_rule`"); + foreach ($r as $rr) { + logger('retriever_install: retriever ' . $rr['id'] . ' old config ' . $rr['data'], LOGGER_DATA); + $data = json_decode($rr['data'], true); + if ($data['pattern']) { + $matches = array(); + if (preg_match("/\/(.*)\//", $data['pattern'], $matches)) { + $data['pattern'] = $matches[1]; + } + } + if ($data['match']) { + $include = array(); + foreach (explode('|', $data['match']) as $component) { + $matches = array(); + if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[@([A-Za-z][a-z0-9]*)='([^']*)'\]/", $component, $matches)) { + $include[] = array( + 'element' => $matches[1], + 'attribute' => $matches[2], + 'value' => $matches[3]); + } + if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[contains(concat(' ',normalize-space(@class),' '),' ([^ ']+) ')]/", $component, $matches)) { + $include[] = array( + 'element' => $matches[1], + 'attribute' => $matches[2], + 'value' => $matches[3]); + } + } + $data['include'] = $include; + unset($data['match']); + } + if ($data['remove']) { + $exclude = array(); + foreach (explode('|', $data['remove']) as $component) { + $matches = array(); + if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[@([A-Za-z][a-z0-9]*)='([^']*)'\]/", $component, $matches)) { + $exclude[] = array( + 'element' => $matches[1], + 'attribute' => $matches[2], + 'value' => $matches[3]); + } + if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[contains(concat(' ',normalize-space(@class),' '),' ([^ ']+) ')]/", $component, $matches)) { + $exclude[] = array( + 'element' => $matches[1], + 'attribute' => $matches[2], + 'value' => $matches[3]); + } + } + $data['exclude'] = $exclude; + unset($data['remove']); + } + $r = q('UPDATE `retriever_rule` SET `data` = "%s" WHERE `id` = %d', DBA::escape(json_encode($data)), $rr['id']); + logger('retriever_install: retriever ' . $rr['id'] . ' new config ' . json_encode($data), LOGGER_DATA); + } + Config::set('retriever', 'dbversion', '0.8'); + } + if (Config::get('retriever', 'dbversion') == '0.8') { + q("ALTER TABLE `retriever_resource` ADD COLUMN `http-code` smallint(1) unsigned NULL DEFAULT NULL"); + Config::set('retriever', 'dbversion', '0.9'); + } + if (Config::get('retriever', 'dbversion') == '0.9') { + q("ALTER TABLE `retriever_item` DROP COLUMN `parent`"); + q("ALTER TABLE `retriever_resource` ADD COLUMN `redirect-url` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NULL DEFAULT NULL"); + Config::set('retriever', 'dbversion', '0.10'); + } + if (Config::get('retriever', 'dbversion') == '0.10') { + q("ALTER TABLE `retriever_resource` MODIFY COLUMN `type` char(255) NULL DEFAULT NULL"); + q("ALTER TABLE `retriever_resource` MODIFY COLUMN `data` mediumblob NULL DEFAULT NULL"); + q("ALTER TABLE `retriever_rule` MODIFY COLUMN `data` mediumtext NULL DEFAULT NULL"); + Config::set('retriever', 'dbversion', '0.11'); + } + if (Config::get('retriever', 'dbversion') == '0.11') { + q("ALTER TABLE `retriever_resource` ADD INDEX `url` (`url`)"); + q("ALTER TABLE `retriever_resource` ADD INDEX `completed` (`completed`)"); + q("ALTER TABLE `retriever_item` ADD INDEX `finished` (`finished`)"); + q("ALTER TABLE `retriever_item` ADD INDEX `item-uid` (`item-uid`)"); + Config::set('retriever', 'dbversion', '0.12'); + } + if (Config::get('retriever', 'dbversion') != '0.12') { + $schema = file_get_contents(dirname(__file__).'/database.sql'); + $arr = explode(';', $schema); + foreach ($arr as $a) { + $r = q($a); + } + Config::set('retriever', 'dbversion', '0.12'); + } +} + +function retriever_uninstall() { + Addon::unregisterHook('plugin_settings', 'addon/retriever/retriever.php', 'retriever_plugin_settings'); + Addon::unregisterHook('plugin_settings_post', 'addon/retriever/retriever.php', 'retriever_plugin_settings_post'); + Addon::unregisterHook('post_remote', 'addon/retriever/retriever.php', 'retriever_post_remote_hook'); + Addon::unregisterHook('plugin_settings', 'addon/retriever/retriever.php', 'retriever_plugin_settings'); + Addon::unregisterHook('plugin_settings_post', 'addon/retriever/retriever.php', 'retriever_plugin_settings_post'); + Addon::unregisterHook('contact_photo_menu', 'addon/retriever/retriever.php', 'retriever_contact_photo_menu'); + Addon::unregisterHook('cron', 'addon/retriever/retriever.php', 'retriever_cron'); +} + +function retriever_module() {} + +function retriever_cron($a, $b) { + // 100 is a nice sane number. Maybe this should be configurable. + retriever_retrieve_items(100, $a); + retriever_tidy(); +} + +$retriever_item_count = 0; + +function retriever_retrieve_items($max_items, $a) { + global $retriever_item_count; + + $retriever_schedule = array(array(1,'minute'), + array(10,'minute'), + array(1,'hour'), + array(1,'day'), + array(2,'day'), + array(1,'week'), + array(1,'month')); + + $schedule_clauses = array(); + for ($i = 0; $i < count($retriever_schedule); $i++) { + $num = $retriever_schedule[$i][0]; + $unit = $retriever_schedule[$i][1]; + array_push($schedule_clauses, + '(`num-tries` = ' . $i . ' AND TIMESTAMPADD(' . DBA::escape($unit) . + ', ' . intval($num) . ', `last-try`) < now())'); + } + + $retrieve_items = $max_items - $retriever_item_count; + logger('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . $retriever_item_count . ', retrieve ' . $retrieve_items, LOGGER_DEBUG); + do { + $r = q("SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR %s) ORDER BY `last-try` ASC LIMIT %d", + DBA::escape(implode($schedule_clauses, ' OR ')), + intval($retrieve_items)); + if (!is_array($r)) { + break; + } + if (count($r) == 0) { + break; + } + logger('retriever_retrieve_items: found ' . count($r) . ' waiting resources in database', LOGGER_DEBUG); + foreach ($r as $rr) { + retrieve_resource($rr); + $retriever_item_count++; + } + $retrieve_items = $max_items - $retriever_item_count; + } + while ($retrieve_items > 0); + + /* Look for items that are waiting even though the resource has + * completed. This usually happens because we've been asked to + * retrospectively apply a config change. It could also happen + * due to a cron job dying or something. */ + $r = q("SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT %d", + intval($retrieve_items)); + if (!$r) { + $r = array(); + } + logger('retriever_retrieve_items: items waiting even though resource has completed: ' . count($r), LOGGER_DEBUG); + foreach ($r as $rr) { + $resource = q("SELECT * FROM retriever_resource WHERE `id` = %d", $rr['resource']); + $retriever_item = retriever_get_retriever_item($rr['item']); + if (!$retriever_item) { + logger('retriever_retrieve_items: no retriever item with id ' . $rr['item'], LOGGER_INFO); + continue; + } + $item = retriever_get_item($retriever_item); + if (!$item) { + logger('retriever_retrieve_items: no item ' . $retriever_item['item-uri'], LOGGER_INFO); + continue; + } + $retriever = get_retriever($item['contact-id'], $item['uid']); + if (!$retriever) { + logger('retriever_retrieve_items: no retriever for item ' . + $retriever_item['item-uri'] . ' ' . $retriever_item['uid'] . ' ' . $item['contact-id'], + LOGGER_INFO); + continue; + } + retriever_apply_completed_resource_to_item($retriever, $item, $resource[0], $a); + q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d", + intval($retriever_item['id'])); + retriever_check_item_completed($item); + } +} + +function retriever_tidy() { + q("DELETE FROM retriever_resource WHERE completed IS NOT NULL AND completed < DATE_SUB(now(), INTERVAL 1 WEEK)"); + q("DELETE FROM retriever_resource WHERE completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)"); + + $r = q("SELECT retriever_item.id FROM retriever_item LEFT OUTER JOIN retriever_resource ON (retriever_item.resource = retriever_resource.id) WHERE retriever_resource.id is null"); + logger('retriever_tidy: found ' . count($r) . ' retriever_items with no retriever_resource'); + foreach ($r as $rr) { + q('DELETE FROM retriever_item WHERE id = %d', intval($rr['id'])); + } +} + +function retrieve_dataurl_resource($resource) { + if (!preg_match("/date:(.*);base64,(.*)/", $resource['url'], $matches)) { + logger('retrieve_dataurl_resource: ' . $resource['id'] . ' does not match pattern'); + } else { + $resource['type'] = $matches[1]; + $resource['data'] = base64url_decode($matches[2]); + } + + // Succeed or fail, there's no point retrying + q("UPDATE `retriever_resource` SET `last-try` = now(), `num-tries` = `num-tries` + 1, `completed` = now(), `data` = '%s', `type` = '%s' WHERE id = %d", + DBA::escape($resource['data']), + DBA::escape($resource['type']), + intval($resource['id'])); + retriever_resource_completed($resource, $a); +} + +function retrieve_resource($resource) { + if (substr($resource['url'], 0, 5) == "data:") { + return retrieve_dataurl_resource($resource); + } + + $a = get_app(); + + try { + logger('retrieve_resource: ' . ($resource['num-tries'] + 1) . + ' attempt at resource ' . $resource['id'] . ' ' . $resource['url'], LOGGER_DEBUG); + $redirects; + $cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-'); + $fetch_result = Network::fetchUrlFull($resource['url'], $resource['binary'], $redirects, array('cookiejar' => $cookiejar)); + unlink($cookiejar); + $resource['data'] = $fetch_result['body']; + $resource['http-code'] = $a->get_curl_code(); + $resource['type'] = $a->get_curl_content_type(); + $resource['redirect-url'] = $fetch_result['redirect_url']; + logger('retrieve_resource: got code ' . $resource['http-code'] . + ' retrieving resource ' . $resource['id'] . + ' final url ' . $resource['redirect-url'], LOGGER_DEBUG); + } catch (Exception $e) { + logger('retrieve_resource: unable to retrieve ' . $resource['url'] . ' - ' . $e->getMessage()); + } + q("UPDATE `retriever_resource` SET `last-try` = now(), `num-tries` = `num-tries` + 1, `http-code` = %d, `redirect-url` = '%s' WHERE id = %d", + intval($resource['http-code']), + DBA::escape($resource['redirect-url']), + intval($resource['id'])); + if ($resource['data']) { + q("UPDATE `retriever_resource` SET `completed` = now(), `data` = '%s', `type` = '%s' WHERE id = %d", + DBA::escape($resource['data']), + DBA::escape($resource['type']), + intval($resource['id'])); + retriever_resource_completed($resource, $a); + } +} + +function get_retriever($contact_id, $uid, $create = false) { + $r = q("SELECT * FROM `retriever_rule` WHERE `contact-id` = %d AND `uid` = %d", + intval($contact_id), intval($uid)); + if (count($r)) { + $r[0]['data'] = json_decode($r[0]['data'], true); + return $r[0]; + } + if ($create) { + q("INSERT INTO `retriever_rule` (`uid`, `contact-id`) VALUES (%d, %d)", + intval($uid), intval($contact_id)); + $r = q("SELECT * FROM `retriever_rule` WHERE `contact-id` = %d AND `uid` = %d", + intval($contact_id), intval($uid)); + return $r[0]; + } +} + +function retriever_get_retriever_item($id) { + $retriever_items = q("SELECT * FROM `retriever_item` WHERE id = %d", intval($id)); + if (count($retriever_items) != 1) { + logger('retriever_get_retriever_item: unable to find retriever_item ' . $id, LOGGER_INFO); + return; + } + return $retriever_items[0]; +} + +function retriever_get_item($retriever_item) { + $items = q("SELECT * FROM `item` WHERE `uri` = '%s' AND `uid` = %d AND `contact-id` = %d", + DBA::escape($retriever_item['item-uri']), + intval($retriever_item['item-uid']), + intval($retriever_item['contact-id'])); + if (count($items) != 1) { + logger('retriever_get_item: unexpected number of results ' . + count($items) . " when searching for item $uri $uid $cid", LOGGER_INFO); + return; + } + return $items[0]; +} + +function retriever_item_completed($retriever_item_id, $resource, $a) { + logger('retriever_item_completed: id ' . $retriever_item_id . ' url ' . $resource['url'], LOGGER_DEBUG); + + $retriever_item = retriever_get_retriever_item($retriever_item_id); + if (!$retriever_item) { + return; + } + // Note: the retriever might be null. Doesn't matter. + $retriever = get_retriever($retriever_item['contact-id'], $retriever_item['item-uid']); + $item = retriever_get_item($retriever_item); + if (!$item) { + return; + } + + retriever_apply_completed_resource_to_item($retriever, $item, $resource, $a); + + q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d", + intval($retriever_item['id'])); + retriever_check_item_completed($item); +} + +function retriever_resource_completed($resource, $a) { + logger('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url'], LOGGER_DEBUG); + $r = q("SELECT `id` FROM `retriever_item` WHERE `resource` = %d", $resource['id']); + foreach ($r as $rr) { + retriever_item_completed($rr['id'], $resource, $a); + } +} + +function apply_retrospective($a, $retriever, $num) { + $r = q("SELECT * FROM `item` WHERE `contact-id` = %d ORDER BY `received` DESC LIMIT %d", + intval($retriever['contact-id']), intval($num)); + foreach ($r as $item) { + q('UPDATE `item` SET `visible` = 0 WHERE `id` = %d', $item['id']); + q('UPDATE `thread` SET `visible` = 0 WHERE `iid` = %d', $item['id']); + retriever_on_item_insert($a, $retriever, $item); + } +} + +function retriever_on_item_insert($a, $retriever, &$item) { + if (!$retriever || !$retriever['id']) { + logger('retriever_on_item_insert: No retriever supplied', LOGGER_INFO); + return; + } + if (!$retriever["data"]['enable'] == "on") { + return; + } + if (array_key_exists('pattern', $retriever["data"]) && $retriever["data"]['pattern']) { + $url = preg_replace('/' . $retriever["data"]['pattern'] . '/', $retriever["data"]['replace'], $item['plink']); + logger('retriever_on_item_insert: Changed ' . $item['plink'] . ' to ' . $url, LOGGER_DATA); + } + else { + $url = $item['plink']; + } + + $resource = add_retriever_resource($a, $url); + $retriever_item_id = add_retriever_item($item, $resource); +} + +function add_retriever_resource($a, $url, $binary = false) { + logger('add_retriever_resource: ' . $url, LOGGER_DEBUG); + + $scheme = parse_url($url, PHP_URL_SCHEME); + if ($scheme == 'data') { + $fp = fopen($url, 'r'); + $meta = stream_get_meta_data($fp); + $type = $meta['mediatype']; + $data = stream_get_contents($fp); + fclose($fp); + + $url = 'md5://' . hash('md5', $url); + $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url)); + $resource = $r[0]; + if (count($r)) { + logger('add_retriever_resource: Resource ' . $url . ' already requested', LOGGER_DEBUG); + return $resource; + } + + logger('retrieve_resource: got data URL type ' . $resource['type'], LOGGER_DEBUG); + q("INSERT INTO `retriever_resource` (`type`, `binary`, `url`, `completed`, `data`) " . + "VALUES ('%s', %d, '%s', now(), '%s')", + DBA::escape($type), + intval($binary ? 1 : 0), + DBA::escape($url), + DBA::escape($data)); + $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url)); + $resource = $r[0]; + if (count($r)) { + retriever_resource_completed($resource, $a); + } + return $resource; + } + + if (strlen($url) > 800) { + logger('add_retriever_resource: URL is longer than 800 characters', LOGGER_INFO); + } + + $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url)); + if (count($r)) { + logger('add_retriever_resource: Resource ' . $url . ' already requested', LOGGER_DEBUG); + return $r[0]; + } + + q("INSERT INTO `retriever_resource` (`binary`, `url`) " . + "VALUES (%d, '%s')", intval($binary ? 1 : 0), DBA::escape($url)); + $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url)); + return $r[0]; +} + +function add_retriever_item(&$item, $resource) { + logger('add_retriever_item: ' . $resource['url'] . ' for ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], LOGGER_DEBUG); + + q("INSERT INTO `retriever_item` (`item-uri`, `item-uid`, `contact-id`, `resource`) " . + "VALUES ('%s', %d, %d, %d)", + DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource["id"])); + $r = q("SELECT id FROM `retriever_item` WHERE " . + "`item-uri` = '%s' AND `item-uid` = %d AND `contact-id` = %d AND `resource` = %d ORDER BY id DESC", + DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource['id'])); + if (!count($r)) { + logger("add_retriever_item: couldn't create retriever item for " . + $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], + LOGGER_INFO); + return; + } + logger('add_retriever_item: created retriever_item ' . $r[0]['id'] . ' for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], LOGGER_DEBUG); + return $r[0]['id']; +} + +function retriever_get_encoding($resource) { + $matches = array(); + if (preg_match('/charset=(.*)/', $resource['type'], $matches)) { + return trim(array_pop($matches)); + } + return 'utf-8'; +} + +function retriever_apply_xslt_text($xslt_text, $doc) { + if (!$xslt_text) { + logger('retriever_apply_xslt_text: empty XSLT text', LOGGER_INFO); + return $doc; + } + $xslt_doc = new DOMDocument(); + if (!$xslt_doc->loadXML($xslt_text)) { + logger('retriever_apply_xslt_text: could not load XML', LOGGER_INFO); + return $doc; + } + $xp = new XsltProcessor(); + $xp->importStylesheet($xslt_doc); + $result = $xp->transformToDoc($doc); + return $result; +} + +function retriever_apply_dom_filter($retriever, &$item, $resource) { + logger('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['uri'] . ' contact ' . $item['contact-id'], LOGGER_DEBUG); + + if (!$retriever['data']['include'] && !$retriever['data']['customxslt']) { + return; + } + if (!$resource['data']) { + logger('retriever_apply_dom_filter: no text to work with', LOGGER_INFO); + return; + } + + $encoding = retriever_get_encoding($resource); + $content = mb_convert_encoding($resource['data'], 'HTML-ENTITIES', $encoding); + $doc = new DOMDocument('1.0', 'UTF-8'); + if (strpos($resource['type'], 'html') !== false) { + @$doc->loadHTML($content); + } + else { + $doc->loadXML($content); + } + + $params = array('$spec' => $retriever['data']); + $extract_template = get_markup_template('extract.tpl', 'addon/retriever/'); + $extract_xslt = replace_macros($extract_template, $params); + if ($retriever['data']['include']) { + $doc = retriever_apply_xslt_text($extract_xslt, $doc); + } + if (array_key_exists('customxslt', $retriever['data']) && $retriever['data']['customxslt']) { + $doc = retriever_apply_xslt_text($retriever['data']['customxslt'], $doc); + } + if (!$doc) { + logger('retriever_apply_dom_filter: failed to apply extract XSLT template', LOGGER_INFO); + return; + } + + $components = parse_url($resource['redirect-url']); + $rooturl = $components['scheme'] . "://" . $components['host']; + $dirurl = $rooturl . dirname($components['path']) . "/"; + $params = array('$dirurl' => $dirurl, '$rooturl' => $rooturl); + $fix_urls_template = get_markup_template('fix-urls.tpl', 'addon/retriever/'); + $fix_urls_xslt = replace_macros($fix_urls_template, $params); + $doc = retriever_apply_xslt_text($fix_urls_xslt, $doc); + if (!$doc) { + logger('retriever_apply_dom_filter: failed to apply fix urls XSLT template', LOGGER_INFO); + return; + } + + $item['body'] = HTML::toBBCode($doc->saveHTML()); + if (!strlen($item['body'])) { + logger('retriever_apply_dom_filter retriever ' . $retriever['id'] . ' item ' . $item['id'] . ': output was empty', LOGGER_INFO); + return; + } + $item['body'] .= "\n\n" . L10n::t('Retrieved') . ' ' . date("Y-m-d") . ': [url='; + $item['body'] .= $item['plink']; + $item['body'] .= ']' . $item['plink'] . '[/url]'; + q("UPDATE `item` SET `body` = '%s' WHERE `id` = %d", + DBA::escape($item['body']), intval($item['id'])); +} + +function retrieve_images(&$item, $a) { + $matches1 = array(); + preg_match_all("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", $item["body"], $matches1); + $matches2 = array(); + preg_match_all("/\[img\](.*?)\[\/img\]/ism", $item["body"], $matches2); + $matches = array_merge($matches1[3], $matches2[1]); + logger('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], LOGGER_DEBUG); + foreach ($matches as $url) { + if (strpos($url, get_app()->get_baseurl()) === FALSE) { + $resource = add_retriever_resource($a, $url, true); + if (!$resource['completed']) { + add_retriever_item($item, $resource); + } + else { + retriever_transform_images($a, $item, $resource); + } + } + } +} + +function retriever_check_item_completed(&$item) +{ + $r = q('SELECT count(*) FROM retriever_item WHERE `item-uri` = "%s" ' . + 'AND `item-uid` = %d AND `contact-id` = %d AND `finished` = 0', + DBA::escape($item['uri']), intval($item['uid']), + intval($item['contact-id'])); + $waiting = $r[0]['count(*)']; + logger('retriever_check_item_completed: item ' . $item['uri'] . ' ' . $item['uid'] + . ' '. $item['contact-id'] . ' waiting for ' . $waiting . ' resources', LOGGER_DEBUG); + $old_visible = $item['visible']; + $item['visible'] = $waiting ? 0 : 1; + if (array_key_exists('id', $item) && ($item['id'] > 0) && ($old_visible != $item['visible'])) { + logger('retriever_check_item_completed: changing visible flag to ' . $item['visible'] . ' and invoking notifier ("edit_post", ' . $item['id'] . ')', LOGGER_DEBUG); + q("UPDATE `item` SET `visible` = %d WHERE `id` = %d", + intval($item['visible']), + intval($item['id'])); + q("UPDATE `thread` SET `visible` = %d WHERE `iid` = %d", + intval($item['visible']), + intval($item['id'])); + } +} + +function retriever_apply_completed_resource_to_item($retriever, &$item, $resource, $a) { + logger('retriever_apply_completed_resource_to_item: retriever ' . + ($retriever ? $retriever['id'] : 'none') . + ' resource ' . $resource['url'] . ' plink ' . $item['plink'], LOGGER_DEBUG); + if (strpos($resource['type'], 'image') !== false) { + retriever_transform_images($a, $item, $resource); + } + if (!$retriever) { + return; + } + if ((strpos($resource['type'], 'html') !== false) || + (strpos($resource['type'], 'xml') !== false)) { + retriever_apply_dom_filter($retriever, $item, $resource); + if ($retriever["data"]['images'] ) { + retrieve_images($item, $a); + } + } +} + +function retriever_transform_images($a, &$item, $resource) { + if (!$resource["data"]) { + logger('retriever_transform_images: no data available for ' + . $resource['id'] . ' ' . $resource['url'], LOGGER_INFO); + return; + } + + try { + $photo = Image::storePhoto($a, $item['uid'], $resource['data'], $resource['url']); + } catch (Exception $e) { + logger('retriever_transform_images caught exception ' . $e->getMessage()); + return; + } + foreach ($photo as $k => $v) + { + logger('@@@ photo key ' . $k); + } + $new_url = $photo['full']; + logger('retriever_transform_images: replacing ' . $resource['url'] . ' with ' . + $new_url . ' in item ' . $item['plink'], LOGGER_DEBUG); + $transformed = str_replace($resource["url"], $new_url, $item['body']); + if ($transformed === $item['body']) { + return; + } + + $item['body'] = $transformed; + q("UPDATE `item` SET `body` = '%s' WHERE `plink` = '%s' AND `uid` = %d AND `contact-id` = %d", + DBA::escape($item['body']), + DBA::escape($item['plink']), + intval($item['uid']), + intval($item['contact-id'])); +} + +function retriever_content($a) { + if (!local_user()) { + $a->page['content'] .= "

Please log in

"; + return; + } + if ($a->argv[1] === 'help') { + $feeds = q("SELECT `id`, `name`, `thumb` FROM contact WHERE `uid` = %d AND `network` = 'feed'", + local_user()); + foreach ($feeds as $k=>$v) { + $feeds[$k]['url'] = $a->get_baseurl() . '/retriever/' . $v['id']; + } + $template = get_markup_template('/help.tpl', 'addon/retriever/'); + $a->page['content'] .= replace_macros($template, array( + '$config' => $a->get_baseurl() . '/settings/addon', + '$feeds' => $feeds)); + return; + } + if ($a->argv[1]) { + $retriever = get_retriever($a->argv[1], local_user(), false); + + if (x($_POST["id"])) { + $retriever = get_retriever($a->argv[1], local_user(), true); + $retriever["data"] = array(); + foreach (array('pattern', 'replace', 'enable', 'images', 'customxslt') as $setting) { + if (x($_POST['retriever_' . $setting])) { + $retriever["data"][$setting] = $_POST['retriever_' . $setting]; + } + } + foreach ($_POST as $k=>$v) { + if (preg_match("/retriever-(include|exclude)-(\d+)-(element|attribute|value)/", $k, $matches)) { + $retriever['data'][$matches[1]][intval($matches[2])][$matches[3]] = $v; + } + } + // You've gotta have an element, even if it's just "*" + foreach ($retriever['data']['include'] as $k=>$clause) { + if (!$clause['element']) { + unset($retriever['data']['include'][$k]); + } + } + foreach ($retriever['data']['exclude'] as $k=>$clause) { + if (!$clause['element']) { + unset($retriever['data']['exclude'][$k]); + } + } + q("UPDATE `retriever_rule` SET `data`='%s' WHERE `id` = %d", + DBA::escape(json_encode($retriever["data"])), intval($retriever["id"])); + $a->page['content'] .= "

Settings Updated"; + if (x($_POST["retriever_retrospective"])) { + apply_retrospective($a, $retriever, $_POST["retriever_retrospective"]); + $a->page['content'] .= " and retrospectively applied to " . $_POST["apply"] . " posts"; + } + $a->page['content'] .= ".

"; + } + + $template = get_markup_template('/rule-config.tpl', 'addon/retriever/'); + $a->page['content'] .= replace_macros($template, array( + '$enable' => array( + 'retriever_enable', + L10n::t('Enabled'), + $retriever['data']['enable']), + '$pattern' => array( + 'retriever_pattern', + L10n::t('URL Pattern'), + $retriever["data"]['pattern'], + L10n::t('Regular expression matching part of the URL to replace')), + '$replace' => array( + 'retriever_replace', + L10n::t('URL Replace'), + $retriever["data"]['replace'], + L10n::t('Text to replace matching part of above regular expression')), + '$images' => array( + 'retriever_images', + L10n::t('Download Images'), + $retriever['data']['images']), + '$retrospective' => array( + 'retriever_retrospective', + L10n::t('Retrospectively Apply'), + '0', + L10n::t('Reapply the rules to this number of posts')), + '$customxslt' => array( + 'retriever_customxslt', + L10n::t('Custom XSLT'), + $retriever['data']['customxslt'], + L10n::t("When standard rules aren't enough, apply custom XSLT to the article")), + '$title' => L10n::t('Retrieve Feed Content'), + '$help' => $a->get_baseurl() . '/retriever/help', + '$help_t' => L10n::t('Get Help'), + '$submit_t' => L10n::t('Submit'), + '$submit' => L10n::t('Save Settings'), + '$id' => ($retriever["id"] ? $retriever["id"] : "create"), + '$tag_t' => L10n::t('Tag'), + '$attribute_t' => L10n::t('Attribute'), + '$value_t' => L10n::t('Value'), + '$add_t' => L10n::t('Add'), + '$remove_t' => L10n::t('Remove'), + '$include_t' => L10n::t('Include'), + '$include' => $retriever['data']['include'], + '$exclude_t' => L10n::t('Exclude'), + '$exclude' => $retriever["data"]['exclude'])); + return; + } +} + +function retriever_contact_photo_menu($a, &$args) { + if (!$args) { + return; + } + if ($args["contact"]["network"] == "feed") { + $args["menu"][ 'retriever' ] = array(L10n::t('Retriever'), $a->get_baseurl() . '/retriever/' . $args["contact"]['id']); + } +} + +function retriever_post_remote_hook(&$a, &$item) { + logger('retriever_post_remote_hook: ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], LOGGER_DEBUG); + + $retriever = get_retriever($item['contact-id'], $item["uid"], false); + if ($retriever) { + retriever_on_item_insert($a, $retriever, $item); + } + else { + if (PConfig::get($item["uid"], 'retriever', 'oembed')) { + // Convert to HTML and back to take advantage of bbcode's resolution of oembeds. + $body = HTML::toBBCode(BBCode::convert($item['body'])); + if ($body) { + $item['body'] = $body; + } + } + if (PConfig::get($item["uid"], 'retriever', 'all_photos')) { + retrieve_images($item, $a); + } + } + retriever_check_item_completed($item); +} + +function retriever_plugin_settings(&$a,&$s) { + $all_photos = PConfig::get(local_user(), 'retriever', 'all_photos'); + $oembed = PConfig::get(local_user(), 'retriever', 'oembed'); + $template = get_markup_template('/settings.tpl', 'addon/retriever/'); + $s .= replace_macros($template, array( + '$allphotos' => array( + 'retriever_all_photos', + L10n::t('All Photos'), + $all_photos, + L10n::t('Check this to retrieve photos for all posts')), + '$oembed' => array( + 'retriever_oembed', + L10n::t('Resolve OEmbed'), + $oembed, + L10n::t('Check this to attempt to retrieve embedded content for all posts - useful e.g. for Facebook posts')), + '$submit' => L10n::t('Save Settings'), + '$title' => L10n::t('Retriever Settings'), + '$help' => $a->get_baseurl() . '/retriever/help')); +} + +function retriever_plugin_settings_post($a,$post) { + if ($_POST['retriever_all_photos']) { + PConfig::set(local_user(), 'retriever', 'all_photos', $_POST['retriever_all_photos']); + } + else { + PConfig::del(local_user(), 'retriever', 'all_photos'); + } + if ($_POST['retriever_oembed']) { + PConfig::set(local_user(), 'retriever', 'oembed', $_POST['retriever_oembed']); + } + else { + PConfig::del(local_user(), 'retriever', 'oembed'); + } +} diff --git a/retriever/templates/extract.tpl b/retriever/templates/extract.tpl new file mode 100644 index 00000000..f24a860d --- /dev/null +++ b/retriever/templates/extract.tpl @@ -0,0 +1,32 @@ + + + + + + +{{function clause_xpath}} +{{if !$clause.attribute}} +{{$clause.element}}{{elseif $clause.attribute == 'class'}} +{{$clause.element}}[contains(concat(' ', normalize-space(@class), ' '), '{{$clause.value}}')]{{else}} +{{$clause.element}}[@{{$clause.attribute}}='{{$clause.value}}']{{/if}} +{{/function}} + +{{foreach $spec.include as $clause}} + + + + + +{{/foreach}} + +{{foreach $spec.exclude as $clause}} + +{{/foreach}} + + + + + + + + diff --git a/retriever/templates/fix-urls.tpl b/retriever/templates/fix-urls.tpl new file mode 100644 index 00000000..248d4770 --- /dev/null +++ b/retriever/templates/fix-urls.tpl @@ -0,0 +1,26 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/retriever/templates/help.tpl b/retriever/templates/help.tpl new file mode 100644 index 00000000..10b421d0 --- /dev/null +++ b/retriever/templates/help.tpl @@ -0,0 +1,148 @@ +

Retriever Plugin Help

+

+This plugin replaces the short excerpts you normally get in RSS feeds +with the full content of the article from the source website. You +specify which part of the page you're interested in with a set of +rules. When each item arrives, the plugin downloads the full page +from the website, extracts content using the rules, and replaces the +original article. +

+

+There's a few reasons you may want to do this. The source website +might be slow or overloaded. The source website might be +untrustworthy, in which case using Friendica to scrub the HTML is a +good idea. You might be on a LAN that blacklists certain websites. +It also works neatly with the mailstream plugin, allowing you to read +a news stream comfortably without needing continuous Internet +connectivity. +

+

+However, setting up retriever can be quite tricky since it depends on +the internal design of the website. That was designed to make life +easy for the website's developers, not for you. You'll need to have +some familiarity with HTML, and be willing to adapt when the website +suddenly changes everything without notice. +

+

Configuring Retriever for a feed

+

+To set up retriever for an RSS feed, go to the "Contacts" page and +find your feed. Then click on the drop-down menu on the contact. +Select "Retriever" to get to the retriever configuration. +

+

+The "Include" configuration section specifies parts of the page to +include in the article. Each row has three components: +

+ +

+A simple case is when the article is wrapped in a "div" element: +

+
+    ...
+    <div class="ArticleWrapper">
+      <h2>Man Bites Dog</h2>
+      <img src="mbd.jpg">
+      <p>
+        Residents of the sleepy community of Nowheresville were
+        shocked yesterday by the sight of creepy local weirdo Jim
+        McOddman assaulting innocent local dog Snufflekins with his
+        false teeth.
+      </p>
+      ...
+    </div>
+    ...
+
+

+You then specify the tag "div", attribute "class", and value +"ArticleWrapper". Everything else in the page, such as navigation +panels and menus and footers and so on, will be discarded. If there +is more than one section of the page you want to include, specify each +one on a separate row. If the matching section contains some sections +you want to remove, specify those in the "Exclude" section in the same +way. +

+

+Once you've got a configuration that you think will work, you can try +it out on some existing articles. Type a number into the +"Retrospectively Apply" box and click "Submit". After a while +(exactly how long depends on your system's cron configuration) the new +articles should be available. +

+

Techniques

+

+You can leave the attribute and value blank to include all the +corresponding elements with the specified tag name. You can also use +a tag name of just an asterisk ("*"), which will match any element type with the +specified attribute regardless of the tag. +

+

+Note that the "class" attribute is a special case. Many web page +templates will put multiple different classes in the same element, +separated by spaces. If you specify an attribute of "class" it will +match an element if any of its classes matches the specified value. +For example: +

+
+    <div class="article breaking-news">
+
+

+In this case you can specify a value of "article", or "breaking-news". +You can also specify "article breaking-news", but that won't match if +the website suddenly changes to "breaking-news article", so that's not +recommended. +

+

+One useful trick you can try is using the website's "print" pages. +Many news sites have print versions of all their articles. These are +usually drastically simplified compared to the live website page. +Sometimes this is a good way to get the whole article when it's +normally split across multiple pages. +

+

+Hopefully the URL for the print page is a predictable variant of the +normal article URL. For example, an article URL like: +

+
+    http://www.newssite.com/article-8636.html
+
+

+...might have a print version at: +

+
+    http://www.newssite.com/print/article-8636.html
+
+

+To change the URL used to retrieve the page, use the "URL Pattern" and +"URL Replace" fields. The pattern is a regular expression matching +part of the URL to replace. In this case, you might use a pattern of +"/article" and a replace string of "/print/article". A common pattern +is simply a dollar sign ("$"), used to add the replace string to the end of the URL. +

+

Background Processing

+

+Note that retrieving and processing the articles can take some time, +so it's done in the background. Incoming articles will be marked as +invisible while they're in the process of being downloaded. If a URL +fails, the plugin will keep trying at progressively longer intervals +for up to a month, in case the website is temporarily overloaded or +the network is down. +

+

Retrieving Images

+

+Retriever can also optionally download images and store them in the +local Friendica instance. Just check the "Download Images" box. You +can also download images in every item from your network, whether it's +an RSS feed or not. Go to the "Settings" page and +click "Plugin settings". Then check the "All +Photos" box in the "Retriever Settings" section and click "Submit". +

+

Configure Feeds:

+
+{{foreach $feeds as $feed}} +{{include file='contact_template.tpl' contact=$feed}} +{{/foreach}} +
diff --git a/retriever/templates/rule-config.tpl b/retriever/templates/rule-config.tpl new file mode 100644 index 00000000..228d0326 --- /dev/null +++ b/retriever/templates/rule-config.tpl @@ -0,0 +1,112 @@ +
+ +

{{$title}}

+

{{$help_t}}

+
+ +{{include file="field_checkbox.tpl" field=$enable}} +{{include file="field_input.tpl" field=$pattern}} +{{include file="field_input.tpl" field=$replace}} +{{include file="field_checkbox.tpl" field=$images}} +{{include file="field_input.tpl" field=$retrospective}} +

{{$include_t}}:

+
+ + + + + +{{if $include}} + {{foreach $include as $k=>$m}} + + + + + + + {{/foreach}} +{{else}} + + + + + + +{{/if}} + +
{{$tag_t}}{{$attribute_t}}{{$value_t}}
+ +
+

{{$exclude_t}}:

+
+ + + + + +{{if $exclude}} + {{foreach $exclude as $k=>$r}} + + + + + + + {{/foreach}} +{{else}} + + + + + + +{{/if}} + +
TagAttributeValue
+ +
+{{include file="field_textarea.tpl" field=$customxslt}} + +
+
diff --git a/retriever/templates/settings.tpl b/retriever/templates/settings.tpl new file mode 100644 index 00000000..8bfe8db0 --- /dev/null +++ b/retriever/templates/settings.tpl @@ -0,0 +1,9 @@ +
+

{{$title}}

+

+ Get Help +

+{{include file="field_checkbox.tpl" field=$allphotos}} +{{include file="field_checkbox.tpl" field=$oembed}} + +
From f453c15259e5fecf151a987ac84c58126653e793 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sat, 20 Jul 2019 10:44:38 +0100 Subject: [PATCH 02/94] Fixes for retriever --- retriever/retriever.php | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 78fe575f..5f2b855a 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -512,7 +512,8 @@ function retriever_apply_xslt_text($xslt_text, $doc) { function retriever_apply_dom_filter($retriever, &$item, $resource) { logger('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['uri'] . ' contact ' . $item['contact-id'], LOGGER_DEBUG); - if (!$retriever['data']['include'] && !$retriever['data']['customxslt']) { + if (!array_key_exists('include', $retriever['data']) && !array_key_exists('customxslt', $retriever['data'])) { + logger('retriever_apply_dom_filter: no include and no customxslt', LOGGER_INFO); return; } if (!$resource['data']) { @@ -564,8 +565,8 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) { $item['body'] .= "\n\n" . L10n::t('Retrieved') . ' ' . date("Y-m-d") . ': [url='; $item['body'] .= $item['plink']; $item['body'] .= ']' . $item['plink'] . '[/url]'; - q("UPDATE `item` SET `body` = '%s' WHERE `id` = %d", - DBA::escape($item['body']), intval($item['id'])); + DBA::update('item', ['body' => $item['body']], ['id' => $item['id']]); + DBA::update('item-content', ['body' => $item['body']], ['uri' => $item['uri']]); } function retrieve_images(&$item, $a) { @@ -642,9 +643,9 @@ function retriever_transform_images($a, &$item, $resource) { logger('retriever_transform_images caught exception ' . $e->getMessage()); return; } - foreach ($photo as $k => $v) - { - logger('@@@ photo key ' . $k); + if (!array_key_exists('full', $photo)) { + logger('retriever_transform_images: no replacement URL for image ' . $resource['url']); + return; } $new_url = $photo['full']; logger('retriever_transform_images: replacing ' . $resource['url'] . ' with ' . From ae3fa6cea2d8e2a480958b68deaf2323d45d24ac Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sat, 20 Jul 2019 14:37:57 +0100 Subject: [PATCH 03/94] more fixes --- retriever/retriever.php | 119 ++++++++++++++++++++-------------------- 1 file changed, 60 insertions(+), 59 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 5f2b855a..18351f1e 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -10,6 +10,7 @@ use Friendica\Core\Addon; use Friendica\Core\Config; use Friendica\Core\PConfig; +use Friendica\Core\Logger; use Friendica\Content\Text\HTML; use Friendica\Content\Text\BBCode; use Friendica\Object\Image; @@ -73,7 +74,7 @@ function retriever_install() { if (Config::get('retriever', 'dbversion') == '0.7') { $r = q("SELECT `id`, `data` FROM `retriever_rule`"); foreach ($r as $rr) { - logger('retriever_install: retriever ' . $rr['id'] . ' old config ' . $rr['data'], LOGGER_DATA); + Logger::log('retriever_install: retriever ' . $rr['id'] . ' old config ' . $rr['data'], Logger::DATA); $data = json_decode($rr['data'], true); if ($data['pattern']) { $matches = array(); @@ -122,7 +123,7 @@ function retriever_install() { unset($data['remove']); } $r = q('UPDATE `retriever_rule` SET `data` = "%s" WHERE `id` = %d', DBA::escape(json_encode($data)), $rr['id']); - logger('retriever_install: retriever ' . $rr['id'] . ' new config ' . json_encode($data), LOGGER_DATA); + Logger::log('retriever_install: retriever ' . $rr['id'] . ' new config ' . json_encode($data), Logger::DATA); } Config::set('retriever', 'dbversion', '0.8'); } @@ -199,7 +200,7 @@ function retriever_retrieve_items($max_items, $a) { } $retrieve_items = $max_items - $retriever_item_count; - logger('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . $retriever_item_count . ', retrieve ' . $retrieve_items, LOGGER_DEBUG); + Logger::log('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . $retriever_item_count . ', retrieve ' . $retrieve_items, Logger::DEBUG); do { $r = q("SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR %s) ORDER BY `last-try` ASC LIMIT %d", DBA::escape(implode($schedule_clauses, ' OR ')), @@ -210,7 +211,7 @@ function retriever_retrieve_items($max_items, $a) { if (count($r) == 0) { break; } - logger('retriever_retrieve_items: found ' . count($r) . ' waiting resources in database', LOGGER_DEBUG); + Logger::log('retriever_retrieve_items: found ' . count($r) . ' waiting resources in database', Logger::DEBUG); foreach ($r as $rr) { retrieve_resource($rr); $retriever_item_count++; @@ -228,24 +229,24 @@ function retriever_retrieve_items($max_items, $a) { if (!$r) { $r = array(); } - logger('retriever_retrieve_items: items waiting even though resource has completed: ' . count($r), LOGGER_DEBUG); + Logger::log('retriever_retrieve_items: items waiting even though resource has completed: ' . count($r), Logger::DEBUG); foreach ($r as $rr) { $resource = q("SELECT * FROM retriever_resource WHERE `id` = %d", $rr['resource']); $retriever_item = retriever_get_retriever_item($rr['item']); if (!$retriever_item) { - logger('retriever_retrieve_items: no retriever item with id ' . $rr['item'], LOGGER_INFO); + Logger::log('retriever_retrieve_items: no retriever item with id ' . $rr['item'], Logger::INFO); continue; } $item = retriever_get_item($retriever_item); if (!$item) { - logger('retriever_retrieve_items: no item ' . $retriever_item['item-uri'], LOGGER_INFO); + Logger::log('retriever_retrieve_items: no item ' . $retriever_item['item-uri'], Logger::INFO); continue; } $retriever = get_retriever($item['contact-id'], $item['uid']); if (!$retriever) { - logger('retriever_retrieve_items: no retriever for item ' . + Logger::log('retriever_retrieve_items: no retriever for item ' . $retriever_item['item-uri'] . ' ' . $retriever_item['uid'] . ' ' . $item['contact-id'], - LOGGER_INFO); + Logger::INFO); continue; } retriever_apply_completed_resource_to_item($retriever, $item, $resource[0], $a); @@ -260,7 +261,7 @@ function retriever_tidy() { q("DELETE FROM retriever_resource WHERE completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)"); $r = q("SELECT retriever_item.id FROM retriever_item LEFT OUTER JOIN retriever_resource ON (retriever_item.resource = retriever_resource.id) WHERE retriever_resource.id is null"); - logger('retriever_tidy: found ' . count($r) . ' retriever_items with no retriever_resource'); + Logger::log('retriever_tidy: found ' . count($r) . ' retriever_items with no retriever_resource'); foreach ($r as $rr) { q('DELETE FROM retriever_item WHERE id = %d', intval($rr['id'])); } @@ -268,7 +269,7 @@ function retriever_tidy() { function retrieve_dataurl_resource($resource) { if (!preg_match("/date:(.*);base64,(.*)/", $resource['url'], $matches)) { - logger('retrieve_dataurl_resource: ' . $resource['id'] . ' does not match pattern'); + Logger::log('retrieve_dataurl_resource: ' . $resource['id'] . ' does not match pattern'); } else { $resource['type'] = $matches[1]; $resource['data'] = base64url_decode($matches[2]); @@ -290,21 +291,21 @@ function retrieve_resource($resource) { $a = get_app(); try { - logger('retrieve_resource: ' . ($resource['num-tries'] + 1) . - ' attempt at resource ' . $resource['id'] . ' ' . $resource['url'], LOGGER_DEBUG); + Logger::log('retrieve_resource: ' . ($resource['num-tries'] + 1) . + ' attempt at resource ' . $resource['id'] . ' ' . $resource['url'], Logger::DEBUG); $redirects; $cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-'); $fetch_result = Network::fetchUrlFull($resource['url'], $resource['binary'], $redirects, array('cookiejar' => $cookiejar)); unlink($cookiejar); - $resource['data'] = $fetch_result['body']; - $resource['http-code'] = $a->get_curl_code(); - $resource['type'] = $a->get_curl_content_type(); - $resource['redirect-url'] = $fetch_result['redirect_url']; - logger('retrieve_resource: got code ' . $resource['http-code'] . + $resource['data'] = $fetch_result->getBody(); + $resource['http-code'] = $fetch_result->getReturnCode(); + $resource['type'] = $fetch_result->getContentType(); + $resource['redirect-url'] = $fetch_result->getRedirectUrl(); + Logger::log('retrieve_resource: got code ' . $resource['http-code'] . ' retrieving resource ' . $resource['id'] . - ' final url ' . $resource['redirect-url'], LOGGER_DEBUG); + ' final url ' . $resource['redirect-url'], Logger::DEBUG); } catch (Exception $e) { - logger('retrieve_resource: unable to retrieve ' . $resource['url'] . ' - ' . $e->getMessage()); + Logger::log('retrieve_resource: unable to retrieve ' . $resource['url'] . ' - ' . $e->getMessage()); } q("UPDATE `retriever_resource` SET `last-try` = now(), `num-tries` = `num-tries` + 1, `http-code` = %d, `redirect-url` = '%s' WHERE id = %d", intval($resource['http-code']), @@ -338,7 +339,7 @@ function get_retriever($contact_id, $uid, $create = false) { function retriever_get_retriever_item($id) { $retriever_items = q("SELECT * FROM `retriever_item` WHERE id = %d", intval($id)); if (count($retriever_items) != 1) { - logger('retriever_get_retriever_item: unable to find retriever_item ' . $id, LOGGER_INFO); + Logger::log('retriever_get_retriever_item: unable to find retriever_item ' . $id, Logger::INFO); return; } return $retriever_items[0]; @@ -350,15 +351,15 @@ function retriever_get_item($retriever_item) { intval($retriever_item['item-uid']), intval($retriever_item['contact-id'])); if (count($items) != 1) { - logger('retriever_get_item: unexpected number of results ' . - count($items) . " when searching for item $uri $uid $cid", LOGGER_INFO); + Logger::log('retriever_get_item: unexpected number of results ' . + count($items) . " when searching for item $uri $uid $cid", Logger::INFO); return; } return $items[0]; } function retriever_item_completed($retriever_item_id, $resource, $a) { - logger('retriever_item_completed: id ' . $retriever_item_id . ' url ' . $resource['url'], LOGGER_DEBUG); + Logger::log('retriever_item_completed: id ' . $retriever_item_id . ' url ' . $resource['url'], Logger::DEBUG); $retriever_item = retriever_get_retriever_item($retriever_item_id); if (!$retriever_item) { @@ -379,7 +380,7 @@ function retriever_item_completed($retriever_item_id, $resource, $a) { } function retriever_resource_completed($resource, $a) { - logger('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url'], LOGGER_DEBUG); + Logger::log('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url'], Logger::DEBUG); $r = q("SELECT `id` FROM `retriever_item` WHERE `resource` = %d", $resource['id']); foreach ($r as $rr) { retriever_item_completed($rr['id'], $resource, $a); @@ -398,7 +399,7 @@ function apply_retrospective($a, $retriever, $num) { function retriever_on_item_insert($a, $retriever, &$item) { if (!$retriever || !$retriever['id']) { - logger('retriever_on_item_insert: No retriever supplied', LOGGER_INFO); + Logger::log('retriever_on_item_insert: No retriever supplied', Logger::INFO); return; } if (!$retriever["data"]['enable'] == "on") { @@ -406,7 +407,7 @@ function retriever_on_item_insert($a, $retriever, &$item) { } if (array_key_exists('pattern', $retriever["data"]) && $retriever["data"]['pattern']) { $url = preg_replace('/' . $retriever["data"]['pattern'] . '/', $retriever["data"]['replace'], $item['plink']); - logger('retriever_on_item_insert: Changed ' . $item['plink'] . ' to ' . $url, LOGGER_DATA); + Logger::log('retriever_on_item_insert: Changed ' . $item['plink'] . ' to ' . $url, Logger::DATA); } else { $url = $item['plink']; @@ -417,7 +418,7 @@ function retriever_on_item_insert($a, $retriever, &$item) { } function add_retriever_resource($a, $url, $binary = false) { - logger('add_retriever_resource: ' . $url, LOGGER_DEBUG); + Logger::log('add_retriever_resource: ' . $url, Logger::DEBUG); $scheme = parse_url($url, PHP_URL_SCHEME); if ($scheme == 'data') { @@ -431,11 +432,11 @@ function add_retriever_resource($a, $url, $binary = false) { $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url)); $resource = $r[0]; if (count($r)) { - logger('add_retriever_resource: Resource ' . $url . ' already requested', LOGGER_DEBUG); + Logger::log('add_retriever_resource: Resource ' . $url . ' already requested', Logger::DEBUG); return $resource; } - logger('retrieve_resource: got data URL type ' . $resource['type'], LOGGER_DEBUG); + Logger::log('retrieve_resource: got data URL type ' . $resource['type'], Logger::DEBUG); q("INSERT INTO `retriever_resource` (`type`, `binary`, `url`, `completed`, `data`) " . "VALUES ('%s', %d, '%s', now(), '%s')", DBA::escape($type), @@ -451,12 +452,12 @@ function add_retriever_resource($a, $url, $binary = false) { } if (strlen($url) > 800) { - logger('add_retriever_resource: URL is longer than 800 characters', LOGGER_INFO); + Logger::log('add_retriever_resource: URL is longer than 800 characters', Logger::INFO); } $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url)); if (count($r)) { - logger('add_retriever_resource: Resource ' . $url . ' already requested', LOGGER_DEBUG); + Logger::log('add_retriever_resource: Resource ' . $url . ' already requested', Logger::DEBUG); return $r[0]; } @@ -467,7 +468,7 @@ function add_retriever_resource($a, $url, $binary = false) { } function add_retriever_item(&$item, $resource) { - logger('add_retriever_item: ' . $resource['url'] . ' for ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], LOGGER_DEBUG); + Logger::log('add_retriever_item: ' . $resource['url'] . ' for ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG); q("INSERT INTO `retriever_item` (`item-uri`, `item-uid`, `contact-id`, `resource`) " . "VALUES ('%s', %d, %d, %d)", @@ -476,12 +477,12 @@ function add_retriever_item(&$item, $resource) { "`item-uri` = '%s' AND `item-uid` = %d AND `contact-id` = %d AND `resource` = %d ORDER BY id DESC", DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource['id'])); if (!count($r)) { - logger("add_retriever_item: couldn't create retriever item for " . + Logger::log("add_retriever_item: couldn't create retriever item for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], - LOGGER_INFO); + Logger::INFO); return; } - logger('add_retriever_item: created retriever_item ' . $r[0]['id'] . ' for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], LOGGER_DEBUG); + Logger::log('add_retriever_item: created retriever_item ' . $r[0]['id'] . ' for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG); return $r[0]['id']; } @@ -495,12 +496,12 @@ function retriever_get_encoding($resource) { function retriever_apply_xslt_text($xslt_text, $doc) { if (!$xslt_text) { - logger('retriever_apply_xslt_text: empty XSLT text', LOGGER_INFO); + Logger::log('retriever_apply_xslt_text: empty XSLT text', Logger::INFO); return $doc; } $xslt_doc = new DOMDocument(); if (!$xslt_doc->loadXML($xslt_text)) { - logger('retriever_apply_xslt_text: could not load XML', LOGGER_INFO); + Logger::log('retriever_apply_xslt_text: could not load XML', Logger::INFO); return $doc; } $xp = new XsltProcessor(); @@ -510,14 +511,14 @@ function retriever_apply_xslt_text($xslt_text, $doc) { } function retriever_apply_dom_filter($retriever, &$item, $resource) { - logger('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['uri'] . ' contact ' . $item['contact-id'], LOGGER_DEBUG); + Logger::log('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['uri'] . ' contact ' . $item['contact-id'], Logger::DEBUG); if (!array_key_exists('include', $retriever['data']) && !array_key_exists('customxslt', $retriever['data'])) { - logger('retriever_apply_dom_filter: no include and no customxslt', LOGGER_INFO); + Logger::log('retriever_apply_dom_filter: no include and no customxslt', Logger::INFO); return; } if (!$resource['data']) { - logger('retriever_apply_dom_filter: no text to work with', LOGGER_INFO); + Logger::log('retriever_apply_dom_filter: no text to work with', Logger::INFO); return; } @@ -541,7 +542,7 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) { $doc = retriever_apply_xslt_text($retriever['data']['customxslt'], $doc); } if (!$doc) { - logger('retriever_apply_dom_filter: failed to apply extract XSLT template', LOGGER_INFO); + Logger::log('retriever_apply_dom_filter: failed to apply extract XSLT template', Logger::INFO); return; } @@ -553,13 +554,13 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) { $fix_urls_xslt = replace_macros($fix_urls_template, $params); $doc = retriever_apply_xslt_text($fix_urls_xslt, $doc); if (!$doc) { - logger('retriever_apply_dom_filter: failed to apply fix urls XSLT template', LOGGER_INFO); + Logger::log('retriever_apply_dom_filter: failed to apply fix urls XSLT template', Logger::INFO); return; } $item['body'] = HTML::toBBCode($doc->saveHTML()); if (!strlen($item['body'])) { - logger('retriever_apply_dom_filter retriever ' . $retriever['id'] . ' item ' . $item['id'] . ': output was empty', LOGGER_INFO); + Logger::log('retriever_apply_dom_filter retriever ' . $retriever['id'] . ' item ' . $item['id'] . ': output was empty', Logger::INFO); return; } $item['body'] .= "\n\n" . L10n::t('Retrieved') . ' ' . date("Y-m-d") . ': [url='; @@ -575,9 +576,9 @@ function retrieve_images(&$item, $a) { $matches2 = array(); preg_match_all("/\[img\](.*?)\[\/img\]/ism", $item["body"], $matches2); $matches = array_merge($matches1[3], $matches2[1]); - logger('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], LOGGER_DEBUG); + Logger::log('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG); foreach ($matches as $url) { - if (strpos($url, get_app()->get_baseurl()) === FALSE) { + if (strpos($url, get_app()->getBaseUrl()) === FALSE) { $resource = add_retriever_resource($a, $url, true); if (!$resource['completed']) { add_retriever_item($item, $resource); @@ -596,12 +597,12 @@ function retriever_check_item_completed(&$item) DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id'])); $waiting = $r[0]['count(*)']; - logger('retriever_check_item_completed: item ' . $item['uri'] . ' ' . $item['uid'] - . ' '. $item['contact-id'] . ' waiting for ' . $waiting . ' resources', LOGGER_DEBUG); + Logger::log('retriever_check_item_completed: item ' . $item['uri'] . ' ' . $item['uid'] + . ' '. $item['contact-id'] . ' waiting for ' . $waiting . ' resources', Logger::DEBUG); $old_visible = $item['visible']; $item['visible'] = $waiting ? 0 : 1; if (array_key_exists('id', $item) && ($item['id'] > 0) && ($old_visible != $item['visible'])) { - logger('retriever_check_item_completed: changing visible flag to ' . $item['visible'] . ' and invoking notifier ("edit_post", ' . $item['id'] . ')', LOGGER_DEBUG); + Logger::log('retriever_check_item_completed: changing visible flag to ' . $item['visible'] . ' and invoking notifier ("edit_post", ' . $item['id'] . ')', Logger::DEBUG); q("UPDATE `item` SET `visible` = %d WHERE `id` = %d", intval($item['visible']), intval($item['id'])); @@ -612,9 +613,9 @@ function retriever_check_item_completed(&$item) } function retriever_apply_completed_resource_to_item($retriever, &$item, $resource, $a) { - logger('retriever_apply_completed_resource_to_item: retriever ' . + Logger::log('retriever_apply_completed_resource_to_item: retriever ' . ($retriever ? $retriever['id'] : 'none') . - ' resource ' . $resource['url'] . ' plink ' . $item['plink'], LOGGER_DEBUG); + ' resource ' . $resource['url'] . ' plink ' . $item['plink'], Logger::DEBUG); if (strpos($resource['type'], 'image') !== false) { retriever_transform_images($a, $item, $resource); } @@ -632,24 +633,24 @@ function retriever_apply_completed_resource_to_item($retriever, &$item, $resourc function retriever_transform_images($a, &$item, $resource) { if (!$resource["data"]) { - logger('retriever_transform_images: no data available for ' - . $resource['id'] . ' ' . $resource['url'], LOGGER_INFO); + Logger::log('retriever_transform_images: no data available for ' + . $resource['id'] . ' ' . $resource['url'], Logger::INFO); return; } try { $photo = Image::storePhoto($a, $item['uid'], $resource['data'], $resource['url']); } catch (Exception $e) { - logger('retriever_transform_images caught exception ' . $e->getMessage()); + Logger::log('retriever_transform_images caught exception ' . $e->getMessage()); return; } if (!array_key_exists('full', $photo)) { - logger('retriever_transform_images: no replacement URL for image ' . $resource['url']); + Logger::log('retriever_transform_images: no replacement URL for image ' . $resource['url']); return; } $new_url = $photo['full']; - logger('retriever_transform_images: replacing ' . $resource['url'] . ' with ' . - $new_url . ' in item ' . $item['plink'], LOGGER_DEBUG); + Logger::log('retriever_transform_images: replacing ' . $resource['url'] . ' with ' . + $new_url . ' in item ' . $item['plink'], Logger::DEBUG); $transformed = str_replace($resource["url"], $new_url, $item['body']); if ($transformed === $item['body']) { return; @@ -672,7 +673,7 @@ function retriever_content($a) { $feeds = q("SELECT `id`, `name`, `thumb` FROM contact WHERE `uid` = %d AND `network` = 'feed'", local_user()); foreach ($feeds as $k=>$v) { - $feeds[$k]['url'] = $a->get_baseurl() . '/retriever/' . $v['id']; + $feeds[$k]['url'] = $a->getBaseUrl() . '/retriever/' . $v['id']; } $template = get_markup_template('/help.tpl', 'addon/retriever/'); $a->page['content'] .= replace_macros($template, array( @@ -776,7 +777,7 @@ function retriever_contact_photo_menu($a, &$args) { } function retriever_post_remote_hook(&$a, &$item) { - logger('retriever_post_remote_hook: ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], LOGGER_DEBUG); + Logger::log('retriever_post_remote_hook: ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG); $retriever = get_retriever($item['contact-id'], $item["uid"], false); if ($retriever) { From 738d1ab5880698a82d0d330ffb785d12b74e6541 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sat, 20 Jul 2019 14:45:10 +0100 Subject: [PATCH 04/94] more fixes --- retriever/retriever.php | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 18351f1e..3072a743 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -11,6 +11,7 @@ use Friendica\Core\Addon; use Friendica\Core\Config; use Friendica\Core\PConfig; use Friendica\Core\Logger; +use Friendica\Core\Renderer; use Friendica\Content\Text\HTML; use Friendica\Content\Text\BBCode; use Friendica\Object\Image; @@ -533,8 +534,8 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) { } $params = array('$spec' => $retriever['data']); - $extract_template = get_markup_template('extract.tpl', 'addon/retriever/'); - $extract_xslt = replace_macros($extract_template, $params); + $extract_template = Renderer::getMarkupTemplate('extract.tpl', 'addon/retriever/'); + $extract_xslt = Renderer::replaceMacros($extract_template, $params); if ($retriever['data']['include']) { $doc = retriever_apply_xslt_text($extract_xslt, $doc); } @@ -550,8 +551,8 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) { $rooturl = $components['scheme'] . "://" . $components['host']; $dirurl = $rooturl . dirname($components['path']) . "/"; $params = array('$dirurl' => $dirurl, '$rooturl' => $rooturl); - $fix_urls_template = get_markup_template('fix-urls.tpl', 'addon/retriever/'); - $fix_urls_xslt = replace_macros($fix_urls_template, $params); + $fix_urls_template = Renderer::getMarkupTemplate('fix-urls.tpl', 'addon/retriever/'); + $fix_urls_xslt = Renderer::replaceMacros($fix_urls_template, $params); $doc = retriever_apply_xslt_text($fix_urls_xslt, $doc); if (!$doc) { Logger::log('retriever_apply_dom_filter: failed to apply fix urls XSLT template', Logger::INFO); @@ -675,8 +676,8 @@ function retriever_content($a) { foreach ($feeds as $k=>$v) { $feeds[$k]['url'] = $a->getBaseUrl() . '/retriever/' . $v['id']; } - $template = get_markup_template('/help.tpl', 'addon/retriever/'); - $a->page['content'] .= replace_macros($template, array( + $template = Renderer::getMarkupTemplate('/help.tpl', 'addon/retriever/'); + $a->page['content'] .= Renderer::replaceMacros($template, array( '$config' => $a->get_baseurl() . '/settings/addon', '$feeds' => $feeds)); return; @@ -718,8 +719,8 @@ function retriever_content($a) { $a->page['content'] .= ".

"; } - $template = get_markup_template('/rule-config.tpl', 'addon/retriever/'); - $a->page['content'] .= replace_macros($template, array( + $template = Renderer::getMarkupTemplate('/rule-config.tpl', 'addon/retriever/'); + $a->page['content'] .= Renderer::replaceMacros($template, array( '$enable' => array( 'retriever_enable', L10n::t('Enabled'), @@ -801,8 +802,8 @@ function retriever_post_remote_hook(&$a, &$item) { function retriever_plugin_settings(&$a,&$s) { $all_photos = PConfig::get(local_user(), 'retriever', 'all_photos'); $oembed = PConfig::get(local_user(), 'retriever', 'oembed'); - $template = get_markup_template('/settings.tpl', 'addon/retriever/'); - $s .= replace_macros($template, array( + $template = Renderer::getMarkupTemplate('/settings.tpl', 'addon/retriever/'); + $s .= Renderer::replaceMacros($template, array( '$allphotos' => array( 'retriever_all_photos', L10n::t('All Photos'), From c9f4ad7405370a7655f77dc1c0dac3392cca9b77 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 21 Jul 2019 18:27:14 +0100 Subject: [PATCH 05/94] now working retriever --- retriever/retriever.php | 147 +++++++--------------------------------- 1 file changed, 23 insertions(+), 124 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 3072a743..97f29694 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -18,6 +18,7 @@ use Friendica\Object\Image; use Friendica\Util\Network; use Friendica\Core\L10n; use Friendica\Database\DBA; +use Friendica\Model\ItemURI; function retriever_install() { Addon::registerHook('plugin_settings', 'addon/retriever/retriever.php', 'retriever_plugin_settings'); @@ -27,116 +28,6 @@ function retriever_install() { Addon::registerHook('cron', 'addon/retriever/retriever.php', 'retriever_cron'); $r = q("SELECT `id` FROM `pconfig` WHERE `cat` LIKE 'retriever_%%'"); - if (count($r) || (Config::get('retriever', 'dbversion') == '0.1')) { - $retrievers = array(); - $r = q("SELECT SUBSTRING(`cat`, 10) AS `contact`, `k`, `v` FROM `pconfig` WHERE `cat` LIKE 'retriever%%'"); - foreach ($r as $rr) { - $retrievers[$rr['contact']][$rr['k']] = $rr['v']; - } - foreach ($retrievers as $k => $v) { - $rr = q("SELECT `uid` FROM `contact` WHERE `id` = %d", intval($k)); - $uid = $rr[0]['uid']; - $v['images'] = 'on'; - q("INSERT INTO `retriever_rule` (`uid`, `contact-id`, `data`) VALUES (%d, %d, '%s')", - intval($uid), intval($k), DBA::escape(json_encode($v))); - } - q("DELETE FROM `pconfig` WHERE `cat` LIKE 'retriever_%%'"); - Config::set('retriever', 'dbversion', '0.2'); - } - if (Config::get('retriever', 'dbversion') == '0.2') { - q("ALTER TABLE `retriever_resource` DROP COLUMN `retriever`"); - Config::set('retriever', 'dbversion', '0.3'); - } - if (Config::get('retriever', 'dbversion') == '0.3') { - q("ALTER TABLE `retriever_item` MODIFY COLUMN `item-uri` varchar(800) CHARACTER SET ascii NOT NULL"); - q("ALTER TABLE `retriever_resource` MODIFY COLUMN `url` varchar(800) CHARACTER SET ascii NOT NULL"); - Config::set('retriever', 'dbversion', '0.4'); - } - if (Config::get('retriever', 'dbversion') == '0.4') { - q("ALTER TABLE `retriever_item` ADD COLUMN `finished` tinyint(1) unsigned NOT NULL DEFAULT '0'"); - Config::set('retriever', 'dbversion', '0.5'); - } - if (Config::get('retriever', 'dbversion') == '0.5') { - q('ALTER TABLE `retriever_resource` CHANGE `created` `created` timestamp NOT NULL DEFAULT now()'); - q('ALTER TABLE `retriever_resource` CHANGE `completed` `completed` timestamp NULL DEFAULT NULL'); - q('ALTER TABLE `retriever_resource` CHANGE `last-try` `last-try` timestamp NULL DEFAULT NULL'); - q('ALTER TABLE `retriever_item` DROP KEY `all`'); - q('ALTER TABLE `retriever_item` ADD KEY `all` (`item-uri`, `item-uid`, `contact-id`)'); - Config::set('retriever', 'dbversion', '0.6'); - } - if (Config::get('retriever', 'dbversion') == '0.6') { - q('ALTER TABLE `retriever_item` CONVERT TO CHARACTER SET utf8 COLLATE utf8_bin'); - q('ALTER TABLE `retriever_item` CHANGE `item-uri` `item-uri` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NOT NULL'); - q('ALTER TABLE `retriever_resource` CONVERT TO CHARACTER SET utf8 COLLATE utf8_bin'); - q('ALTER TABLE `retriever_resource` CHANGE `url` `url` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NOT NULL'); - q('ALTER TABLE `retriever_rule` CONVERT TO CHARACTER SET utf8 COLLATE utf8_bin'); - Config::set('retriever', 'dbversion', '0.7'); - } - if (Config::get('retriever', 'dbversion') == '0.7') { - $r = q("SELECT `id`, `data` FROM `retriever_rule`"); - foreach ($r as $rr) { - Logger::log('retriever_install: retriever ' . $rr['id'] . ' old config ' . $rr['data'], Logger::DATA); - $data = json_decode($rr['data'], true); - if ($data['pattern']) { - $matches = array(); - if (preg_match("/\/(.*)\//", $data['pattern'], $matches)) { - $data['pattern'] = $matches[1]; - } - } - if ($data['match']) { - $include = array(); - foreach (explode('|', $data['match']) as $component) { - $matches = array(); - if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[@([A-Za-z][a-z0-9]*)='([^']*)'\]/", $component, $matches)) { - $include[] = array( - 'element' => $matches[1], - 'attribute' => $matches[2], - 'value' => $matches[3]); - } - if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[contains(concat(' ',normalize-space(@class),' '),' ([^ ']+) ')]/", $component, $matches)) { - $include[] = array( - 'element' => $matches[1], - 'attribute' => $matches[2], - 'value' => $matches[3]); - } - } - $data['include'] = $include; - unset($data['match']); - } - if ($data['remove']) { - $exclude = array(); - foreach (explode('|', $data['remove']) as $component) { - $matches = array(); - if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[@([A-Za-z][a-z0-9]*)='([^']*)'\]/", $component, $matches)) { - $exclude[] = array( - 'element' => $matches[1], - 'attribute' => $matches[2], - 'value' => $matches[3]); - } - if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[contains(concat(' ',normalize-space(@class),' '),' ([^ ']+) ')]/", $component, $matches)) { - $exclude[] = array( - 'element' => $matches[1], - 'attribute' => $matches[2], - 'value' => $matches[3]); - } - } - $data['exclude'] = $exclude; - unset($data['remove']); - } - $r = q('UPDATE `retriever_rule` SET `data` = "%s" WHERE `id` = %d', DBA::escape(json_encode($data)), $rr['id']); - Logger::log('retriever_install: retriever ' . $rr['id'] . ' new config ' . json_encode($data), Logger::DATA); - } - Config::set('retriever', 'dbversion', '0.8'); - } - if (Config::get('retriever', 'dbversion') == '0.8') { - q("ALTER TABLE `retriever_resource` ADD COLUMN `http-code` smallint(1) unsigned NULL DEFAULT NULL"); - Config::set('retriever', 'dbversion', '0.9'); - } - if (Config::get('retriever', 'dbversion') == '0.9') { - q("ALTER TABLE `retriever_item` DROP COLUMN `parent`"); - q("ALTER TABLE `retriever_resource` ADD COLUMN `redirect-url` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NULL DEFAULT NULL"); - Config::set('retriever', 'dbversion', '0.10'); - } if (Config::get('retriever', 'dbversion') == '0.10') { q("ALTER TABLE `retriever_resource` MODIFY COLUMN `type` char(255) NULL DEFAULT NULL"); q("ALTER TABLE `retriever_resource` MODIFY COLUMN `data` mediumblob NULL DEFAULT NULL"); @@ -347,6 +238,7 @@ function retriever_get_retriever_item($id) { } function retriever_get_item($retriever_item) { + // @@@ Need to replace this with Item::selectFirst $items = q("SELECT * FROM `item` WHERE `uri` = '%s' AND `uid` = %d AND `contact-id` = %d", DBA::escape($retriever_item['item-uri']), intval($retriever_item['item-uid']), @@ -537,9 +429,11 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) { $extract_template = Renderer::getMarkupTemplate('extract.tpl', 'addon/retriever/'); $extract_xslt = Renderer::replaceMacros($extract_template, $params); if ($retriever['data']['include']) { + Logger::log('retriever_apply_dom_filter: applying include/exclude template \"' . $extract_xslt . '\"', Logger::DEBUG); $doc = retriever_apply_xslt_text($extract_xslt, $doc); } if (array_key_exists('customxslt', $retriever['data']) && $retriever['data']['customxslt']) { + Logger::log('retriever_apply_dom_filter: applying custom XSLT \"' . $retriever['data']['customxslt'] . '\"', Logger::DEBUG); $doc = retriever_apply_xslt_text($retriever['data']['customxslt'], $doc); } if (!$doc) { @@ -559,16 +453,21 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) { return; } - $item['body'] = HTML::toBBCode($doc->saveHTML()); - if (!strlen($item['body'])) { + $body = HTML::toBBCode($doc->saveHTML()); + if (!strlen($body)) { Logger::log('retriever_apply_dom_filter retriever ' . $retriever['id'] . ' item ' . $item['id'] . ': output was empty', Logger::INFO); return; } - $item['body'] .= "\n\n" . L10n::t('Retrieved') . ' ' . date("Y-m-d") . ': [url='; - $item['body'] .= $item['plink']; - $item['body'] .= ']' . $item['plink'] . '[/url]'; - DBA::update('item', ['body' => $item['body']], ['id' => $item['id']]); - DBA::update('item-content', ['body' => $item['body']], ['uri' => $item['uri']]); + $body .= "\n\n" . L10n::t('Retrieved') . ' ' . date("Y-m-d") . ': [url='; + $body .= $item['plink']; + $body .= ']' . $item['plink'] . '[/url]'; + + $uri_id = ItemURI::getIdByURI($item['uri']); + //@@@ remove this + $item['body'] = $body; + Logger::log('retriever_apply_dom_filter: XSLT result \"' . $body . '\"', Logger::DATA); + DBA::update('item', ['body' => $body], ['id' => $item['id']]); + DBA::update('item-content', ['body' => $body], ['uri-id' => $uri_id]); } function retrieve_images(&$item, $a) { @@ -678,18 +577,18 @@ function retriever_content($a) { } $template = Renderer::getMarkupTemplate('/help.tpl', 'addon/retriever/'); $a->page['content'] .= Renderer::replaceMacros($template, array( - '$config' => $a->get_baseurl() . '/settings/addon', + '$config' => $a->getBaseUrl() . '/settings/addon', '$feeds' => $feeds)); return; } if ($a->argv[1]) { $retriever = get_retriever($a->argv[1], local_user(), false); - if (x($_POST["id"])) { + if (!empty($_POST["id"])) { $retriever = get_retriever($a->argv[1], local_user(), true); $retriever["data"] = array(); foreach (array('pattern', 'replace', 'enable', 'images', 'customxslt') as $setting) { - if (x($_POST['retriever_' . $setting])) { + if (!empty($_POST['retriever_' . $setting])) { $retriever["data"][$setting] = $_POST['retriever_' . $setting]; } } @@ -712,7 +611,7 @@ function retriever_content($a) { q("UPDATE `retriever_rule` SET `data`='%s' WHERE `id` = %d", DBA::escape(json_encode($retriever["data"])), intval($retriever["id"])); $a->page['content'] .= "

Settings Updated"; - if (x($_POST["retriever_retrospective"])) { + if (!empty($_POST["retriever_retrospective"])) { apply_retrospective($a, $retriever, $_POST["retriever_retrospective"]); $a->page['content'] .= " and retrospectively applied to " . $_POST["apply"] . " posts"; } @@ -750,7 +649,7 @@ function retriever_content($a) { $retriever['data']['customxslt'], L10n::t("When standard rules aren't enough, apply custom XSLT to the article")), '$title' => L10n::t('Retrieve Feed Content'), - '$help' => $a->get_baseurl() . '/retriever/help', + '$help' => $a->getBaseUrl() . '/retriever/help', '$help_t' => L10n::t('Get Help'), '$submit_t' => L10n::t('Submit'), '$submit' => L10n::t('Save Settings'), @@ -773,7 +672,7 @@ function retriever_contact_photo_menu($a, &$args) { return; } if ($args["contact"]["network"] == "feed") { - $args["menu"][ 'retriever' ] = array(L10n::t('Retriever'), $a->get_baseurl() . '/retriever/' . $args["contact"]['id']); + $args["menu"][ 'retriever' ] = array(L10n::t('Retriever'), $a->getBaseUrl() . '/retriever/' . $args["contact"]['id']); } } @@ -816,7 +715,7 @@ function retriever_plugin_settings(&$a,&$s) { L10n::t('Check this to attempt to retrieve embedded content for all posts - useful e.g. for Facebook posts')), '$submit' => L10n::t('Save Settings'), '$title' => L10n::t('Retriever Settings'), - '$help' => $a->get_baseurl() . '/retriever/help')); + '$help' => $a->getBaseUrl() . '/retriever/help')); } function retriever_plugin_settings_post($a,$post) { From 8b6a9c017ad13496a4adcf926fff0bc3cc9907ba Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 21 Jul 2019 18:27:55 +0100 Subject: [PATCH 06/94] beginnings of persistent cookiejar support --- retriever/retriever.php | 5 +++++ retriever/templates/rule-config.tpl | 1 + 2 files changed, 6 insertions(+) diff --git a/retriever/retriever.php b/retriever/retriever.php index 97f29694..78a79a0e 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -643,6 +643,11 @@ function retriever_content($a) { L10n::t('Retrospectively Apply'), '0', L10n::t('Reapply the rules to this number of posts')), + '$cookies' => array( + 'retriever_cookies', + L10n::t('Cookies'), + $retriever['data']['cookies'], + L10n::t("Persistent cookies for this feed. Netscape cookie file format.")), '$customxslt' => array( 'retriever_customxslt', L10n::t('Custom XSLT'), diff --git a/retriever/templates/rule-config.tpl b/retriever/templates/rule-config.tpl index 228d0326..847d9c3f 100644 --- a/retriever/templates/rule-config.tpl +++ b/retriever/templates/rule-config.tpl @@ -106,6 +106,7 @@ function retriever_remove_row(id, number) +{{include file="field_textarea.tpl" field=$cookies}} {{include file="field_textarea.tpl" field=$customxslt}} From eb61f8f09acb85f84d00669e92516c824303d309 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 21 Jul 2019 19:32:30 +0100 Subject: [PATCH 07/94] More preparation for persistent cookies --- retriever/retriever.php | 17 +++++++++++------ retriever/templates/rule-config.tpl | 19 ++++++++++++++++++- 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 78a79a0e..bb3460a1 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -587,7 +587,7 @@ function retriever_content($a) { if (!empty($_POST["id"])) { $retriever = get_retriever($a->argv[1], local_user(), true); $retriever["data"] = array(); - foreach (array('pattern', 'replace', 'enable', 'images', 'customxslt') as $setting) { + foreach (array('pattern', 'replace', 'enable', 'images', 'customxslt', 'storecookies', 'cookiedata') as $setting) { if (!empty($_POST['retriever_' . $setting])) { $retriever["data"][$setting] = $_POST['retriever_' . $setting]; } @@ -643,11 +643,16 @@ function retriever_content($a) { L10n::t('Retrospectively Apply'), '0', L10n::t('Reapply the rules to this number of posts')), - '$cookies' => array( - 'retriever_cookies', - L10n::t('Cookies'), - $retriever['data']['cookies'], - L10n::t("Persistent cookies for this feed. Netscape cookie file format.")), + 'storecookies' => array( + 'retriever_storecookies', + L10n::t('Store cookies'), + $retriever['data']['storecookies'], + L10n::t("Preserve cookie data across fetches.")), + '$cookiedata' => array( + 'retriever_cookiedata', + L10n::t('Cookie Data'), + $retriever['data']['cookiedata'], + L10n::t("Latest cookie data for this feed. Netscape cookie file format.")), '$customxslt' => array( 'retriever_customxslt', L10n::t('Custom XSLT'), diff --git a/retriever/templates/rule-config.tpl b/retriever/templates/rule-config.tpl index 847d9c3f..9061d1ff 100644 --- a/retriever/templates/rule-config.tpl +++ b/retriever/templates/rule-config.tpl @@ -40,6 +40,22 @@ function retriever_remove_row(id, number) var row = document.getElementById(id + '-' + number); tbody.removeChild(row); } + +function retriever_toggle_cookiedata_block() +{ + var div = document.querySelector("#id_retriever_cookiedata").parentNode; + if (document.querySelector("#id_retriever_storecookies").checked) { + div.style.display = "block"; + } + else { + div.style.display = "none"; + } +} + +document.addEventListener('DOMContentLoaded', function() { + retriever_toggle_cookiedata_block(); + document.querySelector("#id_retriever_storecookies").addEventListener('change', retriever_toggle_cookiedata_block, false); +}, false);

{{$title}}

{{$help_t}}

@@ -106,8 +122,9 @@ function retriever_remove_row(id, number) -{{include file="field_textarea.tpl" field=$cookies}} {{include file="field_textarea.tpl" field=$customxslt}} +{{include file="field_checkbox.tpl" field=$storecookies}} +{{include file="field_textarea.tpl" field=$cookiedata}} From df7ea6c3755fca9d1db13151f7608f116c50e6fa Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 24 Jul 2019 06:48:23 +0100 Subject: [PATCH 08/94] tentative database work --- retriever/database.sql | 1 + retriever/retriever.php | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/retriever/database.sql b/retriever/database.sql index 340e33eb..2a0db966 100644 --- a/retriever/database.sql +++ b/retriever/database.sql @@ -24,6 +24,7 @@ CREATE TABLE IF NOT EXISTS `retriever_item` ( CREATE TABLE IF NOT EXISTS `retriever_resource` ( `id` int(11) unsigned NOT NULL AUTO_INCREMENT, + `contact-id` int(10) unsigned NOT NULL DEFAULT '0', `type` char(255) NULL DEFAULT NULL, `binary` int(1) NOT NULL DEFAULT 0, `url` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NOT NULL, diff --git a/retriever/retriever.php b/retriever/retriever.php index bb3460a1..adf9681e 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -41,6 +41,10 @@ function retriever_install() { q("ALTER TABLE `retriever_item` ADD INDEX `item-uid` (`item-uid`)"); Config::set('retriever', 'dbversion', '0.12'); } + /* if (Config::get('retriever', 'dbversion') == '0.12') { */ + /* q("ALTER TABLE `retriever_resource` ADD COLUMN `contact-id` int(10) unsigned NULL AFTER `id`"); */ + /* Config::set('retriever', 'dbversion', '0.13'); */ + /* } */ if (Config::get('retriever', 'dbversion') != '0.12') { $schema = file_get_contents(dirname(__file__).'/database.sql'); $arr = explode(';', $schema); From 034ed5fcd665bb3c784a247136cbef56c08e4955 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Fri, 26 Jul 2019 05:49:53 +0100 Subject: [PATCH 09/94] fix --- retriever/retriever.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index adf9681e..65471be9 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -189,9 +189,9 @@ function retrieve_resource($resource) { try { Logger::log('retrieve_resource: ' . ($resource['num-tries'] + 1) . ' attempt at resource ' . $resource['id'] . ' ' . $resource['url'], Logger::DEBUG); - $redirects; + $redirects = 0; $cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-'); - $fetch_result = Network::fetchUrlFull($resource['url'], $resource['binary'], $redirects, array('cookiejar' => $cookiejar)); + $fetch_result = Network::fetchUrlFull($resource['url'], $resource['binary'], $redirects, '', $cookiejar); unlink($cookiejar); $resource['data'] = $fetch_result->getBody(); $resource['http-code'] = $fetch_result->getReturnCode(); From 10f7be958b59ce1a958adc69de4ce1f186384779 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 15 Sep 2019 09:26:25 +0100 Subject: [PATCH 10/94] fixed a bug and commented on another --- retriever/retriever.php | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/retriever/retriever.php b/retriever/retriever.php index 65471be9..c70f906e 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -367,6 +367,15 @@ function add_retriever_resource($a, $url, $binary = false) { function add_retriever_item(&$item, $resource) { Logger::log('add_retriever_item: ' . $resource['url'] . ' for ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG); + $r = q("SELECT COUNT(*) FROM `retriever_item` WHERE " . + "`item-uri` = '%s' AND `item-uid` = %d AND `contact-id` = %d AND `resource` = %d", + DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource['id'])); + if ($r[0]['COUNT(*)'] > 0) { + Logger::log("add_retriever_item: retriever item already present for " . + $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], + Logger::INFO); + return; + } q("INSERT INTO `retriever_item` (`item-uri`, `item-uid`, `contact-id`, `resource`) " . "VALUES ('%s', %d, %d, %d)", DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource["id"])); @@ -536,6 +545,7 @@ function retriever_apply_completed_resource_to_item($retriever, &$item, $resourc } function retriever_transform_images($a, &$item, $resource) { + return; //@@@ not working if (!$resource["data"]) { Logger::log('retriever_transform_images: no data available for ' . $resource['id'] . ' ' . $resource['url'], Logger::INFO); From 61e925630dce4eee5e6616f05a01a3df24535122 Mon Sep 17 00:00:00 2001 From: Administrator Date: Sun, 22 Sep 2019 11:47:30 +0200 Subject: [PATCH 11/94] this is working OK --- retriever/database.sql | 1 + retriever/retriever.php | 387 +++++++++++++++++++++++++--------------- 2 files changed, 247 insertions(+), 141 deletions(-) diff --git a/retriever/database.sql b/retriever/database.sql index 2a0db966..a29135e7 100644 --- a/retriever/database.sql +++ b/retriever/database.sql @@ -24,6 +24,7 @@ CREATE TABLE IF NOT EXISTS `retriever_item` ( CREATE TABLE IF NOT EXISTS `retriever_resource` ( `id` int(11) unsigned NOT NULL AUTO_INCREMENT, + `item-uid` int(10) unsigned NOT NULL DEFAULT '0', `contact-id` int(10) unsigned NOT NULL DEFAULT '0', `type` char(255) NULL DEFAULT NULL, `binary` int(1) NOT NULL DEFAULT 0, diff --git a/retriever/retriever.php b/retriever/retriever.php index c70f906e..5644952a 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -12,13 +12,16 @@ use Friendica\Core\Config; use Friendica\Core\PConfig; use Friendica\Core\Logger; use Friendica\Core\Renderer; +use Friendica\Core\System; use Friendica\Content\Text\HTML; use Friendica\Content\Text\BBCode; +use Friendica\Model\Photo; use Friendica\Object\Image; use Friendica\Util\Network; use Friendica\Core\L10n; use Friendica\Database\DBA; use Friendica\Model\ItemURI; +use Friendica\Model\Item; function retriever_install() { Addon::registerHook('plugin_settings', 'addon/retriever/retriever.php', 'retriever_plugin_settings'); @@ -41,17 +44,18 @@ function retriever_install() { q("ALTER TABLE `retriever_item` ADD INDEX `item-uid` (`item-uid`)"); Config::set('retriever', 'dbversion', '0.12'); } - /* if (Config::get('retriever', 'dbversion') == '0.12') { */ - /* q("ALTER TABLE `retriever_resource` ADD COLUMN `contact-id` int(10) unsigned NULL AFTER `id`"); */ - /* Config::set('retriever', 'dbversion', '0.13'); */ - /* } */ - if (Config::get('retriever', 'dbversion') != '0.12') { + if (Config::get('retriever', 'dbversion') == '0.12') { + q("ALTER TABLE `retriever_resource` ADD COLUMN `contact-id` int(10) unsigned NOT NULL DEFAULT '0' AFTER `id`"); + q("ALTER TABLE `retriever_resource` ADD COLUMN `item-uid` int(10) unsigned NOT NULL DEFAULT '0' AFTER `id`"); + Config::set('retriever', 'dbversion', '0.13'); + } + if (Config::get('retriever', 'dbversion') != '0.13') { $schema = file_get_contents(dirname(__file__).'/database.sql'); $arr = explode(';', $schema); foreach ($arr as $a) { $r = q($a); } - Config::set('retriever', 'dbversion', '0.12'); + Config::set('retriever', 'dbversion', '0.13'); } } @@ -68,7 +72,11 @@ function retriever_uninstall() { function retriever_module() {} function retriever_cron($a, $b) { - // 100 is a nice sane number. Maybe this should be configurable. + // 100 is a nice sane number. Maybe this should be configurable. @@@ + + // Do this first, otherwise it can interfere with retreiver_retrieve_items + retriever_clean_up_completed_resources(100, $a); + retriever_retrieve_items(100, $a); retriever_tidy(); } @@ -76,6 +84,7 @@ function retriever_cron($a, $b) { $retriever_item_count = 0; function retriever_retrieve_items($max_items, $a) { + Logger::log('@@@ retriever_retrieve_items', Logger::INFO); global $retriever_item_count; $retriever_schedule = array(array(1,'minute'), @@ -98,56 +107,61 @@ function retriever_retrieve_items($max_items, $a) { $retrieve_items = $max_items - $retriever_item_count; Logger::log('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . $retriever_item_count . ', retrieve ' . $retrieve_items, Logger::DEBUG); do { - $r = q("SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR %s) ORDER BY `last-try` ASC LIMIT %d", + Logger::log('@@@ retriever_retrieve_items loop max ' . $max_items . ' count ' . $retriever_item_count, Logger::INFO); + Logger::log("@@@ SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR " . implode($schedule_clauses, ' OR ') . ") ORDER BY `last-try` ASC LIMIT " . $retrieve_items, Logger::INFO); + $retriever_resources = q("SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR %s) ORDER BY `last-try` ASC LIMIT %d", DBA::escape(implode($schedule_clauses, ' OR ')), intval($retrieve_items)); - if (!is_array($r)) { + if (!is_array($retriever_resources)) { break; } - if (count($r) == 0) { + if (count($retriever_resources) == 0) { break; } - Logger::log('retriever_retrieve_items: found ' . count($r) . ' waiting resources in database', Logger::DEBUG); - foreach ($r as $rr) { - retrieve_resource($rr); + Logger::log('retriever_retrieve_items: found ' . count($retriever_resources) . ' waiting resources in database', Logger::DEBUG); + foreach ($retriever_resources as $retriever_resource) { + Logger::log('@@@ need to get the retriever config here cid ' . $retriever_resource['contact-id'] . ' uid ' . $retriever_resource['item-uid'], Logger::INFO); + retrieve_resource($retriever_resource); $retriever_item_count++; } $retrieve_items = $max_items - $retriever_item_count; } while ($retrieve_items > 0); + // @@@ todo: when items add further items (i.e. images), do the new images go round this loop again? + Logger::log('@@@ retriever_retrieve_items: finished retrieving items', Logger::INFO); +} - /* Look for items that are waiting even though the resource has - * completed. This usually happens because we've been asked to - * retrospectively apply a config change. It could also happen - * due to a cron job dying or something. */ +/* Look for items that are waiting even though the resource has + * completed. This usually happens because we've been asked to + * retrospectively apply a config change. It could also happen due to + * a cron job dying or something. */ +function retriever_clean_up_completed_resources($max_items, $a) { $r = q("SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT %d", - intval($retrieve_items)); + intval($max_items)); if (!$r) { $r = array(); } - Logger::log('retriever_retrieve_items: items waiting even though resource has completed: ' . count($r), Logger::DEBUG); + Logger::log('retriever_clean_up_completed_resources: items waiting even though resource has completed: ' . count($r), Logger::DEBUG); foreach ($r as $rr) { $resource = q("SELECT * FROM retriever_resource WHERE `id` = %d", $rr['resource']); $retriever_item = retriever_get_retriever_item($rr['item']); - if (!$retriever_item) { - Logger::log('retriever_retrieve_items: no retriever item with id ' . $rr['item'], Logger::INFO); + if (!DBA::isResult($retriever_item)) { + Logger::log('retriever_clean_up_completed_resources: no retriever item with id ' . $rr['item'], Logger::WARNING); continue; } $item = retriever_get_item($retriever_item); if (!$item) { - Logger::log('retriever_retrieve_items: no item ' . $retriever_item['item-uri'], Logger::INFO); + Logger::log('retriever_clean_up_completed_resources: no item ' . $retriever_item['item-uri'], Logger::WARNING); continue; } - $retriever = get_retriever($item['contact-id'], $item['uid']); - if (!$retriever) { - Logger::log('retriever_retrieve_items: no retriever for item ' . - $retriever_item['item-uri'] . ' ' . $retriever_item['uid'] . ' ' . $item['contact-id'], - Logger::INFO); + $retriever_rule = get_retriever_rule($retriever_item['contact-id'], $item['uid']); + if (!$retriever_rule) { + Logger::log('retriever_clean_up_completed_resources: no retriever for uri ' . $retriever_item['item-uri'] . ' uid ' . $retriever_item['uid'] . ' ' . $retriever_item['contact-id'], Logger::WARNING); continue; } - retriever_apply_completed_resource_to_item($retriever, $item, $resource[0], $a); - q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d", - intval($retriever_item['id'])); + Logger::log('@@@ retriever_clean_up_completed_resources: about to retriever_apply_completed_resource_to_item', Logger::INFO); + retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource[0], $a); + q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d", intval($retriever_item['id'])); retriever_check_item_completed($item); } } @@ -157,7 +171,7 @@ function retriever_tidy() { q("DELETE FROM retriever_resource WHERE completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)"); $r = q("SELECT retriever_item.id FROM retriever_item LEFT OUTER JOIN retriever_resource ON (retriever_item.resource = retriever_resource.id) WHERE retriever_resource.id is null"); - Logger::log('retriever_tidy: found ' . count($r) . ' retriever_items with no retriever_resource'); + Logger::log('retriever_tidy: found ' . count($r) . ' retriever_items with no retriever_resource', Logger::INFO); foreach ($r as $rr) { q('DELETE FROM retriever_item WHERE id = %d', intval($rr['id'])); } @@ -165,7 +179,7 @@ function retriever_tidy() { function retrieve_dataurl_resource($resource) { if (!preg_match("/date:(.*);base64,(.*)/", $resource['url'], $matches)) { - Logger::log('retrieve_dataurl_resource: ' . $resource['id'] . ' does not match pattern'); + Logger::log('retrieve_dataurl_resource: ' . $resource['id'] . ' does not match pattern', Logger::INFO); } else { $resource['type'] = $matches[1]; $resource['data'] = base64url_decode($matches[2]); @@ -180,28 +194,36 @@ function retrieve_dataurl_resource($resource) { } function retrieve_resource($resource) { + Logger::log('@@@ retrieve_resource: url ' . $resource['url'] . ' uid ' . $resource['item-uid'] . ' cid ' . $resource['contact-id'], Logger::INFO); + if (substr($resource['url'], 0, 5) == "data:") { return retrieve_dataurl_resource($resource); } $a = get_app(); + $retriever_rule = get_retriever_rule($resource['contact-id'], $resource['item-uid']); + try { - Logger::log('retrieve_resource: ' . ($resource['num-tries'] + 1) . - ' attempt at resource ' . $resource['id'] . ' ' . $resource['url'], Logger::DEBUG); + Logger::log('retrieve_resource: ' . ($resource['num-tries'] + 1) . ' attempt at resource ' . $resource['id'] . ' ' . $resource['url'], Logger::DEBUG); $redirects = 0; $cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-'); + if ($retriever_rule['storecookies']) { + file_put_contents($cookiejar, $retriever_rule['cookiedata']); + } $fetch_result = Network::fetchUrlFull($resource['url'], $resource['binary'], $redirects, '', $cookiejar); + if ($retriever_rule['storecookies']) { + $retriever_rule['cookiedata'] = file_get_contents($cookiejar); + //@@@ do the store here + } unlink($cookiejar); $resource['data'] = $fetch_result->getBody(); $resource['http-code'] = $fetch_result->getReturnCode(); $resource['type'] = $fetch_result->getContentType(); $resource['redirect-url'] = $fetch_result->getRedirectUrl(); - Logger::log('retrieve_resource: got code ' . $resource['http-code'] . - ' retrieving resource ' . $resource['id'] . - ' final url ' . $resource['redirect-url'], Logger::DEBUG); + Logger::log('retrieve_resource: got code ' . $resource['http-code'] . ' retrieving resource ' . $resource['id'] . ' final url ' . $resource['redirect-url'], Logger::DEBUG); } catch (Exception $e) { - Logger::log('retrieve_resource: unable to retrieve ' . $resource['url'] . ' - ' . $e->getMessage()); + Logger::log('retrieve_resource: unable to retrieve ' . $resource['url'] . ' - ' . $e->getMessage(), Logger::INFO); } q("UPDATE `retriever_resource` SET `last-try` = now(), `num-tries` = `num-tries` + 1, `http-code` = %d, `redirect-url` = '%s' WHERE id = %d", intval($resource['http-code']), @@ -214,13 +236,17 @@ function retrieve_resource($resource) { intval($resource['id'])); retriever_resource_completed($resource, $a); } + Logger::log('@@@ retrieve_resource finished: ' . $resource['url'], Logger::INFO); } -function get_retriever($contact_id, $uid, $create = false) { +function get_retriever_rule($contact_id, $uid, $create = false) { + Logger::log('@@@ get_retriever_rule ' . "SELECT * FROM `retriever_rule` WHERE `contact-id` = " . intval($contact_id) . " AND `uid` = " . intval($uid), Logger::INFO); $r = q("SELECT * FROM `retriever_rule` WHERE `contact-id` = %d AND `uid` = %d", intval($contact_id), intval($uid)); + Logger::log('@@@ get_retriever_rule count is ' . count($r), Logger::INFO); if (count($r)) { $r[0]['data'] = json_decode($r[0]['data'], true); + Logger::log('@@@ get_retriever_rule returning an actual thing', Logger::INFO); return $r[0]; } if ($create) { @@ -233,43 +259,62 @@ function get_retriever($contact_id, $uid, $create = false) { } function retriever_get_retriever_item($id) { - $retriever_items = q("SELECT * FROM `retriever_item` WHERE id = %d", intval($id)); - if (count($retriever_items) != 1) { - Logger::log('retriever_get_retriever_item: unable to find retriever_item ' . $id, Logger::INFO); - return; + return DBA::selectFirst('retriever_item', [], ['id' => intval($id)]); +} + +function retriever_class_of_item($item) { //@@@ + if (!$item) { + return 'false'; } - return $retriever_items[0]; + if (array_key_exists('finished', $item)) { + Logger::log('@@@ oh no this is a bad thing', Logger::INFO); + return 'retriever_item'; + } + if (array_key_exists('moderated', $item)) { + return 'friendica_item'; + } + return 'unknown'; +} + +function mat_test($item) { //@@@ + return 'mat_test'; } function retriever_get_item($retriever_item) { - // @@@ Need to replace this with Item::selectFirst - $items = q("SELECT * FROM `item` WHERE `uri` = '%s' AND `uid` = %d AND `contact-id` = %d", - DBA::escape($retriever_item['item-uri']), - intval($retriever_item['item-uid']), - intval($retriever_item['contact-id'])); - if (count($items) != 1) { - Logger::log('retriever_get_item: unexpected number of results ' . - count($items) . " when searching for item $uri $uid $cid", Logger::INFO); - return; + // @@@ add contact id as a search term + Logger::log('@@@ retriever_get_item uri ' . $retriever_item['item-uri'] . ' uid ' . $retriever_item['item-uid'] . ' cid ' . $retriever_item['contact-id'], Logger::INFO); + try {//@@@ not necessary + $item = Item::selectFirst([], ['uri' => $retriever_item['item-uri'], 'uid' => intval($retriever_item['item-uid'])]); + Logger::log('@@@ 1 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); + if (!DBA::isResult($item)) { + Logger::log('retriever_get_item: no item found for uri ' . $retriever_item['item-uri'], Logger::INFO); + return; + } + Logger::log('@@@ retriever_get_item: yay item found for uri ' . $retriever_item['item-uri'] . ' guid ' . $item['guid'] . ' plink ' . $item['plink'], Logger::INFO); + return $item; + } catch (Exception $e) { + Logger::log('retriever_get_item: exception ' . $e->getMessage(), Logger::INFO); } - return $items[0]; } function retriever_item_completed($retriever_item_id, $resource, $a) { Logger::log('retriever_item_completed: id ' . $retriever_item_id . ' url ' . $resource['url'], Logger::DEBUG); $retriever_item = retriever_get_retriever_item($retriever_item_id); - if (!$retriever_item) { + if (!DBA::isResult($retriever_item)) { + Logger::log('retriever_item_completed: no retriever item with id ' . $retriever_item_id, Logger::INFO); + return; + } + $item = retriever_get_item($retriever_item); + Logger::log('@@@ 2 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); + if (!$item) { + Logger::log('retriever_item_completed: no item ' . $retriever_item['item-uri'], Logger::INFO); return; } // Note: the retriever might be null. Doesn't matter. - $retriever = get_retriever($retriever_item['contact-id'], $retriever_item['item-uid']); - $item = retriever_get_item($retriever_item); - if (!$item) { - return; - } + $retriever_rule = get_retriever_rule($retriever_item['contact-id'], $retriever_item['item-uid']); - retriever_apply_completed_resource_to_item($retriever, $item, $resource, $a); + retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource, $a); q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d", intval($retriever_item['id'])); @@ -288,18 +333,24 @@ function apply_retrospective($a, $retriever, $num) { $r = q("SELECT * FROM `item` WHERE `contact-id` = %d ORDER BY `received` DESC LIMIT %d", intval($retriever['contact-id']), intval($num)); foreach ($r as $item) { + Logger::log('@@@ 3 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); //@@@ already know this is wrong q('UPDATE `item` SET `visible` = 0 WHERE `id` = %d', $item['id']); q('UPDATE `thread` SET `visible` = 0 WHERE `iid` = %d', $item['id']); retriever_on_item_insert($a, $retriever, $item); } } +//@@@ make this trigger a retriever immediately somehow +//@@@ need a lock to say something is doing something function retriever_on_item_insert($a, $retriever, &$item) { + Logger::log('@@@ 4 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); + Logger::log('@@@ retriever_on_item_insert start ' . $item['plink'], Logger::INFO); if (!$retriever || !$retriever['id']) { Logger::log('retriever_on_item_insert: No retriever supplied', Logger::INFO); return; } if (!$retriever["data"]['enable'] == "on") { + Logger::log('@@@ retriever_on_item_insert: Disabled', Logger::INFO); return; } if (array_key_exists('pattern', $retriever["data"]) && $retriever["data"]['pattern']) { @@ -310,12 +361,13 @@ function retriever_on_item_insert($a, $retriever, &$item) { $url = $item['plink']; } - $resource = add_retriever_resource($a, $url); + Logger::log('@@@ retriever_on_item_insert: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id'], Logger::DEBUG); + $resource = add_retriever_resource($a, $url, $item['uid'], $item['contact-id']); $retriever_item_id = add_retriever_item($item, $resource); } -function add_retriever_resource($a, $url, $binary = false) { - Logger::log('add_retriever_resource: ' . $url, Logger::DEBUG); +function add_retriever_resource($a, $url, $uid, $cid, $binary = false) { + Logger::log('add_retriever_resource: url ' . $url . ' uid ' . $uid . ' contact-id ' . $cid, Logger::DEBUG); $scheme = parse_url($url, PHP_URL_SCHEME); if ($scheme == 'data') { @@ -326,7 +378,7 @@ function add_retriever_resource($a, $url, $binary = false) { fclose($fp); $url = 'md5://' . hash('md5', $url); - $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url)); + $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s' AND `item-uid` = %d AND `contact-id` = %d", DBA::escape($url), intval($uid), intval($cid)); $resource = $r[0]; if (count($r)) { Logger::log('add_retriever_resource: Resource ' . $url . ' already requested', Logger::DEBUG); @@ -334,8 +386,10 @@ function add_retriever_resource($a, $url, $binary = false) { } Logger::log('retrieve_resource: got data URL type ' . $resource['type'], Logger::DEBUG); - q("INSERT INTO `retriever_resource` (`type`, `binary`, `url`, `completed`, `data`) " . - "VALUES ('%s', %d, '%s', now(), '%s')", + q("INSERT INTO `retriever_resource` (`item-uid`, `contact-id`, `type`, `binary`, `url`, `completed`, `data`) " . + "VALUES (%d, %d, '%s', %d, '%s', now(), '%s')", + intval($uid), + intval($cid), DBA::escape($type), intval($binary ? 1 : 0), DBA::escape($url), @@ -349,31 +403,30 @@ function add_retriever_resource($a, $url, $binary = false) { } if (strlen($url) > 800) { - Logger::log('add_retriever_resource: URL is longer than 800 characters', Logger::INFO); + Logger::log('add_retriever_resource: URL is longer than 800 characters', Logger::WARNING); } - $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url)); + $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s' AND `item-uid` = %d AND `contact-id` = %d", DBA::escape($url), intval($uid), intval($cid)); if (count($r)) { - Logger::log('add_retriever_resource: Resource ' . $url . ' already requested', Logger::DEBUG); + Logger::log('add_retriever_resource: Resource ' . $url . ' uid ' . $uid . ' cid ' . $cid . ' already requested', Logger::DEBUG); return $r[0]; } - q("INSERT INTO `retriever_resource` (`binary`, `url`) " . - "VALUES (%d, '%s')", intval($binary ? 1 : 0), DBA::escape($url)); + q("INSERT INTO `retriever_resource` (`item-uid`, `contact-id`, `binary`, `url`) " . + "VALUES (%d, %d, %d, '%s')", intval($uid), intval($cid), intval($binary ? 1 : 0), DBA::escape($url)); $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url)); return $r[0]; } function add_retriever_item(&$item, $resource) { + Logger::log('@@@ 5 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item), Logger::DEBUG); Logger::log('add_retriever_item: ' . $resource['url'] . ' for ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG); $r = q("SELECT COUNT(*) FROM `retriever_item` WHERE " . "`item-uri` = '%s' AND `item-uid` = %d AND `contact-id` = %d AND `resource` = %d", DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource['id'])); if ($r[0]['COUNT(*)'] > 0) { - Logger::log("add_retriever_item: retriever item already present for " . - $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], - Logger::INFO); + Logger::log("add_retriever_item: retriever item already present for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::INFO); return; } q("INSERT INTO `retriever_item` (`item-uri`, `item-uid`, `contact-id`, `resource`) " . @@ -383,9 +436,7 @@ function add_retriever_item(&$item, $resource) { "`item-uri` = '%s' AND `item-uid` = %d AND `contact-id` = %d AND `resource` = %d ORDER BY id DESC", DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource['id'])); if (!count($r)) { - Logger::log("add_retriever_item: couldn't create retriever item for " . - $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], - Logger::INFO); + Logger::log("add_retriever_item: couldn't create retriever item for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::INFO); return; } Logger::log('add_retriever_item: created retriever_item ' . $r[0]['id'] . ' for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG); @@ -416,7 +467,9 @@ function retriever_apply_xslt_text($xslt_text, $doc) { return $result; } +//@@@ is that an item or a resource_item? I really want an item here so I can update it function retriever_apply_dom_filter($retriever, &$item, $resource) { + Logger::log('@@@ 6 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item), Logger::DEBUG); Logger::log('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['uri'] . ' contact ' . $item['contact-id'], Logger::DEBUG); if (!array_key_exists('include', $retriever['data']) && !array_key_exists('customxslt', $retriever['data'])) { @@ -454,18 +507,23 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) { return; } + Logger::log('@@@ retriever_apply_dom_filter: 1', Logger::INFO); $components = parse_url($resource['redirect-url']); $rooturl = $components['scheme'] . "://" . $components['host']; $dirurl = $rooturl . dirname($components['path']) . "/"; + Logger::log('@@@ retriever_apply_dom_filter: 2', Logger::INFO); $params = array('$dirurl' => $dirurl, '$rooturl' => $rooturl); $fix_urls_template = Renderer::getMarkupTemplate('fix-urls.tpl', 'addon/retriever/'); $fix_urls_xslt = Renderer::replaceMacros($fix_urls_template, $params); + Logger::log('@@@ retriever_apply_dom_filter: 3', Logger::INFO); $doc = retriever_apply_xslt_text($fix_urls_xslt, $doc); + Logger::log('@@@ retriever_apply_dom_filter: 4', Logger::INFO); if (!$doc) { Logger::log('retriever_apply_dom_filter: failed to apply fix urls XSLT template', Logger::INFO); return; } + Logger::log('@@@ retriever_apply_dom_filter: 5', Logger::INFO); $body = HTML::toBBCode($doc->saveHTML()); if (!strlen($body)) { Logger::log('retriever_apply_dom_filter retriever ' . $retriever['id'] . ' item ' . $item['id'] . ': output was empty', Logger::INFO); @@ -475,47 +533,66 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) { $body .= $item['plink']; $body .= ']' . $item['plink'] . '[/url]'; - $uri_id = ItemURI::getIdByURI($item['uri']); - //@@@ remove this - $item['body'] = $body; + Logger::log('@@@ retriever_apply_dom_filter: 6', Logger::INFO); + $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself? + Logger::log('@@@ retriever_apply_dom_filter: item id is ' . $item['id'] . ' uri id is ' . $uri_id, Logger::INFO); Logger::log('retriever_apply_dom_filter: XSLT result \"' . $body . '\"', Logger::DATA); - DBA::update('item', ['body' => $body], ['id' => $item['id']]); - DBA::update('item-content', ['body' => $body], ['uri-id' => $uri_id]); + DBA::update('item-content', ['body' => $body], ['uri-id' => $uri_id]); //@@@ isn't there a better interface to that? + //@@@ probably Item::updateContent } function retrieve_images(&$item, $a) { + $blah_item_class = retriever_class_of_item($item) . ' ' . mat_test($item); + Logger::log('@@@ 7 item class is ' . $blah_item_class, Logger::DEBUG); + + $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself? + + $content = DBA::selectFirst('item-content', [], ['uri-id' => $uri_id]); + $body = $content['body']; + if (!strlen($body)) { + Logger::log('retrieve_images: no body for uri-id ' . $uri_id, Logger::WARNING); + return; + } + + Logger::log('@@@ retrieve_images start looking in body "' . $body . '"', Logger::INFO); $matches1 = array(); - preg_match_all("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", $item["body"], $matches1); + preg_match_all("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", $body, $matches1); $matches2 = array(); - preg_match_all("/\[img\](.*?)\[\/img\]/ism", $item["body"], $matches2); + preg_match_all("/\[img\](.*?)\[\/img\]/ism", $body, $matches2); $matches = array_merge($matches1[3], $matches2[1]); Logger::log('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG); foreach ($matches as $url) { + Logger::log('@@@ retrieve_images: url ' . $url, Logger::DEBUG); if (strpos($url, get_app()->getBaseUrl()) === FALSE) { - $resource = add_retriever_resource($a, $url, true); + Logger::log('@@@ retrieve_images: it is from somewhere else', Logger::DEBUG); + Logger::log('@@@ retrieve_images: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id'], Logger::DEBUG); + $resource = add_retriever_resource($a, $url, $item['uid'], $item['contact-id'], true); if (!$resource['completed']) { + Logger::log('@@@ retrieve_images: do not have it yet, get it later', Logger::DEBUG); add_retriever_item($item, $resource); } else { + Logger::log('@@@ retrieve_images: got it already, transform', Logger::DEBUG); retriever_transform_images($a, $item, $resource); } } } + Logger::log('@@@ retrieve_images end', Logger::INFO); } function retriever_check_item_completed(&$item) { + Logger::log('@@@ 9 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item), Logger::DEBUG); $r = q('SELECT count(*) FROM retriever_item WHERE `item-uri` = "%s" ' . 'AND `item-uid` = %d AND `contact-id` = %d AND `finished` = 0', DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id'])); $waiting = $r[0]['count(*)']; - Logger::log('retriever_check_item_completed: item ' . $item['uri'] . ' ' . $item['uid'] - . ' '. $item['contact-id'] . ' waiting for ' . $waiting . ' resources', Logger::DEBUG); + Logger::log('retriever_check_item_completed: item ' . $item['uri'] . ' ' . $item['uid'] . ' '. $item['contact-id'] . ' waiting for ' . $waiting . ' resources', Logger::DEBUG); $old_visible = $item['visible']; $item['visible'] = $waiting ? 0 : 1; if (array_key_exists('id', $item) && ($item['id'] > 0) && ($old_visible != $item['visible'])) { - Logger::log('retriever_check_item_completed: changing visible flag to ' . $item['visible'] . ' and invoking notifier ("edit_post", ' . $item['id'] . ')', Logger::DEBUG); + Logger::log('retriever_check_item_completed: changing visible flag to ' . $item['visible'], Logger::DEBUG); q("UPDATE `item` SET `visible` = %d WHERE `id` = %d", intval($item['visible']), intval($item['id'])); @@ -526,10 +603,10 @@ function retriever_check_item_completed(&$item) } function retriever_apply_completed_resource_to_item($retriever, &$item, $resource, $a) { - Logger::log('retriever_apply_completed_resource_to_item: retriever ' . - ($retriever ? $retriever['id'] : 'none') . - ' resource ' . $resource['url'] . ' plink ' . $item['plink'], Logger::DEBUG); + Logger::log('@@@ 10 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item), Logger::DEBUG); + Logger::log('retriever_apply_completed_resource_to_item: retriever ' . ($retriever ? $retriever['id'] : 'none') . ' resource ' . $resource['url'] . ' plink ' . $item['plink'], Logger::DEBUG); if (strpos($resource['type'], 'image') !== false) { + Logger::log('@@@ retriever_apply_completed_resource_to_item this is an image must transform', Logger::INFO); retriever_transform_images($a, $item, $resource); } if (!$retriever) { @@ -544,38 +621,61 @@ function retriever_apply_completed_resource_to_item($retriever, &$item, $resourc } } +//@@@ todo: change all Logger::log to Logger::info etc +//@@@ todo: what is this reference for? document if needed delete if not function retriever_transform_images($a, &$item, $resource) { - return; //@@@ not working + Logger::log('@@@ 11 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item), Logger::DEBUG); + Logger::log('@@@ retriever_transform_images', Logger::INFO); if (!$resource["data"]) { - Logger::log('retriever_transform_images: no data available for ' - . $resource['id'] . ' ' . $resource['url'], Logger::INFO); + Logger::log('retriever_transform_images: no data available for ' . $resource['id'] . ' ' . $resource['url'], Logger::INFO); return; } - try { - $photo = Image::storePhoto($a, $item['uid'], $resource['data'], $resource['url']); + $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself? + + try { //@@@ probably can get rid of this try/catch + $data = $resource['data']; + $type = $resource['type']; + $uid = $item['uid']; + $cid = $item['contact-id']; + $rid = Photo::newResource(); + $path = parse_url($resource['url'], PHP_URL_PATH); + $parts = pathinfo($path); + $filename = $parts['filename'] . (array_key_exists('extension', $parts) ? '.' . $parts['extension'] : ''); + Logger::log('@@@ retriever_transform_images url ' . $resource['url'] . ' path ' . $path . ' filename ' . $parts['filename'], Logger::INFO); + $album = 'Wall Photos'; + $scale = 0; + $desc = ''; // TODO: store alt text with resource when it's requested so we can fill this in + Logger::log('retriever_transform_images storing ' . strlen($data) . ' bytes type ' . $type . ': uid ' . $uid . ' cid ' . $cid . ' rid ' . $rid . ' filename ' . $filename . ' album ' . $album . ' scale ' . $scale . ' desc ' . $desc, Logger::DEBUG); + Logger::log('@@@ retriever_transform_images before new Image', Logger::INFO); + $image = new Image($data, $type); + Logger::log('@@@ retriever_transform_images after new Image', Logger::INFO); + Logger::log('@@@ retriever_transform_images before Photo::store', Logger::INFO); + $photo = Photo::store($image, $uid, $cid, $rid, $filename, $album, 0, 0, "", "", "", "", $desc); + Logger::log('@@@ retriever_transform_images after Photo::store', Logger::INFO); + $new_url = System::baseUrl() . '/photo/' . $rid . '-0.' . $image->getExt(); + Logger::log('@@@ retriever_transform_images new url ' . $new_url . ' rid ' . $rid . ' ext ' . $image->getExt(), Logger::INFO); + if (!strlen($new_url)) { + Logger::log('retriever_transform_images: no replacement URL for image ' . $resource['url'], Logger::WARNING); + return; + } + + $content = DBA::selectFirst('item-content', [], ['uri-id' => $uri_id]); + $body = $content['body']; + Logger::log('@@@ retriever_transform_images: found body for uri id ' . $uri_id . ': ' . $body, Logger::INFO); + + Logger::log('retriever_transform_images: replacing ' . $resource['url'] . ' with ' . $new_url . ' in item ' . $item['uri'], Logger::DEBUG); + Logger::log('@@@ retriever_transform_images: replacing ' . $resource['url'] . ' with ' . $new_url . ' in body ' . $body, Logger::DEBUG); + $body = str_replace($resource["url"], $new_url, $body); + + Logger::log('@@@ retriever_transform_images: result \"' . $body . '\"', Logger::INFO); + DBA::update('item-content', ['body' => $body], ['uri-id' => $uri_id]); //@@@ isn't there a better interface to that? + //@@@ probably Item::updateContent + //@@ actually no, Item::update } catch (Exception $e) { - Logger::log('retriever_transform_images caught exception ' . $e->getMessage()); + Logger::log('retriever_transform_images caught exception ' . $e->getMessage(), Logger::INFO); return; } - if (!array_key_exists('full', $photo)) { - Logger::log('retriever_transform_images: no replacement URL for image ' . $resource['url']); - return; - } - $new_url = $photo['full']; - Logger::log('retriever_transform_images: replacing ' . $resource['url'] . ' with ' . - $new_url . ' in item ' . $item['plink'], Logger::DEBUG); - $transformed = str_replace($resource["url"], $new_url, $item['body']); - if ($transformed === $item['body']) { - return; - } - - $item['body'] = $transformed; - q("UPDATE `item` SET `body` = '%s' WHERE `plink` = '%s' AND `uid` = %d AND `contact-id` = %d", - DBA::escape($item['body']), - DBA::escape($item['plink']), - intval($item['uid']), - intval($item['contact-id'])); } function retriever_content($a) { @@ -596,37 +696,37 @@ function retriever_content($a) { return; } if ($a->argv[1]) { - $retriever = get_retriever($a->argv[1], local_user(), false); + $retriever_rule = get_retriever_rule($a->argv[1], local_user(), false); if (!empty($_POST["id"])) { - $retriever = get_retriever($a->argv[1], local_user(), true); - $retriever["data"] = array(); + $retriever_rule = get_retriever_rule($a->argv[1], local_user(), true); + $retriever_rule["data"] = array(); foreach (array('pattern', 'replace', 'enable', 'images', 'customxslt', 'storecookies', 'cookiedata') as $setting) { if (!empty($_POST['retriever_' . $setting])) { - $retriever["data"][$setting] = $_POST['retriever_' . $setting]; + $retriever_rule["data"][$setting] = $_POST['retriever_' . $setting]; } } foreach ($_POST as $k=>$v) { if (preg_match("/retriever-(include|exclude)-(\d+)-(element|attribute|value)/", $k, $matches)) { - $retriever['data'][$matches[1]][intval($matches[2])][$matches[3]] = $v; + $retriever_rule['data'][$matches[1]][intval($matches[2])][$matches[3]] = $v; } } // You've gotta have an element, even if it's just "*" - foreach ($retriever['data']['include'] as $k=>$clause) { + foreach ($retriever_rule['data']['include'] as $k=>$clause) { if (!$clause['element']) { - unset($retriever['data']['include'][$k]); + unset($retriever_rule['data']['include'][$k]); } } - foreach ($retriever['data']['exclude'] as $k=>$clause) { + foreach ($retriever_rule['data']['exclude'] as $k=>$clause) { if (!$clause['element']) { - unset($retriever['data']['exclude'][$k]); + unset($retriever_rule['data']['exclude'][$k]); } } q("UPDATE `retriever_rule` SET `data`='%s' WHERE `id` = %d", - DBA::escape(json_encode($retriever["data"])), intval($retriever["id"])); + DBA::escape(json_encode($retriever_rule["data"])), intval($retriever_rule["id"])); $a->page['content'] .= "

Settings Updated"; if (!empty($_POST["retriever_retrospective"])) { - apply_retrospective($a, $retriever, $_POST["retriever_retrospective"]); + apply_retrospective($a, $retriever_rule, $_POST["retriever_retrospective"]); $a->page['content'] .= " and retrospectively applied to " . $_POST["apply"] . " posts"; } $a->page['content'] .= ".

"; @@ -637,21 +737,21 @@ function retriever_content($a) { '$enable' => array( 'retriever_enable', L10n::t('Enabled'), - $retriever['data']['enable']), + $retriever_rule['data']['enable']), '$pattern' => array( 'retriever_pattern', L10n::t('URL Pattern'), - $retriever["data"]['pattern'], + $retriever_rule["data"]['pattern'], L10n::t('Regular expression matching part of the URL to replace')), '$replace' => array( 'retriever_replace', L10n::t('URL Replace'), - $retriever["data"]['replace'], + $retriever_rule["data"]['replace'], L10n::t('Text to replace matching part of above regular expression')), '$images' => array( 'retriever_images', L10n::t('Download Images'), - $retriever['data']['images']), + $retriever_rule['data']['images']), '$retrospective' => array( 'retriever_retrospective', L10n::t('Retrospectively Apply'), @@ -660,33 +760,33 @@ function retriever_content($a) { 'storecookies' => array( 'retriever_storecookies', L10n::t('Store cookies'), - $retriever['data']['storecookies'], + $retriever_rule['data']['storecookies'], L10n::t("Preserve cookie data across fetches.")), '$cookiedata' => array( 'retriever_cookiedata', L10n::t('Cookie Data'), - $retriever['data']['cookiedata'], + $retriever_rule['data']['cookiedata'], L10n::t("Latest cookie data for this feed. Netscape cookie file format.")), '$customxslt' => array( 'retriever_customxslt', L10n::t('Custom XSLT'), - $retriever['data']['customxslt'], + $retriever_rule['data']['customxslt'], L10n::t("When standard rules aren't enough, apply custom XSLT to the article")), '$title' => L10n::t('Retrieve Feed Content'), '$help' => $a->getBaseUrl() . '/retriever/help', '$help_t' => L10n::t('Get Help'), '$submit_t' => L10n::t('Submit'), '$submit' => L10n::t('Save Settings'), - '$id' => ($retriever["id"] ? $retriever["id"] : "create"), + '$id' => ($retriever_rule["id"] ? $retriever_rule["id"] : "create"), '$tag_t' => L10n::t('Tag'), '$attribute_t' => L10n::t('Attribute'), '$value_t' => L10n::t('Value'), '$add_t' => L10n::t('Add'), '$remove_t' => L10n::t('Remove'), '$include_t' => L10n::t('Include'), - '$include' => $retriever['data']['include'], + '$include' => $retriever_rule['data']['include'], '$exclude_t' => L10n::t('Exclude'), - '$exclude' => $retriever["data"]['exclude'])); + '$exclude' => $retriever_rule["data"]['exclude'])); return; } } @@ -701,18 +801,23 @@ function retriever_contact_photo_menu($a, &$args) { } function retriever_post_remote_hook(&$a, &$item) { + Logger::log('@@@ 12 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); Logger::log('retriever_post_remote_hook: ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG); - $retriever = get_retriever($item['contact-id'], $item["uid"], false); - if ($retriever) { - retriever_on_item_insert($a, $retriever, $item); + $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself? + $retriever_rule = get_retriever_rule($item['contact-id'], $item["uid"], false); + if ($retriever_rule) { + retriever_on_item_insert($a, $retriever_rule, $item); } else { if (PConfig::get($item["uid"], 'retriever', 'oembed')) { // Convert to HTML and back to take advantage of bbcode's resolution of oembeds. - $body = HTML::toBBCode(BBCode::convert($item['body'])); + $content = DBA::selectFirst('item-content', [], ['uri-id' => $uri_id]); + $body = HTML::toBBCode(BBCode::convert($content['body'])); + Logger::log('@@@ retriever_post_remote_hook item uri-id ' . $uri_id . ' body "' . $item['body'] . '" item content body "' . $body . '"', Logger::DEBUG); if ($body) { $item['body'] = $body; + DBA::update('item-content', ['body' => $body], ['uri-id' => $uri_id]); //@@@ isn't there a better interface to that? } } if (PConfig::get($item["uid"], 'retriever', 'all_photos')) { From ef6709d861bc9ba710bef507d7d298d39a58fd6c Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 22 Sep 2019 17:05:23 +0200 Subject: [PATCH 12/94] Improvement --- retriever/retriever.php | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/retriever/retriever.php b/retriever/retriever.php index 5644952a..704bff34 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -650,6 +650,10 @@ function retriever_transform_images($a, &$item, $resource) { Logger::log('@@@ retriever_transform_images before new Image', Logger::INFO); $image = new Image($data, $type); Logger::log('@@@ retriever_transform_images after new Image', Logger::INFO); + if (!$image->isValid()) { + Logger::log('retriever_transform_images: invalid image found at URL ' . $resource['url'] ' for item ' . $item['id'], Logger::WARNING); + return; + } Logger::log('@@@ retriever_transform_images before Photo::store', Logger::INFO); $photo = Photo::store($image, $uid, $cid, $rid, $filename, $album, 0, 0, "", "", "", "", $desc); Logger::log('@@@ retriever_transform_images after Photo::store', Logger::INFO); From 615992810a8cfff2e7213f8bd7d2d9dcc259831a Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 22 Sep 2019 19:55:07 +0200 Subject: [PATCH 13/94] Change logging functions --- retriever/retriever.php | 210 ++++++++++++++++++++-------------------- 1 file changed, 105 insertions(+), 105 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 704bff34..ac6b321a 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -84,7 +84,7 @@ function retriever_cron($a, $b) { $retriever_item_count = 0; function retriever_retrieve_items($max_items, $a) { - Logger::log('@@@ retriever_retrieve_items', Logger::INFO); + Logger::info('@@@ retriever_retrieve_items'); global $retriever_item_count; $retriever_schedule = array(array(1,'minute'), @@ -105,10 +105,10 @@ function retriever_retrieve_items($max_items, $a) { } $retrieve_items = $max_items - $retriever_item_count; - Logger::log('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . $retriever_item_count . ', retrieve ' . $retrieve_items, Logger::DEBUG); + Logger::debug('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . $retriever_item_count . ', retrieve ' . $retrieve_items); do { - Logger::log('@@@ retriever_retrieve_items loop max ' . $max_items . ' count ' . $retriever_item_count, Logger::INFO); - Logger::log("@@@ SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR " . implode($schedule_clauses, ' OR ') . ") ORDER BY `last-try` ASC LIMIT " . $retrieve_items, Logger::INFO); + Logger::info('@@@ retriever_retrieve_items loop max ' . $max_items . ' count ' . $retriever_item_count); + Logger::info("@@@ SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR " . implode($schedule_clauses, ' OR ') . ") ORDER BY `last-try` ASC LIMIT " . $retrieve_items); $retriever_resources = q("SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR %s) ORDER BY `last-try` ASC LIMIT %d", DBA::escape(implode($schedule_clauses, ' OR ')), intval($retrieve_items)); @@ -118,9 +118,9 @@ function retriever_retrieve_items($max_items, $a) { if (count($retriever_resources) == 0) { break; } - Logger::log('retriever_retrieve_items: found ' . count($retriever_resources) . ' waiting resources in database', Logger::DEBUG); + Logger::debug('retriever_retrieve_items: found ' . count($retriever_resources) . ' waiting resources in database'); foreach ($retriever_resources as $retriever_resource) { - Logger::log('@@@ need to get the retriever config here cid ' . $retriever_resource['contact-id'] . ' uid ' . $retriever_resource['item-uid'], Logger::INFO); + Logger::info('@@@ need to get the retriever config here cid ' . $retriever_resource['contact-id'] . ' uid ' . $retriever_resource['item-uid']); retrieve_resource($retriever_resource); $retriever_item_count++; } @@ -128,7 +128,7 @@ function retriever_retrieve_items($max_items, $a) { } while ($retrieve_items > 0); // @@@ todo: when items add further items (i.e. images), do the new images go round this loop again? - Logger::log('@@@ retriever_retrieve_items: finished retrieving items', Logger::INFO); + Logger::info('@@@ retriever_retrieve_items: finished retrieving items'); } /* Look for items that are waiting even though the resource has @@ -141,25 +141,25 @@ function retriever_clean_up_completed_resources($max_items, $a) { if (!$r) { $r = array(); } - Logger::log('retriever_clean_up_completed_resources: items waiting even though resource has completed: ' . count($r), Logger::DEBUG); + Logger::debug('retriever_clean_up_completed_resources: items waiting even though resource has completed: ' . count($r)); foreach ($r as $rr) { $resource = q("SELECT * FROM retriever_resource WHERE `id` = %d", $rr['resource']); $retriever_item = retriever_get_retriever_item($rr['item']); if (!DBA::isResult($retriever_item)) { - Logger::log('retriever_clean_up_completed_resources: no retriever item with id ' . $rr['item'], Logger::WARNING); + Logger::warning('retriever_clean_up_completed_resources: no retriever item with id ' . $rr['item']); continue; } $item = retriever_get_item($retriever_item); if (!$item) { - Logger::log('retriever_clean_up_completed_resources: no item ' . $retriever_item['item-uri'], Logger::WARNING); + Logger::warning('retriever_clean_up_completed_resources: no item ' . $retriever_item['item-uri']); continue; } $retriever_rule = get_retriever_rule($retriever_item['contact-id'], $item['uid']); if (!$retriever_rule) { - Logger::log('retriever_clean_up_completed_resources: no retriever for uri ' . $retriever_item['item-uri'] . ' uid ' . $retriever_item['uid'] . ' ' . $retriever_item['contact-id'], Logger::WARNING); + Logger::warning('retriever_clean_up_completed_resources: no retriever for uri ' . $retriever_item['item-uri'] . ' uid ' . $retriever_item['uid'] . ' ' . $retriever_item['contact-id']); continue; } - Logger::log('@@@ retriever_clean_up_completed_resources: about to retriever_apply_completed_resource_to_item', Logger::INFO); + Logger::info('@@@ retriever_clean_up_completed_resources: about to retriever_apply_completed_resource_to_item'); retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource[0], $a); q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d", intval($retriever_item['id'])); retriever_check_item_completed($item); @@ -171,7 +171,7 @@ function retriever_tidy() { q("DELETE FROM retriever_resource WHERE completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)"); $r = q("SELECT retriever_item.id FROM retriever_item LEFT OUTER JOIN retriever_resource ON (retriever_item.resource = retriever_resource.id) WHERE retriever_resource.id is null"); - Logger::log('retriever_tidy: found ' . count($r) . ' retriever_items with no retriever_resource', Logger::INFO); + Logger::info('retriever_tidy: found ' . count($r) . ' retriever_items with no retriever_resource'); foreach ($r as $rr) { q('DELETE FROM retriever_item WHERE id = %d', intval($rr['id'])); } @@ -179,7 +179,7 @@ function retriever_tidy() { function retrieve_dataurl_resource($resource) { if (!preg_match("/date:(.*);base64,(.*)/", $resource['url'], $matches)) { - Logger::log('retrieve_dataurl_resource: ' . $resource['id'] . ' does not match pattern', Logger::INFO); + Logger::info('retrieve_dataurl_resource: ' . $resource['id'] . ' does not match pattern'); } else { $resource['type'] = $matches[1]; $resource['data'] = base64url_decode($matches[2]); @@ -194,7 +194,7 @@ function retrieve_dataurl_resource($resource) { } function retrieve_resource($resource) { - Logger::log('@@@ retrieve_resource: url ' . $resource['url'] . ' uid ' . $resource['item-uid'] . ' cid ' . $resource['contact-id'], Logger::INFO); + Logger::info('@@@ retrieve_resource: url ' . $resource['url'] . ' uid ' . $resource['item-uid'] . ' cid ' . $resource['contact-id']); if (substr($resource['url'], 0, 5) == "data:") { return retrieve_dataurl_resource($resource); @@ -205,14 +205,14 @@ function retrieve_resource($resource) { $retriever_rule = get_retriever_rule($resource['contact-id'], $resource['item-uid']); try { - Logger::log('retrieve_resource: ' . ($resource['num-tries'] + 1) . ' attempt at resource ' . $resource['id'] . ' ' . $resource['url'], Logger::DEBUG); + Logger::debug('retrieve_resource: ' . ($resource['num-tries'] + 1) . ' attempt at resource ' . $resource['id'] . ' ' . $resource['url']); $redirects = 0; $cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-'); - if ($retriever_rule['storecookies']) { + if (array_key_exists('storecookies', $retriever_rule) && $retriever_rule['storecookies']) { file_put_contents($cookiejar, $retriever_rule['cookiedata']); } $fetch_result = Network::fetchUrlFull($resource['url'], $resource['binary'], $redirects, '', $cookiejar); - if ($retriever_rule['storecookies']) { + if (array_key_exists('storecookies', $retriever_rule) && $retriever_rule['storecookies']) { $retriever_rule['cookiedata'] = file_get_contents($cookiejar); //@@@ do the store here } @@ -221,9 +221,9 @@ function retrieve_resource($resource) { $resource['http-code'] = $fetch_result->getReturnCode(); $resource['type'] = $fetch_result->getContentType(); $resource['redirect-url'] = $fetch_result->getRedirectUrl(); - Logger::log('retrieve_resource: got code ' . $resource['http-code'] . ' retrieving resource ' . $resource['id'] . ' final url ' . $resource['redirect-url'], Logger::DEBUG); + Logger::debug('retrieve_resource: got code ' . $resource['http-code'] . ' retrieving resource ' . $resource['id'] . ' final url ' . $resource['redirect-url']); } catch (Exception $e) { - Logger::log('retrieve_resource: unable to retrieve ' . $resource['url'] . ' - ' . $e->getMessage(), Logger::INFO); + Logger::info('retrieve_resource: unable to retrieve ' . $resource['url'] . ' - ' . $e->getMessage()); } q("UPDATE `retriever_resource` SET `last-try` = now(), `num-tries` = `num-tries` + 1, `http-code` = %d, `redirect-url` = '%s' WHERE id = %d", intval($resource['http-code']), @@ -236,17 +236,17 @@ function retrieve_resource($resource) { intval($resource['id'])); retriever_resource_completed($resource, $a); } - Logger::log('@@@ retrieve_resource finished: ' . $resource['url'], Logger::INFO); + Logger::info('@@@ retrieve_resource finished: ' . $resource['url']); } function get_retriever_rule($contact_id, $uid, $create = false) { - Logger::log('@@@ get_retriever_rule ' . "SELECT * FROM `retriever_rule` WHERE `contact-id` = " . intval($contact_id) . " AND `uid` = " . intval($uid), Logger::INFO); + Logger::info('@@@ get_retriever_rule ' . "SELECT * FROM `retriever_rule` WHERE `contact-id` = " . intval($contact_id) . " AND `uid` = " . intval($uid)); $r = q("SELECT * FROM `retriever_rule` WHERE `contact-id` = %d AND `uid` = %d", intval($contact_id), intval($uid)); - Logger::log('@@@ get_retriever_rule count is ' . count($r), Logger::INFO); + Logger::info('@@@ get_retriever_rule count is ' . count($r)); if (count($r)) { $r[0]['data'] = json_decode($r[0]['data'], true); - Logger::log('@@@ get_retriever_rule returning an actual thing', Logger::INFO); + Logger::info('@@@ get_retriever_rule returning an actual thing'); return $r[0]; } if ($create) { @@ -267,7 +267,7 @@ function retriever_class_of_item($item) { //@@@ return 'false'; } if (array_key_exists('finished', $item)) { - Logger::log('@@@ oh no this is a bad thing', Logger::INFO); + Logger::info('@@@ oh no this is a bad thing'); return 'retriever_item'; } if (array_key_exists('moderated', $item)) { @@ -282,33 +282,33 @@ function mat_test($item) { //@@@ function retriever_get_item($retriever_item) { // @@@ add contact id as a search term - Logger::log('@@@ retriever_get_item uri ' . $retriever_item['item-uri'] . ' uid ' . $retriever_item['item-uid'] . ' cid ' . $retriever_item['contact-id'], Logger::INFO); + Logger::info('@@@ retriever_get_item uri ' . $retriever_item['item-uri'] . ' uid ' . $retriever_item['item-uid'] . ' cid ' . $retriever_item['contact-id']); try {//@@@ not necessary $item = Item::selectFirst([], ['uri' => $retriever_item['item-uri'], 'uid' => intval($retriever_item['item-uid'])]); Logger::log('@@@ 1 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); if (!DBA::isResult($item)) { - Logger::log('retriever_get_item: no item found for uri ' . $retriever_item['item-uri'], Logger::INFO); + Logger::log('retriever_get_item: no item found for uri ' . $retriever_item['item-uri']); return; } - Logger::log('@@@ retriever_get_item: yay item found for uri ' . $retriever_item['item-uri'] . ' guid ' . $item['guid'] . ' plink ' . $item['plink'], Logger::INFO); + Logger::info('@@@ retriever_get_item: yay item found for uri ' . $retriever_item['item-uri'] . ' guid ' . $item['guid'] . ' plink ' . $item['plink']); return $item; } catch (Exception $e) { - Logger::log('retriever_get_item: exception ' . $e->getMessage(), Logger::INFO); + Logger::info('retriever_get_item: exception ' . $e->getMessage()); } } function retriever_item_completed($retriever_item_id, $resource, $a) { - Logger::log('retriever_item_completed: id ' . $retriever_item_id . ' url ' . $resource['url'], Logger::DEBUG); + Logger::debug('retriever_item_completed: id ' . $retriever_item_id . ' url ' . $resource['url']); $retriever_item = retriever_get_retriever_item($retriever_item_id); if (!DBA::isResult($retriever_item)) { - Logger::log('retriever_item_completed: no retriever item with id ' . $retriever_item_id, Logger::INFO); + Logger::info('retriever_item_completed: no retriever item with id ' . $retriever_item_id); return; } $item = retriever_get_item($retriever_item); Logger::log('@@@ 2 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); if (!$item) { - Logger::log('retriever_item_completed: no item ' . $retriever_item['item-uri'], Logger::INFO); + Logger::log('retriever_item_completed: no item ' . $retriever_item['item-uri']); return; } // Note: the retriever might be null. Doesn't matter. @@ -322,7 +322,7 @@ function retriever_item_completed($retriever_item_id, $resource, $a) { } function retriever_resource_completed($resource, $a) { - Logger::log('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url'], Logger::DEBUG); + Logger::debug('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url']); $r = q("SELECT `id` FROM `retriever_item` WHERE `resource` = %d", $resource['id']); foreach ($r as $rr) { retriever_item_completed($rr['id'], $resource, $a); @@ -343,31 +343,31 @@ function apply_retrospective($a, $retriever, $num) { //@@@ make this trigger a retriever immediately somehow //@@@ need a lock to say something is doing something function retriever_on_item_insert($a, $retriever, &$item) { - Logger::log('@@@ 4 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); - Logger::log('@@@ retriever_on_item_insert start ' . $item['plink'], Logger::INFO); + Logger::info('@@@ 4 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); + Logger::info('@@@ retriever_on_item_insert start ' . $item['plink']); if (!$retriever || !$retriever['id']) { - Logger::log('retriever_on_item_insert: No retriever supplied', Logger::INFO); + Logger::info('retriever_on_item_insert: No retriever supplied'); return; } if (!$retriever["data"]['enable'] == "on") { - Logger::log('@@@ retriever_on_item_insert: Disabled', Logger::INFO); + Logger::info('@@@ retriever_on_item_insert: Disabled'); return; } if (array_key_exists('pattern', $retriever["data"]) && $retriever["data"]['pattern']) { $url = preg_replace('/' . $retriever["data"]['pattern'] . '/', $retriever["data"]['replace'], $item['plink']); - Logger::log('retriever_on_item_insert: Changed ' . $item['plink'] . ' to ' . $url, Logger::DATA); + Logger::debug('retriever_on_item_insert: Changed ' . $item['plink'] . ' to ' . $url); } else { $url = $item['plink']; } - Logger::log('@@@ retriever_on_item_insert: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id'], Logger::DEBUG); + Logger::debug('@@@ retriever_on_item_insert: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id']); $resource = add_retriever_resource($a, $url, $item['uid'], $item['contact-id']); $retriever_item_id = add_retriever_item($item, $resource); } function add_retriever_resource($a, $url, $uid, $cid, $binary = false) { - Logger::log('add_retriever_resource: url ' . $url . ' uid ' . $uid . ' contact-id ' . $cid, Logger::DEBUG); + Logger::debug('add_retriever_resource: url ' . $url . ' uid ' . $uid . ' contact-id ' . $cid); $scheme = parse_url($url, PHP_URL_SCHEME); if ($scheme == 'data') { @@ -381,11 +381,11 @@ function add_retriever_resource($a, $url, $uid, $cid, $binary = false) { $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s' AND `item-uid` = %d AND `contact-id` = %d", DBA::escape($url), intval($uid), intval($cid)); $resource = $r[0]; if (count($r)) { - Logger::log('add_retriever_resource: Resource ' . $url . ' already requested', Logger::DEBUG); + Logger::debug('add_retriever_resource: Resource ' . $url . ' already requested'); return $resource; } - Logger::log('retrieve_resource: got data URL type ' . $resource['type'], Logger::DEBUG); + Logger::debug('retrieve_resource: got data URL type ' . $resource['type']); q("INSERT INTO `retriever_resource` (`item-uid`, `contact-id`, `type`, `binary`, `url`, `completed`, `data`) " . "VALUES (%d, %d, '%s', %d, '%s', now(), '%s')", intval($uid), @@ -403,12 +403,12 @@ function add_retriever_resource($a, $url, $uid, $cid, $binary = false) { } if (strlen($url) > 800) { - Logger::log('add_retriever_resource: URL is longer than 800 characters', Logger::WARNING); + Logger::warning('add_retriever_resource: URL is longer than 800 characters'); } $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s' AND `item-uid` = %d AND `contact-id` = %d", DBA::escape($url), intval($uid), intval($cid)); if (count($r)) { - Logger::log('add_retriever_resource: Resource ' . $url . ' uid ' . $uid . ' cid ' . $cid . ' already requested', Logger::DEBUG); + Logger::debug('add_retriever_resource: Resource ' . $url . ' uid ' . $uid . ' cid ' . $cid . ' already requested'); return $r[0]; } @@ -419,14 +419,14 @@ function add_retriever_resource($a, $url, $uid, $cid, $binary = false) { } function add_retriever_item(&$item, $resource) { - Logger::log('@@@ 5 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item), Logger::DEBUG); - Logger::log('add_retriever_item: ' . $resource['url'] . ' for ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG); + Logger::debug('@@@ 5 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); + Logger::debug('add_retriever_item: ' . $resource['url'] . ' for ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); $r = q("SELECT COUNT(*) FROM `retriever_item` WHERE " . "`item-uri` = '%s' AND `item-uid` = %d AND `contact-id` = %d AND `resource` = %d", DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource['id'])); if ($r[0]['COUNT(*)'] > 0) { - Logger::log("add_retriever_item: retriever item already present for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::INFO); + Logger::info("add_retriever_item: retriever item already present for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); return; } q("INSERT INTO `retriever_item` (`item-uri`, `item-uid`, `contact-id`, `resource`) " . @@ -436,10 +436,10 @@ function add_retriever_item(&$item, $resource) { "`item-uri` = '%s' AND `item-uid` = %d AND `contact-id` = %d AND `resource` = %d ORDER BY id DESC", DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource['id'])); if (!count($r)) { - Logger::log("add_retriever_item: couldn't create retriever item for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::INFO); + Logger::info("add_retriever_item: couldn't create retriever item for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); return; } - Logger::log('add_retriever_item: created retriever_item ' . $r[0]['id'] . ' for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG); + Logger::debug('add_retriever_item: created retriever_item ' . $r[0]['id'] . ' for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); return $r[0]['id']; } @@ -453,12 +453,12 @@ function retriever_get_encoding($resource) { function retriever_apply_xslt_text($xslt_text, $doc) { if (!$xslt_text) { - Logger::log('retriever_apply_xslt_text: empty XSLT text', Logger::INFO); + Logger::info('retriever_apply_xslt_text: empty XSLT text'); return $doc; } $xslt_doc = new DOMDocument(); if (!$xslt_doc->loadXML($xslt_text)) { - Logger::log('retriever_apply_xslt_text: could not load XML', Logger::INFO); + Logger::info('retriever_apply_xslt_text: could not load XML'); return $doc; } $xp = new XsltProcessor(); @@ -469,15 +469,15 @@ function retriever_apply_xslt_text($xslt_text, $doc) { //@@@ is that an item or a resource_item? I really want an item here so I can update it function retriever_apply_dom_filter($retriever, &$item, $resource) { - Logger::log('@@@ 6 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item), Logger::DEBUG); - Logger::log('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['uri'] . ' contact ' . $item['contact-id'], Logger::DEBUG); + Logger::debug('@@@ 6 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); + Logger::debug('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['uri'] . ' contact ' . $item['contact-id']); if (!array_key_exists('include', $retriever['data']) && !array_key_exists('customxslt', $retriever['data'])) { - Logger::log('retriever_apply_dom_filter: no include and no customxslt', Logger::INFO); + Logger::info('retriever_apply_dom_filter: no include and no customxslt'); return; } if (!$resource['data']) { - Logger::log('retriever_apply_dom_filter: no text to work with', Logger::INFO); + Logger::info('retriever_apply_dom_filter: no text to work with'); return; } @@ -495,104 +495,104 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) { $extract_template = Renderer::getMarkupTemplate('extract.tpl', 'addon/retriever/'); $extract_xslt = Renderer::replaceMacros($extract_template, $params); if ($retriever['data']['include']) { - Logger::log('retriever_apply_dom_filter: applying include/exclude template \"' . $extract_xslt . '\"', Logger::DEBUG); + Logger::debug('retriever_apply_dom_filter: applying include/exclude template \"' . $extract_xslt . '\"'); $doc = retriever_apply_xslt_text($extract_xslt, $doc); } if (array_key_exists('customxslt', $retriever['data']) && $retriever['data']['customxslt']) { - Logger::log('retriever_apply_dom_filter: applying custom XSLT \"' . $retriever['data']['customxslt'] . '\"', Logger::DEBUG); + Logger::debug('retriever_apply_dom_filter: applying custom XSLT \"' . $retriever['data']['customxslt'] . '\"'); $doc = retriever_apply_xslt_text($retriever['data']['customxslt'], $doc); } if (!$doc) { - Logger::log('retriever_apply_dom_filter: failed to apply extract XSLT template', Logger::INFO); + Logger::info('retriever_apply_dom_filter: failed to apply extract XSLT template'); return; } - Logger::log('@@@ retriever_apply_dom_filter: 1', Logger::INFO); + Logger::info('@@@ retriever_apply_dom_filter: 1'); $components = parse_url($resource['redirect-url']); $rooturl = $components['scheme'] . "://" . $components['host']; $dirurl = $rooturl . dirname($components['path']) . "/"; - Logger::log('@@@ retriever_apply_dom_filter: 2', Logger::INFO); + Logger::info('@@@ retriever_apply_dom_filter: 2'); $params = array('$dirurl' => $dirurl, '$rooturl' => $rooturl); $fix_urls_template = Renderer::getMarkupTemplate('fix-urls.tpl', 'addon/retriever/'); $fix_urls_xslt = Renderer::replaceMacros($fix_urls_template, $params); - Logger::log('@@@ retriever_apply_dom_filter: 3', Logger::INFO); + Logger::info('@@@ retriever_apply_dom_filter: 3'); $doc = retriever_apply_xslt_text($fix_urls_xslt, $doc); - Logger::log('@@@ retriever_apply_dom_filter: 4', Logger::INFO); + Logger::info('@@@ retriever_apply_dom_filter: 4'); if (!$doc) { - Logger::log('retriever_apply_dom_filter: failed to apply fix urls XSLT template', Logger::INFO); + Logger::info('retriever_apply_dom_filter: failed to apply fix urls XSLT template'); return; } - Logger::log('@@@ retriever_apply_dom_filter: 5', Logger::INFO); + Logger::info('@@@ retriever_apply_dom_filter: 5'); $body = HTML::toBBCode($doc->saveHTML()); if (!strlen($body)) { - Logger::log('retriever_apply_dom_filter retriever ' . $retriever['id'] . ' item ' . $item['id'] . ': output was empty', Logger::INFO); + Logger::info('retriever_apply_dom_filter retriever ' . $retriever['id'] . ' item ' . $item['id'] . ': output was empty'); return; } $body .= "\n\n" . L10n::t('Retrieved') . ' ' . date("Y-m-d") . ': [url='; $body .= $item['plink']; $body .= ']' . $item['plink'] . '[/url]'; - Logger::log('@@@ retriever_apply_dom_filter: 6', Logger::INFO); + Logger::info('@@@ retriever_apply_dom_filter: 6'); $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself? - Logger::log('@@@ retriever_apply_dom_filter: item id is ' . $item['id'] . ' uri id is ' . $uri_id, Logger::INFO); - Logger::log('retriever_apply_dom_filter: XSLT result \"' . $body . '\"', Logger::DATA); + Logger::info('@@@ retriever_apply_dom_filter: item id is ' . $item['id'] . ' uri id is ' . $uri_id); + Logger::debug('retriever_apply_dom_filter: XSLT result \"' . $body . '\"'); DBA::update('item-content', ['body' => $body], ['uri-id' => $uri_id]); //@@@ isn't there a better interface to that? //@@@ probably Item::updateContent } function retrieve_images(&$item, $a) { $blah_item_class = retriever_class_of_item($item) . ' ' . mat_test($item); - Logger::log('@@@ 7 item class is ' . $blah_item_class, Logger::DEBUG); + Logger::debug('@@@ 7 item class is ' . $blah_item_class); $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself? $content = DBA::selectFirst('item-content', [], ['uri-id' => $uri_id]); $body = $content['body']; if (!strlen($body)) { - Logger::log('retrieve_images: no body for uri-id ' . $uri_id, Logger::WARNING); + Logger::warning('retrieve_images: no body for uri-id ' . $uri_id); return; } - Logger::log('@@@ retrieve_images start looking in body "' . $body . '"', Logger::INFO); + Logger::info('@@@ retrieve_images start looking in body "' . $body . '"'); $matches1 = array(); preg_match_all("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", $body, $matches1); $matches2 = array(); preg_match_all("/\[img\](.*?)\[\/img\]/ism", $body, $matches2); $matches = array_merge($matches1[3], $matches2[1]); - Logger::log('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG); + Logger::debug('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); foreach ($matches as $url) { - Logger::log('@@@ retrieve_images: url ' . $url, Logger::DEBUG); + Logger::debug('@@@ retrieve_images: url ' . $url); if (strpos($url, get_app()->getBaseUrl()) === FALSE) { - Logger::log('@@@ retrieve_images: it is from somewhere else', Logger::DEBUG); - Logger::log('@@@ retrieve_images: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id'], Logger::DEBUG); + Logger::debug('@@@ retrieve_images: it is from somewhere else'); + Logger::debug('@@@ retrieve_images: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id']); $resource = add_retriever_resource($a, $url, $item['uid'], $item['contact-id'], true); if (!$resource['completed']) { - Logger::log('@@@ retrieve_images: do not have it yet, get it later', Logger::DEBUG); + Logger::debug('@@@ retrieve_images: do not have it yet, get it later'); add_retriever_item($item, $resource); } else { - Logger::log('@@@ retrieve_images: got it already, transform', Logger::DEBUG); + Logger::debug('@@@ retrieve_images: got it already, transform'); retriever_transform_images($a, $item, $resource); } } } - Logger::log('@@@ retrieve_images end', Logger::INFO); + Logger::info('@@@ retrieve_images end'); } function retriever_check_item_completed(&$item) { - Logger::log('@@@ 9 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item), Logger::DEBUG); + Logger::debug('@@@ 9 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); $r = q('SELECT count(*) FROM retriever_item WHERE `item-uri` = "%s" ' . 'AND `item-uid` = %d AND `contact-id` = %d AND `finished` = 0', DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id'])); $waiting = $r[0]['count(*)']; - Logger::log('retriever_check_item_completed: item ' . $item['uri'] . ' ' . $item['uid'] . ' '. $item['contact-id'] . ' waiting for ' . $waiting . ' resources', Logger::DEBUG); + Logger::debug('retriever_check_item_completed: item ' . $item['uri'] . ' ' . $item['uid'] . ' '. $item['contact-id'] . ' waiting for ' . $waiting . ' resources'); $old_visible = $item['visible']; $item['visible'] = $waiting ? 0 : 1; if (array_key_exists('id', $item) && ($item['id'] > 0) && ($old_visible != $item['visible'])) { - Logger::log('retriever_check_item_completed: changing visible flag to ' . $item['visible'], Logger::DEBUG); + Logger::debug('retriever_check_item_completed: changing visible flag to ' . $item['visible']); q("UPDATE `item` SET `visible` = %d WHERE `id` = %d", intval($item['visible']), intval($item['id'])); @@ -603,10 +603,10 @@ function retriever_check_item_completed(&$item) } function retriever_apply_completed_resource_to_item($retriever, &$item, $resource, $a) { - Logger::log('@@@ 10 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item), Logger::DEBUG); - Logger::log('retriever_apply_completed_resource_to_item: retriever ' . ($retriever ? $retriever['id'] : 'none') . ' resource ' . $resource['url'] . ' plink ' . $item['plink'], Logger::DEBUG); + Logger::debug('@@@ 10 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); + Logger::debug('retriever_apply_completed_resource_to_item: retriever ' . ($retriever ? $retriever['id'] : 'none') . ' resource ' . $resource['url'] . ' plink ' . $item['plink']); if (strpos($resource['type'], 'image') !== false) { - Logger::log('@@@ retriever_apply_completed_resource_to_item this is an image must transform', Logger::INFO); + Logger::info('@@@ retriever_apply_completed_resource_to_item this is an image must transform'); retriever_transform_images($a, $item, $resource); } if (!$retriever) { @@ -621,13 +621,13 @@ function retriever_apply_completed_resource_to_item($retriever, &$item, $resourc } } -//@@@ todo: change all Logger::log to Logger::info etc +//@@@ todo: change all Logger::info t etc //@@@ todo: what is this reference for? document if needed delete if not function retriever_transform_images($a, &$item, $resource) { - Logger::log('@@@ 11 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item), Logger::DEBUG); - Logger::log('@@@ retriever_transform_images', Logger::INFO); + Logger::debug('@@@ 11 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); + Logger::info('@@@ retriever_transform_images'); if (!$resource["data"]) { - Logger::log('retriever_transform_images: no data available for ' . $resource['id'] . ' ' . $resource['url'], Logger::INFO); + Logger::info('retriever_transform_images: no data available for ' . $resource['id'] . ' ' . $resource['url']); return; } @@ -642,42 +642,42 @@ function retriever_transform_images($a, &$item, $resource) { $path = parse_url($resource['url'], PHP_URL_PATH); $parts = pathinfo($path); $filename = $parts['filename'] . (array_key_exists('extension', $parts) ? '.' . $parts['extension'] : ''); - Logger::log('@@@ retriever_transform_images url ' . $resource['url'] . ' path ' . $path . ' filename ' . $parts['filename'], Logger::INFO); + Logger::info('@@@ retriever_transform_images url ' . $resource['url'] . ' path ' . $path . ' filename ' . $parts['filename']); $album = 'Wall Photos'; $scale = 0; $desc = ''; // TODO: store alt text with resource when it's requested so we can fill this in - Logger::log('retriever_transform_images storing ' . strlen($data) . ' bytes type ' . $type . ': uid ' . $uid . ' cid ' . $cid . ' rid ' . $rid . ' filename ' . $filename . ' album ' . $album . ' scale ' . $scale . ' desc ' . $desc, Logger::DEBUG); - Logger::log('@@@ retriever_transform_images before new Image', Logger::INFO); + Logger::debug('retriever_transform_images storing ' . strlen($data) . ' bytes type ' . $type . ': uid ' . $uid . ' cid ' . $cid . ' rid ' . $rid . ' filename ' . $filename . ' album ' . $album . ' scale ' . $scale . ' desc ' . $desc); + Logger::info('@@@ retriever_transform_images before new Image'); $image = new Image($data, $type); - Logger::log('@@@ retriever_transform_images after new Image', Logger::INFO); + Logger::info('@@@ retriever_transform_images after new Image'); if (!$image->isValid()) { - Logger::log('retriever_transform_images: invalid image found at URL ' . $resource['url'] ' for item ' . $item['id'], Logger::WARNING); + Logger::warning('retriever_transform_images: invalid image found at URL ' . $resource['url'] . ' for item ' . $item['id']); return; } - Logger::log('@@@ retriever_transform_images before Photo::store', Logger::INFO); + Logger::info('@@@ retriever_transform_images before Photo::store'); $photo = Photo::store($image, $uid, $cid, $rid, $filename, $album, 0, 0, "", "", "", "", $desc); - Logger::log('@@@ retriever_transform_images after Photo::store', Logger::INFO); + Logger::info('@@@ retriever_transform_images after Photo::store'); $new_url = System::baseUrl() . '/photo/' . $rid . '-0.' . $image->getExt(); - Logger::log('@@@ retriever_transform_images new url ' . $new_url . ' rid ' . $rid . ' ext ' . $image->getExt(), Logger::INFO); + Logger::info('@@@ retriever_transform_images new url ' . $new_url . ' rid ' . $rid . ' ext ' . $image->getExt()); if (!strlen($new_url)) { - Logger::log('retriever_transform_images: no replacement URL for image ' . $resource['url'], Logger::WARNING); + Logger::warning('retriever_transform_images: no replacement URL for image ' . $resource['url']); return; } $content = DBA::selectFirst('item-content', [], ['uri-id' => $uri_id]); $body = $content['body']; - Logger::log('@@@ retriever_transform_images: found body for uri id ' . $uri_id . ': ' . $body, Logger::INFO); + Logger::info('@@@ retriever_transform_images: found body for uri id ' . $uri_id . ': ' . $body); - Logger::log('retriever_transform_images: replacing ' . $resource['url'] . ' with ' . $new_url . ' in item ' . $item['uri'], Logger::DEBUG); - Logger::log('@@@ retriever_transform_images: replacing ' . $resource['url'] . ' with ' . $new_url . ' in body ' . $body, Logger::DEBUG); + Logger::debug('retriever_transform_images: replacing ' . $resource['url'] . ' with ' . $new_url . ' in item ' . $item['uri']); + Logger::debug('@@@ retriever_transform_images: replacing ' . $resource['url'] . ' with ' . $new_url . ' in body ' . $body); $body = str_replace($resource["url"], $new_url, $body); - Logger::log('@@@ retriever_transform_images: result \"' . $body . '\"', Logger::INFO); + Logger::info('@@@ retriever_transform_images: result \"' . $body . '\"'); DBA::update('item-content', ['body' => $body], ['uri-id' => $uri_id]); //@@@ isn't there a better interface to that? //@@@ probably Item::updateContent //@@ actually no, Item::update } catch (Exception $e) { - Logger::log('retriever_transform_images caught exception ' . $e->getMessage(), Logger::INFO); + Logger::info('retriever_transform_images caught exception ' . $e->getMessage()); return; } } @@ -805,8 +805,8 @@ function retriever_contact_photo_menu($a, &$args) { } function retriever_post_remote_hook(&$a, &$item) { - Logger::log('@@@ 12 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); - Logger::log('retriever_post_remote_hook: ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG); + Logger::info('@@@ 12 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); + Logger::info('retriever_post_remote_hook: ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself? $retriever_rule = get_retriever_rule($item['contact-id'], $item["uid"], false); @@ -818,7 +818,7 @@ function retriever_post_remote_hook(&$a, &$item) { // Convert to HTML and back to take advantage of bbcode's resolution of oembeds. $content = DBA::selectFirst('item-content', [], ['uri-id' => $uri_id]); $body = HTML::toBBCode(BBCode::convert($content['body'])); - Logger::log('@@@ retriever_post_remote_hook item uri-id ' . $uri_id . ' body "' . $item['body'] . '" item content body "' . $body . '"', Logger::DEBUG); + Logger::debug('@@@ retriever_post_remote_hook item uri-id ' . $uri_id . ' body "' . $item['body'] . '" item content body "' . $body . '"'); if ($body) { $item['body'] = $body; DBA::update('item-content', ['body' => $body], ['uri-id' => $uri_id]); //@@@ isn't there a better interface to that? From c5fb49455226f776ca16a99e9655d1fce0683b69 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Fri, 27 Sep 2019 22:05:00 +0200 Subject: [PATCH 14/94] retriever stuff --- retriever/retriever.php | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index ac6b321a..56852e45 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -30,7 +30,6 @@ function retriever_install() { Addon::registerHook('contact_photo_menu', 'addon/retriever/retriever.php', 'retriever_contact_photo_menu'); Addon::registerHook('cron', 'addon/retriever/retriever.php', 'retriever_cron'); - $r = q("SELECT `id` FROM `pconfig` WHERE `cat` LIKE 'retriever_%%'"); if (Config::get('retriever', 'dbversion') == '0.10') { q("ALTER TABLE `retriever_resource` MODIFY COLUMN `type` char(255) NULL DEFAULT NULL"); q("ALTER TABLE `retriever_resource` MODIFY COLUMN `data` mediumblob NULL DEFAULT NULL"); @@ -537,8 +536,7 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) { $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself? Logger::info('@@@ retriever_apply_dom_filter: item id is ' . $item['id'] . ' uri id is ' . $uri_id); Logger::debug('retriever_apply_dom_filter: XSLT result \"' . $body . '\"'); - DBA::update('item-content', ['body' => $body], ['uri-id' => $uri_id]); //@@@ isn't there a better interface to that? - //@@@ probably Item::updateContent + Item::update(['body' => $body], ['uri-id' => $uri_id]); } function retrieve_images(&$item, $a) { @@ -673,9 +671,7 @@ function retriever_transform_images($a, &$item, $resource) { $body = str_replace($resource["url"], $new_url, $body); Logger::info('@@@ retriever_transform_images: result \"' . $body . '\"'); - DBA::update('item-content', ['body' => $body], ['uri-id' => $uri_id]); //@@@ isn't there a better interface to that? - //@@@ probably Item::updateContent - //@@ actually no, Item::update + Item::update(['body' => $body], ['uri-id' => $uri_id]); } catch (Exception $e) { Logger::info('retriever_transform_images caught exception ' . $e->getMessage()); return; @@ -821,7 +817,7 @@ function retriever_post_remote_hook(&$a, &$item) { Logger::debug('@@@ retriever_post_remote_hook item uri-id ' . $uri_id . ' body "' . $item['body'] . '" item content body "' . $body . '"'); if ($body) { $item['body'] = $body; - DBA::update('item-content', ['body' => $body], ['uri-id' => $uri_id]); //@@@ isn't there a better interface to that? + Item::update(['body' => $body], ['uri-id' => $uri_id]); } } if (PConfig::get($item["uid"], 'retriever', 'all_photos')) { From 4dc51d8f0557fbc2bdd94b0c2985036fa03a6ed3 Mon Sep 17 00:00:00 2001 From: Administrator Date: Fri, 27 Sep 2019 21:02:52 +0200 Subject: [PATCH 15/94] Fix retriever database problems --- retriever/database.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/retriever/database.sql b/retriever/database.sql index a29135e7..68480cfd 100644 --- a/retriever/database.sql +++ b/retriever/database.sql @@ -36,7 +36,7 @@ CREATE TABLE IF NOT EXISTS `retriever_resource` ( `data` mediumblob NULL DEFAULT NULL, `http-code` smallint(1) unsigned NULL DEFAULT NULL, `redirect-url` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NULL DEFAULT NULL, - KEY `retriever_resource` ADD INDEX `url` (`url`), - KEY `retriever_resource` ADD INDEX `completed` (`completed`), + KEY `url` (`url`), + KEY `completed` (`completed`), PRIMARY KEY (`id`) ) DEFAULT CHARSET=utf8 COLLATE=utf8_bin From 0963f0da4a2975d8731093cdcd384fca81ae7880 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Fri, 27 Sep 2019 22:05:22 +0200 Subject: [PATCH 16/94] more retriever stuff --- retriever/retriever.php | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 56852e45..3b557d80 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -4,7 +4,6 @@ * Description: Follow the permalink of RSS/Atom feed items and replace the summary with the full content. * Version: 1.1 * Author: Matthew Exon - * Status: Unsupported */ use Friendica\Core\Addon; @@ -53,6 +52,7 @@ function retriever_install() { $arr = explode(';', $schema); foreach ($arr as $a) { $r = q($a); + //@@@ check for errors } Config::set('retriever', 'dbversion', '0.13'); } @@ -332,7 +332,6 @@ function apply_retrospective($a, $retriever, $num) { $r = q("SELECT * FROM `item` WHERE `contact-id` = %d ORDER BY `received` DESC LIMIT %d", intval($retriever['contact-id']), intval($num)); foreach ($r as $item) { - Logger::log('@@@ 3 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); //@@@ already know this is wrong q('UPDATE `item` SET `visible` = 0 WHERE `id` = %d', $item['id']); q('UPDATE `thread` SET `visible` = 0 WHERE `iid` = %d', $item['id']); retriever_on_item_insert($a, $retriever, $item); @@ -343,7 +342,10 @@ function apply_retrospective($a, $retriever, $num) { //@@@ need a lock to say something is doing something function retriever_on_item_insert($a, $retriever, &$item) { Logger::info('@@@ 4 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); - Logger::info('@@@ retriever_on_item_insert start ' . $item['plink']); + foreach ($item as $key => $value) { + Logger::info("@@@ $key => $value"); + } + Logger::info('@@@ retriever_on_item_insert start ' . ' plink ' . $item['plink']); if (!$retriever || !$retriever['id']) { Logger::info('retriever_on_item_insert: No retriever supplied'); return; @@ -352,15 +354,29 @@ function retriever_on_item_insert($a, $retriever, &$item) { Logger::info('@@@ retriever_on_item_insert: Disabled'); return; } - if (array_key_exists('pattern', $retriever["data"]) && $retriever["data"]['pattern']) { - $url = preg_replace('/' . $retriever["data"]['pattern'] . '/', $retriever["data"]['replace'], $item['plink']); - Logger::debug('retriever_on_item_insert: Changed ' . $item['plink'] . ' to ' . $url); - } - else { + if (array_key_exists('plink', $item)) { $url = $item['plink']; } + else { + if (!array_key_exists('uri_id', $item)) { + Logger::warning('retriever_on_item_insert: item ' . ' has no plink and no uri-id'); + // @@@ find an identifier and put it in warning + Logger::warning('@@@ retriever_on_item_insert: item has: ' . print_r($item, true)); + foreach ($item as $key => $value) { + Logger::warning("@@@ $key => $value"); + } + return; + } + $content = DBA::selectFirst('item-content', [], ['uri-id' => $item['uri_id']]); + $url = $content['plink']; + } - Logger::debug('@@@ retriever_on_item_insert: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id']); + if (array_key_exists('pattern', $retriever["data"]) && $retriever["data"]['pattern']) { + $url = preg_replace('/' . $retriever["data"]['pattern'] . '/', $retriever["data"]['replace'], $url); + Logger::debug('retriever_on_item_insert: Changed ' . $item['plink'] . ' to ' . $url); + } + + Logger::debug('@@@ retriever_on_item_insert: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id'] . ' url ' . $url); $resource = add_retriever_resource($a, $url, $item['uid'], $item['contact-id']); $retriever_item_id = add_retriever_item($item, $resource); } From 04e57e4334d97764b67393ecfe75c48fc75afb8f Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 29 Sep 2019 17:01:46 +0200 Subject: [PATCH 17/94] Fix bugs in retriever retrospective stuff --- retriever/retriever.php | 100 ++++++++++++++++---------------- retriever/templates/extract.tpl | 18 ++---- 2 files changed, 55 insertions(+), 63 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 3b557d80..4d701276 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -30,16 +30,16 @@ function retriever_install() { Addon::registerHook('cron', 'addon/retriever/retriever.php', 'retriever_cron'); if (Config::get('retriever', 'dbversion') == '0.10') { - q("ALTER TABLE `retriever_resource` MODIFY COLUMN `type` char(255) NULL DEFAULT NULL"); - q("ALTER TABLE `retriever_resource` MODIFY COLUMN `data` mediumblob NULL DEFAULT NULL"); - q("ALTER TABLE `retriever_rule` MODIFY COLUMN `data` mediumtext NULL DEFAULT NULL"); + q('ALTER TABLE `retriever_resource` MODIFY COLUMN `type` char(255) NULL DEFAULT NULL'); + q('ALTER TABLE `retriever_resource` MODIFY COLUMN `data` mediumblob NULL DEFAULT NULL'); + q('ALTER TABLE `retriever_rule` MODIFY COLUMN `data` mediumtext NULL DEFAULT NULL'); Config::set('retriever', 'dbversion', '0.11'); } if (Config::get('retriever', 'dbversion') == '0.11') { - q("ALTER TABLE `retriever_resource` ADD INDEX `url` (`url`)"); - q("ALTER TABLE `retriever_resource` ADD INDEX `completed` (`completed`)"); - q("ALTER TABLE `retriever_item` ADD INDEX `finished` (`finished`)"); - q("ALTER TABLE `retriever_item` ADD INDEX `item-uid` (`item-uid`)"); + q('ALTER TABLE `retriever_resource` ADD INDEX `url` (`url`)'); + q('ALTER TABLE `retriever_resource` ADD INDEX `completed` (`completed`)'); + q('ALTER TABLE `retriever_item` ADD INDEX `finished` (`finished`)'); + q('ALTER TABLE `retriever_item` ADD INDEX `item-uid` (`item-uid`)'); Config::set('retriever', 'dbversion', '0.12'); } if (Config::get('retriever', 'dbversion') == '0.12') { @@ -206,16 +206,19 @@ function retrieve_resource($resource) { try { Logger::debug('retrieve_resource: ' . ($resource['num-tries'] + 1) . ' attempt at resource ' . $resource['id'] . ' ' . $resource['url']); $redirects = 0; - $cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-'); + $cookiejar = ''; if (array_key_exists('storecookies', $retriever_rule) && $retriever_rule['storecookies']) { + $cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-'); file_put_contents($cookiejar, $retriever_rule['cookiedata']); } $fetch_result = Network::fetchUrlFull($resource['url'], $resource['binary'], $redirects, '', $cookiejar); if (array_key_exists('storecookies', $retriever_rule) && $retriever_rule['storecookies']) { $retriever_rule['cookiedata'] = file_get_contents($cookiejar); - //@@@ do the store here + Logger::debug('@@@ retriever_resource update cookie ' . json_encode($retriever_rule['data'] . ' id ' . $retriever_rule['id'])); + q("UPDATE `retriever_rule` SET `data`='%s' WHERE `id` = %d", + DBA::escape(json_encode($retriever_rule['data'])), intval($retriever_rule["id"])); + unlink($cookiejar); } - unlink($cookiejar); $resource['data'] = $fetch_result->getBody(); $resource['http-code'] = $fetch_result->getReturnCode(); $resource['type'] = $fetch_result->getContentType(); @@ -323,17 +326,25 @@ function retriever_item_completed($retriever_item_id, $resource, $a) { function retriever_resource_completed($resource, $a) { Logger::debug('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url']); $r = q("SELECT `id` FROM `retriever_item` WHERE `resource` = %d", $resource['id']); - foreach ($r as $rr) { - retriever_item_completed($rr['id'], $resource, $a); + foreach (DBA::select('retriever_item', ['id'], ['resource' => intval($resource['id'])]) as $retriever_item) { + Logger::debug('@@@ retriever_resource_completed got item id ' . $retriever_item['id']); + retriever_item_completed($retriever_item['id'], $resource, $a); } } function apply_retrospective($a, $retriever, $num) { + Logger::info('@@@ apply_retrospective'); $r = q("SELECT * FROM `item` WHERE `contact-id` = %d ORDER BY `received` DESC LIMIT %d", intval($retriever['contact-id']), intval($num)); foreach ($r as $item) { + Logger::info('@@@ apply_retrospective item ' . $item['id']); q('UPDATE `item` SET `visible` = 0 WHERE `id` = %d', $item['id']); q('UPDATE `thread` SET `visible` = 0 WHERE `iid` = %d', $item['id']); + foreach (DBA::select('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => $item['uid'], 'contact-id' => $item['contact-id']]) as $retriever_item) { + Logger::info('@@@ about to delete retriever_item id ' . $retriever_item['id'] . ' uri ' . $item['uri'] . ' uid ' . $item['uid'] . ' contact ' . $item['contact-id']); + DBA::delete('retriever_resource', ['id' => $retriever_item['resource']]); + DBA::delete('retriever_item', ['id' => $retriever_item['id']]); + } retriever_on_item_insert($a, $retriever, $item); } } @@ -341,39 +352,31 @@ function apply_retrospective($a, $retriever, $num) { //@@@ make this trigger a retriever immediately somehow //@@@ need a lock to say something is doing something function retriever_on_item_insert($a, $retriever, &$item) { - Logger::info('@@@ 4 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); - foreach ($item as $key => $value) { - Logger::info("@@@ $key => $value"); - } - Logger::info('@@@ retriever_on_item_insert start ' . ' plink ' . $item['plink']); + Logger::info('@@@ retriever_on_item_insert start plink ' . $item['plink'] . ' id ' . $item['id']); if (!$retriever || !$retriever['id']) { Logger::info('retriever_on_item_insert: No retriever supplied'); return; } - if (!$retriever["data"]['enable'] == "on") { + if (!$retriever['data']['enable'] == "on") { Logger::info('@@@ retriever_on_item_insert: Disabled'); return; } - if (array_key_exists('plink', $item)) { + if (array_key_exists('plink', $item) && strlen($item['plink'])) { $url = $item['plink']; } else { - if (!array_key_exists('uri_id', $item)) { - Logger::warning('retriever_on_item_insert: item ' . ' has no plink and no uri-id'); - // @@@ find an identifier and put it in warning - Logger::warning('@@@ retriever_on_item_insert: item has: ' . print_r($item, true)); - foreach ($item as $key => $value) { - Logger::warning("@@@ $key => $value"); - } + if (!array_key_exists('uri-id', $item)) { + Logger::warning('retriever_on_item_insert: item ' . $item['id'] . ' has no plink and no uri-id'); return; } - $content = DBA::selectFirst('item-content', [], ['uri-id' => $item['uri_id']]); + $content = DBA::selectFirst('item-content', [], ['uri-id' => $item['uri-id']]); $url = $content['plink']; } - if (array_key_exists('pattern', $retriever["data"]) && $retriever["data"]['pattern']) { - $url = preg_replace('/' . $retriever["data"]['pattern'] . '/', $retriever["data"]['replace'], $url); - Logger::debug('retriever_on_item_insert: Changed ' . $item['plink'] . ' to ' . $url); + if (array_key_exists('pattern', $retriever['data']) && $retriever['data']['pattern']) { + $orig_url = $url; + $url = preg_replace('/' . $retriever['data']['pattern'] . '/', $retriever['data']['replace'], $orig_url); + Logger::debug('retriever_on_item_insert: Changed ' . $orig_url . ' to ' . $url); } Logger::debug('@@@ retriever_on_item_insert: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id'] . ' url ' . $url); @@ -476,15 +479,14 @@ function retriever_apply_xslt_text($xslt_text, $doc) { Logger::info('retriever_apply_xslt_text: could not load XML'); return $doc; } + Logger::debug('@@@ retriever_apply_xslt_text: ' . $xslt_text); $xp = new XsltProcessor(); $xp->importStylesheet($xslt_doc); $result = $xp->transformToDoc($doc); return $result; } -//@@@ is that an item or a resource_item? I really want an item here so I can update it function retriever_apply_dom_filter($retriever, &$item, $resource) { - Logger::debug('@@@ 6 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); Logger::debug('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['uri'] . ' contact ' . $item['contact-id']); if (!array_key_exists('include', $retriever['data']) && !array_key_exists('customxslt', $retriever['data'])) { @@ -496,6 +498,7 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) { return; } + //@@@ break this bit into separate function $encoding = retriever_get_encoding($resource); $content = mb_convert_encoding($resource['data'], 'HTML-ENTITIES', $encoding); $doc = new DOMDocument('1.0', 'UTF-8'); @@ -522,23 +525,19 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) { return; } - Logger::info('@@@ retriever_apply_dom_filter: 1'); + //@@@ break this bit into separate function $components = parse_url($resource['redirect-url']); $rooturl = $components['scheme'] . "://" . $components['host']; $dirurl = $rooturl . dirname($components['path']) . "/"; - Logger::info('@@@ retriever_apply_dom_filter: 2'); $params = array('$dirurl' => $dirurl, '$rooturl' => $rooturl); $fix_urls_template = Renderer::getMarkupTemplate('fix-urls.tpl', 'addon/retriever/'); $fix_urls_xslt = Renderer::replaceMacros($fix_urls_template, $params); - Logger::info('@@@ retriever_apply_dom_filter: 3'); $doc = retriever_apply_xslt_text($fix_urls_xslt, $doc); - Logger::info('@@@ retriever_apply_dom_filter: 4'); if (!$doc) { Logger::info('retriever_apply_dom_filter: failed to apply fix urls XSLT template'); return; } - Logger::info('@@@ retriever_apply_dom_filter: 5'); $body = HTML::toBBCode($doc->saveHTML()); if (!strlen($body)) { Logger::info('retriever_apply_dom_filter retriever ' . $retriever['id'] . ' item ' . $item['id'] . ': output was empty'); @@ -548,9 +547,7 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) { $body .= $item['plink']; $body .= ']' . $item['plink'] . '[/url]'; - Logger::info('@@@ retriever_apply_dom_filter: 6'); - $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself? - Logger::info('@@@ retriever_apply_dom_filter: item id is ' . $item['id'] . ' uri id is ' . $uri_id); + $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself? Consider using item['id'] instead Logger::debug('retriever_apply_dom_filter: XSLT result \"' . $body . '\"'); Item::update(['body' => $body], ['uri-id' => $uri_id]); } @@ -629,7 +626,7 @@ function retriever_apply_completed_resource_to_item($retriever, &$item, $resourc if ((strpos($resource['type'], 'html') !== false) || (strpos($resource['type'], 'xml') !== false)) { retriever_apply_dom_filter($retriever, $item, $resource); - if ($retriever["data"]['images'] ) { + if ($retriever['data']['images'] ) { retrieve_images($item, $a); } } @@ -640,7 +637,7 @@ function retriever_apply_completed_resource_to_item($retriever, &$item, $resourc function retriever_transform_images($a, &$item, $resource) { Logger::debug('@@@ 11 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); Logger::info('@@@ retriever_transform_images'); - if (!$resource["data"]) { + if (!$resource['data']) { Logger::info('retriever_transform_images: no data available for ' . $resource['id'] . ' ' . $resource['url']); return; } @@ -716,10 +713,13 @@ function retriever_content($a) { if (!empty($_POST["id"])) { $retriever_rule = get_retriever_rule($a->argv[1], local_user(), true); - $retriever_rule["data"] = array(); + $retriever_rule['data'] = array(); foreach (array('pattern', 'replace', 'enable', 'images', 'customxslt', 'storecookies', 'cookiedata') as $setting) { - if (!empty($_POST['retriever_' . $setting])) { - $retriever_rule["data"][$setting] = $_POST['retriever_' . $setting]; + if (empty($_POST['retriever_' . $setting])) { + $retriever_rule['data'][$setting] = NULL; + } + else { + $retriever_rule['data'][$setting] = $_POST['retriever_' . $setting]; } } foreach ($_POST as $k=>$v) { @@ -739,11 +739,11 @@ function retriever_content($a) { } } q("UPDATE `retriever_rule` SET `data`='%s' WHERE `id` = %d", - DBA::escape(json_encode($retriever_rule["data"])), intval($retriever_rule["id"])); + DBA::escape(json_encode($retriever_rule['data'])), intval($retriever_rule["id"])); $a->page['content'] .= "

Settings Updated"; if (!empty($_POST["retriever_retrospective"])) { apply_retrospective($a, $retriever_rule, $_POST["retriever_retrospective"]); - $a->page['content'] .= " and retrospectively applied to " . $_POST["apply"] . " posts"; + $a->page['content'] .= " and retrospectively applied to " . $_POST["retriever_retrospective"] . " posts"; } $a->page['content'] .= ".

"; } @@ -757,12 +757,12 @@ function retriever_content($a) { '$pattern' => array( 'retriever_pattern', L10n::t('URL Pattern'), - $retriever_rule["data"]['pattern'], + $retriever_rule['data']['pattern'], L10n::t('Regular expression matching part of the URL to replace')), '$replace' => array( 'retriever_replace', L10n::t('URL Replace'), - $retriever_rule["data"]['replace'], + $retriever_rule['data']['replace'], L10n::t('Text to replace matching part of above regular expression')), '$images' => array( 'retriever_images', @@ -802,7 +802,7 @@ function retriever_content($a) { '$include_t' => L10n::t('Include'), '$include' => $retriever_rule['data']['include'], '$exclude_t' => L10n::t('Exclude'), - '$exclude' => $retriever_rule["data"]['exclude'])); + '$exclude' => $retriever_rule['data']['exclude'])); return; } } diff --git a/retriever/templates/extract.tpl b/retriever/templates/extract.tpl index f24a860d..ca67f683 100644 --- a/retriever/templates/extract.tpl +++ b/retriever/templates/extract.tpl @@ -3,25 +3,17 @@ - -{{function clause_xpath}} -{{if !$clause.attribute}} -{{$clause.element}}{{elseif $clause.attribute == 'class'}} -{{$clause.element}}[contains(concat(' ', normalize-space(@class), ' '), '{{$clause.value}}')]{{else}} -{{$clause.element}}[@{{$clause.attribute}}='{{$clause.value}}']{{/if}} -{{/function}} - +{{function clause_xpath}}{{if !$clause.attribute}}{{$clause.element}}{{elseif $clause.attribute == 'class'}}{{$clause.element}}[contains(concat(' ', normalize-space(@class), ' '), '{{$clause.value}}')]{{else}}{{$clause.element}}[@{{$clause.attribute}}='{{$clause.value}}']{{/if}}{{/function}} {{foreach $spec.include as $clause}} + - -{{/foreach}} - + {{/foreach}} {{foreach $spec.exclude as $clause}} - -{{/foreach}} + + {{/foreach}} From 0c9db8383af59932929bd4cafabd429fe0bff635 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 29 Sep 2019 17:04:34 +0200 Subject: [PATCH 18/94] fakerei2 --- retriever/retriever.php | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 4d701276..294fba67 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -51,8 +51,10 @@ function retriever_install() { $schema = file_get_contents(dirname(__file__).'/database.sql'); $arr = explode(';', $schema); foreach ($arr as $a) { - $r = q($a); - //@@@ check for errors + if (!DBA::e($a)) { + Logger::warning('Unable to create database table: ' . DBA::errorMessage()); + return; + } } Config::set('retriever', 'dbversion', '0.13'); } @@ -142,7 +144,8 @@ function retriever_clean_up_completed_resources($max_items, $a) { } Logger::debug('retriever_clean_up_completed_resources: items waiting even though resource has completed: ' . count($r)); foreach ($r as $rr) { - $resource = q("SELECT * FROM retriever_resource WHERE `id` = %d", $rr['resource']); + $resource = DBA::selectFirst('retriever_resource', [], ['id' => intval($rr['resource'])]); + Logger::info('@@@ retriever_clean_up_completed_resources did alternate thing resource type ' . $resource['type']); $retriever_item = retriever_get_retriever_item($rr['item']); if (!DBA::isResult($retriever_item)) { Logger::warning('retriever_clean_up_completed_resources: no retriever item with id ' . $rr['item']); @@ -159,7 +162,7 @@ function retriever_clean_up_completed_resources($max_items, $a) { continue; } Logger::info('@@@ retriever_clean_up_completed_resources: about to retriever_apply_completed_resource_to_item'); - retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource[0], $a); + retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource, $a); q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d", intval($retriever_item['id'])); retriever_check_item_completed($item); } From 043c5157073333127b3a9966c507cfdcb09161da Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 29 Sep 2019 17:09:11 +0200 Subject: [PATCH 19/94] more dba stuff --- retriever/retriever.php | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 294fba67..bbe138b9 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -144,8 +144,6 @@ function retriever_clean_up_completed_resources($max_items, $a) { } Logger::debug('retriever_clean_up_completed_resources: items waiting even though resource has completed: ' . count($r)); foreach ($r as $rr) { - $resource = DBA::selectFirst('retriever_resource', [], ['id' => intval($rr['resource'])]); - Logger::info('@@@ retriever_clean_up_completed_resources did alternate thing resource type ' . $resource['type']); $retriever_item = retriever_get_retriever_item($rr['item']); if (!DBA::isResult($retriever_item)) { Logger::warning('retriever_clean_up_completed_resources: no retriever item with id ' . $rr['item']); @@ -161,7 +159,7 @@ function retriever_clean_up_completed_resources($max_items, $a) { Logger::warning('retriever_clean_up_completed_resources: no retriever for uri ' . $retriever_item['item-uri'] . ' uid ' . $retriever_item['uid'] . ' ' . $retriever_item['contact-id']); continue; } - Logger::info('@@@ retriever_clean_up_completed_resources: about to retriever_apply_completed_resource_to_item'); + $resource = DBA::selectFirst('retriever_resource', [], ['id' => intval($rr['resource'])]); retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource, $a); q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d", intval($retriever_item['id'])); retriever_check_item_completed($item); From 3906813dcfc9682f3f73d82a04fca2b1b9c3e4f9 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 29 Sep 2019 20:59:14 +0200 Subject: [PATCH 20/94] fixed image regex --- retriever/retriever.php | 70 +++++++++++++++++++++++++++++------------ 1 file changed, 50 insertions(+), 20 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index bbe138b9..20ab1ee8 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -47,7 +47,10 @@ function retriever_install() { q("ALTER TABLE `retriever_resource` ADD COLUMN `item-uid` int(10) unsigned NOT NULL DEFAULT '0' AFTER `id`"); Config::set('retriever', 'dbversion', '0.13'); } - if (Config::get('retriever', 'dbversion') != '0.13') { + if (Config::get('retriever', 'dbversion') == '0.13') { + Config::set('retriever', 'downloads_per_cron', '100'); + } + if (Config::get('retriever', 'dbversion') != '0.14') { $schema = file_get_contents(dirname(__file__).'/database.sql'); $arr = explode(';', $schema); foreach ($arr as $a) { @@ -56,7 +59,8 @@ function retriever_install() { return; } } - Config::set('retriever', 'dbversion', '0.13'); + Config::set('retriever', 'downloads_per_cron', '100'); + Config::set('retriever', 'dbversion', '0.14'); } } @@ -72,20 +76,37 @@ function retriever_uninstall() { function retriever_module() {} +function retriever_addon_admin(&$a, &$o) { + $downloads_per_cron = Config::get('retriever', 'downloads_per_cron'); + $template = Renderer::getMarkupTemplate('admin.tpl', 'addon/retriever/'); + $config = ['downloads_per_cron', + L10n::t('Downloads per Cron'), + $downloads_per_cron, + L10n::t('Maximum number of downloads to attempt during each run of the cron job.')]; + $o .= Renderer::replaceMacros($template, [ + '$downloads_per_cron' => $config, + '$submit' => L10n::t('Save Settings')]); +} + +function retriever_addon_admin_post ($a) { + if (!empty($_POST['downloads_per_cron'])) { + Config::set('retriever', 'downloads_per_cron', $_POST['downloads_per_cron']); + } +} + function retriever_cron($a, $b) { - // 100 is a nice sane number. Maybe this should be configurable. @@@ + $downloads_per_cron = Config::get('retriever', 'downloads_per_cron'); - // Do this first, otherwise it can interfere with retreiver_retrieve_items - retriever_clean_up_completed_resources(100, $a); + // Do this first, otherwise it can interfere with retriever_retrieve_items + retriever_clean_up_completed_resources($downloads_per_cron, $a); - retriever_retrieve_items(100, $a); + retriever_retrieve_items($downloads_per_cron, $a); retriever_tidy(); } $retriever_item_count = 0; function retriever_retrieve_items($max_items, $a) { - Logger::info('@@@ retriever_retrieve_items'); global $retriever_item_count; $retriever_schedule = array(array(1,'minute'), @@ -108,8 +129,7 @@ function retriever_retrieve_items($max_items, $a) { $retrieve_items = $max_items - $retriever_item_count; Logger::debug('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . $retriever_item_count . ', retrieve ' . $retrieve_items); do { - Logger::info('@@@ retriever_retrieve_items loop max ' . $max_items . ' count ' . $retriever_item_count); - Logger::info("@@@ SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR " . implode($schedule_clauses, ' OR ') . ") ORDER BY `last-try` ASC LIMIT " . $retrieve_items); + // TODO: figure out how to do this with DBA module $retriever_resources = q("SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR %s) ORDER BY `last-try` ASC LIMIT %d", DBA::escape(implode($schedule_clauses, ' OR ')), intval($retrieve_items)); @@ -121,7 +141,6 @@ function retriever_retrieve_items($max_items, $a) { } Logger::debug('retriever_retrieve_items: found ' . count($retriever_resources) . ' waiting resources in database'); foreach ($retriever_resources as $retriever_resource) { - Logger::info('@@@ need to get the retriever config here cid ' . $retriever_resource['contact-id'] . ' uid ' . $retriever_resource['item-uid']); retrieve_resource($retriever_resource); $retriever_item_count++; } @@ -129,7 +148,7 @@ function retriever_retrieve_items($max_items, $a) { } while ($retrieve_items > 0); // @@@ todo: when items add further items (i.e. images), do the new images go round this loop again? - Logger::info('@@@ retriever_retrieve_items: finished retrieving items'); + Logger::debug('retriever_retrieve_items: finished retrieving items'); } /* Look for items that are waiting even though the resource has @@ -137,7 +156,8 @@ function retriever_retrieve_items($max_items, $a) { * retrospectively apply a config change. It could also happen due to * a cron job dying or something. */ function retriever_clean_up_completed_resources($max_items, $a) { - $r = q("SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT %d", + // TODO: figure out how to do this with DBA module + $r = q('SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT %d', intval($max_items)); if (!$r) { $r = array(); @@ -161,6 +181,7 @@ function retriever_clean_up_completed_resources($max_items, $a) { } $resource = DBA::selectFirst('retriever_resource', [], ['id' => intval($rr['resource'])]); retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource, $a); + //@@@ next one to do q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d", intval($retriever_item['id'])); retriever_check_item_completed($item); } @@ -208,8 +229,10 @@ function retrieve_resource($resource) { Logger::debug('retrieve_resource: ' . ($resource['num-tries'] + 1) . ' attempt at resource ' . $resource['id'] . ' ' . $resource['url']); $redirects = 0; $cookiejar = ''; + Logger::debug('@@@ retrieve_resource storecookies ' . $retriever_rule['storecookies']); if (array_key_exists('storecookies', $retriever_rule) && $retriever_rule['storecookies']) { $cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-'); + Logger::debug('@@@ retrieve_resource cookie file ' . $cookiejar . ' content ' . $retriever_rule['cookiedata']); file_put_contents($cookiejar, $retriever_rule['cookiedata']); } $fetch_result = Network::fetchUrlFull($resource['url'], $resource['binary'], $redirects, '', $cookiejar); @@ -218,7 +241,7 @@ function retrieve_resource($resource) { Logger::debug('@@@ retriever_resource update cookie ' . json_encode($retriever_rule['data'] . ' id ' . $retriever_rule['id'])); q("UPDATE `retriever_rule` SET `data`='%s' WHERE `id` = %d", DBA::escape(json_encode($retriever_rule['data'])), intval($retriever_rule["id"])); - unlink($cookiejar); + /* unlink($cookiejar); */ //@@@ } $resource['data'] = $fetch_result->getBody(); $resource['http-code'] = $fetch_result->getReturnCode(); @@ -350,8 +373,8 @@ function apply_retrospective($a, $retriever, $num) { } } -//@@@ make this trigger a retriever immediately somehow -//@@@ need a lock to say something is doing something +// TODO: Currently this waits until the next cron before actually downloading. Should do it immediately. +// TODO: This queries then inserts. It should use some kind of lock to avoid requesting the same resource twice. function retriever_on_item_insert($a, $retriever, &$item) { Logger::info('@@@ retriever_on_item_insert start plink ' . $item['plink'] . ' id ' . $item['id']); if (!$retriever || !$retriever['id']) { @@ -397,6 +420,7 @@ function add_retriever_resource($a, $url, $uid, $cid, $binary = false) { fclose($fp); $url = 'md5://' . hash('md5', $url); + //@@@ fix this $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s' AND `item-uid` = %d AND `contact-id` = %d", DBA::escape($url), intval($uid), intval($cid)); $resource = $r[0]; if (count($r)) { @@ -405,6 +429,7 @@ function add_retriever_resource($a, $url, $uid, $cid, $binary = false) { } Logger::debug('retrieve_resource: got data URL type ' . $resource['type']); + //@@@ fix this q("INSERT INTO `retriever_resource` (`item-uid`, `contact-id`, `type`, `binary`, `url`, `completed`, `data`) " . "VALUES (%d, %d, '%s', %d, '%s', now(), '%s')", intval($uid), @@ -425,6 +450,7 @@ function add_retriever_resource($a, $url, $uid, $cid, $binary = false) { Logger::warning('add_retriever_resource: URL is longer than 800 characters'); } + //@@@ fix this $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s' AND `item-uid` = %d AND `contact-id` = %d", DBA::escape($url), intval($uid), intval($cid)); if (count($r)) { Logger::debug('add_retriever_resource: Resource ' . $url . ' uid ' . $uid . ' cid ' . $cid . ' already requested'); @@ -554,24 +580,29 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) { } function retrieve_images(&$item, $a) { + // Note that $item doesn't necessarily contain all the fields you would expect, in particular 'id' $blah_item_class = retriever_class_of_item($item) . ' ' . mat_test($item); Logger::debug('@@@ 7 item class is ' . $blah_item_class); + Logger::debug('@@@ retrieve_images start item '. $item['id'] . ' uri ' . $item['uri'] . ' uri id ' . $item['uri-id'] . ' plink ' . $item['plink'] . ' guid ' . $item['guid']); $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself? - $content = DBA::selectFirst('item-content', [], ['uri-id' => $uri_id]); + $content = DBA::selectFirst('item-content', ['body'], ['uri-id' => $uri_id]); $body = $content['body']; if (!strlen($body)) { Logger::warning('retrieve_images: no body for uri-id ' . $uri_id); return; } - Logger::info('@@@ retrieve_images start looking in body "' . $body . '"'); + Logger::info('@@@ retrieve_images looking in body "' . $body . '"'); + // I suspect that matches1 and matches2 are not used any more? $matches1 = array(); preg_match_all("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", $body, $matches1); $matches2 = array(); preg_match_all("/\[img\](.*?)\[\/img\]/ism", $body, $matches2); - $matches = array_merge($matches1[3], $matches2[1]); + $matches3 = array(); + preg_match_all("/\[img\=([^\]]*)\]([^[]*)\[\/img\]/ism", $body, $matches3); + $matches = array_merge($matches1[3], $matches2[1], $matches3[1]); Logger::debug('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); foreach ($matches as $url) { Logger::debug('@@@ retrieve_images: url ' . $url); @@ -615,7 +646,6 @@ function retriever_check_item_completed(&$item) } function retriever_apply_completed_resource_to_item($retriever, &$item, $resource, $a) { - Logger::debug('@@@ 10 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); Logger::debug('retriever_apply_completed_resource_to_item: retriever ' . ($retriever ? $retriever['id'] : 'none') . ' resource ' . $resource['url'] . ' plink ' . $item['plink']); if (strpos($resource['type'], 'image') !== false) { Logger::info('@@@ retriever_apply_completed_resource_to_item this is an image must transform'); @@ -676,7 +706,7 @@ function retriever_transform_images($a, &$item, $resource) { return; } - $content = DBA::selectFirst('item-content', [], ['uri-id' => $uri_id]); + $content = DBA::selectFirst('item-content', ['body'], ['uri-id' => $uri_id]); $body = $content['body']; Logger::info('@@@ retriever_transform_images: found body for uri id ' . $uri_id . ': ' . $body); From dbd00503aaaaef8d4c3100ae1d6af51601c54857 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 29 Sep 2019 22:05:49 +0200 Subject: [PATCH 21/94] Stuff in retriever --- retriever/retriever.php | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 20ab1ee8..f43504e6 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -147,14 +147,10 @@ function retriever_retrieve_items($max_items, $a) { $retrieve_items = $max_items - $retriever_item_count; } while ($retrieve_items > 0); - // @@@ todo: when items add further items (i.e. images), do the new images go round this loop again? Logger::debug('retriever_retrieve_items: finished retrieving items'); } -/* Look for items that are waiting even though the resource has - * completed. This usually happens because we've been asked to - * retrospectively apply a config change. It could also happen due to - * a cron job dying or something. */ +// Look for items that are waiting even though the resource has completed. This shouldn't happen, but is worth cleaning up if it does. function retriever_clean_up_completed_resources($max_items, $a) { // TODO: figure out how to do this with DBA module $r = q('SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT %d', @@ -181,13 +177,14 @@ function retriever_clean_up_completed_resources($max_items, $a) { } $resource = DBA::selectFirst('retriever_resource', [], ['id' => intval($rr['resource'])]); retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource, $a); - //@@@ next one to do - q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d", intval($retriever_item['id'])); + Logger::info('@@@ retriever_clean_up_completed_resources tried to update id ' . $retriver_item['id'] . ' to finished, better check that it really worked!'); + DBA::update('retriever_item', ['finished' => 1], ['id' => intval($retriever_item['id'])], ['finished']); retriever_check_item_completed($item); } } function retriever_tidy() { + // TODO: figure out how to do this with DBA module q("DELETE FROM retriever_resource WHERE completed IS NOT NULL AND completed < DATE_SUB(now(), INTERVAL 1 WEEK)"); q("DELETE FROM retriever_resource WHERE completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)"); @@ -581,8 +578,6 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) { function retrieve_images(&$item, $a) { // Note that $item doesn't necessarily contain all the fields you would expect, in particular 'id' - $blah_item_class = retriever_class_of_item($item) . ' ' . mat_test($item); - Logger::debug('@@@ 7 item class is ' . $blah_item_class); Logger::debug('@@@ retrieve_images start item '. $item['id'] . ' uri ' . $item['uri'] . ' uri id ' . $item['uri-id'] . ' plink ' . $item['plink'] . ' guid ' . $item['guid']); $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself? @@ -595,27 +590,21 @@ function retrieve_images(&$item, $a) { } Logger::info('@@@ retrieve_images looking in body "' . $body . '"'); - // I suspect that matches1 and matches2 are not used any more? - $matches1 = array(); - preg_match_all("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", $body, $matches1); - $matches2 = array(); - preg_match_all("/\[img\](.*?)\[\/img\]/ism", $body, $matches2); - $matches3 = array(); - preg_match_all("/\[img\=([^\]]*)\]([^[]*)\[\/img\]/ism", $body, $matches3); + // I suspect that the first two are not used any more? + preg_match_all("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", $item["body"], $matches1); + preg_match_all("/\[img\](.*?)\[\/img\]/ism", $item["body"], $matches2); + preg_match_all("/\[img\=([^\]]*)\]([^[]*)\[\/img\]/ism", $item["body"], $matches3); $matches = array_merge($matches1[3], $matches2[1], $matches3[1]); Logger::debug('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); foreach ($matches as $url) { Logger::debug('@@@ retrieve_images: url ' . $url); if (strpos($url, get_app()->getBaseUrl()) === FALSE) { - Logger::debug('@@@ retrieve_images: it is from somewhere else'); Logger::debug('@@@ retrieve_images: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id']); $resource = add_retriever_resource($a, $url, $item['uid'], $item['contact-id'], true); if (!$resource['completed']) { - Logger::debug('@@@ retrieve_images: do not have it yet, get it later'); add_retriever_item($item, $resource); } else { - Logger::debug('@@@ retrieve_images: got it already, transform'); retriever_transform_images($a, $item, $resource); } } From be68a4aa3c53b08731b92d66b64979b322be2a34 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Mon, 30 Sep 2019 08:25:00 +0200 Subject: [PATCH 22/94] update version number --- retriever/retriever.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index f43504e6..fcd45b46 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -1,8 +1,8 @@ */ From 2ba05cc80cb81f8d625ec44b4e0167f62afaa2cb Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Mon, 30 Sep 2019 08:25:16 +0200 Subject: [PATCH 23/94] configurable number of requests --- retriever/templates/admin.tpl | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 retriever/templates/admin.tpl diff --git a/retriever/templates/admin.tpl b/retriever/templates/admin.tpl new file mode 100644 index 00000000..b5a35961 --- /dev/null +++ b/retriever/templates/admin.tpl @@ -0,0 +1,8 @@ +{{* + * AUTOMATICALLY GENERATED TEMPLATE + * DO NOT EDIT THIS FILE, CHANGES WILL BE OVERWRITTEN + * + *}} +{{include file="field_input.tpl" field=$downloads_per_cron}} +
+ From 42314b667063d2dca23376d138ef7cb27874c3ce Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Mon, 30 Sep 2019 20:52:05 +0200 Subject: [PATCH 24/94] Add phototrack and publicise --- phototrack/database.sql | 23 ++ phototrack/phototrack.php | 258 ++++++++++++++++++++ publicise/publicise.php | 431 ++++++++++++++++++++++++++++++++++ publicise/templates/admin.tpl | 39 +++ 4 files changed, 751 insertions(+) create mode 100644 phototrack/database.sql create mode 100644 phototrack/phototrack.php create mode 100644 publicise/publicise.php create mode 100644 publicise/templates/admin.tpl diff --git a/phototrack/database.sql b/phototrack/database.sql new file mode 100644 index 00000000..f1b58f6b --- /dev/null +++ b/phototrack/database.sql @@ -0,0 +1,23 @@ +CREATE TABLE IF NOT EXISTS `phototrack_photo_use` ( + `id` int(11) unsigned NOT NULL AUTO_INCREMENT, + `resource-id` char(64) NOT NULL, + `table` char(64) NOT NULL, + `field` char(64) NOT NULL, + `row-id` int(11) NOT NULL, + `checked` timestamp NOT NULL DEFAULT now(), + PRIMARY KEY (`id`), + INDEX `resource-id` (`resource-id`), + INDEX `row` (`table`,`field`,`row-id`) +) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin; + +CREATE TABLE IF NOT EXISTS `phototrack_row_check` ( + `id` int(11) unsigned NOT NULL AUTO_INCREMENT, + `table` char(64) NOT NULL, + `row-id` int(11) NOT NULL, + `checked` timestamp NOT NULL DEFAULT now(), + PRIMARY KEY (`id`), + INDEX `row` (`table`,`row-id`), + INDEX `checked` (`checked`) +) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin; + +SELECT TRUE diff --git a/phototrack/phototrack.php b/phototrack/phototrack.php new file mode 100644 index 00000000..8b909f5d --- /dev/null +++ b/phototrack/phototrack.php @@ -0,0 +1,258 @@ + + */ + +/* + * List of tables and the fields that are checked: + * + * contact: photo thumb micro about + * fcontact: photo + * fsuggest: photo + * gcontact: photo about + * item: body + * item-content: body + * mail: from-photo + * notify: photo + * profile: photo thumb about + */ + +use Friendica\Core\Addon; +use Friendica\Core\Config; +use Friendica\Core\Logger; +use Friendica\Object\Image; +use Friendica\Database\DBA; + +if (!defined('PHOTOTRACK_DEFAULT_BATCH_SIZE')) { + define('PHOTOTRACK_DEFAULT_BATCH_SIZE', 1000); +} +// Time in *minutes* between searching for photo uses +if (!defined('PHOTOTRACK_DEFAULT_SEARCH_INTERVAL')) { + define('PHOTOTRACK_DEFAULT_SEARCH_INTERVAL', 10); +} + +function phototrack_install() { + global $db; + + Addon::registerHook('post_local_end', 'addon/phototrack/phototrack.php', 'phototrack_post_local_end'); + Addon::registerHook('post_remote_end', 'addon/phototrack/phototrack.php', 'phototrack_post_remote_end'); + Addon::registerHook('notifier_end', 'addon/phototrack/phototrack.php', 'phototrack_notifier_end'); + Addon::registerHook('cron', 'addon/phototrack/phototrack.php', 'phototrack_cron'); + + if (Config::get('phototrack', 'dbversion') != '0.1') { + $schema = file_get_contents(dirname(__file__).'/database.sql'); + $arr = explode(';', $schema); + foreach ($arr as $a) { + if (!DBA::e($a)) { + Logger::warning('Unable to create database table: ' . DBA::errorMessage()); + return; + } + } + Config::set('phototrack', 'dbversion', '0.1'); + } +} + +function phototrack_uninstall() { + Addon::unregisterHook('post_local_end', 'addon/phototrack/phototrack.php', 'phototrack_post_local_end'); + Addon::unregisterHook('post_remote_end', 'addon/phototrack/phototrack.php', 'phototrack_post_remote_end'); + Addon::unregisterHook('notifier_end', 'addon/phototrack/phototrack.php', 'phototrack_notifier_end'); + Addon::unregisterHook('cron', 'addon/phototrack/phototrack.php', 'phototrack_cron'); +} + +function phototrack_module() {} + +function phototrack_finished_row($table, $id) { + $existing = DBA::selectFirst('phototrack_row_check', ['id'], ['table' => $table, 'row-id' => $id]); + if (!is_bool($existing)) { + q("UPDATE phototrack_row_check SET checked = NOW() WHERE `table` = '$table' AND `row-id` = '$id'"); + } + else { + q("INSERT INTO phototrack_row_check (`table`, `row-id`, `checked`) VALUES ('$table', '$id', NOW())"); + } +} + +function phototrack_photo_use($photo, $table, $field, $id) { + Logger::debug('@@@ phototrack_photo_use ' . $photo); + foreach (Image::supportedTypes() as $m => $e) { + $photo = str_replace(".$e", '', $photo); + } + if (substr($photo, -2, 1) == '-') { + $resolution = intval(substr($photo,-1,1)); + $photo = substr($photo,0,-2); + } + if (strlen($photo) != 32) { + return; + } + $r = q("SELECT `resource-id` FROM `photo` WHERE `resource-id` = '%s' LIMIT 1", DBA::escape($photo)); + if (!count($r)) { + return; + } + $rid = $r[0]['resource-id']; + $existing = q("SELECT id FROM phototrack_photo_use WHERE `resource-id` = '$rid' AND `table` = '$table' AND `field` = '$field' AND `row-id` = '$id'"); + if (count($existing)) { + q("UPDATE phototrack_photo_use SET checked = NOW() WHERE `resource-id` = '$rid' AND `table` = '$table' AND `field` = '$field' AND `row-id` = '$id'"); + } + else { + q("INSERT INTO phototrack_photo_use (`resource-id`, `table`, `field`, `row-id`, `checked`) VALUES ('$rid', '$table', '$field', '$id', NOW())"); + } +} + +function phototrack_check_field_url($a, $table, $field, $id, $url) { + Logger::info('@@@ phototrack_check_field_url table ' . $table . ' field ' . $field . ' id ' . $id . ' url ' . $url); + $baseurl = $a->getBaseURL(); + if (strpos($url, $baseurl) !== FALSE) { + $url = substr($url, strlen($baseurl)); + Logger::info('@@@ phototrack_check_field_url funny url stuff ' . $url . ' base ' . $baseurl); + } + if (strpos($url, '/photo/') !== FALSE) { + $rid = substr($url, strlen('/photo/')); + Logger::info('@@@ phototrack_check_field_url rid ' . $rid); + phototrack_photo_use($rid, $table, $field, $id); + } +} + +function phototrack_check_field_bbcode($a, $table, $field, $id, $value) { + $baseurl = $a->getBaseURL(); + $matches = array(); + preg_match_all("/\[img(\=([0-9]*)x([0-9]*))?\](.*?)\[\/img\]/ism", $value, $matches); + foreach ($matches[4] as $url) { + phototrack_check_field_url($a, $table, $field, $id, $url); + } +} + +function phototrack_post_local_end(&$a, &$item) { + phototrack_check_row($a, 'item', $item); + phototrack_check_row($a, 'item-content', $item); +} + +function phototrack_post_remote_end(&$a, &$item) { + phototrack_check_row($a, 'item', $item); + phototrack_check_row($a, 'item-content', $item); +} + +function phototrack_notifier_end($item) { + $a = get_app(); +} + +function phototrack_check_row($a, $table, $row) { + switch ($table) { + case 'item': + $fields = array( + 'body' => 'bbcode'); + break; + case 'item-content': + $fields = array( + 'body' => 'bbcode'); + break; + case 'contact': + $fields = array( + 'photo' => 'url', + 'thumb' => 'url', + 'micro' => 'url', + 'about' => 'bbcode'); + break; + case 'fcontact': + $fields = array( + 'photo' => 'url'); + break; + case 'fsuggest': + $fields = array( + 'photo' => 'url'); + break; + case 'gcontact': + $fields = array( + 'photo' => 'url', + 'about' => 'bbcode'); + break; + default: $fields = array(); break; + } + foreach ($fields as $field => $type) { + switch ($type) { + case 'bbcode': phototrack_check_field_bbcode($a, $table, $field, $row['id'], $row[$field]); break; + case 'url': phototrack_check_field_url($a, $table, $field, $row['id'], $row[$field]); break; + } + } + phototrack_finished_row($table, $row['id']); +} + +function phototrack_batch_size() { + $batch_size = Config::get('phototrack', 'batch_size'); + if ($batch_size > 0) { + return $batch_size; + } + return PHOTOTRACK_DEFAULT_BATCH_SIZE; +} + +function phototrack_search_table($a, $table) { + $batch_size = phototrack_batch_size(); + $rows = q("SELECT `$table`.* FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) ) ORDER BY phototrack_row_check.checked LIMIT $batch_size"); + foreach ($rows as $row) { + phototrack_check_row($a, $table, $row); + } + $r = q("SELECT COUNT(*) FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) )"); + $remaining = $r[0]['COUNT(*)']; + Logger::info('phototrack: searched ' . count($rows) . ' rows in table ' . $table . ', ' . $remaining . ' still remaining to search'); + return $remaining; +} + +function phototrack_cron_time() { + $prev_remaining = Config::get('phototrack', 'remaining_items'); + if ($prev_remaining > 10 * phototrack_batch_size()) { + Logger::debug('phototrack: more than ' . (10 * phototrack_batch_size()) . ' items remaining'); + return true; + } + $last = Config::get('phototrack', 'last_search'); + $search_interval = intval(Config::get('phototrack', 'search_interval')); + if (!$search_interval) { + $search_interval = PHOTOTRACK_DEFAULT_SEARCH_INTERVAL; + } + if ($last) { + $next = $last + ($search_interval * 60); + if ($next > time()) { + Logger::debug('phototrack: search interval not reached'); + return false; + } + } + return true; +} + +function phototrack_cron($a, $b) { + if (!phototrack_cron_time()) { + return; + } + Config::set('phototrack', 'last_search', time()); + + $remaining = 0; + $remaining += phototrack_search_table($a, 'item'); + $remaining += phototrack_search_table($a, 'item-content'); + $remaining += phototrack_search_table($a, 'contact'); + $remaining += phototrack_search_table($a, 'fcontact'); + $remaining += phototrack_search_table($a, 'fsuggest'); + $remaining += phototrack_search_table($a, 'gcontact'); + + Config::set('phototrack', 'remaining_items', $remaining); + if ($remaining === 0) { + phototrack_tidy(); + } +} + +function phototrack_tidy() { + $batch_size = phototrack_batch_size(); + q('CREATE TABLE IF NOT EXISTS `phototrack-temp` (`resource-id` char(255) not null)'); + q('INSERT INTO `phototrack-temp` SELECT DISTINCT(`resource-id`) FROM photo WHERE photo.`created` < DATE_SUB(NOW(), INTERVAL 2 MONTH)'); + $rows = q('SELECT `phototrack-temp`.`resource-id` FROM `phototrack-temp` LEFT OUTER JOIN phototrack_photo_use ON (`phototrack-temp`.`resource-id` = phototrack_photo_use.`resource-id`) WHERE phototrack_photo_use.id IS NULL limit ' . /*$batch_size*/1000); + foreach ($rows as $row) { + Logger::debug('phototrack: remove photo ' . $row['resource-id']); + q('DELETE FROM photo WHERE `resource-id` = "' . $row['resource-id'] . '"'); + } + q('DROP TABLE `phototrack-temp`'); + Logger::info('phototrack_tidy: deleted ' . count($rows) . ' photos'); + $rows = q('SELECT id FROM phototrack_photo_use WHERE checked < DATE_SUB(NOW(), INTERVAL 14 DAY)'); + foreach ($rows as $row) { + q('DELETE FROM phototrack_photo_use WHERE id = ' . $row['id']); + } + Logger::info('phototrack_tidy: deleted ' . count($rows) . ' phototrack_photo_use rows'); +} diff --git a/publicise/publicise.php b/publicise/publicise.php new file mode 100644 index 00000000..d27eefd4 --- /dev/null +++ b/publicise/publicise.php @@ -0,0 +1,431 @@ + + */ + +use Friendica\Core\Addon; +use Friendica\Core\Logger; +use Friendica\Core\Renderer; +use Friendica\Core\L10n; +use Friendica\Database\DBA; + +function publicise_install() { + Addon::registerHook('post_remote', 'addon/publicise/publicise.php', 'publicise_post_remote_hook'); +} + +function publicise_uninstall() { + Addon::unregisterHook('post_remote', 'addon/publicise/publicise.php', 'publicise_post_remote_hook'); + Addon::unregisterHook('post_remote_end', 'addon/publicise/publicise.php', 'publicise_post_remote_end_hook'); +} + +function publicise_get_contacts() { + $query = <<$v) { + $enabled = ($v['reason'] === 'publicise') ? 1 : NULL; + $expire = 30; + $comments = 1; + $url = $v['url']; + if ($enabled) { + $r = q('SELECT * FROM `user` WHERE `uid` = %d', intval($v['uid'])); + $expire = $r[0]['expire']; + $url = $a->get_baseurl() . '/profile/' . $v['nick']; + if ($r[0]['page-flags'] == PAGE_SOAPBOX) { + $comments = NULL; + } + if ($r[0]['account_expired']) { + $enabled = NULL; + } + } + $contacts[$k]['enabled'] = array('publicise-enabled-' . $v['id'], NULL, $enabled); + $contacts[$k]['comments'] = array('publicise-comments-' . $v['id'], NULL, $comments); + $contacts[$k]['expire'] = $expire; + $contacts[$k]['url'] = $url; + } + $template = Renderer::getMarkupTemplate('admin.tpl', 'addon/publicise/'); + $o .= Renderer::replaceMacros($template, array( + '$feeds' => $contacts, + '$feed_t' => L10n::t('Feed'), + '$publicised_t' => L10n::t('Publicised'), + '$comments_t' => L10n::t('Allow Comments/Likes'), + '$expire_t' => L10n::t('Expire Articles After (Days)'), + '$submit_t' => L10n::t('Submit'))); +} + +function publicise_make_string($in) { + return "'" . DBA::escape($in) . "'"; +} + +function publicise_make_int($in) { + return intval($in) ? $in : 0; +} + +function publicise_create_user($owner, $contact) { + + $nick = $contact['nick']; + if (!$nick) { + notice(sprintf(t("Can't publicise feed \"%s\" because it doesn't have a nickname"), $contact['name']) . EOL); + return; + } + Logger::info('Publicise: create user, beginning key generation...'); + $res=openssl_pkey_new(array( + 'digest_alg' => 'sha1', + 'private_key_bits' => 4096, + 'encrypt_key' => false )); + $prvkey = ''; + openssl_pkey_export($res, $prvkey); + $pkey = openssl_pkey_get_details($res); + $pubkey = $pkey["key"]; + $sres=openssl_pkey_new(array( + 'digest_alg' => 'sha1', + 'private_key_bits' => 512, + 'encrypt_key' => false )); + $sprvkey = ''; + openssl_pkey_export($sres, $sprvkey); + $spkey = openssl_pkey_get_details($sres); + $spubkey = $spkey["key"]; + $guid = generate_user_guid(); + + $newuser = array( + 'guid' => publicise_make_string($guid), + 'username' => publicise_make_string($contact['name']), + 'password' => publicise_make_string($owner['password']), + 'nickname' => publicise_make_string($contact['nick']), + 'email' => publicise_make_string($owner['email']), + 'openid' => publicise_make_string($owner['openid']), + 'timezone' => publicise_make_string($owner['timezone']), + 'language' => publicise_make_string($owner['language']), + 'register_date' => publicise_make_string(datetime_convert()), + 'default-location' => publicise_make_string($owner['default-location']), + 'allow_location' => publicise_make_string($owner['allow_location']), + 'theme' => publicise_make_string($owner['theme']), + 'pubkey' => publicise_make_string($pubkey), + 'prvkey' => publicise_make_string($prvkey), + 'spubkey' => publicise_make_string($spubkey), + 'sprvkey' => publicise_make_string($sprvkey), + 'verified' => publicise_make_int($owner['verified']), + 'blocked' => publicise_make_int(0), + 'blockwall' => publicise_make_int(1), + 'hidewall' => publicise_make_int(0), + 'blocktags' => publicise_make_int(0), + 'notify-flags' => publicise_make_int($owner['notifyflags']), + 'page-flags' => publicise_make_int($comments ? PAGE_COMMUNITY : PAGE_SOAPBOX), + 'expire' => publicise_make_int($expire), + ); + Logger::debug('Publicise: creating user ' . print_r($newuser, true)); + $r = q("INSERT INTO `user` (`" + . implode("`, `", array_keys($newuser)) + . "`) VALUES (" + . implode(", ", array_values($newuser)) + . ")" ); + if (!$r) { + Logger::warning('Publicise: create user failed'); + return; + } + $r = q('SELECT * FROM `user` WHERE `guid` = "%s"', DBA::escape($guid)); + if (count($r) != 1) { + Logger::warning('Publicise: unexpected number of uids returned'); + return; + } + Logger::debug('Publicise: created user ID ' . $r[0]); + return $r[0]; +} + +function publicise_create_self_contact($a, $contact, $uid) { + $newcontact = array( + 'uid' => $uid, + 'created' => publicise_make_string(datetime_convert()), + 'self' => publicise_make_int(1), + 'name' => publicise_make_string($contact['name']), + 'nick' => publicise_make_string($contact['nick']), + 'photo' => publicise_make_string($contact['photo']), + 'thumb' => publicise_make_string($contact['thumb']), + 'micro' => publicise_make_string($contact['micro']), + 'blocked' => publicise_make_int(0), + 'pending' => publicise_make_int(0), + 'url' => publicise_make_string($a->get_baseurl() . '/profile/' . $contact['nick']), + 'nurl' => publicise_make_string($a->get_baseurl() . '/profile/' . $contact['nick']), + 'request' => publicise_make_string($a->get_baseurl() . '/dfrn_request/' . $contact['nick']), + 'notify' => publicise_make_string($a->get_baseurl() . '/dfrn_notify/' . $contact['nick']), + 'poll' => publicise_make_string($a->get_baseurl() . '/dfrn_poll/' . $contact['nick']), + 'confirm' => publicise_make_string($a->get_baseurl() . '/dfrn_confirm/' . $contact['nick']), + 'poco' => publicise_make_string($a->get_baseurl() . '/poco/' . $contact['nick']), + 'uri-date' => publicise_make_string(datetime_convert()), + 'avatar-date' => publicise_make_string(datetime_convert()), + 'closeness' => publicise_make_int(0), + ); + $existing = q("SELECT * FROM `contact` WHERE `uid` = %d AND `self` = 1", intval($uid)); + if (count($existing)) { + $newcontact = $existing[0]; + Logger::debug('Publicise: self contact already exists for user ' . $uid . ' id ' . $newcontact['id']); + } else { + Logger::debug('Publicise: create contact ' . print_r($newcontact, true)); + q("INSERT INTO `contact` (`" + . implode("`, `", array_keys($newcontact)) + . "`) VALUES (" + . implode(", ", array_values($newcontact)) + . ")" ); + $results = q("SELECT `id` FROM `contact` WHERE `uid` = %d AND `self` = 1", intval($uid)); + if (count($results) != 1) { + Logger::warning('Publicise: create self contact failed, will delete uid ' . $uid); + $r = q("DELETE FROM `user` WHERE `uid` = %d", intval($uid)); + return; + } + $newcontact = $results[0]; + Logger::debug('Publicise: created self contact for user ' . $uid . ' id ' . $newcontact['id']); + } + Logger::debug('Publicise: self contact for ' . $uid . ' nick ' . $contact['nick'] . ' is ' . $newcontact['id']); + return $newcontact['id']; +} + +function publicise_create_profile($contact, $uid) { + $newprofile = array( + 'uid' => $uid, + 'profile-name' => publicise_make_string('default'), + 'is-default' => publicise_make_int(1), + 'name' => publicise_make_string($contact['name']), + 'photo' => publicise_make_string($contact['photo']), + 'thumb' => publicise_make_string($contact['thumb']), + 'homepage' => publicise_make_string($contact['url']), + 'publish' => publicise_make_int(1), + 'net-publish' => publicise_make_int(1), + ); + Logger::debug('Publicise: create profile ' . print_r($newprofile, true)); + $r = q("INSERT INTO `profile` (`" + . implode("`, `", array_keys($newprofile)) + . "`) VALUES (" + . implode(", ", array_values($newprofile)) + . ")" ); + if (!$r) { + Logger::warning('Publicise: create profile failed'); + } + $newprofile = q('SELECT `id` FROM `profile` WHERE `uid` = %d AND `is-default` = 1', intval($uid)); + if (count($newprofile) != 1) { + Logger::warning('Publicise: create profile produced unexpected number of results'); + return; + } + Logger::debug('Publicise: created profile ' . $newprofile[0]['id']); + return $newprofile[0]['id']; +} + +function publicise_set_up_user($a, $contact, $owner) { + $user = publicise_create_user($owner, $contact); + if (!$user) { + notice(sprintf(t("Failed to create user for feed \"%s\""), $contact['name']) . EOL); + return; + } + $self_contact = publicise_create_self_contact($a, $contact, $user['uid']); + if (!$self_contact) { + notice(sprintf(t("Failed to create self contact for user \"%s\""), $contact['name']) . EOL); + Logger::warning("Publicise: unable to create self contact, deleting user " . $user['uid']); + q('DELETE FROM `user` WHERE `uid` = %d', intval($user['uid'])); + return; + } + $profile = publicise_create_profile($contact, $user['uid']); + if (!$profile) { + notice(sprintf(t("Failed to create profile for user \"%s\""), $contact['name']) . EOL); + Logger::warning("Publicise: unable to create profile, deleting user $uid contact $self_contact"); + q('DELETE FROM `user` WHERE `uid` = %d', intval($user['uid'])); + q('DELETE FROM `contact` WHERE `id` = %d', intval($self_contact)); + return; + } + return $user; +} + +function publicise($a, &$contact, &$owner) { + Logger::info('@@@ Publicise: publicise'); + if (!is_site_admin()) { + notice(t("Only admin users can publicise feeds")); + Logger::warning('Publicise: non-admin tried to publicise'); + return; + } + + // Check if we're changing our mind about a feed we earlier depublicised + Logger::info('@@@ Publicise: ' . 'SELECT * FROM `user` WHERE `account_expires_on` != "0000-00-00 00:00:00" AND `nickname` = "' . $contact['nick'] . '" AND `email` = "' . $owner['email'] . '" AND `page-flags` in (' . intval(PAGE_COMMUNITY) . ', ' . intval(PAGE_SOAPBOX) . ')'); + $existing = q('SELECT * FROM `user` WHERE `account_expires_on` != "0000-00-00 00:00:00" AND `nickname` = "%s" AND `email` = "%s" AND `page-flags` in (%d, %d)', + DBA::escape($contact['nick']), DBA::escape($owner['email']), intval(PAGE_COMMUNITY), intval(PAGE_SOAPBOX)); + if (count($existing) == 1) { + Logger::info('@@@ Publicise: there is existing'); + $owner = $existing[0]; + q('UPDATE `user` SET `account_expires_on` = "0000-00-00 00:00:00", `account_removed` = 0, `account_expired` = 0 WHERE `uid` = %d', intval($owner['uid'])); + q('UPDATE `profile` SET `publish` = 1, `net-publish` = 1 WHERE `uid` = %d AND `is-default` = 1', intval($owner['uid'])); + Logger::debug('Publicise: recycled previous user ' . $owner['uid']); + } + else { + Logger::info('@@@ Publicise: there is not existing'); + $owner = publicise_set_up_user($a, $contact, $owner); + if (!$owner) { + return; + } + Logger::debug("Publicise: created new user " . $owner['uid']); + } + Logger::info('Publicise: new contact user is ' . $owner['uid']); + + $r = q("UPDATE `contact` SET `uid` = %d, `reason` = 'publicise', `hidden` = 1 WHERE id = %d", intval($owner['uid']), intval($contact['id'])); + if (!$r) { + Logger::warning('Publicise: update contact failed, user is probably in a bad state ' . $user['uid']); + } + $contact['uid'] = $owner['uid']; + $contact['reason'] = 'publicise'; + $contact['hidden'] = 1; + $r = q("UPDATE `item` SET `uid` = %d, type = 'wall', wall = 1, private = 0 WHERE `contact-id` = %d", + intval($owner['uid']), intval($contact['id'])); + Logger::debug('Publicise: moved items from contact ' . $contact['id'] . ' to uid ' . $owner['uid']); + + // Update the retriever config + $r = q("UPDATE `retriever_rule` SET `uid` = %d WHERE `contact-id` = %d", + intval($owner['uid']), intval($contact['id'])); + + info(sprintf(t("Moved feed \"%s\" to dedicated account"), $contact['name']) . EOL); + return true; +} + +function publicise_self_contact($uid) { + $r = q('SELECT * FROM `contact` WHERE `uid` = %d AND `self` = 1', intval($uid)); + if (count($r) != 1) { + Logger::warning('Publicise: unexpected number of self contacts for user ' . $uid); + return; + } + return $r[0]; +} + +function depublicise($a, $contact, $user) { + require_once('include/Contact.php'); + + if (!is_site_admin()) { + notice("Only admin users can depublicise feeds"); + Logger::warning('Publicise: non-admin tried to depublicise'); + return; + } + + Logger::debug('Publicise: about to depublicise contact ' . $contact['id'] . ' user ' . $user['uid']); + + $self_contact = publicise_self_contact($user['uid']); + + // If the local_user() is subscribed to the feed, take ownership + // of the feed and all its items and photos. Otherwise they will + // be deleted when the account expires. + $r = q('SELECT * FROM `contact` WHERE `uid` = %d AND `url` = "%s"', + intval(local_user()), DBA::escape($self_contact['url'])); + if (count($r)) { + // Delete the contact to the feed user and any + // copies of its items. These will be replaced by the originals, + // which will be brought back into the local_user's feed along + // with the feed contact itself. + foreach ($r as $my_contact) { + q('DELETE FROM `item` WHERE `contact-id` = %d', intval($my_contact['id'])); + q('DELETE FROM `contact` WHERE `id` = %d', intval($my_contact['id'])); + } + + // Move the feed contact to local_user. Existing items stay + // attached to the original feed contact, but must have their uid + // updated. Also update the fields we scribbled over in + // publicise_post_remote_hook. + q('UPDATE `contact` SET `uid` = %d, `reason` = "", hidden = 0 WHERE id = %d', + intval(local_user()), intval($contact['id'])); + q('UPDATE `item` SET `uid` = %d, `wall` = 0, `type` = "remote", `private` = 2 WHERE `contact-id` = %d', + intval(local_user()), intval($contact['id'])); + + // Take ownership of any photos created by the feed user + q('UPDATE `photo` SET `uid` = %d WHERE `uid` = %d', + intval(local_user()), intval($user['uid'])); + + // Update the retriever config + $r = q("UPDATE `retriever_rule` SET `uid` = %d WHERE `contact-id` = %d", + intval($owner['uid']), intval($contact['id'])); + } + + // Set the account to removed and expired right now. It will be cleaned up by cron after 3 days, giving a chance to change your mind + q('UPDATE `user` SET `account_removed` = 1, `account_expired` = 1, `account_expires_on` = UTC_TIMESTAMP() WHERE `uid` = %d', + intval($user['uid'])); + q('UPDATE `profile` SET `publish` = 0, `net-publish` = 0 WHERE `uid` = %d AND `is-default` = 1', intval($user['uid'])); + + info(sprintf(t("Removed dedicated account for feed \"%s\""), $contact['name']) . EOL); +} + +function publicise_addon_admin_post ($a) { + Logger::info('@@@ publicise_addon_admin_post'); + if (!is_site_admin()) { + Logger::warning('Publicise: non-admin tried to do admin post'); + return; + } + + foreach (publicise_get_contacts() as $contact) { + Logger::info('@@@ publicise_addon_admin_post contact ' . $contact['id'] . ' ' . $contact['name']); + $user = publicise_get_user($contact['uid']); + if (!$_POST['publicise-enabled-' . $contact['id']]) { + if ($contact['reason'] === 'publicise') { + Logger::info('@@@ depublicise'); + depublicise($a, $contact, $user); + } + } + else { + if ($contact['reason'] !== 'publicise') { + Logger::info('@@@ publicise'); + if (!publicise($a, $contact, $user)) { + Logger::warning('Publicise: failed to publicise contact ' . $contact['id']); + continue; + } + } + if ($_POST['publicise-expire-' . $contact['id']] != $user['expire']) { + q('UPDATE `user` SET `expire` = %d WHERE `uid` = %d', + intval($_POST['publicise-expire-' . $contact['id']]), intval($user['uid'])); + } + if ($_POST['publicise-comments-' . $contact['id']]) { + if ($user['page-flags'] != PAGE_COMMUNITY) { + q('UPDATE `user` SET `page-flags` = %d WHERE `uid` = %d', + intval(PAGE_COMMUNITY), intval($user['uid'])); + q('UPDATE `contact` SET `rel` = %d WHERE `uid` = %d AND `network` = "dfrn"', + intval(CONTACT_IS_SHARING), intval($user['uid'])); + } + } + else { + if ($user['page-flags'] != PAGE_SOAPBOX) { + q('UPDATE `user` SET `page-flags` = %d WHERE `uid` = %d', + intval(PAGE_SOAPBOX), intval($user['uid'])); + q('UPDATE `contact` SET `rel` = %d WHERE `uid` = %d AND `network` = "dfrn"', + intval(CONTACT_IS_FOLLOWER), intval($user['uid'])); + } + } + } + } +} + +function publicise_post_remote_hook(&$a, &$item) { + $r1 = q("SELECT `uid` FROM `contact` WHERE `id` = %d AND `reason` = 'publicise'", intval($item['contact-id'])); + if (!$r1) { + return; + } + + Logger::debug('Publicise: moving to wall: ' . $item['uid'] . ' ' . $item['contact-id'] . ' ' . $item['uri']); + $item['type'] = 'wall'; + $item['wall'] = 1; + $item['private'] = 0; +} + diff --git a/publicise/templates/admin.tpl b/publicise/templates/admin.tpl new file mode 100644 index 00000000..b10c3546 --- /dev/null +++ b/publicise/templates/admin.tpl @@ -0,0 +1,39 @@ +{{* + * AUTOMATICALLY GENERATED TEMPLATE + * DO NOT EDIT THIS FILE, CHANGES WILL BE OVERWRITTEN + * + *}} +
+ + + + + + + + + + +{{foreach $feeds as $f}} + + + + + + +{{/foreach}} + +
{{$feed_t}}{{$publicised_t}}{{$comments_t}}{{$expire_t}}
+ + + {{$f.name}} + + +{{include file="field_yesno.tpl" field=$f.enabled}} + +{{include file="field_yesno.tpl" field=$f.comments}} + + +
+ +
From dbfc24d51fc531530b7f4c446d1fee833d0ac998 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Mon, 30 Sep 2019 20:52:51 +0200 Subject: [PATCH 25/94] retriever tweaks --- retriever/retriever.php | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index fcd45b46..f495578b 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -308,9 +308,8 @@ function retriever_get_item($retriever_item) { Logger::info('@@@ retriever_get_item uri ' . $retriever_item['item-uri'] . ' uid ' . $retriever_item['item-uid'] . ' cid ' . $retriever_item['contact-id']); try {//@@@ not necessary $item = Item::selectFirst([], ['uri' => $retriever_item['item-uri'], 'uid' => intval($retriever_item['item-uid'])]); - Logger::log('@@@ 1 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); if (!DBA::isResult($item)) { - Logger::log('retriever_get_item: no item found for uri ' . $retriever_item['item-uri']); + Logger::warning('retriever_get_item: no item found for uri ' . $retriever_item['item-uri']); return; } Logger::info('@@@ retriever_get_item: yay item found for uri ' . $retriever_item['item-uri'] . ' guid ' . $item['guid'] . ' plink ' . $item['plink']); @@ -329,9 +328,9 @@ function retriever_item_completed($retriever_item_id, $resource, $a) { return; } $item = retriever_get_item($retriever_item); - Logger::log('@@@ 2 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); + Logger::info('@@@ 2 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); if (!$item) { - Logger::log('retriever_item_completed: no item ' . $retriever_item['item-uri']); + Logger::warning('retriever_item_completed: no item ' . $retriever_item['item-uri']); return; } // Note: the retriever might be null. Doesn't matter. @@ -348,21 +347,17 @@ function retriever_resource_completed($resource, $a) { Logger::debug('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url']); $r = q("SELECT `id` FROM `retriever_item` WHERE `resource` = %d", $resource['id']); foreach (DBA::select('retriever_item', ['id'], ['resource' => intval($resource['id'])]) as $retriever_item) { - Logger::debug('@@@ retriever_resource_completed got item id ' . $retriever_item['id']); retriever_item_completed($retriever_item['id'], $resource, $a); } } function apply_retrospective($a, $retriever, $num) { - Logger::info('@@@ apply_retrospective'); $r = q("SELECT * FROM `item` WHERE `contact-id` = %d ORDER BY `received` DESC LIMIT %d", intval($retriever['contact-id']), intval($num)); foreach ($r as $item) { - Logger::info('@@@ apply_retrospective item ' . $item['id']); q('UPDATE `item` SET `visible` = 0 WHERE `id` = %d', $item['id']); q('UPDATE `thread` SET `visible` = 0 WHERE `iid` = %d', $item['id']); foreach (DBA::select('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => $item['uid'], 'contact-id' => $item['contact-id']]) as $retriever_item) { - Logger::info('@@@ about to delete retriever_item id ' . $retriever_item['id'] . ' uri ' . $item['uri'] . ' uid ' . $item['uid'] . ' contact ' . $item['contact-id']); DBA::delete('retriever_resource', ['id' => $retriever_item['resource']]); DBA::delete('retriever_item', ['id' => $retriever_item['id']]); } @@ -378,7 +373,7 @@ function retriever_on_item_insert($a, $retriever, &$item) { Logger::info('retriever_on_item_insert: No retriever supplied'); return; } - if (!$retriever['data']['enable'] == "on") { + if (!array_key_exists('enable', $retriever['data']) || !$retriever['data']['enable'] == "on") { Logger::info('@@@ retriever_on_item_insert: Disabled'); return; } From 2d8e13d53d388e9f98718e48199d44f8716fe265 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 2 Oct 2019 07:19:59 +0200 Subject: [PATCH 26/94] extensive refactoring --- retriever/retriever.php | 305 +++++++++++++--------------- retriever/templates/rule-config.tpl | 32 ++- 2 files changed, 164 insertions(+), 173 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index f495578b..6ace5e98 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -177,14 +177,14 @@ function retriever_clean_up_completed_resources($max_items, $a) { } $resource = DBA::selectFirst('retriever_resource', [], ['id' => intval($rr['resource'])]); retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource, $a); - Logger::info('@@@ retriever_clean_up_completed_resources tried to update id ' . $retriver_item['id'] . ' to finished, better check that it really worked!'); + Logger::info('@@@ retriever_clean_up_completed_resources tried to update id ' . $retriever_item['id'] . ' to finished, better check that it really worked!'); DBA::update('retriever_item', ['finished' => 1], ['id' => intval($retriever_item['id'])], ['finished']); retriever_check_item_completed($item); } } function retriever_tidy() { - // TODO: figure out how to do this with DBA module + // TODO: figure out how to do this with DBA module @@@ it is possible q("DELETE FROM retriever_resource WHERE completed IS NOT NULL AND completed < DATE_SUB(now(), INTERVAL 1 WEEK)"); q("DELETE FROM retriever_resource WHERE completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)"); @@ -212,8 +212,6 @@ function retrieve_dataurl_resource($resource) { } function retrieve_resource($resource) { - Logger::info('@@@ retrieve_resource: url ' . $resource['url'] . ' uid ' . $resource['item-uid'] . ' cid ' . $resource['contact-id']); - if (substr($resource['url'], 0, 5) == "data:") { return retrieve_dataurl_resource($resource); } @@ -221,24 +219,22 @@ function retrieve_resource($resource) { $a = get_app(); $retriever_rule = get_retriever_rule($resource['contact-id'], $resource['item-uid']); + $rule_data = $retriever_rule['data']; try { Logger::debug('retrieve_resource: ' . ($resource['num-tries'] + 1) . ' attempt at resource ' . $resource['id'] . ' ' . $resource['url']); $redirects = 0; $cookiejar = ''; - Logger::debug('@@@ retrieve_resource storecookies ' . $retriever_rule['storecookies']); - if (array_key_exists('storecookies', $retriever_rule) && $retriever_rule['storecookies']) { + if (array_key_exists('storecookies', $rule_data) && $rule_data['storecookies']) { $cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-'); - Logger::debug('@@@ retrieve_resource cookie file ' . $cookiejar . ' content ' . $retriever_rule['cookiedata']); - file_put_contents($cookiejar, $retriever_rule['cookiedata']); + file_put_contents($cookiejar, $rule_data['cookiedata']); } $fetch_result = Network::fetchUrlFull($resource['url'], $resource['binary'], $redirects, '', $cookiejar); - if (array_key_exists('storecookies', $retriever_rule) && $retriever_rule['storecookies']) { - $retriever_rule['cookiedata'] = file_get_contents($cookiejar); - Logger::debug('@@@ retriever_resource update cookie ' . json_encode($retriever_rule['data'] . ' id ' . $retriever_rule['id'])); - q("UPDATE `retriever_rule` SET `data`='%s' WHERE `id` = %d", - DBA::escape(json_encode($retriever_rule['data'])), intval($retriever_rule["id"])); - /* unlink($cookiejar); */ //@@@ + if (array_key_exists('storecookies', $rule_data) && $rule_data['storecookies']) { + $retriever_rule['data']['cookiedata'] = file_get_contents($cookiejar); + DBA::update('retriever_rule', ['data' => json_encode($retriever_rule['data'])], ['id' => intval($retriever_rule["id"])]); + //@@@ check the update worked + unlink($cookiejar); } $resource['data'] = $fetch_result->getBody(); $resource['http-code'] = $fetch_result->getReturnCode(); @@ -248,36 +244,33 @@ function retrieve_resource($resource) { } catch (Exception $e) { Logger::info('retrieve_resource: unable to retrieve ' . $resource['url'] . ' - ' . $e->getMessage()); } + // TODO: figure out how to do this with DBA module q("UPDATE `retriever_resource` SET `last-try` = now(), `num-tries` = `num-tries` + 1, `http-code` = %d, `redirect-url` = '%s' WHERE id = %d", intval($resource['http-code']), DBA::escape($resource['redirect-url']), intval($resource['id'])); if ($resource['data']) { + // TODO: figure out how to do this with DBA module q("UPDATE `retriever_resource` SET `completed` = now(), `data` = '%s', `type` = '%s' WHERE id = %d", DBA::escape($resource['data']), DBA::escape($resource['type']), intval($resource['id'])); retriever_resource_completed($resource, $a); } - Logger::info('@@@ retrieve_resource finished: ' . $resource['url']); } function get_retriever_rule($contact_id, $uid, $create = false) { - Logger::info('@@@ get_retriever_rule ' . "SELECT * FROM `retriever_rule` WHERE `contact-id` = " . intval($contact_id) . " AND `uid` = " . intval($uid)); - $r = q("SELECT * FROM `retriever_rule` WHERE `contact-id` = %d AND `uid` = %d", - intval($contact_id), intval($uid)); - Logger::info('@@@ get_retriever_rule count is ' . count($r)); - if (count($r)) { - $r[0]['data'] = json_decode($r[0]['data'], true); + $retriever_rule = DBA::selectFirst('retriever_rule', [], ['contact-id' => intval($contact_id), 'uid' => intval($uid)]); + //@@@ check that this worked + if ($retriever_rule) { + $retriever_rule['data'] = json_decode($retriever_rule['data'], true); Logger::info('@@@ get_retriever_rule returning an actual thing'); - return $r[0]; + return $retriever_rule; } if ($create) { - q("INSERT INTO `retriever_rule` (`uid`, `contact-id`) VALUES (%d, %d)", - intval($uid), intval($contact_id)); - $r = q("SELECT * FROM `retriever_rule` WHERE `contact-id` = %d AND `uid` = %d", - intval($contact_id), intval($uid)); - return $r[0]; + DBA::insert('retriever_rule', ['uid' => intval($uid), 'contact-id' => intval($contact_id)]); + //@@@ check that this worked + return DBA::selectFirst('retriever_rule', [], ['contact-id' => intval($contact_id), 'uid' => intval($uid)]); } } @@ -285,38 +278,13 @@ function retriever_get_retriever_item($id) { return DBA::selectFirst('retriever_item', [], ['id' => intval($id)]); } -function retriever_class_of_item($item) { //@@@ - if (!$item) { - return 'false'; - } - if (array_key_exists('finished', $item)) { - Logger::info('@@@ oh no this is a bad thing'); - return 'retriever_item'; - } - if (array_key_exists('moderated', $item)) { - return 'friendica_item'; - } - return 'unknown'; -} - -function mat_test($item) { //@@@ - return 'mat_test'; -} - function retriever_get_item($retriever_item) { - // @@@ add contact id as a search term - Logger::info('@@@ retriever_get_item uri ' . $retriever_item['item-uri'] . ' uid ' . $retriever_item['item-uid'] . ' cid ' . $retriever_item['contact-id']); - try {//@@@ not necessary - $item = Item::selectFirst([], ['uri' => $retriever_item['item-uri'], 'uid' => intval($retriever_item['item-uid'])]); - if (!DBA::isResult($item)) { - Logger::warning('retriever_get_item: no item found for uri ' . $retriever_item['item-uri']); - return; - } - Logger::info('@@@ retriever_get_item: yay item found for uri ' . $retriever_item['item-uri'] . ' guid ' . $item['guid'] . ' plink ' . $item['plink']); - return $item; - } catch (Exception $e) { - Logger::info('retriever_get_item: exception ' . $e->getMessage()); + $item = Item::selectFirst([], ['uri' => $retriever_item['item-uri'], 'uid' => intval($retriever_item['item-uid']), 'contact-id' => intval($retriever_item['contact-id'])]); + if (!DBA::isResult($item)) { + Logger::warning('retriever_get_item: no item found for uri ' . $retriever_item['item-uri']); + return; } + return $item; } function retriever_item_completed($retriever_item_id, $resource, $a) { @@ -328,7 +296,6 @@ function retriever_item_completed($retriever_item_id, $resource, $a) { return; } $item = retriever_get_item($retriever_item); - Logger::info('@@@ 2 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); if (!$item) { Logger::warning('retriever_item_completed: no item ' . $retriever_item['item-uri']); return; @@ -338,25 +305,23 @@ function retriever_item_completed($retriever_item_id, $resource, $a) { retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource, $a); - q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d", - intval($retriever_item['id'])); + DBA::update('retriever_item', ['finished' => 1], ['id' => intval($retriever_item['id'])], ['finished']); retriever_check_item_completed($item); } function retriever_resource_completed($resource, $a) { Logger::debug('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url']); - $r = q("SELECT `id` FROM `retriever_item` WHERE `resource` = %d", $resource['id']); foreach (DBA::select('retriever_item', ['id'], ['resource' => intval($resource['id'])]) as $retriever_item) { retriever_item_completed($retriever_item['id'], $resource, $a); } } function apply_retrospective($a, $retriever, $num) { - $r = q("SELECT * FROM `item` WHERE `contact-id` = %d ORDER BY `received` DESC LIMIT %d", - intval($retriever['contact-id']), intval($num)); - foreach ($r as $item) { - q('UPDATE `item` SET `visible` = 0 WHERE `id` = %d', $item['id']); - q('UPDATE `thread` SET `visible` = 0 WHERE `iid` = %d', $item['id']); + Logger::debug('@@@ apply_retrospective'); + foreach (Item::select([], ['contact-id' => intval($retriever['contact-id'])], ['order' => ['received' => true], 'limit' => $num]) as $item) { + Logger::debug('@@@ apply_retrospective got item id ' . $item['id'] . ' uri ' . $item['uri']); + Item::update(['visible' => 0], ['id' => intval($item['id'])]); + //@@@ check that this works foreach (DBA::select('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => $item['uid'], 'contact-id' => $item['contact-id']]) as $retriever_item) { DBA::delete('retriever_resource', ['id' => $retriever_item['resource']]); DBA::delete('retriever_item', ['id' => $retriever_item['id']]); @@ -368,13 +333,11 @@ function apply_retrospective($a, $retriever, $num) { // TODO: Currently this waits until the next cron before actually downloading. Should do it immediately. // TODO: This queries then inserts. It should use some kind of lock to avoid requesting the same resource twice. function retriever_on_item_insert($a, $retriever, &$item) { - Logger::info('@@@ retriever_on_item_insert start plink ' . $item['plink'] . ' id ' . $item['id']); if (!$retriever || !$retriever['id']) { Logger::info('retriever_on_item_insert: No retriever supplied'); return; } if (!array_key_exists('enable', $retriever['data']) || !$retriever['data']['enable'] == "on") { - Logger::info('@@@ retriever_on_item_insert: Disabled'); return; } if (array_key_exists('plink', $item) && strlen($item['plink'])) { @@ -389,13 +352,12 @@ function retriever_on_item_insert($a, $retriever, &$item) { $url = $content['plink']; } - if (array_key_exists('pattern', $retriever['data']) && $retriever['data']['pattern']) { + if ($retriever['data']['modurl']) { $orig_url = $url; $url = preg_replace('/' . $retriever['data']['pattern'] . '/', $retriever['data']['replace'], $orig_url); Logger::debug('retriever_on_item_insert: Changed ' . $orig_url . ' to ' . $url); } - Logger::debug('@@@ retriever_on_item_insert: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id'] . ' url ' . $url); $resource = add_retriever_resource($a, $url, $item['uid'], $item['contact-id']); $retriever_item_id = add_retriever_item($item, $resource); } @@ -412,16 +374,15 @@ function add_retriever_resource($a, $url, $uid, $cid, $binary = false) { fclose($fp); $url = 'md5://' . hash('md5', $url); - //@@@ fix this - $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s' AND `item-uid` = %d AND `contact-id` = %d", DBA::escape($url), intval($uid), intval($cid)); - $resource = $r[0]; - if (count($r)) { + if (DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)])) { + //@@@ test that this really happens - it should sometimes Logger::debug('add_retriever_resource: Resource ' . $url . ' already requested'); return $resource; } Logger::debug('retrieve_resource: got data URL type ' . $resource['type']); - //@@@ fix this + // TODO: figure out how to do this with DBA module + // @@@ DBA::update('workerqueue', ['executed' => DateTimeFormat::utcNow()], ['pid' => $mypid, 'done' => false]); q("INSERT INTO `retriever_resource` (`item-uid`, `contact-id`, `type`, `binary`, `url`, `completed`, `data`) " . "VALUES (%d, %d, '%s', %d, '%s', now(), '%s')", intval($uid), @@ -430,6 +391,7 @@ function add_retriever_resource($a, $url, $uid, $cid, $binary = false) { intval($binary ? 1 : 0), DBA::escape($url), DBA::escape($data)); + //@@@ fix this $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url)); $resource = $r[0]; if (count($r)) { @@ -449,16 +411,18 @@ function add_retriever_resource($a, $url, $uid, $cid, $binary = false) { return $r[0]; } + //@@@ fix this q("INSERT INTO `retriever_resource` (`item-uid`, `contact-id`, `binary`, `url`) " . "VALUES (%d, %d, %d, '%s')", intval($uid), intval($cid), intval($binary ? 1 : 0), DBA::escape($url)); + //@@@ fix this $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url)); return $r[0]; } function add_retriever_item(&$item, $resource) { - Logger::debug('@@@ 5 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); Logger::debug('add_retriever_item: ' . $resource['url'] . ' for ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); + //@@@ can use selectFirst $r = q("SELECT COUNT(*) FROM `retriever_item` WHERE " . "`item-uri` = '%s' AND `item-uid` = %d AND `contact-id` = %d AND `resource` = %d", DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource['id'])); @@ -466,9 +430,11 @@ function add_retriever_item(&$item, $resource) { Logger::info("add_retriever_item: retriever item already present for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); return; } + //@@@ fix this q("INSERT INTO `retriever_item` (`item-uri`, `item-uid`, `contact-id`, `resource`) " . "VALUES ('%s', %d, %d, %d)", DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource["id"])); + //@@@ fix this $r = q("SELECT id FROM `retriever_item` WHERE " . "`item-uri` = '%s' AND `item-uid` = %d AND `contact-id` = %d AND `resource` = %d ORDER BY id DESC", DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource['id'])); @@ -505,8 +471,10 @@ function retriever_apply_xslt_text($xslt_text, $doc) { return $result; } +//@@@ I think this is supposed to update the $item, but it doesn't function retriever_apply_dom_filter($retriever, &$item, $resource) { - Logger::debug('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['uri'] . ' contact ' . $item['contact-id']); + //@@@ check if id and uri-id are there //@@@ uri-id definitely is not + Logger::debug('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['uri'] . ' contact ' . $item['contact-id'] . ' uri-id ' . $item['uri-id']); if (!array_key_exists('include', $retriever['data']) && !array_key_exists('customxslt', $retriever['data'])) { Logger::info('retriever_apply_dom_filter: no include and no customxslt'); @@ -517,41 +485,15 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) { return; } - //@@@ break this bit into separate function - $encoding = retriever_get_encoding($resource); - $content = mb_convert_encoding($resource['data'], 'HTML-ENTITIES', $encoding); - $doc = new DOMDocument('1.0', 'UTF-8'); - if (strpos($resource['type'], 'html') !== false) { - @$doc->loadHTML($content); - } - else { - $doc->loadXML($content); - } + $doc = retriever_load_into_dom($resource); - $params = array('$spec' => $retriever['data']); - $extract_template = Renderer::getMarkupTemplate('extract.tpl', 'addon/retriever/'); - $extract_xslt = Renderer::replaceMacros($extract_template, $params); - if ($retriever['data']['include']) { - Logger::debug('retriever_apply_dom_filter: applying include/exclude template \"' . $extract_xslt . '\"'); - $doc = retriever_apply_xslt_text($extract_xslt, $doc); - } - if (array_key_exists('customxslt', $retriever['data']) && $retriever['data']['customxslt']) { - Logger::debug('retriever_apply_dom_filter: applying custom XSLT \"' . $retriever['data']['customxslt'] . '\"'); - $doc = retriever_apply_xslt_text($retriever['data']['customxslt'], $doc); - } + $doc = retriever_extract($doc, $retriever); if (!$doc) { Logger::info('retriever_apply_dom_filter: failed to apply extract XSLT template'); return; } - //@@@ break this bit into separate function - $components = parse_url($resource['redirect-url']); - $rooturl = $components['scheme'] . "://" . $components['host']; - $dirurl = $rooturl . dirname($components['path']) . "/"; - $params = array('$dirurl' => $dirurl, '$rooturl' => $rooturl); - $fix_urls_template = Renderer::getMarkupTemplate('fix-urls.tpl', 'addon/retriever/'); - $fix_urls_xslt = Renderer::replaceMacros($fix_urls_template, $params); - $doc = retriever_apply_xslt_text($fix_urls_xslt, $doc); + $doc = retriever_globalise_urls($doc, $resource); if (!$doc) { Logger::info('retriever_apply_dom_filter: failed to apply fix urls XSLT template'); return; @@ -571,10 +513,56 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) { Item::update(['body' => $body], ['uri-id' => $uri_id]); } +function retriever_load_into_dom($resource) { + Logger::info('@@@ retriever_load_into_dom start'); + $encoding = retriever_get_encoding($resource); + $content = mb_convert_encoding($resource['data'], 'HTML-ENTITIES', $encoding); + $doc = new DOMDocument('1.0', 'UTF-8'); + if (strpos($resource['type'], 'html') !== false) { + @$doc->loadHTML($content); + } + else { + $doc->loadXML($content); + } + Logger::info('@@@ retriever_load_into_dom end'); + return $doc; +} + +function retriever_extract($doc, $retriever) { + Logger::info('@@@ retriever_extract start'); + $params = array('$spec' => $retriever['data']); + $extract_template = Renderer::getMarkupTemplate('extract.tpl', 'addon/retriever/'); + $extract_xslt = Renderer::replaceMacros($extract_template, $params); + if ($retriever['data']['include']) { + Logger::debug('retriever_apply_dom_filter: applying include/exclude template \"' . $extract_xslt . '\"'); + $doc = retriever_apply_xslt_text($extract_xslt, $doc); + } + if (array_key_exists('customxslt', $retriever['data']) && $retriever['data']['customxslt']) { + Logger::debug('retriever_extract: applying custom XSLT \"' . $retriever['data']['customxslt'] . '\"'); + $doc = retriever_apply_xslt_text($retriever['data']['customxslt'], $doc); + } + Logger::info('@@@ retriever_extract end'); + return $doc; +} + +function retriever_globalise_urls($doc, $resource) { + Logger::info('@@@ retriever_globalise_urls start'); + $components = parse_url($resource['redirect-url']); + $rooturl = $components['scheme'] . "://" . $components['host']; + $dirurl = $rooturl . dirname($components['path']) . "/"; + $params = array('$dirurl' => $dirurl, '$rooturl' => $rooturl); + $fix_urls_template = Renderer::getMarkupTemplate('fix-urls.tpl', 'addon/retriever/'); + $fix_urls_xslt = Renderer::replaceMacros($fix_urls_template, $params); + $doc = retriever_apply_xslt_text($fix_urls_xslt, $doc); + Logger::info('@@@ retriever_globalise_urls end'); + return $doc; +} + function retrieve_images(&$item, $a) { // Note that $item doesn't necessarily contain all the fields you would expect, in particular 'id' + //@@@ doe sit contain uri-id? //@@@ it definitely does not - Logger::debug('@@@ retrieve_images start item '. $item['id'] . ' uri ' . $item['uri'] . ' uri id ' . $item['uri-id'] . ' plink ' . $item['plink'] . ' guid ' . $item['guid']); + Logger::debug('@@@ retrieve_images start item id '. (array_key_exists('id', $item) ? $item['id'] : 'undef') . ' uri ' . $item['uri'] . ' uri id ' . $item['uri-id'] . ' plink ' . $item['plink'] . ' guid ' . $item['guid']); $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself? $content = DBA::selectFirst('item-content', ['body'], ['uri-id' => $uri_id]); @@ -584,7 +572,6 @@ function retrieve_images(&$item, $a) { return; } - Logger::info('@@@ retrieve_images looking in body "' . $body . '"'); // I suspect that the first two are not used any more? preg_match_all("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", $item["body"], $matches1); preg_match_all("/\[img\](.*?)\[\/img\]/ism", $item["body"], $matches2); @@ -592,9 +579,7 @@ function retrieve_images(&$item, $a) { $matches = array_merge($matches1[3], $matches2[1], $matches3[1]); Logger::debug('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); foreach ($matches as $url) { - Logger::debug('@@@ retrieve_images: url ' . $url); if (strpos($url, get_app()->getBaseUrl()) === FALSE) { - Logger::debug('@@@ retrieve_images: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id']); $resource = add_retriever_resource($a, $url, $item['uid'], $item['contact-id'], true); if (!$resource['completed']) { add_retriever_item($item, $resource); @@ -604,12 +589,11 @@ function retrieve_images(&$item, $a) { } } } - Logger::info('@@@ retrieve_images end'); } function retriever_check_item_completed(&$item) { - Logger::debug('@@@ 9 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); + // TODO: figure out how to do this with DBA module $r = q('SELECT count(*) FROM retriever_item WHERE `item-uri` = "%s" ' . 'AND `item-uid` = %d AND `contact-id` = %d AND `finished` = 0', DBA::escape($item['uri']), intval($item['uid']), @@ -620,12 +604,7 @@ function retriever_check_item_completed(&$item) $item['visible'] = $waiting ? 0 : 1; if (array_key_exists('id', $item) && ($item['id'] > 0) && ($old_visible != $item['visible'])) { Logger::debug('retriever_check_item_completed: changing visible flag to ' . $item['visible']); - q("UPDATE `item` SET `visible` = %d WHERE `id` = %d", - intval($item['visible']), - intval($item['id'])); - q("UPDATE `thread` SET `visible` = %d WHERE `iid` = %d", - intval($item['visible']), - intval($item['id'])); + Item::update(['visible' => 0], ['id' => intval($item['id'])]); } } @@ -647,11 +626,8 @@ function retriever_apply_completed_resource_to_item($retriever, &$item, $resourc } } -//@@@ todo: change all Logger::info t etc //@@@ todo: what is this reference for? document if needed delete if not function retriever_transform_images($a, &$item, $resource) { - Logger::debug('@@@ 11 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); - Logger::info('@@@ retriever_transform_images'); if (!$resource['data']) { Logger::info('retriever_transform_images: no data available for ' . $resource['id'] . ' ' . $resource['url']); return; @@ -659,51 +635,37 @@ function retriever_transform_images($a, &$item, $resource) { $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself? - try { //@@@ probably can get rid of this try/catch - $data = $resource['data']; - $type = $resource['type']; - $uid = $item['uid']; - $cid = $item['contact-id']; - $rid = Photo::newResource(); - $path = parse_url($resource['url'], PHP_URL_PATH); - $parts = pathinfo($path); - $filename = $parts['filename'] . (array_key_exists('extension', $parts) ? '.' . $parts['extension'] : ''); - Logger::info('@@@ retriever_transform_images url ' . $resource['url'] . ' path ' . $path . ' filename ' . $parts['filename']); - $album = 'Wall Photos'; - $scale = 0; - $desc = ''; // TODO: store alt text with resource when it's requested so we can fill this in - Logger::debug('retriever_transform_images storing ' . strlen($data) . ' bytes type ' . $type . ': uid ' . $uid . ' cid ' . $cid . ' rid ' . $rid . ' filename ' . $filename . ' album ' . $album . ' scale ' . $scale . ' desc ' . $desc); - Logger::info('@@@ retriever_transform_images before new Image'); - $image = new Image($data, $type); - Logger::info('@@@ retriever_transform_images after new Image'); - if (!$image->isValid()) { - Logger::warning('retriever_transform_images: invalid image found at URL ' . $resource['url'] . ' for item ' . $item['id']); - return; - } - Logger::info('@@@ retriever_transform_images before Photo::store'); - $photo = Photo::store($image, $uid, $cid, $rid, $filename, $album, 0, 0, "", "", "", "", $desc); - Logger::info('@@@ retriever_transform_images after Photo::store'); - $new_url = System::baseUrl() . '/photo/' . $rid . '-0.' . $image->getExt(); - Logger::info('@@@ retriever_transform_images new url ' . $new_url . ' rid ' . $rid . ' ext ' . $image->getExt()); - if (!strlen($new_url)) { - Logger::warning('retriever_transform_images: no replacement URL for image ' . $resource['url']); - return; - } - - $content = DBA::selectFirst('item-content', ['body'], ['uri-id' => $uri_id]); - $body = $content['body']; - Logger::info('@@@ retriever_transform_images: found body for uri id ' . $uri_id . ': ' . $body); - - Logger::debug('retriever_transform_images: replacing ' . $resource['url'] . ' with ' . $new_url . ' in item ' . $item['uri']); - Logger::debug('@@@ retriever_transform_images: replacing ' . $resource['url'] . ' with ' . $new_url . ' in body ' . $body); - $body = str_replace($resource["url"], $new_url, $body); - - Logger::info('@@@ retriever_transform_images: result \"' . $body . '\"'); - Item::update(['body' => $body], ['uri-id' => $uri_id]); - } catch (Exception $e) { - Logger::info('retriever_transform_images caught exception ' . $e->getMessage()); + $data = $resource['data']; + $type = $resource['type']; + $uid = $item['uid']; + $cid = $item['contact-id']; + $rid = Photo::newResource(); + $path = parse_url($resource['url'], PHP_URL_PATH); + $parts = pathinfo($path); + $filename = $parts['filename'] . (array_key_exists('extension', $parts) ? '.' . $parts['extension'] : ''); + $album = 'Wall Photos'; + $scale = 0; + $desc = ''; // TODO: store alt text with resource when it's requested so we can fill this in + Logger::debug('retriever_transform_images storing ' . strlen($data) . ' bytes type ' . $type . ': uid ' . $uid . ' cid ' . $cid . ' rid ' . $rid . ' filename ' . $filename . ' album ' . $album . ' scale ' . $scale . ' desc ' . $desc); + $image = new Image($data, $type); + if (!$image->isValid()) { + Logger::warning('retriever_transform_images: invalid image found at URL ' . $resource['url'] . ' for item ' . $item['id']); return; } + $photo = Photo::store($image, $uid, $cid, $rid, $filename, $album, 0, 0, "", "", "", "", $desc); + $new_url = System::baseUrl() . '/photo/' . $rid . '-0.' . $image->getExt(); + if (!strlen($new_url)) { + Logger::warning('retriever_transform_images: no replacement URL for image ' . $resource['url']); + return; + } + + $content = DBA::selectFirst('item-content', ['body'], ['uri-id' => $uri_id]); + $body = $content['body']; + + Logger::debug('retriever_transform_images: replacing ' . $resource['url'] . ' with ' . $new_url . ' in item ' . $item['uri']); + $body = str_replace($resource["url"], $new_url, $body); + + Item::update(['body' => $body], ['uri-id' => $uri_id]); } function retriever_content($a) { @@ -712,6 +674,7 @@ function retriever_content($a) { return; } if ($a->argv[1] === 'help') { + //@@@ fix me $feeds = q("SELECT `id`, `name`, `thumb` FROM contact WHERE `uid` = %d AND `network` = 'feed'", local_user()); foreach ($feeds as $k=>$v) { @@ -729,7 +692,7 @@ function retriever_content($a) { if (!empty($_POST["id"])) { $retriever_rule = get_retriever_rule($a->argv[1], local_user(), true); $retriever_rule['data'] = array(); - foreach (array('pattern', 'replace', 'enable', 'images', 'customxslt', 'storecookies', 'cookiedata') as $setting) { + foreach (array('modurl', 'pattern', 'replace', 'enable', 'images', 'customxslt', 'storecookies', 'cookiedata') as $setting) { if (empty($_POST['retriever_' . $setting])) { $retriever_rule['data'][$setting] = NULL; } @@ -753,6 +716,7 @@ function retriever_content($a) { unset($retriever_rule['data']['exclude'][$k]); } } + //@@@ fix me q("UPDATE `retriever_rule` SET `data`='%s' WHERE `id` = %d", DBA::escape(json_encode($retriever_rule['data'])), intval($retriever_rule["id"])); $a->page['content'] .= "

Settings Updated"; @@ -769,6 +733,11 @@ function retriever_content($a) { 'retriever_enable', L10n::t('Enabled'), $retriever_rule['data']['enable']), + '$modurl' => array( + 'retriever_modurl', + L10n::t('Modify URL'), + $retriever_rule['data']['modurl'], + L10n::t("Modify each article's URL with regular expressions before retrieving.")), '$pattern' => array( 'retriever_pattern', L10n::t('URL Pattern'), @@ -832,7 +801,8 @@ function retriever_contact_photo_menu($a, &$args) { } function retriever_post_remote_hook(&$a, &$item) { - Logger::info('@@@ 12 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); + // Note that $item doesn't necessarily contain all the fields you would expect, in particular 'id' + Logger::info('retriever_post_remote_hook: ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself? @@ -845,14 +815,13 @@ function retriever_post_remote_hook(&$a, &$item) { // Convert to HTML and back to take advantage of bbcode's resolution of oembeds. $content = DBA::selectFirst('item-content', [], ['uri-id' => $uri_id]); $body = HTML::toBBCode(BBCode::convert($content['body'])); - Logger::debug('@@@ retriever_post_remote_hook item uri-id ' . $uri_id . ' body "' . $item['body'] . '" item content body "' . $body . '"'); if ($body) { $item['body'] = $body; Item::update(['body' => $body], ['uri-id' => $uri_id]); } } if (PConfig::get($item["uid"], 'retriever', 'all_photos')) { - retrieve_images($item, $a); + retrieve_images($item, $a); //@@@ backwards } } retriever_check_item_completed($item); diff --git a/retriever/templates/rule-config.tpl b/retriever/templates/rule-config.tpl index 9061d1ff..171054de 100644 --- a/retriever/templates/rule-config.tpl +++ b/retriever/templates/rule-config.tpl @@ -41,6 +41,25 @@ function retriever_remove_row(id, number) tbody.removeChild(row); } +function retriever_toggle_url_block() +{ + var pattern = document.querySelector("#id_retriever_pattern").parentNode; + if (document.querySelector("#id_retriever_modurl").checked) { + pattern.style.display = "block"; + } + else { + pattern.style.display = "none"; + } + + var replace = document.querySelector("#id_retriever_replace").parentNode; + if (document.querySelector("#id_retriever_modurl").checked) { + replace.style.display = "block"; + } + else { + replace.style.display = "none"; + } +} + function retriever_toggle_cookiedata_block() { var div = document.querySelector("#id_retriever_cookiedata").parentNode; @@ -53,6 +72,8 @@ function retriever_toggle_cookiedata_block() } document.addEventListener('DOMContentLoaded', function() { + retriever_toggle_url_block(); + document.querySelector("#id_retriever_modurl").addEventListener('change', retriever_toggle_url_block, false); retriever_toggle_cookiedata_block(); document.querySelector("#id_retriever_storecookies").addEventListener('change', retriever_toggle_cookiedata_block, false); }, false); @@ -62,10 +83,6 @@ document.addEventListener('DOMContentLoaded', function() {

{{include file="field_checkbox.tpl" field=$enable}} -{{include file="field_input.tpl" field=$pattern}} -{{include file="field_input.tpl" field=$replace}} -{{include file="field_checkbox.tpl" field=$images}} -{{include file="field_input.tpl" field=$retrospective}}

{{$include_t}}:

@@ -98,7 +115,7 @@ document.addEventListener('DOMContentLoaded', function() {
- + {{if $exclude}} @@ -122,9 +139,14 @@ document.addEventListener('DOMContentLoaded', function() {
TagAttributeValue
{{$tag_t}}{{$attribute_t}}{{$value_t}}
+{{include file="field_checkbox.tpl" field=$modurl}} +{{include file="field_input.tpl" field=$pattern}} +{{include file="field_input.tpl" field=$replace}} +{{include file="field_checkbox.tpl" field=$images}} {{include file="field_textarea.tpl" field=$customxslt}} {{include file="field_checkbox.tpl" field=$storecookies}} {{include file="field_textarea.tpl" field=$cookiedata}} +{{include file="field_input.tpl" field=$retrospective}} From b1b9fd6af8290200f68a3692425ea94eecec80e5 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Tue, 8 Oct 2019 07:29:59 +0200 Subject: [PATCH 27/94] Now retriever works again --- retriever/retriever.php | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 6ace5e98..988bbc43 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -127,8 +127,9 @@ function retriever_retrieve_items($max_items, $a) { } $retrieve_items = $max_items - $retriever_item_count; - Logger::debug('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . $retriever_item_count . ', retrieve ' . $retrieve_items); do { + //@@@ check this looks sane after moving inside the loop + Logger::debug('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . $retriever_item_count . ', retrieve ' . $retrieve_items); // TODO: figure out how to do this with DBA module $retriever_resources = q("SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR %s) ORDER BY `last-try` ASC LIMIT %d", DBA::escape(implode($schedule_clauses, ' OR ')), @@ -178,7 +179,7 @@ function retriever_clean_up_completed_resources($max_items, $a) { $resource = DBA::selectFirst('retriever_resource', [], ['id' => intval($rr['resource'])]); retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource, $a); Logger::info('@@@ retriever_clean_up_completed_resources tried to update id ' . $retriever_item['id'] . ' to finished, better check that it really worked!'); - DBA::update('retriever_item', ['finished' => 1], ['id' => intval($retriever_item['id'])], ['finished']); + DBA::update('retriever_item', ['finished' => 1], ['id' => intval($retriever_item['id'])], ['finished' => 0]); retriever_check_item_completed($item); } } @@ -232,7 +233,7 @@ function retrieve_resource($resource) { $fetch_result = Network::fetchUrlFull($resource['url'], $resource['binary'], $redirects, '', $cookiejar); if (array_key_exists('storecookies', $rule_data) && $rule_data['storecookies']) { $retriever_rule['data']['cookiedata'] = file_get_contents($cookiejar); - DBA::update('retriever_rule', ['data' => json_encode($retriever_rule['data'])], ['id' => intval($retriever_rule["id"])]); + DBA::update('retriever_rule', ['data' => json_encode($retriever_rule['data'])], ['id' => intval($retriever_rule["id"])], $retriever_rule); //@@@ check the update worked unlink($cookiejar); } @@ -305,24 +306,22 @@ function retriever_item_completed($retriever_item_id, $resource, $a) { retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource, $a); - DBA::update('retriever_item', ['finished' => 1], ['id' => intval($retriever_item['id'])], ['finished']); + DBA::update('retriever_item', ['finished' => 1], ['id' => intval($retriever_item['id'])], ['finished' => 0]); retriever_check_item_completed($item); } function retriever_resource_completed($resource, $a) { Logger::debug('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url']); - foreach (DBA::select('retriever_item', ['id'], ['resource' => intval($resource['id'])]) as $retriever_item) { - retriever_item_completed($retriever_item['id'], $resource, $a); + foreach (DBA::selectToArray('retriever_item', ['id'], ['resource' => intval($resource['id'])]) as $retriever_item) { + retriever_item_completed($retriever_item['id'], $resource, $a); //@@@ args in wrong order } } function apply_retrospective($a, $retriever, $num) { - Logger::debug('@@@ apply_retrospective'); - foreach (Item::select([], ['contact-id' => intval($retriever['contact-id'])], ['order' => ['received' => true], 'limit' => $num]) as $item) { - Logger::debug('@@@ apply_retrospective got item id ' . $item['id'] . ' uri ' . $item['uri']); + foreach (Item::selectToArray([], ['contact-id' => intval($retriever['contact-id'])], ['order' => ['received' => true], 'limit' => $num]) as $item) { Item::update(['visible' => 0], ['id' => intval($item['id'])]); //@@@ check that this works - foreach (DBA::select('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => $item['uid'], 'contact-id' => $item['contact-id']]) as $retriever_item) { + foreach (DBA::selectToArray('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => $item['uid'], 'contact-id' => $item['contact-id']]) as $retriever_item) { DBA::delete('retriever_resource', ['id' => $retriever_item['resource']]); DBA::delete('retriever_item', ['id' => $retriever_item['id']]); } @@ -593,7 +592,7 @@ function retrieve_images(&$item, $a) { function retriever_check_item_completed(&$item) { - // TODO: figure out how to do this with DBA module + // TODO: figure out how to do this with DBA module //@@@ selectFirst works $r = q('SELECT count(*) FROM retriever_item WHERE `item-uri` = "%s" ' . 'AND `item-uid` = %d AND `contact-id` = %d AND `finished` = 0', DBA::escape($item['uri']), intval($item['uid']), @@ -604,7 +603,7 @@ function retriever_check_item_completed(&$item) $item['visible'] = $waiting ? 0 : 1; if (array_key_exists('id', $item) && ($item['id'] > 0) && ($old_visible != $item['visible'])) { Logger::debug('retriever_check_item_completed: changing visible flag to ' . $item['visible']); - Item::update(['visible' => 0], ['id' => intval($item['id'])]); + Item::update(['visible' => $item['visible']], ['id' => intval($item['id'])]); } } @@ -615,6 +614,8 @@ function retriever_apply_completed_resource_to_item($retriever, &$item, $resourc retriever_transform_images($a, $item, $resource); } if (!$retriever) { + //@@@ log line here: how normal is this? + Logger::info('@@@ retriever_apply_completed_resource_to_item no retriever'); return; } if ((strpos($resource['type'], 'html') !== false) || @@ -674,12 +675,11 @@ function retriever_content($a) { return; } if ($a->argv[1] === 'help') { - //@@@ fix me - $feeds = q("SELECT `id`, `name`, `thumb` FROM contact WHERE `uid` = %d AND `network` = 'feed'", - local_user()); - foreach ($feeds as $k=>$v) { - $feeds[$k]['url'] = $a->getBaseUrl() . '/retriever/' . $v['id']; + $feeds = DBA::selectToArray('contact', ['id', 'name', 'thumb'], ['uid' => local_user(), 'network' => 'feed']); + for ($i = 0; $i < count($feeds); ++$i) { + $feeds[$i]['url'] = $a->getBaseUrl() . '/retriever/' . $feeds[$i]['id']; } + //@@@ this is broken $template = Renderer::getMarkupTemplate('/help.tpl', 'addon/retriever/'); $a->page['content'] .= Renderer::replaceMacros($template, array( '$config' => $a->getBaseUrl() . '/settings/addon', From e8ab4f3adb86aabea46acfe89c3ae6b0c2aafcbd Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Tue, 8 Oct 2019 18:55:34 +0200 Subject: [PATCH 28/94] maybe broken again --- retriever/retriever.php | 45 +++++++++++++++++------------------------ 1 file changed, 18 insertions(+), 27 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 988bbc43..128fc80e 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -262,10 +262,8 @@ function retrieve_resource($resource) { function get_retriever_rule($contact_id, $uid, $create = false) { $retriever_rule = DBA::selectFirst('retriever_rule', [], ['contact-id' => intval($contact_id), 'uid' => intval($uid)]); - //@@@ check that this worked if ($retriever_rule) { $retriever_rule['data'] = json_decode($retriever_rule['data'], true); - Logger::info('@@@ get_retriever_rule returning an actual thing'); return $retriever_rule; } if ($create) { @@ -288,7 +286,7 @@ function retriever_get_item($retriever_item) { return $item; } -function retriever_item_completed($retriever_item_id, $resource, $a) { +function retriever_item_completed($a, $retriever_item_id, $resource) { Logger::debug('retriever_item_completed: id ' . $retriever_item_id . ' url ' . $resource['url']); $retriever_item = retriever_get_retriever_item($retriever_item_id); @@ -313,7 +311,7 @@ function retriever_item_completed($retriever_item_id, $resource, $a) { function retriever_resource_completed($resource, $a) { Logger::debug('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url']); foreach (DBA::selectToArray('retriever_item', ['id'], ['resource' => intval($resource['id'])]) as $retriever_item) { - retriever_item_completed($retriever_item['id'], $resource, $a); //@@@ args in wrong order + retriever_item_completed($a, $retriever_item['id'], $resource); } } @@ -358,6 +356,7 @@ function retriever_on_item_insert($a, $retriever, &$item) { } $resource = add_retriever_resource($a, $url, $item['uid'], $item['contact-id']); + Logger::debug('@@@ check this makes sense: ' . $resource['id'] . ' url ' . $resource['url']); $retriever_item_id = add_retriever_item($item, $resource); } @@ -374,7 +373,6 @@ function add_retriever_resource($a, $url, $uid, $cid, $binary = false) { $url = 'md5://' . hash('md5', $url); if (DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)])) { - //@@@ test that this really happens - it should sometimes Logger::debug('add_retriever_resource: Resource ' . $url . ' already requested'); return $resource; } @@ -390,10 +388,7 @@ function add_retriever_resource($a, $url, $uid, $cid, $binary = false) { intval($binary ? 1 : 0), DBA::escape($url), DBA::escape($data)); - //@@@ fix this - $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url)); - $resource = $r[0]; - if (count($r)) { + if (DBA::selectFirst('retriever_resource', [], ['url' => $url])) { retriever_resource_completed($resource, $a); } return $resource; @@ -403,19 +398,15 @@ function add_retriever_resource($a, $url, $uid, $cid, $binary = false) { Logger::warning('add_retriever_resource: URL is longer than 800 characters'); } - //@@@ fix this - $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s' AND `item-uid` = %d AND `contact-id` = %d", DBA::escape($url), intval($uid), intval($cid)); - if (count($r)) { + if (DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)])) { Logger::debug('add_retriever_resource: Resource ' . $url . ' uid ' . $uid . ' cid ' . $cid . ' already requested'); return $r[0]; } - //@@@ fix this - q("INSERT INTO `retriever_resource` (`item-uid`, `contact-id`, `binary`, `url`) " . - "VALUES (%d, %d, %d, '%s')", intval($uid), intval($cid), intval($binary ? 1 : 0), DBA::escape($url)); - //@@@ fix this - $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url)); - return $r[0]; + DBA::insert('retriever_rule', ['item-uid' => intval($uid), 'contact-id' => intval($cid), 'binary' => ($binary ? 1 : 0), 'url' => $url]); + Logge::debug('@@@ add_retriever_resource inserting resource ' . $url . ' uid ' . $uid . ' cid ' . $cid); + //@@@ check the insert worked + return DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)]); } function add_retriever_item(&$item, $resource) { @@ -557,7 +548,7 @@ function retriever_globalise_urls($doc, $resource) { return $doc; } -function retrieve_images(&$item, $a) { +function retrieve_images($a, &$item) { // Note that $item doesn't necessarily contain all the fields you would expect, in particular 'id' //@@@ doe sit contain uri-id? //@@@ it definitely does not @@ -580,6 +571,7 @@ function retrieve_images(&$item, $a) { foreach ($matches as $url) { if (strpos($url, get_app()->getBaseUrl()) === FALSE) { $resource = add_retriever_resource($a, $url, $item['uid'], $item['contact-id'], true); + Logger::debug('@@@ check this makes sense 2: ' . $resource['id'] . ' url ' . $resource['url']); if (!$resource['completed']) { add_retriever_item($item, $resource); } @@ -592,6 +584,8 @@ function retrieve_images(&$item, $a) { function retriever_check_item_completed(&$item) { + $waiting = DBA::selectFirst('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => intval($item['uid']), 'contact-id' => intval($item['contact-id']), 'finished' => 0]); + Logger::debug('@@@ waiting is ' . $waiting); // TODO: figure out how to do this with DBA module //@@@ selectFirst works $r = q('SELECT count(*) FROM retriever_item WHERE `item-uri` = "%s" ' . 'AND `item-uid` = %d AND `contact-id` = %d AND `finished` = 0', @@ -610,19 +604,17 @@ function retriever_check_item_completed(&$item) function retriever_apply_completed_resource_to_item($retriever, &$item, $resource, $a) { Logger::debug('retriever_apply_completed_resource_to_item: retriever ' . ($retriever ? $retriever['id'] : 'none') . ' resource ' . $resource['url'] . ' plink ' . $item['plink']); if (strpos($resource['type'], 'image') !== false) { - Logger::info('@@@ retriever_apply_completed_resource_to_item this is an image must transform'); retriever_transform_images($a, $item, $resource); } if (!$retriever) { - //@@@ log line here: how normal is this? - Logger::info('@@@ retriever_apply_completed_resource_to_item no retriever'); + Logger::warning('retriever_apply_completed_resource_to_item: no retriever'); return; } if ((strpos($resource['type'], 'html') !== false) || (strpos($resource['type'], 'xml') !== false)) { retriever_apply_dom_filter($retriever, $item, $resource); if ($retriever['data']['images'] ) { - retrieve_images($item, $a); + retrieve_images($a, $item); } } } @@ -716,9 +708,8 @@ function retriever_content($a) { unset($retriever_rule['data']['exclude'][$k]); } } - //@@@ fix me - q("UPDATE `retriever_rule` SET `data`='%s' WHERE `id` = %d", - DBA::escape(json_encode($retriever_rule['data'])), intval($retriever_rule["id"])); + //@@@ check that this works + DBA::update('retriever_rule', ['data' => json_encode($retriever_rule['data'])], ['id' => intval($retriever_rule["id"])], ['data' => '']); $a->page['content'] .= "

Settings Updated"; if (!empty($_POST["retriever_retrospective"])) { apply_retrospective($a, $retriever_rule, $_POST["retriever_retrospective"]); @@ -821,7 +812,7 @@ function retriever_post_remote_hook(&$a, &$item) { } } if (PConfig::get($item["uid"], 'retriever', 'all_photos')) { - retrieve_images($item, $a); //@@@ backwards + retrieve_images($a, $item); } } retriever_check_item_completed($item); From c8cbd41161dd9f988159961f069976613539581a Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 9 Oct 2019 20:54:39 +0200 Subject: [PATCH 29/94] working much better --- retriever/retriever.php | 160 +++++++++++++++++------------------ retriever/templates/help.tpl | 15 +++- 2 files changed, 92 insertions(+), 83 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 128fc80e..b8af7d3d 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -128,8 +128,7 @@ function retriever_retrieve_items($max_items, $a) { $retrieve_items = $max_items - $retriever_item_count; do { - //@@@ check this looks sane after moving inside the loop - Logger::debug('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . $retriever_item_count . ', retrieve ' . $retrieve_items); + Logger::debug('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . intval($retriever_item_count) . ', retrieve ' . $retrieve_items); // TODO: figure out how to do this with DBA module $retriever_resources = q("SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR %s) ORDER BY `last-try` ASC LIMIT %d", DBA::escape(implode($schedule_clauses, ' OR ')), @@ -185,9 +184,11 @@ function retriever_clean_up_completed_resources($max_items, $a) { } function retriever_tidy() { - // TODO: figure out how to do this with DBA module @@@ it is possible - q("DELETE FROM retriever_resource WHERE completed IS NOT NULL AND completed < DATE_SUB(now(), INTERVAL 1 WEEK)"); - q("DELETE FROM retriever_resource WHERE completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)"); + DBA::delete('retriever_resource', ['completed IS NOT NULL AND completed < DATE_SUB(now(), INTERVAL 1 WEEK)']); + DBA::delete('retriever_resource', ['completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)']); + // @@@ check that this worked + /* q("DELETE FROM retriever_resource WHERE completed IS NOT NULL AND completed < DATE_SUB(now(), INTERVAL 1 WEEK)"); */ + /* q("DELETE FROM retriever_resource WHERE completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)"); */ $r = q("SELECT retriever_item.id FROM retriever_item LEFT OUTER JOIN retriever_resource ON (retriever_item.resource = retriever_resource.id) WHERE retriever_resource.id is null"); Logger::info('retriever_tidy: found ' . count($r) . ' retriever_items with no retriever_resource'); @@ -286,7 +287,7 @@ function retriever_get_item($retriever_item) { return $item; } -function retriever_item_completed($a, $retriever_item_id, $resource) { +function retriever_item_completed($retriever_item_id, $resource) { Logger::debug('retriever_item_completed: id ' . $retriever_item_id . ' url ' . $resource['url']); $retriever_item = retriever_get_retriever_item($retriever_item_id); @@ -302,20 +303,20 @@ function retriever_item_completed($a, $retriever_item_id, $resource) { // Note: the retriever might be null. Doesn't matter. $retriever_rule = get_retriever_rule($retriever_item['contact-id'], $retriever_item['item-uid']); - retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource, $a); + retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource); DBA::update('retriever_item', ['finished' => 1], ['id' => intval($retriever_item['id'])], ['finished' => 0]); retriever_check_item_completed($item); } -function retriever_resource_completed($resource, $a) { +function retriever_resource_completed($resource) { Logger::debug('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url']); foreach (DBA::selectToArray('retriever_item', ['id'], ['resource' => intval($resource['id'])]) as $retriever_item) { - retriever_item_completed($a, $retriever_item['id'], $resource); + retriever_item_completed($retriever_item['id'], $resource); } } -function apply_retrospective($a, $retriever, $num) { +function apply_retrospective($retriever, $num) { foreach (Item::selectToArray([], ['contact-id' => intval($retriever['contact-id'])], ['order' => ['received' => true], 'limit' => $num]) as $item) { Item::update(['visible' => 0], ['id' => intval($item['id'])]); //@@@ check that this works @@ -323,13 +324,15 @@ function apply_retrospective($a, $retriever, $num) { DBA::delete('retriever_resource', ['id' => $retriever_item['resource']]); DBA::delete('retriever_item', ['id' => $retriever_item['id']]); } - retriever_on_item_insert($a, $retriever, $item); + retriever_on_item_insert($retriever, $item); } } // TODO: Currently this waits until the next cron before actually downloading. Should do it immediately. +//@@@ I think the above statement is wrong. Check! // TODO: This queries then inserts. It should use some kind of lock to avoid requesting the same resource twice. -function retriever_on_item_insert($a, $retriever, &$item) { +function retriever_on_item_insert($retriever, &$item) { + Logger::debug('@@@ retriever_on_item_insert start'); if (!$retriever || !$retriever['id']) { Logger::info('retriever_on_item_insert: No retriever supplied'); return; @@ -349,18 +352,19 @@ function retriever_on_item_insert($a, $retriever, &$item) { $url = $content['plink']; } - if ($retriever['data']['modurl']) { + if (array_key_exists('modurl', $retriever['data']) && $retriever['data']['modurl']) { $orig_url = $url; $url = preg_replace('/' . $retriever['data']['pattern'] . '/', $retriever['data']['replace'], $orig_url); Logger::debug('retriever_on_item_insert: Changed ' . $orig_url . ' to ' . $url); } - $resource = add_retriever_resource($a, $url, $item['uid'], $item['contact-id']); + $resource = add_retriever_resource($url, $item['uid'], $item['contact-id']); Logger::debug('@@@ check this makes sense: ' . $resource['id'] . ' url ' . $resource['url']); + Logger::debug('@@@ it does not make sense ' . print_r($resource, true)); $retriever_item_id = add_retriever_item($item, $resource); } -function add_retriever_resource($a, $url, $uid, $cid, $binary = false) { +function add_retriever_resource($url, $uid, $cid, $binary = false) { Logger::debug('add_retriever_resource: url ' . $url . ' uid ' . $uid . ' contact-id ' . $cid); $scheme = parse_url($url, PHP_URL_SCHEME); @@ -378,18 +382,10 @@ function add_retriever_resource($a, $url, $uid, $cid, $binary = false) { } Logger::debug('retrieve_resource: got data URL type ' . $resource['type']); - // TODO: figure out how to do this with DBA module - // @@@ DBA::update('workerqueue', ['executed' => DateTimeFormat::utcNow()], ['pid' => $mypid, 'done' => false]); - q("INSERT INTO `retriever_resource` (`item-uid`, `contact-id`, `type`, `binary`, `url`, `completed`, `data`) " . - "VALUES (%d, %d, '%s', %d, '%s', now(), '%s')", - intval($uid), - intval($cid), - DBA::escape($type), - intval($binary ? 1 : 0), - DBA::escape($url), - DBA::escape($data)); + DBA::insert('retriever_resource', ['item-uid' => intval($uid), 'contact-id' => intval($cid), 'type' => $type, 'binary' => ($binary ? 1 : 0), 'url' => $url, 'completed' => DateTimeFormat::utcNow(), 'data' => $data]); + // @@@ check that this makes sense if (DBA::selectFirst('retriever_resource', [], ['url' => $url])) { - retriever_resource_completed($resource, $a); + retriever_resource_completed($resource); } return $resource; } @@ -403,37 +399,31 @@ function add_retriever_resource($a, $url, $uid, $cid, $binary = false) { return $r[0]; } - DBA::insert('retriever_rule', ['item-uid' => intval($uid), 'contact-id' => intval($cid), 'binary' => ($binary ? 1 : 0), 'url' => $url]); - Logge::debug('@@@ add_retriever_resource inserting resource ' . $url . ' uid ' . $uid . ' cid ' . $cid); - //@@@ check the insert worked + DBA::insert('retriever_resource', ['item-uid' => intval($uid), 'contact-id' => intval($cid), 'binary' => ($binary ? 1 : 0), 'url' => $url]); return DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)]); } function add_retriever_item(&$item, $resource) { Logger::debug('add_retriever_item: ' . $resource['url'] . ' for ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); - //@@@ can use selectFirst - $r = q("SELECT COUNT(*) FROM `retriever_item` WHERE " . - "`item-uri` = '%s' AND `item-uid` = %d AND `contact-id` = %d AND `resource` = %d", - DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource['id'])); - if ($r[0]['COUNT(*)'] > 0) { + if (!array_key_exists('id', $resource) || !$resource['id']) { + Logger::warning('add_retriever_item: resource is empty'); + //@@@ check that this does not happen + return; + } + if (DBA::selectFirst('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => intval($item['uid']), 'resource' => intval($resource['id'])])) { + //@@@ check that this worked Logger::info("add_retriever_item: retriever item already present for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); return; } - //@@@ fix this - q("INSERT INTO `retriever_item` (`item-uri`, `item-uid`, `contact-id`, `resource`) " . - "VALUES ('%s', %d, %d, %d)", - DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource["id"])); - //@@@ fix this - $r = q("SELECT id FROM `retriever_item` WHERE " . - "`item-uri` = '%s' AND `item-uid` = %d AND `contact-id` = %d AND `resource` = %d ORDER BY id DESC", - DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource['id'])); - if (!count($r)) { + DBA::insert('retriever_item', ['item-uri' => $item['uri'], 'item-uid' => intval($item['uid']), 'contact-id' => intval($item['contact-id']), 'resource' => intval($resource['id'])]); + $retriever_item = DBA::selectFirst('retriever_item', ['id'], ['item-uri' => $item['uri'], 'item-uid' => intval($item['uid']), 'resource' => intval($resource['id'])]); + if (!$retriever_item) { Logger::info("add_retriever_item: couldn't create retriever item for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); return; } - Logger::debug('add_retriever_item: created retriever_item ' . $r[0]['id'] . ' for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); - return $r[0]['id']; + Logger::debug('add_retriever_item: created retriever_item ' . $retriever_item['id'] . ' for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); + return $retriever_item['id']; } function retriever_get_encoding($resource) { @@ -454,17 +444,14 @@ function retriever_apply_xslt_text($xslt_text, $doc) { Logger::info('retriever_apply_xslt_text: could not load XML'); return $doc; } - Logger::debug('@@@ retriever_apply_xslt_text: ' . $xslt_text); $xp = new XsltProcessor(); $xp->importStylesheet($xslt_doc); $result = $xp->transformToDoc($doc); return $result; } -//@@@ I think this is supposed to update the $item, but it doesn't function retriever_apply_dom_filter($retriever, &$item, $resource) { - //@@@ check if id and uri-id are there //@@@ uri-id definitely is not - Logger::debug('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['uri'] . ' contact ' . $item['contact-id'] . ' uri-id ' . $item['uri-id']); + Logger::debug('retriever_apply_dom_filter: applying XSLT to uri ' . $item['uri'] . ' uid ' . $item['uid'] . ' contact ' . $item['contact-id']); if (!array_key_exists('include', $retriever['data']) && !array_key_exists('customxslt', $retriever['data'])) { Logger::info('retriever_apply_dom_filter: no include and no customxslt'); @@ -498,13 +485,16 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) { $body .= $item['plink']; $body .= ']' . $item['plink'] . '[/url]'; - $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself? Consider using item['id'] instead Logger::debug('retriever_apply_dom_filter: XSLT result \"' . $body . '\"'); - Item::update(['body' => $body], ['uri-id' => $uri_id]); + $item['body'] = $body; + if (array_key_exists('id', $item) && $item['id']) { //@@@ this should be a separate function + //@@@ check that this works + Logger::debug('@@@ retriever_apply_dom_filter updating item by id ' . $item['id']); + Item::update(['body' => $body], ['id' => $item['id']]); + } } function retriever_load_into_dom($resource) { - Logger::info('@@@ retriever_load_into_dom start'); $encoding = retriever_get_encoding($resource); $content = mb_convert_encoding($resource['data'], 'HTML-ENTITIES', $encoding); $doc = new DOMDocument('1.0', 'UTF-8'); @@ -514,12 +504,10 @@ function retriever_load_into_dom($resource) { else { $doc->loadXML($content); } - Logger::info('@@@ retriever_load_into_dom end'); return $doc; } function retriever_extract($doc, $retriever) { - Logger::info('@@@ retriever_extract start'); $params = array('$spec' => $retriever['data']); $extract_template = Renderer::getMarkupTemplate('extract.tpl', 'addon/retriever/'); $extract_xslt = Renderer::replaceMacros($extract_template, $params); @@ -531,12 +519,10 @@ function retriever_extract($doc, $retriever) { Logger::debug('retriever_extract: applying custom XSLT \"' . $retriever['data']['customxslt'] . '\"'); $doc = retriever_apply_xslt_text($retriever['data']['customxslt'], $doc); } - Logger::info('@@@ retriever_extract end'); return $doc; } function retriever_globalise_urls($doc, $resource) { - Logger::info('@@@ retriever_globalise_urls start'); $components = parse_url($resource['redirect-url']); $rooturl = $components['scheme'] . "://" . $components['host']; $dirurl = $rooturl . dirname($components['path']) . "/"; @@ -544,18 +530,19 @@ function retriever_globalise_urls($doc, $resource) { $fix_urls_template = Renderer::getMarkupTemplate('fix-urls.tpl', 'addon/retriever/'); $fix_urls_xslt = Renderer::replaceMacros($fix_urls_template, $params); $doc = retriever_apply_xslt_text($fix_urls_xslt, $doc); - Logger::info('@@@ retriever_globalise_urls end'); return $doc; } -function retrieve_images($a, &$item) { - // Note that $item doesn't necessarily contain all the fields you would expect, in particular 'id' - //@@@ doe sit contain uri-id? //@@@ it definitely does not +function retrieve_images(&$item) { + // Note that $item might not yet have an id or a uri-id - Logger::debug('@@@ retrieve_images start item id '. (array_key_exists('id', $item) ? $item['id'] : 'undef') . ' uri ' . $item['uri'] . ' uri id ' . $item['uri-id'] . ' plink ' . $item['plink'] . ' guid ' . $item['guid']); $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself? - $content = DBA::selectFirst('item-content', ['body'], ['uri-id' => $uri_id]); + $content = DBA::selectFirst('item-content', [], ['body'], ['uri-id' => $uri_id]); + if ($content['body'] != $item['body']) { + Logger::warning('@@@ this is probably bad right 3?'); + //@@@ check for this. + } $body = $content['body']; if (!strlen($body)) { Logger::warning('retrieve_images: no body for uri-id ' . $uri_id); @@ -570,13 +557,13 @@ function retrieve_images($a, &$item) { Logger::debug('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); foreach ($matches as $url) { if (strpos($url, get_app()->getBaseUrl()) === FALSE) { - $resource = add_retriever_resource($a, $url, $item['uid'], $item['contact-id'], true); + $resource = add_retriever_resource($url, $item['uid'], $item['contact-id'], true); Logger::debug('@@@ check this makes sense 2: ' . $resource['id'] . ' url ' . $resource['url']); if (!$resource['completed']) { add_retriever_item($item, $resource); } else { - retriever_transform_images($a, $item, $resource); + retriever_transform_images($item, $resource); } } } @@ -585,14 +572,7 @@ function retrieve_images($a, &$item) { function retriever_check_item_completed(&$item) { $waiting = DBA::selectFirst('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => intval($item['uid']), 'contact-id' => intval($item['contact-id']), 'finished' => 0]); - Logger::debug('@@@ waiting is ' . $waiting); - // TODO: figure out how to do this with DBA module //@@@ selectFirst works - $r = q('SELECT count(*) FROM retriever_item WHERE `item-uri` = "%s" ' . - 'AND `item-uid` = %d AND `contact-id` = %d AND `finished` = 0', - DBA::escape($item['uri']), intval($item['uid']), - intval($item['contact-id'])); - $waiting = $r[0]['count(*)']; - Logger::debug('retriever_check_item_completed: item ' . $item['uri'] . ' ' . $item['uid'] . ' '. $item['contact-id'] . ' waiting for ' . $waiting . ' resources'); + Logger::debug('retriever_check_item_completed: item ' . $item['uri'] . ' ' . $item['uid'] . ' '. $item['contact-id'] . ' waiting for resources'); $old_visible = $item['visible']; $item['visible'] = $waiting ? 0 : 1; if (array_key_exists('id', $item) && ($item['id'] > 0) && ($old_visible != $item['visible'])) { @@ -601,10 +581,10 @@ function retriever_check_item_completed(&$item) } } -function retriever_apply_completed_resource_to_item($retriever, &$item, $resource, $a) { +function retriever_apply_completed_resource_to_item($retriever, &$item, $resource) { Logger::debug('retriever_apply_completed_resource_to_item: retriever ' . ($retriever ? $retriever['id'] : 'none') . ' resource ' . $resource['url'] . ' plink ' . $item['plink']); if (strpos($resource['type'], 'image') !== false) { - retriever_transform_images($a, $item, $resource); + retriever_transform_images($item, $resource); } if (!$retriever) { Logger::warning('retriever_apply_completed_resource_to_item: no retriever'); @@ -614,13 +594,19 @@ function retriever_apply_completed_resource_to_item($retriever, &$item, $resourc (strpos($resource['type'], 'xml') !== false)) { retriever_apply_dom_filter($retriever, $item, $resource); if ($retriever['data']['images'] ) { - retrieve_images($a, $item); + retrieve_images($item); } } } -//@@@ todo: what is this reference for? document if needed delete if not -function retriever_transform_images($a, &$item, $resource) { +/** + * @brief Stores the image downloaded in the supplied resource and updates the item body by replacing the remote URL with the local URL. The body will be updated in the supplied item array. If the item has already been stored, and therefore has an ID already, the row in the database will be updated too. + * + * @param array &$item Row from the item table (by ref) + * @param array $resource Row from the resource table containing successfully downloaded image + */ +// TODO: split this into two functions, one to store the image, the other to change the item body +function retriever_transform_images(&$item, $resource) { if (!$resource['data']) { Logger::info('retriever_transform_images: no data available for ' . $resource['id'] . ' ' . $resource['url']); return; @@ -652,11 +638,16 @@ function retriever_transform_images($a, &$item, $resource) { return; } - $content = DBA::selectFirst('item-content', ['body'], ['uri-id' => $uri_id]); + $content = DBA::selectFirst('item-content', [], ['body'], ['uri-id' => $uri_id]); $body = $content['body']; + if ($body != $item['body']) { + Logger::warning('@@@ this is probably bad right 1?'); + //@@@ check for this. + } Logger::debug('retriever_transform_images: replacing ' . $resource['url'] . ' with ' . $new_url . ' in item ' . $item['uri']); $body = str_replace($resource["url"], $new_url, $body); + $item['body'] = $body; Item::update(['body' => $body], ['uri-id' => $uri_id]); } @@ -671,7 +662,6 @@ function retriever_content($a) { for ($i = 0; $i < count($feeds); ++$i) { $feeds[$i]['url'] = $a->getBaseUrl() . '/retriever/' . $feeds[$i]['id']; } - //@@@ this is broken $template = Renderer::getMarkupTemplate('/help.tpl', 'addon/retriever/'); $a->page['content'] .= Renderer::replaceMacros($template, array( '$config' => $a->getBaseUrl() . '/settings/addon', @@ -799,20 +789,26 @@ function retriever_post_remote_hook(&$a, &$item) { $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself? $retriever_rule = get_retriever_rule($item['contact-id'], $item["uid"], false); if ($retriever_rule) { - retriever_on_item_insert($a, $retriever_rule, $item); + retriever_on_item_insert($retriever_rule, $item); } else { if (PConfig::get($item["uid"], 'retriever', 'oembed')) { // Convert to HTML and back to take advantage of bbcode's resolution of oembeds. $content = DBA::selectFirst('item-content', [], ['uri-id' => $uri_id]); + if ($content['body'] != $item['body']) { + Logger::warning('@@@ this is probably bad right 2?'); + //@@@ check for this. + } $body = HTML::toBBCode(BBCode::convert($content['body'])); if ($body) { $item['body'] = $body; - Item::update(['body' => $body], ['uri-id' => $uri_id]); + if (array_key_exists('id', $item) && $item['id']) { + Item::update(['body' => $body], ['id' => $item['id']]); + } } } if (PConfig::get($item["uid"], 'retriever', 'all_photos')) { - retrieve_images($a, $item); + retrieve_images($item); } } retriever_check_item_completed($item); diff --git a/retriever/templates/help.tpl b/retriever/templates/help.tpl index 10b421d0..b96ec63c 100644 --- a/retriever/templates/help.tpl +++ b/retriever/templates/help.tpl @@ -143,6 +143,19 @@ Photos" box in the "Retriever Settings" section and click "Submit".

Configure Feeds:

{{foreach $feeds as $feed}} -{{include file='contact_template.tpl' contact=$feed}} +
+ +
+
+ {{$feed.name}} +
+
+
+
+ {{$feed.name}} +
+
+
+
{{/foreach}}
From 351482464b867564e3944fcd667932a5fca6629e Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 9 Oct 2019 20:56:46 +0200 Subject: [PATCH 30/94] small cleanup --- retriever/retriever.php | 3 --- 1 file changed, 3 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index b8af7d3d..029b0cff 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -332,7 +332,6 @@ function apply_retrospective($retriever, $num) { //@@@ I think the above statement is wrong. Check! // TODO: This queries then inserts. It should use some kind of lock to avoid requesting the same resource twice. function retriever_on_item_insert($retriever, &$item) { - Logger::debug('@@@ retriever_on_item_insert start'); if (!$retriever || !$retriever['id']) { Logger::info('retriever_on_item_insert: No retriever supplied'); return; @@ -359,8 +358,6 @@ function retriever_on_item_insert($retriever, &$item) { } $resource = add_retriever_resource($url, $item['uid'], $item['contact-id']); - Logger::debug('@@@ check this makes sense: ' . $resource['id'] . ' url ' . $resource['url']); - Logger::debug('@@@ it does not make sense ' . print_r($resource, true)); $retriever_item_id = add_retriever_item($item, $resource); } From d36fade822f76a7f0c20e9109b58301a83665416 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 9 Oct 2019 21:03:45 +0200 Subject: [PATCH 31/94] small addition --- retriever/retriever.php | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/retriever/retriever.php b/retriever/retriever.php index 029b0cff..4538e031 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -538,6 +538,7 @@ function retrieve_images(&$item) { $content = DBA::selectFirst('item-content', [], ['body'], ['uri-id' => $uri_id]); if ($content['body'] != $item['body']) { Logger::warning('@@@ this is probably bad right 3?'); + Logger::warning('@@@ content: ' . $content['body'] . ' item ' . $item['body']); //@@@ check for this. } $body = $content['body']; @@ -553,6 +554,9 @@ function retrieve_images(&$item) { $matches = array_merge($matches1[3], $matches2[1], $matches3[1]); Logger::debug('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); foreach ($matches as $url) { + if (!$url) { + continue; + } if (strpos($url, get_app()->getBaseUrl()) === FALSE) { $resource = add_retriever_resource($url, $item['uid'], $item['contact-id'], true); Logger::debug('@@@ check this makes sense 2: ' . $resource['id'] . ' url ' . $resource['url']); @@ -794,6 +798,7 @@ function retriever_post_remote_hook(&$a, &$item) { $content = DBA::selectFirst('item-content', [], ['uri-id' => $uri_id]); if ($content['body'] != $item['body']) { Logger::warning('@@@ this is probably bad right 2?'); + Logger::warning('@@@ content: ' . $content['body'] . ' item ' . $item['body']); //@@@ check for this. } $body = HTML::toBBCode(BBCode::convert($content['body'])); From 7e1a495e5fa7739e15c158c61082236ed69c9c8e Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Fri, 11 Oct 2019 18:47:32 +0200 Subject: [PATCH 32/94] I think this works --- retriever/retriever.php | 134 +++++++++++++++++++++++++--------------- 1 file changed, 84 insertions(+), 50 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 4538e031..42c4a55a 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -21,6 +21,7 @@ use Friendica\Core\L10n; use Friendica\Database\DBA; use Friendica\Model\ItemURI; use Friendica\Model\Item; +use Friendica\Util\DateTimeFormat; function retriever_install() { Addon::registerHook('plugin_settings', 'addon/retriever/retriever.php', 'retriever_plugin_settings'); @@ -129,7 +130,8 @@ function retriever_retrieve_items($max_items, $a) { $retrieve_items = $max_items - $retriever_item_count; do { Logger::debug('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . intval($retriever_item_count) . ', retrieve ' . $retrieve_items); - // TODO: figure out how to do this with DBA module + // TODO: figure out how to do this with DBA module //@@@ this is possible + $retriever_resources2 = DBA::selectToArray('retriever_resource', [], ['`completed` IS NULL AND (`last-try` IS NULL OR ' . implode($schedule_clauses, ' OR ') . ')'], ['order' => ['last-try' => 0], 'limit' => $retrieve_items]); $retriever_resources = q("SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR %s) ORDER BY `last-try` ASC LIMIT %d", DBA::escape(implode($schedule_clauses, ' OR ')), intval($retrieve_items)); @@ -140,6 +142,7 @@ function retriever_retrieve_items($max_items, $a) { break; } Logger::debug('retriever_retrieve_items: found ' . count($retriever_resources) . ' waiting resources in database'); + Logger::debug('@@@ retriever_retrieve_items: alternative found ' . count($retriever_resources2) . ': ' . print_r($retriever_resources2, true)); foreach ($retriever_resources as $retriever_resource) { retrieve_resource($retriever_resource); $retriever_item_count++; @@ -186,9 +189,6 @@ function retriever_clean_up_completed_resources($max_items, $a) { function retriever_tidy() { DBA::delete('retriever_resource', ['completed IS NOT NULL AND completed < DATE_SUB(now(), INTERVAL 1 WEEK)']); DBA::delete('retriever_resource', ['completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)']); - // @@@ check that this worked - /* q("DELETE FROM retriever_resource WHERE completed IS NOT NULL AND completed < DATE_SUB(now(), INTERVAL 1 WEEK)"); */ - /* q("DELETE FROM retriever_resource WHERE completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)"); */ $r = q("SELECT retriever_item.id FROM retriever_item LEFT OUTER JOIN retriever_resource ON (retriever_item.resource = retriever_resource.id) WHERE retriever_resource.id is null"); Logger::info('retriever_tidy: found ' . count($r) . ' retriever_items with no retriever_resource'); @@ -221,7 +221,15 @@ function retrieve_resource($resource) { $a = get_app(); $retriever_rule = get_retriever_rule($resource['contact-id'], $resource['item-uid']); + if (!$retriever_rule) { + Logger::warning('retrieve_resource: no rule found for contact ' . $resource['contact-id'] . ' item ' . $resource['item-uid']); + return; + } $rule_data = $retriever_rule['data']; + if (!$rule_data) { + Logger::warning('retrieve_resource: no rule data found for contact ' . $resource['contact-id'] . ' item ' . $resource['item-uid']); + return; + } try { Logger::debug('retrieve_resource: ' . ($resource['num-tries'] + 1) . ' attempt at resource ' . $resource['id'] . ' ' . $resource['url']); @@ -269,8 +277,8 @@ function get_retriever_rule($contact_id, $uid, $create = false) { } if ($create) { DBA::insert('retriever_rule', ['uid' => intval($uid), 'contact-id' => intval($contact_id)]); - //@@@ check that this worked - return DBA::selectFirst('retriever_rule', [], ['contact-id' => intval($contact_id), 'uid' => intval($uid)]); + $retriever_rule = DBA::selectFirst('retriever_rule', [], ['contact-id' => intval($contact_id), 'uid' => intval($uid)]); + return $retriever_rule; } } @@ -373,15 +381,16 @@ function add_retriever_resource($url, $uid, $cid, $binary = false) { fclose($fp); $url = 'md5://' . hash('md5', $url); - if (DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)])) { + $resource = DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)]); + if ($resource) { Logger::debug('add_retriever_resource: Resource ' . $url . ' already requested'); return $resource; } - Logger::debug('retrieve_resource: got data URL type ' . $resource['type']); DBA::insert('retriever_resource', ['item-uid' => intval($uid), 'contact-id' => intval($cid), 'type' => $type, 'binary' => ($binary ? 1 : 0), 'url' => $url, 'completed' => DateTimeFormat::utcNow(), 'data' => $data]); // @@@ check that this makes sense - if (DBA::selectFirst('retriever_resource', [], ['url' => $url])) { + $resource = DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)]); + if ($resource) { retriever_resource_completed($resource); } return $resource; @@ -391,9 +400,10 @@ function add_retriever_resource($url, $uid, $cid, $binary = false) { Logger::warning('add_retriever_resource: URL is longer than 800 characters'); } - if (DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)])) { + $resource = DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)]); + if ($resource) { Logger::debug('add_retriever_resource: Resource ' . $url . ' uid ' . $uid . ' cid ' . $cid . ' already requested'); - return $r[0]; + return $resource; } DBA::insert('retriever_resource', ['item-uid' => intval($uid), 'contact-id' => intval($cid), 'binary' => ($binary ? 1 : 0), 'url' => $url]); @@ -530,27 +540,67 @@ function retriever_globalise_urls($doc, $resource) { return $doc; } +function retriever_get_body($item) { + if (array_key_exists('id', $item) && $item['id']) { + // item has already been stored in database + if (!array_key_exists('uri-id', $item) || !$item['uri-id']) { + Logger::warning('retriever_get_body: item uri ' . $item['uri'] . ' has id but no uri-id'); + //@@@ check never happens + return $item['body']; + } + $content = DBA::selectFirst('item-content', [], ['body'], ['uri-id' => $item['uri-id']]); + if (!$content) { + Logger::warning('retriever_get_body: item-content uri-id ' . $item['uri-id'] . ' has no content'); + //@@@ check never happens + return $item['body']; + } + if (!$content['body']) { + Logger::warning('retriever_get_body: item-content uri-id ' . $item['uri-id'] . ' has no body'); + //@@@ check never happens + return $item['body']; + } + if ($content['body'] != $item['body']) { + Logger::warning('@@@ this is probably bad content: ' . $content['body'] . ' item ' . $item['body']); + //@@@ check for this. + } + Logger::debug('@@@ retriever_get_body uri-id ' . $item['uri-id'] . ' body: ' . $content['body']); + return $content['body']; + } + // item has not yet been stored in database + Logger::debug('@@@ retriever_get_body id ' . $item['id'] . ' body: ' . $item['body']); + return $item['body']; +} + +function retriever_set_body(&$item, $body, $allow_empty = false) { + if (!$body && !$allow_empty) { + Logger::debug('retriever_set_body: will not set empty body in item id ' . $item['id'] . ' uri ' . $item['uri']); + return; + } + $item['body'] = $body; + Logger::debug('@@@ retriever_set_body set array value to ' . $body); + if (array_key_exists('id', $item) && $item['id']) { + // item has already been stored in database + Logger::debug('@@@ retriever_set_body updating item ' . print_r($item, true) . ' to ' . $body); + Item::update(['body' => $body], ['id' => intval($item['id'])]); + } +} + +/** + * @brief @@@ + * + * @param array &$item Row from the item table (by ref) + */ function retrieve_images(&$item) { - // Note that $item might not yet have an id or a uri-id - - $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself? - - $content = DBA::selectFirst('item-content', [], ['body'], ['uri-id' => $uri_id]); - if ($content['body'] != $item['body']) { - Logger::warning('@@@ this is probably bad right 3?'); - Logger::warning('@@@ content: ' . $content['body'] . ' item ' . $item['body']); - //@@@ check for this. - } - $body = $content['body']; + $body = retriever_get_body($item); if (!strlen($body)) { - Logger::warning('retrieve_images: no body for uri-id ' . $uri_id); + Logger::warning('retrieve_images: no body for item ' . $item['uri']); return; } // I suspect that the first two are not used any more? - preg_match_all("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", $item["body"], $matches1); - preg_match_all("/\[img\](.*?)\[\/img\]/ism", $item["body"], $matches2); - preg_match_all("/\[img\=([^\]]*)\]([^[]*)\[\/img\]/ism", $item["body"], $matches3); + preg_match_all("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", $body, $matches1); + preg_match_all("/\[img\](.*?)\[\/img\]/ism", $body, $matches2); + preg_match_all("/\[img\=([^\]]*)\]([^[]*)\[\/img\]/ism", $body, $matches3); $matches = array_merge($matches1[3], $matches2[1], $matches3[1]); Logger::debug('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); foreach ($matches as $url) { @@ -639,18 +689,11 @@ function retriever_transform_images(&$item, $resource) { return; } - $content = DBA::selectFirst('item-content', [], ['body'], ['uri-id' => $uri_id]); - $body = $content['body']; - if ($body != $item['body']) { - Logger::warning('@@@ this is probably bad right 1?'); - //@@@ check for this. - } + $body = retriever_get_body($item); Logger::debug('retriever_transform_images: replacing ' . $resource['url'] . ' with ' . $new_url . ' in item ' . $item['uri']); $body = str_replace($resource["url"], $new_url, $body); - $item['body'] = $body; - - Item::update(['body' => $body], ['uri-id' => $uri_id]); + retriever_set_body($item, $body); } function retriever_content($a) { @@ -699,7 +742,6 @@ function retriever_content($a) { unset($retriever_rule['data']['exclude'][$k]); } } - //@@@ check that this works DBA::update('retriever_rule', ['data' => json_encode($retriever_rule['data'])], ['id' => intval($retriever_rule["id"])], ['data' => '']); $a->page['content'] .= "

Settings Updated"; if (!empty($_POST["retriever_retrospective"])) { @@ -783,7 +825,9 @@ function retriever_contact_photo_menu($a, &$args) { } function retriever_post_remote_hook(&$a, &$item) { - // Note that $item doesn't necessarily contain all the fields you would expect, in particular 'id' + // @@@ I believe this should either never have the id, or always should. This needs more investigation. + // @@@ and if it does not, does it have a content row? + Logger::debug('@@@ retriever_post_remote_hook uri ' . $item['uri'] . ' has id ' . array_key_exists('id', $item) . ' has uri-id ' . array_key_exists('uri-id', $item)); Logger::info('retriever_post_remote_hook: ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); @@ -795,19 +839,9 @@ function retriever_post_remote_hook(&$a, &$item) { else { if (PConfig::get($item["uid"], 'retriever', 'oembed')) { // Convert to HTML and back to take advantage of bbcode's resolution of oembeds. - $content = DBA::selectFirst('item-content', [], ['uri-id' => $uri_id]); - if ($content['body'] != $item['body']) { - Logger::warning('@@@ this is probably bad right 2?'); - Logger::warning('@@@ content: ' . $content['body'] . ' item ' . $item['body']); - //@@@ check for this. - } - $body = HTML::toBBCode(BBCode::convert($content['body'])); - if ($body) { - $item['body'] = $body; - if (array_key_exists('id', $item) && $item['id']) { - Item::update(['body' => $body], ['id' => $item['id']]); - } - } + $body = retriever_get_body($item); + $body = HTML::toBBCode(BBCode::convert($body)); + retriever_set_body($item, $body); } if (PConfig::get($item["uid"], 'retriever', 'all_photos')) { retrieve_images($item); From 592b28c09b5ad53363ab9796fc4abf1d16490c4f Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sat, 12 Oct 2019 19:08:11 +0200 Subject: [PATCH 33/94] working much better --- retriever/retriever.php | 177 ++++++++++++++++++++-------------------- 1 file changed, 87 insertions(+), 90 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 42c4a55a..a71f302c 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -23,6 +23,9 @@ use Friendica\Model\ItemURI; use Friendica\Model\Item; use Friendica\Util\DateTimeFormat; +/** + * @brief Installation hook for retriever plugin + */ function retriever_install() { Addon::registerHook('plugin_settings', 'addon/retriever/retriever.php', 'retriever_plugin_settings'); Addon::registerHook('plugin_settings_post', 'addon/retriever/retriever.php', 'retriever_plugin_settings_post'); @@ -53,9 +56,9 @@ function retriever_install() { } if (Config::get('retriever', 'dbversion') != '0.14') { $schema = file_get_contents(dirname(__file__).'/database.sql'); - $arr = explode(';', $schema); - foreach ($arr as $a) { - if (!DBA::e($a)) { + $tables = explode(';', $schema); + foreach ($tables as $table) { + if (!DBA::e($table)) { Logger::warning('Unable to create database table: ' . DBA::errorMessage()); return; } @@ -65,6 +68,9 @@ function retriever_install() { } } +/** + * @brief Uninstallation hook for retriever plugin + */ function retriever_uninstall() { Addon::unregisterHook('plugin_settings', 'addon/retriever/retriever.php', 'retriever_plugin_settings'); Addon::unregisterHook('plugin_settings_post', 'addon/retriever/retriever.php', 'retriever_plugin_settings_post'); @@ -75,9 +81,17 @@ function retriever_uninstall() { Addon::unregisterHook('cron', 'addon/retriever/retriever.php', 'retriever_cron'); } +/** + * @brief Module hook for retriever plugin + * + * TODO: figure out what this should be used for + */ function retriever_module() {} -function retriever_addon_admin(&$a, &$o) { +/** + * @brief Admin page hook for retriever plugin + */ +function retriever_addon_admin() { $downloads_per_cron = Config::get('retriever', 'downloads_per_cron'); $template = Renderer::getMarkupTemplate('admin.tpl', 'addon/retriever/'); $config = ['downloads_per_cron', @@ -89,25 +103,36 @@ function retriever_addon_admin(&$a, &$o) { '$submit' => L10n::t('Save Settings')]); } -function retriever_addon_admin_post ($a) { +/** + * @brief Admin page post hook for retriever plugin + */ +function retriever_addon_admin_post () { if (!empty($_POST['downloads_per_cron'])) { Config::set('retriever', 'downloads_per_cron', $_POST['downloads_per_cron']); } } -function retriever_cron($a, $b) { +/** + * @brief Cron jobs for retriever plugin + */ +function retriever_cron() { $downloads_per_cron = Config::get('retriever', 'downloads_per_cron'); // Do this first, otherwise it can interfere with retriever_retrieve_items - retriever_clean_up_completed_resources($downloads_per_cron, $a); + retriever_clean_up_completed_resources($downloads_per_cron); - retriever_retrieve_items($downloads_per_cron, $a); + retriever_retrieve_items($downloads_per_cron); retriever_tidy(); } $retriever_item_count = 0; -function retriever_retrieve_items($max_items, $a) { +/** + * @brief Searches for items in the retriever_items table that should be retrieved and attempts to retrieve them + * + * @param int $max_items Maximum number of items to retrieve in this call + */ +function retriever_retrieve_items($max_items) { global $retriever_item_count; $retriever_schedule = array(array(1,'minute'), @@ -130,11 +155,7 @@ function retriever_retrieve_items($max_items, $a) { $retrieve_items = $max_items - $retriever_item_count; do { Logger::debug('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . intval($retriever_item_count) . ', retrieve ' . $retrieve_items); - // TODO: figure out how to do this with DBA module //@@@ this is possible - $retriever_resources2 = DBA::selectToArray('retriever_resource', [], ['`completed` IS NULL AND (`last-try` IS NULL OR ' . implode($schedule_clauses, ' OR ') . ')'], ['order' => ['last-try' => 0], 'limit' => $retrieve_items]); - $retriever_resources = q("SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR %s) ORDER BY `last-try` ASC LIMIT %d", - DBA::escape(implode($schedule_clauses, ' OR ')), - intval($retrieve_items)); + $retriever_resources = DBA::selectToArray('retriever_resource', [], ['`completed` IS NULL AND (`last-try` IS NULL OR ' . implode($schedule_clauses, ' OR ') . ')'], ['order' => ['last-try' => 0], 'limit' => $retrieve_items]); if (!is_array($retriever_resources)) { break; } @@ -142,7 +163,6 @@ function retriever_retrieve_items($max_items, $a) { break; } Logger::debug('retriever_retrieve_items: found ' . count($retriever_resources) . ' waiting resources in database'); - Logger::debug('@@@ retriever_retrieve_items: alternative found ' . count($retriever_resources2) . ': ' . print_r($retriever_resources2, true)); foreach ($retriever_resources as $retriever_resource) { retrieve_resource($retriever_resource); $retriever_item_count++; @@ -153,8 +173,12 @@ function retriever_retrieve_items($max_items, $a) { Logger::debug('retriever_retrieve_items: finished retrieving items'); } -// Look for items that are waiting even though the resource has completed. This shouldn't happen, but is worth cleaning up if it does. -function retriever_clean_up_completed_resources($max_items, $a) { +/** + * @brief Looks for items that are waiting even though the resource has completed. This shouldn't happen, but is worth cleaning up if it does. + * + * @param int $max_items Maximum number of items to retrieve in this call + */ +function retriever_clean_up_completed_resources($max_items) { // TODO: figure out how to do this with DBA module $r = q('SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT %d', intval($max_items)); @@ -179,13 +203,15 @@ function retriever_clean_up_completed_resources($max_items, $a) { continue; } $resource = DBA::selectFirst('retriever_resource', [], ['id' => intval($rr['resource'])]); - retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource, $a); - Logger::info('@@@ retriever_clean_up_completed_resources tried to update id ' . $retriever_item['id'] . ' to finished, better check that it really worked!'); + retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource); DBA::update('retriever_item', ['finished' => 1], ['id' => intval($retriever_item['id'])], ['finished' => 0]); retriever_check_item_completed($item); } } +/** + * @brief Deletes old rows from the retriever_item and retriever_resource table that are unlikely to be needed + */ function retriever_tidy() { DBA::delete('retriever_resource', ['completed IS NOT NULL AND completed < DATE_SUB(now(), INTERVAL 1 WEEK)']); DBA::delete('retriever_resource', ['completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)']); @@ -199,7 +225,7 @@ function retriever_tidy() { function retrieve_dataurl_resource($resource) { if (!preg_match("/date:(.*);base64,(.*)/", $resource['url'], $matches)) { - Logger::info('retrieve_dataurl_resource: ' . $resource['id'] . ' does not match pattern'); + Logger::warning('retrieve_dataurl_resource: resource ' . $resource['id'] . ' does not match pattern'); } else { $resource['type'] = $matches[1]; $resource['data'] = base64url_decode($matches[2]); @@ -210,7 +236,7 @@ function retrieve_dataurl_resource($resource) { DBA::escape($resource['data']), DBA::escape($resource['type']), intval($resource['id'])); - retriever_resource_completed($resource, $a); + retriever_resource_completed($resource); } function retrieve_resource($resource) { @@ -218,8 +244,6 @@ function retrieve_resource($resource) { return retrieve_dataurl_resource($resource); } - $a = get_app(); - $retriever_rule = get_retriever_rule($resource['contact-id'], $resource['item-uid']); if (!$retriever_rule) { Logger::warning('retrieve_resource: no rule found for contact ' . $resource['contact-id'] . ' item ' . $resource['item-uid']); @@ -243,7 +267,6 @@ function retrieve_resource($resource) { if (array_key_exists('storecookies', $rule_data) && $rule_data['storecookies']) { $retriever_rule['data']['cookiedata'] = file_get_contents($cookiejar); DBA::update('retriever_rule', ['data' => json_encode($retriever_rule['data'])], ['id' => intval($retriever_rule["id"])], $retriever_rule); - //@@@ check the update worked unlink($cookiejar); } $resource['data'] = $fetch_result->getBody(); @@ -254,18 +277,10 @@ function retrieve_resource($resource) { } catch (Exception $e) { Logger::info('retrieve_resource: unable to retrieve ' . $resource['url'] . ' - ' . $e->getMessage()); } - // TODO: figure out how to do this with DBA module - q("UPDATE `retriever_resource` SET `last-try` = now(), `num-tries` = `num-tries` + 1, `http-code` = %d, `redirect-url` = '%s' WHERE id = %d", - intval($resource['http-code']), - DBA::escape($resource['redirect-url']), - intval($resource['id'])); + DBA::update('retriever_resource', ['id' => intval($resource['id'])], ['last-try' => DateTimeFormat::utcNow(), 'num-tries' => intval($resource['num-tries']) + 1, 'http-code' => intval($resource['http-code']), 'redirect-url' => $resource['redirect-url']], ['last-try' => false]); if ($resource['data']) { - // TODO: figure out how to do this with DBA module - q("UPDATE `retriever_resource` SET `completed` = now(), `data` = '%s', `type` = '%s' WHERE id = %d", - DBA::escape($resource['data']), - DBA::escape($resource['type']), - intval($resource['id'])); - retriever_resource_completed($resource, $a); + DBA::update('retriever_resource', ['id' => intval($resource['id'])], ['completed' => DateTimeFormat::utcNow(), 'data' => $resource['data'], 'type' => $resource['type']], ['completed' => false]); + retriever_resource_completed($resource); } } @@ -327,7 +342,6 @@ function retriever_resource_completed($resource) { function apply_retrospective($retriever, $num) { foreach (Item::selectToArray([], ['contact-id' => intval($retriever['contact-id'])], ['order' => ['received' => true], 'limit' => $num]) as $item) { Item::update(['visible' => 0], ['id' => intval($item['id'])]); - //@@@ check that this works foreach (DBA::selectToArray('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => $item['uid'], 'contact-id' => $item['contact-id']]) as $retriever_item) { DBA::delete('retriever_resource', ['id' => $retriever_item['resource']]); DBA::delete('retriever_item', ['id' => $retriever_item['id']]); @@ -336,9 +350,14 @@ function apply_retrospective($retriever, $num) { } } -// TODO: Currently this waits until the next cron before actually downloading. Should do it immediately. -//@@@ I think the above statement is wrong. Check! -// TODO: This queries then inserts. It should use some kind of lock to avoid requesting the same resource twice. +/** + * @brief Queues an item for retrieval. It does not actually perform the retrieval. + * + * @param array $retriever Retriever rule configuration for this contact + * @param array $item Item that should be retrieved. This may or may not have been already stored in the database. + * + * TODO: This queries then inserts. It should use some kind of lock to avoid requesting the same resource twice. + */ function retriever_on_item_insert($retriever, &$item) { if (!$retriever || !$retriever['id']) { Logger::info('retriever_on_item_insert: No retriever supplied'); @@ -388,7 +407,6 @@ function add_retriever_resource($url, $uid, $cid, $binary = false) { } DBA::insert('retriever_resource', ['item-uid' => intval($uid), 'contact-id' => intval($cid), 'type' => $type, 'binary' => ($binary ? 1 : 0), 'url' => $url, 'completed' => DateTimeFormat::utcNow(), 'data' => $data]); - // @@@ check that this makes sense $resource = DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)]); if ($resource) { retriever_resource_completed($resource); @@ -396,6 +414,7 @@ function add_retriever_resource($url, $uid, $cid, $binary = false) { return $resource; } + // 800 characters is the size of this field in the database if (strlen($url) > 800) { Logger::warning('add_retriever_resource: URL is longer than 800 characters'); } @@ -419,7 +438,6 @@ function add_retriever_item(&$item, $resource) { return; } if (DBA::selectFirst('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => intval($item['uid']), 'resource' => intval($resource['id'])])) { - //@@@ check that this worked Logger::info("add_retriever_item: retriever item already present for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); return; } @@ -493,12 +511,7 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) { $body .= ']' . $item['plink'] . '[/url]'; Logger::debug('retriever_apply_dom_filter: XSLT result \"' . $body . '\"'); - $item['body'] = $body; - if (array_key_exists('id', $item) && $item['id']) { //@@@ this should be a separate function - //@@@ check that this works - Logger::debug('@@@ retriever_apply_dom_filter updating item by id ' . $item['id']); - Item::update(['body' => $body], ['id' => $item['id']]); - } + retriever_set_body($item, $body); } function retriever_load_into_dom($resource) { @@ -541,34 +554,27 @@ function retriever_globalise_urls($doc, $resource) { } function retriever_get_body($item) { - if (array_key_exists('id', $item) && $item['id']) { - // item has already been stored in database - if (!array_key_exists('uri-id', $item) || !$item['uri-id']) { - Logger::warning('retriever_get_body: item uri ' . $item['uri'] . ' has id but no uri-id'); - //@@@ check never happens - return $item['body']; - } - $content = DBA::selectFirst('item-content', [], ['body'], ['uri-id' => $item['uri-id']]); - if (!$content) { - Logger::warning('retriever_get_body: item-content uri-id ' . $item['uri-id'] . ' has no content'); - //@@@ check never happens - return $item['body']; - } - if (!$content['body']) { - Logger::warning('retriever_get_body: item-content uri-id ' . $item['uri-id'] . ' has no body'); - //@@@ check never happens - return $item['body']; - } - if ($content['body'] != $item['body']) { - Logger::warning('@@@ this is probably bad content: ' . $content['body'] . ' item ' . $item['body']); - //@@@ check for this. - } - Logger::debug('@@@ retriever_get_body uri-id ' . $item['uri-id'] . ' body: ' . $content['body']); - return $content['body']; + if (!array_key_exists('uri-id', $item) || !$item['uri-id']) { + // item has not yet been stored in database + return $item['body']; } - // item has not yet been stored in database - Logger::debug('@@@ retriever_get_body id ' . $item['id'] . ' body: ' . $item['body']); - return $item['body']; + + // item has been stored in database, body is stored in the item-content table + $content = DBA::selectFirst('item-content', ['body'], ['uri-id' => $item['uri-id']]); + if (!$content) { + Logger::warning('retriever_get_body: item-content uri-id ' . $item['uri-id'] . ' has no content'); + return $item['body']; + } + if (!$content['body']) { + Logger::warning('retriever_get_body: item-content uri-id ' . $item['uri-id'] . ' has no body'); + //@@@ check never happens + return $item['body']; + } + if ($content['body'] != $item['body']) { + Logger::warning('@@@ this is probably bad @@@ content: ' . $content['body'] . ' @@@ item: ' . $item['body']); + //@@@ check for this. + } + return $content['body']; } function retriever_set_body(&$item, $body, $allow_empty = false) { @@ -577,16 +583,15 @@ function retriever_set_body(&$item, $body, $allow_empty = false) { return; } $item['body'] = $body; - Logger::debug('@@@ retriever_set_body set array value to ' . $body); - if (array_key_exists('id', $item) && $item['id']) { - // item has already been stored in database - Logger::debug('@@@ retriever_set_body updating item ' . print_r($item, true) . ' to ' . $body); - Item::update(['body' => $body], ['id' => intval($item['id'])]); + if (!array_key_exists('id', $item) || !$item['id']) { + // item has not yet been stored in database + return; } + Item::update(['body' => $body], ['id' => intval($item['id'])]); } /** - * @brief @@@ + * @brief Searches for images in the item and adds corresponding retriever_items. If the images have already been downloaded, updates the body in the supplied item array. * * @param array &$item Row from the item table (by ref) */ @@ -609,7 +614,6 @@ function retrieve_images(&$item) { } if (strpos($url, get_app()->getBaseUrl()) === FALSE) { $resource = add_retriever_resource($url, $item['uid'], $item['contact-id'], true); - Logger::debug('@@@ check this makes sense 2: ' . $resource['id'] . ' url ' . $resource['url']); if (!$resource['completed']) { add_retriever_item($item, $resource); } @@ -663,8 +667,6 @@ function retriever_transform_images(&$item, $resource) { return; } - $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself? - $data = $resource['data']; $type = $resource['type']; $uid = $item['uid']; @@ -745,7 +747,7 @@ function retriever_content($a) { DBA::update('retriever_rule', ['data' => json_encode($retriever_rule['data'])], ['id' => intval($retriever_rule["id"])], ['data' => '']); $a->page['content'] .= "

Settings Updated"; if (!empty($_POST["retriever_retrospective"])) { - apply_retrospective($a, $retriever_rule, $_POST["retriever_retrospective"]); + apply_retrospective($retriever_rule, $_POST["retriever_retrospective"]); $a->page['content'] .= " and retrospectively applied to " . $_POST["retriever_retrospective"] . " posts"; } $a->page['content'] .= ".

"; @@ -825,13 +827,8 @@ function retriever_contact_photo_menu($a, &$args) { } function retriever_post_remote_hook(&$a, &$item) { - // @@@ I believe this should either never have the id, or always should. This needs more investigation. - // @@@ and if it does not, does it have a content row? - Logger::debug('@@@ retriever_post_remote_hook uri ' . $item['uri'] . ' has id ' . array_key_exists('id', $item) . ' has uri-id ' . array_key_exists('uri-id', $item)); - Logger::info('retriever_post_remote_hook: ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); - $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself? $retriever_rule = get_retriever_rule($item['contact-id'], $item["uid"], false); if ($retriever_rule) { retriever_on_item_insert($retriever_rule, $item); @@ -850,7 +847,7 @@ function retriever_post_remote_hook(&$a, &$item) { retriever_check_item_completed($item); } -function retriever_plugin_settings(&$a,&$s) { +function retriever_plugin_settings(&$a, &$s) { $all_photos = PConfig::get(local_user(), 'retriever', 'all_photos'); $oembed = PConfig::get(local_user(), 'retriever', 'oembed'); $template = Renderer::getMarkupTemplate('/settings.tpl', 'addon/retriever/'); From 9bf8602d8cadbe6ef08d282aa03b3ab057f51962 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 13 Oct 2019 10:40:24 +0200 Subject: [PATCH 34/94] Almost finished, maybe not working --- retriever/retriever.php | 1492 +++++++++++++++------------ retriever/templates/admin.tpl | 1 + retriever/templates/rule-config.tpl | 2 + retriever/templates/settings.tpl | 19 +- 4 files changed, 834 insertions(+), 680 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index a71f302c..33f9a40e 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -1,10 +1,10 @@ - */ + /** + * Name: Retriever + * Description: Follow the permalink of RSS/Atom feed items and replace the summary with the full content. + * Version: 1.0 + * Author: Matthew Exon + */ use Friendica\Core\Addon; use Friendica\Core\Config; @@ -27,58 +27,37 @@ use Friendica\Util\DateTimeFormat; * @brief Installation hook for retriever plugin */ function retriever_install() { - Addon::registerHook('plugin_settings', 'addon/retriever/retriever.php', 'retriever_plugin_settings'); - Addon::registerHook('plugin_settings_post', 'addon/retriever/retriever.php', 'retriever_plugin_settings_post'); - Addon::registerHook('post_remote', 'addon/retriever/retriever.php', 'retriever_post_remote_hook'); - Addon::registerHook('contact_photo_menu', 'addon/retriever/retriever.php', 'retriever_contact_photo_menu'); - Addon::registerHook('cron', 'addon/retriever/retriever.php', 'retriever_cron'); + Addon::registerHook('addon_settings', 'addon/retriever/retriever.php', 'retriever_addon_settings'); + Addon::registerHook('addon_settings_post', 'addon/retriever/retriever.php', 'retriever_addon_settings_post'); + Addon::registerHook('post_remote', 'addon/retriever/retriever.php', 'retriever_post_remote_hook'); + Addon::registerHook('contact_photo_menu', 'addon/retriever/retriever.php', 'retriever_contact_photo_menu'); + Addon::registerHook('cron', 'addon/retriever/retriever.php', 'retriever_cron'); - if (Config::get('retriever', 'dbversion') == '0.10') { - q('ALTER TABLE `retriever_resource` MODIFY COLUMN `type` char(255) NULL DEFAULT NULL'); - q('ALTER TABLE `retriever_resource` MODIFY COLUMN `data` mediumblob NULL DEFAULT NULL'); - q('ALTER TABLE `retriever_rule` MODIFY COLUMN `data` mediumtext NULL DEFAULT NULL'); - Config::set('retriever', 'dbversion', '0.11'); - } - if (Config::get('retriever', 'dbversion') == '0.11') { - q('ALTER TABLE `retriever_resource` ADD INDEX `url` (`url`)'); - q('ALTER TABLE `retriever_resource` ADD INDEX `completed` (`completed`)'); - q('ALTER TABLE `retriever_item` ADD INDEX `finished` (`finished`)'); - q('ALTER TABLE `retriever_item` ADD INDEX `item-uid` (`item-uid`)'); - Config::set('retriever', 'dbversion', '0.12'); - } - if (Config::get('retriever', 'dbversion') == '0.12') { - q("ALTER TABLE `retriever_resource` ADD COLUMN `contact-id` int(10) unsigned NOT NULL DEFAULT '0' AFTER `id`"); - q("ALTER TABLE `retriever_resource` ADD COLUMN `item-uid` int(10) unsigned NOT NULL DEFAULT '0' AFTER `id`"); - Config::set('retriever', 'dbversion', '0.13'); - } - if (Config::get('retriever', 'dbversion') == '0.13') { - Config::set('retriever', 'downloads_per_cron', '100'); - } - if (Config::get('retriever', 'dbversion') != '0.14') { - $schema = file_get_contents(dirname(__file__).'/database.sql'); - $tables = explode(';', $schema); - foreach ($tables as $table) { - if (!DBA::e($table)) { - Logger::warning('Unable to create database table: ' . DBA::errorMessage()); - return; - } - } - Config::set('retriever', 'downloads_per_cron', '100'); - Config::set('retriever', 'dbversion', '0.14'); - } + if (Config::get('retriever', 'dbversion') != '0.14') { + $schema = file_get_contents(dirname(__file__).'/database.sql'); + $tables = explode(';', $schema); + foreach ($tables as $table) { + if (!DBA::e($table)) { + Logger::warning('Unable to create database table: ' . DBA::errorMessage()); + return; + } + } + Config::set('retriever', 'downloads_per_cron', '100'); + Config::set('retriever', 'dbversion', '0.14'); + } } /** * @brief Uninstallation hook for retriever plugin */ function retriever_uninstall() { - Addon::unregisterHook('plugin_settings', 'addon/retriever/retriever.php', 'retriever_plugin_settings'); - Addon::unregisterHook('plugin_settings_post', 'addon/retriever/retriever.php', 'retriever_plugin_settings_post'); - Addon::unregisterHook('post_remote', 'addon/retriever/retriever.php', 'retriever_post_remote_hook'); - Addon::unregisterHook('plugin_settings', 'addon/retriever/retriever.php', 'retriever_plugin_settings'); - Addon::unregisterHook('plugin_settings_post', 'addon/retriever/retriever.php', 'retriever_plugin_settings_post'); - Addon::unregisterHook('contact_photo_menu', 'addon/retriever/retriever.php', 'retriever_contact_photo_menu'); - Addon::unregisterHook('cron', 'addon/retriever/retriever.php', 'retriever_cron'); + Addon::unregisterHook('addon_settings', 'addon/retriever/retriever.php', 'retriever_addon_settings'); + Addon::unregisterHook('addon_settings_post', 'addon/retriever/retriever.php', 'retriever_addon_settings_post'); + Addon::unregisterHook('post_remote', 'addon/retriever/retriever.php', 'retriever_post_remote_hook'); + Addon::unregisterHook('addon_settings', 'addon/retriever/retriever.php', 'retriever_addon_settings'); + Addon::unregisterHook('addon_settings_post', 'addon/retriever/retriever.php', 'retriever_addon_settings_post'); + Addon::unregisterHook('contact_photo_menu', 'addon/retriever/retriever.php', 'retriever_contact_photo_menu'); + Addon::unregisterHook('cron', 'addon/retriever/retriever.php', 'retriever_cron'); } /** @@ -90,41 +69,55 @@ function retriever_module() {} /** * @brief Admin page hook for retriever plugin + * + * @param App $a App object (by ref) + * @param string $o HTML to append content to (by ref) */ -function retriever_addon_admin() { - $downloads_per_cron = Config::get('retriever', 'downloads_per_cron'); - $template = Renderer::getMarkupTemplate('admin.tpl', 'addon/retriever/'); - $config = ['downloads_per_cron', - L10n::t('Downloads per Cron'), - $downloads_per_cron, - L10n::t('Maximum number of downloads to attempt during each run of the cron job.')]; - $o .= Renderer::replaceMacros($template, [ - '$downloads_per_cron' => $config, - '$submit' => L10n::t('Save Settings')]); +function retriever_addon_admin(&$a, &$o) { + $template = Renderer::getMarkupTemplate('admin.tpl', 'addon/retriever/'); + + $downloads_per_cron = Config::get('retriever', 'downloads_per_cron'); + $downloads_per_cron_config = ['downloads_per_cron', + L10n::t('Downloads per Cron'), + $downloads_per_cron, + L10n::t('Maximum number of downloads to attempt during each run of the cron job.')]; + + $allow_images = Config::get('retriever', 'allow_images'); + $allow_images_config = ['allow_images', + L10n::t('Allow Retrieving Images'), + $allow_images, + L10n::t('Allow users to request images be downloaded as well as text.
Warning: the images are not automatically deleted and may fill up your database.')]; + + $o .= Renderer::replaceMacros($template, [ + '$downloads_per_cron' => $downloads_per_cron_config, + '$allow_images' => $allow_images_config, + '$submit' => L10n::t('Save Settings')]); } /** * @brief Admin page post hook for retriever plugin */ function retriever_addon_admin_post () { - if (!empty($_POST['downloads_per_cron'])) { - Config::set('retriever', 'downloads_per_cron', $_POST['downloads_per_cron']); - } + if (!empty($_POST['downloads_per_cron'])) { + Config::set('retriever', 'downloads_per_cron', $_POST['downloads_per_cron']); + } + Config::set('retriever', 'allow_images', $_POST['allow_images']); } /** * @brief Cron jobs for retriever plugin */ function retriever_cron() { - $downloads_per_cron = Config::get('retriever', 'downloads_per_cron'); + $downloads_per_cron = Config::get('retriever', 'downloads_per_cron'); - // Do this first, otherwise it can interfere with retriever_retrieve_items - retriever_clean_up_completed_resources($downloads_per_cron); + // Do this first, otherwise it can interfere with retriever_retrieve_items + retriever_clean_up_completed_resources($downloads_per_cron); - retriever_retrieve_items($downloads_per_cron); - retriever_tidy(); + retriever_retrieve_items($downloads_per_cron); + retriever_tidy(); } +// This global variable is used to track the number of items that have been retrieved during the course of this process $retriever_item_count = 0; /** @@ -133,44 +126,44 @@ $retriever_item_count = 0; * @param int $max_items Maximum number of items to retrieve in this call */ function retriever_retrieve_items($max_items) { - global $retriever_item_count; + global $retriever_item_count; - $retriever_schedule = array(array(1,'minute'), - array(10,'minute'), - array(1,'hour'), - array(1,'day'), - array(2,'day'), - array(1,'week'), - array(1,'month')); + $retriever_schedule = array(array(1,'minute'), + array(10,'minute'), + array(1,'hour'), + array(1,'day'), + array(2,'day'), + array(1,'week'), + array(1,'month')); - $schedule_clauses = array(); - for ($i = 0; $i < count($retriever_schedule); $i++) { - $num = $retriever_schedule[$i][0]; - $unit = $retriever_schedule[$i][1]; - array_push($schedule_clauses, - '(`num-tries` = ' . $i . ' AND TIMESTAMPADD(' . DBA::escape($unit) . - ', ' . intval($num) . ', `last-try`) < now())'); - } + $schedule_clauses = array(); + for ($i = 0; $i < count($retriever_schedule); $i++) { + $num = $retriever_schedule[$i][0]; + $unit = $retriever_schedule[$i][1]; + array_push($schedule_clauses, + '(`num-tries` = ' . $i . ' AND TIMESTAMPADD(' . DBA::escape($unit) . + ', ' . intval($num) . ', `last-try`) < now())'); + } - $retrieve_items = $max_items - $retriever_item_count; - do { - Logger::debug('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . intval($retriever_item_count) . ', retrieve ' . $retrieve_items); - $retriever_resources = DBA::selectToArray('retriever_resource', [], ['`completed` IS NULL AND (`last-try` IS NULL OR ' . implode($schedule_clauses, ' OR ') . ')'], ['order' => ['last-try' => 0], 'limit' => $retrieve_items]); - if (!is_array($retriever_resources)) { - break; - } - if (count($retriever_resources) == 0) { - break; - } - Logger::debug('retriever_retrieve_items: found ' . count($retriever_resources) . ' waiting resources in database'); - foreach ($retriever_resources as $retriever_resource) { - retrieve_resource($retriever_resource); - $retriever_item_count++; - } - $retrieve_items = $max_items - $retriever_item_count; - } - while ($retrieve_items > 0); - Logger::debug('retriever_retrieve_items: finished retrieving items'); + $retrieve_items = $max_items - $retriever_item_count; + do { + Logger::debug('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . intval($retriever_item_count) . ', retrieve ' . $retrieve_items); + $retriever_resources = DBA::selectToArray('retriever_resource', [], ['`completed` IS NULL AND (`last-try` IS NULL OR ' . implode($schedule_clauses, ' OR ') . ')'], ['order' => ['last-try' => 0], 'limit' => $retrieve_items]); + if (!is_array($retriever_resources)) { + break; + } + if (count($retriever_resources) == 0) { + break; + } + Logger::debug('retriever_retrieve_items: found ' . count($retriever_resources) . ' waiting resources in database'); + foreach ($retriever_resources as $retriever_resource) { + retrieve_resource($retriever_resource); + $retriever_item_count++; + } + $retrieve_items = $max_items - $retriever_item_count; + } + while ($retrieve_items > 0); + Logger::debug('retriever_retrieve_items: finished retrieving items'); } /** @@ -179,175 +172,221 @@ function retriever_retrieve_items($max_items) { * @param int $max_items Maximum number of items to retrieve in this call */ function retriever_clean_up_completed_resources($max_items) { - // TODO: figure out how to do this with DBA module - $r = q('SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT %d', - intval($max_items)); - if (!$r) { - $r = array(); - } - Logger::debug('retriever_clean_up_completed_resources: items waiting even though resource has completed: ' . count($r)); - foreach ($r as $rr) { - $retriever_item = retriever_get_retriever_item($rr['item']); - if (!DBA::isResult($retriever_item)) { - Logger::warning('retriever_clean_up_completed_resources: no retriever item with id ' . $rr['item']); - continue; - } - $item = retriever_get_item($retriever_item); - if (!$item) { - Logger::warning('retriever_clean_up_completed_resources: no item ' . $retriever_item['item-uri']); - continue; - } - $retriever_rule = get_retriever_rule($retriever_item['contact-id'], $item['uid']); - if (!$retriever_rule) { - Logger::warning('retriever_clean_up_completed_resources: no retriever for uri ' . $retriever_item['item-uri'] . ' uid ' . $retriever_item['uid'] . ' ' . $retriever_item['contact-id']); - continue; - } - $resource = DBA::selectFirst('retriever_resource', [], ['id' => intval($rr['resource'])]); - retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource); - DBA::update('retriever_item', ['finished' => 1], ['id' => intval($retriever_item['id'])], ['finished' => 0]); - retriever_check_item_completed($item); - } + // TODO: figure out how to do this with DBA module + $r = q('SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT %d', + intval($max_items)); + if (!$r) { + $r = array(); + } + Logger::debug('retriever_clean_up_completed_resources: items waiting even though resource has completed: ' . count($r)); + foreach ($r as $rr) { + $retriever_item = DBA::selectFirst('retriever_item', [], ['id' => intval($rr['item'])]); + if (!DBA::isResult($retriever_item)) { + Logger::warning('retriever_clean_up_completed_resources: no retriever item with id ' . $rr['item']); + continue; + } + $item = retriever_get_item($retriever_item); + if (!$item) { + Logger::warning('retriever_clean_up_completed_resources: no item ' . $retriever_item['item-uri']); + continue; + } + $retriever_rule = get_retriever_rule($retriever_item['contact-id'], $item['uid'], false); + if (!$retriever_rule) { + Logger::warning('retriever_clean_up_completed_resources: no retriever for uri ' . $retriever_item['item-uri'] . ' uid ' . $retriever_item['uid'] . ' ' . $retriever_item['contact-id']); + continue; + } + $resource = DBA::selectFirst('retriever_resource', [], ['id' => intval($rr['resource'])]); + retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource); + // TODO: I don't really get how the $old_fields argument to DBA::update works + DBA::update('retriever_item', ['finished' => 1], ['id' => intval($retriever_item['id'])], ['finished' => 0]); + retriever_check_item_completed($item); + } } /** * @brief Deletes old rows from the retriever_item and retriever_resource table that are unlikely to be needed */ function retriever_tidy() { - DBA::delete('retriever_resource', ['completed IS NOT NULL AND completed < DATE_SUB(now(), INTERVAL 1 WEEK)']); - DBA::delete('retriever_resource', ['completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)']); + DBA::delete('retriever_resource', ['completed IS NOT NULL AND completed < DATE_SUB(now(), INTERVAL 1 WEEK)']); + DBA::delete('retriever_resource', ['completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)']); - $r = q("SELECT retriever_item.id FROM retriever_item LEFT OUTER JOIN retriever_resource ON (retriever_item.resource = retriever_resource.id) WHERE retriever_resource.id is null"); - Logger::info('retriever_tidy: found ' . count($r) . ' retriever_items with no retriever_resource'); - foreach ($r as $rr) { - q('DELETE FROM retriever_item WHERE id = %d', intval($rr['id'])); - } + $r = q("SELECT retriever_item.id FROM retriever_item LEFT OUTER JOIN retriever_resource ON (retriever_item.resource = retriever_resource.id) WHERE retriever_resource.id is null"); + Logger::info('retriever_tidy: found ' . count($r) . ' retriever_items with no retriever_resource'); + foreach ($r as $rr) { + q('DELETE FROM retriever_item WHERE id = %d', intval($rr['id'])); + } } +/** + * @brief Special case of retrieving a resource: if the URL is a data URL, do not use cURL, decode the URL directly + * + * @param array $resource The row from the retriever_resource table + */ function retrieve_dataurl_resource($resource) { - if (!preg_match("/date:(.*);base64,(.*)/", $resource['url'], $matches)) { - Logger::warning('retrieve_dataurl_resource: resource ' . $resource['id'] . ' does not match pattern'); - } else { - $resource['type'] = $matches[1]; - $resource['data'] = base64url_decode($matches[2]); - } + if (!preg_match("/date:(.*);base64,(.*)/", $resource['url'], $matches)) { + Logger::warning('retrieve_dataurl_resource: resource ' . $resource['id'] . ' does not match pattern'); + } else { + $resource['type'] = $matches[1]; + $resource['data'] = base64url_decode($matches[2]); + } - // Succeed or fail, there's no point retrying - q("UPDATE `retriever_resource` SET `last-try` = now(), `num-tries` = `num-tries` + 1, `completed` = now(), `data` = '%s', `type` = '%s' WHERE id = %d", - DBA::escape($resource['data']), - DBA::escape($resource['type']), - intval($resource['id'])); - retriever_resource_completed($resource); + // Succeed or fail, there's no point retrying + DBA::update('retriever_resource', ['id' => intval($resource['id'])], ['last-try' => DateTimeFormat::utcNow(), 'num-tries' => intval($resource['num-tries']) + 1, 'completed' => DateTimeFormat::utcNow(), 'data' => $resource['data'], 'type' => $resource['type']], ['last-try' => false]); + retriever_resource_completed($resource); } +/** + * @brief Makes an attempt to retrieve the supplied resource, and updates the row in the table with the results + * + * @param array $resource The row from the retriever_resource table + */ function retrieve_resource($resource) { - if (substr($resource['url'], 0, 5) == "data:") { - return retrieve_dataurl_resource($resource); - } + $components = parse_url($resource['url']); + if ($components['scheme'] == "data") { + return retrieve_dataurl_resource($resource); + } + if (($components['scheme'] != "http") && ($components['scheme'] != "https")) { + Logger::warning('retrieve_resource: URL scheme not supported for ' . $resource['url']); + DBA::update('retriever_resource', ['completed' => DateTimeFormat::utcNow()], ['id' => intval($resource['id'])], ['completed' => false]); + retriever_resource_completed($resource); + return; + } - $retriever_rule = get_retriever_rule($resource['contact-id'], $resource['item-uid']); - if (!$retriever_rule) { - Logger::warning('retrieve_resource: no rule found for contact ' . $resource['contact-id'] . ' item ' . $resource['item-uid']); - return; - } - $rule_data = $retriever_rule['data']; - if (!$rule_data) { - Logger::warning('retrieve_resource: no rule data found for contact ' . $resource['contact-id'] . ' item ' . $resource['item-uid']); - return; - } + $retriever_rule = get_retriever_rule($resource['contact-id'], $resource['item-uid'], false); + if (!$retriever_rule) { + Logger::warning('retrieve_resource: no rule found for resource id ' . $resource['id'] . ' contact ' . $resource['contact-id'] . ' item ' . $resource['item-uid']); + DBA::update('retriever_resource', ['completed' => DateTimeFormat::utcNow()], ['id' => intval($resource['id'])], ['completed' => false]); + retriever_resource_completed($resource); + return; + } + $rule_data = $retriever_rule['data']; + if (!$rule_data) { + Logger::warning('retrieve_resource: no rule data found for resource id ' . $resource['id'] . ' contact ' . $resource['contact-id'] . ' item ' . $resource['item-uid']); + DBA::update('retriever_resource', ['completed' => DateTimeFormat::utcNow()], ['id' => intval($resource['id'])], ['completed' => false]); + retriever_resource_completed($resource); + return; + } - try { - Logger::debug('retrieve_resource: ' . ($resource['num-tries'] + 1) . ' attempt at resource ' . $resource['id'] . ' ' . $resource['url']); - $redirects = 0; - $cookiejar = ''; - if (array_key_exists('storecookies', $rule_data) && $rule_data['storecookies']) { - $cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-'); - file_put_contents($cookiejar, $rule_data['cookiedata']); - } - $fetch_result = Network::fetchUrlFull($resource['url'], $resource['binary'], $redirects, '', $cookiejar); - if (array_key_exists('storecookies', $rule_data) && $rule_data['storecookies']) { - $retriever_rule['data']['cookiedata'] = file_get_contents($cookiejar); - DBA::update('retriever_rule', ['data' => json_encode($retriever_rule['data'])], ['id' => intval($retriever_rule["id"])], $retriever_rule); - unlink($cookiejar); - } - $resource['data'] = $fetch_result->getBody(); - $resource['http-code'] = $fetch_result->getReturnCode(); - $resource['type'] = $fetch_result->getContentType(); - $resource['redirect-url'] = $fetch_result->getRedirectUrl(); - Logger::debug('retrieve_resource: got code ' . $resource['http-code'] . ' retrieving resource ' . $resource['id'] . ' final url ' . $resource['redirect-url']); - } catch (Exception $e) { - Logger::info('retrieve_resource: unable to retrieve ' . $resource['url'] . ' - ' . $e->getMessage()); - } - DBA::update('retriever_resource', ['id' => intval($resource['id'])], ['last-try' => DateTimeFormat::utcNow(), 'num-tries' => intval($resource['num-tries']) + 1, 'http-code' => intval($resource['http-code']), 'redirect-url' => $resource['redirect-url']], ['last-try' => false]); - if ($resource['data']) { - DBA::update('retriever_resource', ['id' => intval($resource['id'])], ['completed' => DateTimeFormat::utcNow(), 'data' => $resource['data'], 'type' => $resource['type']], ['completed' => false]); - retriever_resource_completed($resource); - } + try { + Logger::debug('retrieve_resource: ' . ($resource['num-tries'] + 1) . ' attempt at resource ' . $resource['id'] . ' ' . $resource['url']); + $redirects = 0; + $cookiejar = ''; + if (array_key_exists('storecookies', $rule_data) && $rule_data['storecookies']) { + $cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-'); + file_put_contents($cookiejar, $rule_data['cookiedata']); + } + $fetch_result = Network::fetchUrlFull($resource['url'], $resource['binary'], $redirects, '', $cookiejar); + if (array_key_exists('storecookies', $rule_data) && $rule_data['storecookies']) { + $retriever_rule['data']['cookiedata'] = file_get_contents($cookiejar); + DBA::update('retriever_rule', ['data' => json_encode($retriever_rule['data'])], ['id' => intval($retriever_rule["id"])], $retriever_rule); + unlink($cookiejar); + } + $resource['data'] = $fetch_result->getBody(); + $resource['http-code'] = $fetch_result->getReturnCode(); + $resource['type'] = $fetch_result->getContentType(); + $resource['redirect-url'] = $fetch_result->getRedirectUrl(); + Logger::debug('retrieve_resource: got code ' . $resource['http-code'] . ' retrieving resource ' . $resource['id'] . ' final url ' . $resource['redirect-url']); + } catch (Exception $e) { + Logger::info('retrieve_resource: unable to retrieve ' . $resource['url'] . ' - ' . $e->getMessage()); + } + DBA::update('retriever_resource', ['last-try' => DateTimeFormat::utcNow(), 'num-tries' => intval($resource['num-tries']) + 1, 'http-code' => intval($resource['http-code']), 'redirect-url' => $resource['redirect-url']], ['id' => intval($resource['id'])], ['last-try' => false]); + if ($resource['data']) { + DBA::update('retriever_resource', ['completed' => DateTimeFormat::utcNow(), 'data' => $resource['data'], 'type' => $resource['type']], ['id' => intval($resource['id'])], ['completed' => false]); + retriever_resource_completed($resource); + } } -function get_retriever_rule($contact_id, $uid, $create = false) { - $retriever_rule = DBA::selectFirst('retriever_rule', [], ['contact-id' => intval($contact_id), 'uid' => intval($uid)]); - if ($retriever_rule) { - $retriever_rule['data'] = json_decode($retriever_rule['data'], true); - return $retriever_rule; - } - if ($create) { - DBA::insert('retriever_rule', ['uid' => intval($uid), 'contact-id' => intval($contact_id)]); - $retriever_rule = DBA::selectFirst('retriever_rule', [], ['contact-id' => intval($contact_id), 'uid' => intval($uid)]); - return $retriever_rule; - } -} - -function retriever_get_retriever_item($id) { - return DBA::selectFirst('retriever_item', [], ['id' => intval($id)]); +/** + * @brief Gets the retriever configuration for a particular contact. Optionally, will create a blank configuration. + * + * @param int $contact_id The Contact ID of the retriever configuration + * @param int $uid The User ID of the retriever configuration + * @param boolean $create Whether to create a new configuration if none exists already + * @return array The row from the retriever_rule database for this configuration + */ +function get_retriever_rule($contact_id, $uid, $create) { + $retriever_rule = DBA::selectFirst('retriever_rule', [], ['contact-id' => intval($contact_id), 'uid' => intval($uid)]); + if ($retriever_rule) { + $retriever_rule['data'] = json_decode($retriever_rule['data'], true); + return $retriever_rule; + } + if ($create) { + DBA::insert('retriever_rule', ['uid' => intval($uid), 'contact-id' => intval($contact_id)]); + $retriever_rule = DBA::selectFirst('retriever_rule', [], ['contact-id' => intval($contact_id), 'uid' => intval($uid)]); + return $retriever_rule; + } } +/** + * @brief Looks up the item from the database that corresponds to the retriever_item + * + * @param array $retriever_item Row from the retriever_item table + * @return array Item that was found, or undef if no item could be found + */ function retriever_get_item($retriever_item) { - $item = Item::selectFirst([], ['uri' => $retriever_item['item-uri'], 'uid' => intval($retriever_item['item-uid']), 'contact-id' => intval($retriever_item['contact-id'])]); - if (!DBA::isResult($item)) { - Logger::warning('retriever_get_item: no item found for uri ' . $retriever_item['item-uri']); - return; - } - return $item; + $item = Item::selectFirst([], ['uri' => $retriever_item['item-uri'], 'uid' => intval($retriever_item['item-uid']), 'contact-id' => intval($retriever_item['contact-id'])]); + if (!DBA::isResult($item)) { + Logger::warning('retriever_get_item: no item found for uri ' . $retriever_item['item-uri']); + return; + } + return $item; } +/** + * @brief This function should be called when a resource is completed to trigger all next steps, based on the corresponding retriever item + * + * @param int $retriever_item_id ID of the retriever item corresponding to this resource + * @param array $resource The full details of the completed resource + */ function retriever_item_completed($retriever_item_id, $resource) { - Logger::debug('retriever_item_completed: id ' . $retriever_item_id . ' url ' . $resource['url']); + Logger::debug('retriever_item_completed: id ' . $retriever_item_id . ' url ' . $resource['url']); - $retriever_item = retriever_get_retriever_item($retriever_item_id); - if (!DBA::isResult($retriever_item)) { - Logger::info('retriever_item_completed: no retriever item with id ' . $retriever_item_id); - return; - } - $item = retriever_get_item($retriever_item); - if (!$item) { - Logger::warning('retriever_item_completed: no item ' . $retriever_item['item-uri']); - return; - } - // Note: the retriever might be null. Doesn't matter. - $retriever_rule = get_retriever_rule($retriever_item['contact-id'], $retriever_item['item-uid']); + $retriever_item = DBA::selectFirst('retriever_item', [], ['id' => intval($retriever_item_id)]); + if (!DBA::isResult($retriever_item)) { + Logger::info('retriever_item_completed: no retriever item with id ' . $retriever_item_id); + return; + } + $item = retriever_get_item($retriever_item); + if (!$item) { + Logger::warning('retriever_item_completed: no item ' . $retriever_item['item-uri']); + return; + } + // Note: the retriever might be null. Doesn't matter. + $retriever_rule = get_retriever_rule($retriever_item['contact-id'], $retriever_item['item-uid'], false); - retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource); + retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource); - DBA::update('retriever_item', ['finished' => 1], ['id' => intval($retriever_item['id'])], ['finished' => 0]); - retriever_check_item_completed($item); + DBA::update('retriever_item', ['finished' => 1], ['id' => intval($retriever_item['id'])], ['finished' => 0]); + retriever_check_item_completed($item); } +/** + * @brief This function should be called when a resource is completed to trigger all next steps + * + * @param array $resource The full details of the completed resource + */ function retriever_resource_completed($resource) { - Logger::debug('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url']); - foreach (DBA::selectToArray('retriever_item', ['id'], ['resource' => intval($resource['id'])]) as $retriever_item) { - retriever_item_completed($retriever_item['id'], $resource); - } + Logger::debug('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url']); + foreach (DBA::selectToArray('retriever_item', ['id'], ['resource' => intval($resource['id'])]) as $retriever_item) { + retriever_item_completed($retriever_item['id'], $resource); + } } +/** + * @brief For a retriever config for a particular contact, remove existing artifacts for a number of completed items and queue them to be tried again. Will make the items invisible until they are again completed. The items chosen will be the most recently received. + * + * @param array $retriever The row from the retriever_rule table for the contact + * @param int $num The number of existing items to queue for retrieval + */ function apply_retrospective($retriever, $num) { - foreach (Item::selectToArray([], ['contact-id' => intval($retriever['contact-id'])], ['order' => ['received' => true], 'limit' => $num]) as $item) { - Item::update(['visible' => 0], ['id' => intval($item['id'])]); - foreach (DBA::selectToArray('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => $item['uid'], 'contact-id' => $item['contact-id']]) as $retriever_item) { - DBA::delete('retriever_resource', ['id' => $retriever_item['resource']]); - DBA::delete('retriever_item', ['id' => $retriever_item['id']]); - } - retriever_on_item_insert($retriever, $item); - } + foreach (Item::selectToArray([], ['contact-id' => intval($retriever['contact-id'])], ['order' => ['received' => true], 'limit' => $num]) as $item) { + Item::update(['visible' => 0], ['id' => intval($item['id'])]); + foreach (DBA::selectToArray('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => $item['uid'], 'contact-id' => $item['contact-id']]) as $retriever_item) { + DBA::delete('retriever_resource', ['id' => $retriever_item['resource']]); + DBA::delete('retriever_item', ['id' => $retriever_item['id']]); + } + retriever_on_item_insert($retriever, $item); + } } /** @@ -359,299 +398,374 @@ function apply_retrospective($retriever, $num) { * TODO: This queries then inserts. It should use some kind of lock to avoid requesting the same resource twice. */ function retriever_on_item_insert($retriever, &$item) { - if (!$retriever || !$retriever['id']) { - Logger::info('retriever_on_item_insert: No retriever supplied'); - return; - } - if (!array_key_exists('enable', $retriever['data']) || !$retriever['data']['enable'] == "on") { - return; - } - if (array_key_exists('plink', $item) && strlen($item['plink'])) { - $url = $item['plink']; - } - else { - if (!array_key_exists('uri-id', $item)) { - Logger::warning('retriever_on_item_insert: item ' . $item['id'] . ' has no plink and no uri-id'); - return; - } - $content = DBA::selectFirst('item-content', [], ['uri-id' => $item['uri-id']]); - $url = $content['plink']; - } + if (!$retriever || !$retriever['id']) { + Logger::info('retriever_on_item_insert: No retriever supplied'); + return; + } + if (!array_key_exists('enable', $retriever['data']) || !$retriever['data']['enable'] == "on") { + return; + } + if (array_key_exists('plink', $item) && strlen($item['plink'])) { + $url = $item['plink']; + } + else { + if (!array_key_exists('uri-id', $item)) { + Logger::warning('retriever_on_item_insert: item ' . $item['id'] . ' has no plink and no uri-id'); + return; + } + $content = DBA::selectFirst('item-content', [], ['uri-id' => $item['uri-id']]); + $url = $content['plink']; + } - if (array_key_exists('modurl', $retriever['data']) && $retriever['data']['modurl']) { - $orig_url = $url; - $url = preg_replace('/' . $retriever['data']['pattern'] . '/', $retriever['data']['replace'], $orig_url); - Logger::debug('retriever_on_item_insert: Changed ' . $orig_url . ' to ' . $url); - } + if (array_key_exists('modurl', $retriever['data']) && $retriever['data']['modurl']) { + $orig_url = $url; + $url = preg_replace('/' . $retriever['data']['pattern'] . '/', $retriever['data']['replace'], $orig_url); + Logger::debug('retriever_on_item_insert: Changed ' . $orig_url . ' to ' . $url); + } - $resource = add_retriever_resource($url, $item['uid'], $item['contact-id']); - $retriever_item_id = add_retriever_item($item, $resource); + $resource = add_retriever_resource($url, $item['uid'], $item['contact-id']); + $retriever_item_id = add_retriever_item($item, $resource); } +/** + * @brief Creates a new resource to be downloaded from the supplied URL. Unique resources are created for each URL, UID and contact ID, because different contact IDs may have different rules for how to retrieve them. If the URL is actually a data URL, the resource is completed immediately. + * + * @param string $url URL of the resource to be downloaded + * @param int $uid User ID that this resource is being downloaded fore + * @param int $cid Contact ID of the item that triggered the downloading of this resource + * @param boolean $binary Specifies if this download should be done in binary mode + * @return array The created resource + */ function add_retriever_resource($url, $uid, $cid, $binary = false) { - Logger::debug('add_retriever_resource: url ' . $url . ' uid ' . $uid . ' contact-id ' . $cid); + Logger::debug('add_retriever_resource: url ' . $url . ' uid ' . $uid . ' contact-id ' . $cid); - $scheme = parse_url($url, PHP_URL_SCHEME); - if ($scheme == 'data') { - $fp = fopen($url, 'r'); - $meta = stream_get_meta_data($fp); - $type = $meta['mediatype']; - $data = stream_get_contents($fp); - fclose($fp); + $scheme = parse_url($url, PHP_URL_SCHEME); + if ($scheme == 'data') { + $fp = fopen($url, 'r'); + $meta = stream_get_meta_data($fp); + $type = $meta['mediatype']; + $data = stream_get_contents($fp); + fclose($fp); - $url = 'md5://' . hash('md5', $url); - $resource = DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)]); - if ($resource) { - Logger::debug('add_retriever_resource: Resource ' . $url . ' already requested'); - return $resource; - } + $url = 'md5://' . hash('md5', $url); + $resource = DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)]); + if ($resource) { + Logger::debug('add_retriever_resource: Resource ' . $url . ' already requested'); + return $resource; + } - DBA::insert('retriever_resource', ['item-uid' => intval($uid), 'contact-id' => intval($cid), 'type' => $type, 'binary' => ($binary ? 1 : 0), 'url' => $url, 'completed' => DateTimeFormat::utcNow(), 'data' => $data]); - $resource = DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)]); - if ($resource) { - retriever_resource_completed($resource); - } - return $resource; - } + DBA::insert('retriever_resource', ['item-uid' => intval($uid), 'contact-id' => intval($cid), 'type' => $type, 'binary' => ($binary ? 1 : 0), 'url' => $url, 'completed' => DateTimeFormat::utcNow(), 'data' => $data]); + $resource = DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)]); + if ($resource) { + retriever_resource_completed($resource); + } + return $resource; + } - // 800 characters is the size of this field in the database - if (strlen($url) > 800) { - Logger::warning('add_retriever_resource: URL is longer than 800 characters'); - } + // 800 characters is the size of this field in the database + if (strlen($url) > 800) { + Logger::warning('add_retriever_resource: URL is longer than 800 characters'); + } - $resource = DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)]); - if ($resource) { - Logger::debug('add_retriever_resource: Resource ' . $url . ' uid ' . $uid . ' cid ' . $cid . ' already requested'); - return $resource; - } + $resource = DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)]); + if ($resource) { + Logger::debug('add_retriever_resource: Resource ' . $url . ' uid ' . $uid . ' cid ' . $cid . ' already requested'); + return $resource; + } - DBA::insert('retriever_resource', ['item-uid' => intval($uid), 'contact-id' => intval($cid), 'binary' => ($binary ? 1 : 0), 'url' => $url]); - return DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)]); + DBA::insert('retriever_resource', ['item-uid' => intval($uid), 'contact-id' => intval($cid), 'binary' => ($binary ? 1 : 0), 'url' => $url]); + return DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)]); } -function add_retriever_item(&$item, $resource) { - Logger::debug('add_retriever_item: ' . $resource['url'] . ' for ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); +/** + * @brief Adds a retriever item for the supplied resource and item, to mark that this item should wait for the resource to be completed. Does not create a retriever item if a matching one already exists. + * + * @param array $item Item that is waiting for the resource. This may or may not have been already stored in the database. + * @param array $resource Resource that the item needs to wait for. This must have already been stored in the database. + * @return int ID of the retriever item that was created, or the existing one if present + */ +function add_retriever_item($item, $resource) { + Logger::debug('add_retriever_item: ' . $resource['url'] . ' for ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); - if (!array_key_exists('id', $resource) || !$resource['id']) { - Logger::warning('add_retriever_item: resource is empty'); - //@@@ check that this does not happen - return; - } - if (DBA::selectFirst('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => intval($item['uid']), 'resource' => intval($resource['id'])])) { - Logger::info("add_retriever_item: retriever item already present for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); - return; - } - DBA::insert('retriever_item', ['item-uri' => $item['uri'], 'item-uid' => intval($item['uid']), 'contact-id' => intval($item['contact-id']), 'resource' => intval($resource['id'])]); - $retriever_item = DBA::selectFirst('retriever_item', ['id'], ['item-uri' => $item['uri'], 'item-uid' => intval($item['uid']), 'resource' => intval($resource['id'])]); - if (!$retriever_item) { - Logger::info("add_retriever_item: couldn't create retriever item for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); - return; - } - Logger::debug('add_retriever_item: created retriever_item ' . $retriever_item['id'] . ' for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); - return $retriever_item['id']; + if (!array_key_exists('id', $resource) || !$resource['id']) { + Logger::warning('add_retriever_item: resource is empty'); + return; + } + if (DBA::selectFirst('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => intval($item['uid']), 'resource' => intval($resource['id'])])) { + Logger::info("add_retriever_item: retriever item already present for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); + return; + } + DBA::insert('retriever_item', ['item-uri' => $item['uri'], 'item-uid' => intval($item['uid']), 'contact-id' => intval($item['contact-id']), 'resource' => intval($resource['id'])]); + $retriever_item = DBA::selectFirst('retriever_item', ['id'], ['item-uri' => $item['uri'], 'item-uid' => intval($item['uid']), 'resource' => intval($resource['id'])]); + if (!$retriever_item) { + Logger::info("add_retriever_item: couldn't create retriever item for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); + return; + } + Logger::debug('add_retriever_item: created retriever_item ' . $retriever_item['id'] . ' for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); + return $retriever_item['id']; } +/** + * @brief Analyse a completed text resource (such as HTML) for the character encoding used + * + * @param array $resource The completed resource + * @return string Character encoding, e.g. "utf-8" or "iso-8859-1" + */ function retriever_get_encoding($resource) { - $matches = array(); - if (preg_match('/charset=(.*)/', $resource['type'], $matches)) { - return trim(array_pop($matches)); - } - return 'utf-8'; + $matches = array(); + if (preg_match('/charset=(.*)/', $resource['type'], $matches)) { + return trim(array_pop($matches)); + } + return 'utf-8'; } +/** + * @brief Apply the XSLT template to the DOM document + * + * @param string $xslt_text Text of the XSLT template + * @param DOMDocument $doc Input to the XSLT template + * @return DOMDocument Result of applying the template + */ function retriever_apply_xslt_text($xslt_text, $doc) { - if (!$xslt_text) { - Logger::info('retriever_apply_xslt_text: empty XSLT text'); - return $doc; - } - $xslt_doc = new DOMDocument(); - if (!$xslt_doc->loadXML($xslt_text)) { - Logger::info('retriever_apply_xslt_text: could not load XML'); - return $doc; - } - $xp = new XsltProcessor(); - $xp->importStylesheet($xslt_doc); - $result = $xp->transformToDoc($doc); - return $result; + if (!$xslt_text) { + Logger::info('retriever_apply_xslt_text: empty XSLT text'); + return $doc; + } + $xslt_doc = new DOMDocument(); + if (!$xslt_doc->loadXML($xslt_text)) { + Logger::info('retriever_apply_xslt_text: could not load XML'); + return $doc; + } + $xp = new XsltProcessor(); + $xp->importStylesheet($xslt_doc); + $result = $xp->transformToDoc($doc); + return $result; } +/** + * @brief Applies the retriever rules to the downloaded resource, and stores the results as the new body text of the item + * + * @param array $retriever Retriever rules as stored in the database, with the "data" element already decoded from JSON + * @param array &$item Item to be in which to store the new body (by ref). This may or may not be already stored in the database. + * @param array $resource Newly completed resource, which should be text (HTML or XML) + */ function retriever_apply_dom_filter($retriever, &$item, $resource) { - Logger::debug('retriever_apply_dom_filter: applying XSLT to uri ' . $item['uri'] . ' uid ' . $item['uid'] . ' contact ' . $item['contact-id']); + Logger::debug('retriever_apply_dom_filter: applying XSLT to uri ' . $item['uri'] . ' uid ' . $item['uid'] . ' contact ' . $item['contact-id']); - if (!array_key_exists('include', $retriever['data']) && !array_key_exists('customxslt', $retriever['data'])) { - Logger::info('retriever_apply_dom_filter: no include and no customxslt'); - return; - } - if (!$resource['data']) { - Logger::info('retriever_apply_dom_filter: no text to work with'); - return; - } + if (!array_key_exists('include', $retriever['data']) && !array_key_exists('customxslt', $retriever['data'])) { + Logger::info('retriever_apply_dom_filter: no include and no customxslt'); + return; + } + if (!$resource['data']) { + Logger::info('retriever_apply_dom_filter: no text to work with'); + return; + } - $doc = retriever_load_into_dom($resource); + $doc = retriever_load_into_dom($resource); - $doc = retriever_extract($doc, $retriever); - if (!$doc) { - Logger::info('retriever_apply_dom_filter: failed to apply extract XSLT template'); - return; - } + $doc = retriever_extract($doc, $retriever); + if (!$doc) { + Logger::info('retriever_apply_dom_filter: failed to apply extract XSLT template'); + return; + } - $doc = retriever_globalise_urls($doc, $resource); - if (!$doc) { - Logger::info('retriever_apply_dom_filter: failed to apply fix urls XSLT template'); - return; - } + $doc = retriever_globalise_urls($doc, $resource); + if (!$doc) { + Logger::info('retriever_apply_dom_filter: failed to apply fix urls XSLT template'); + return; + } - $body = HTML::toBBCode($doc->saveHTML()); - if (!strlen($body)) { - Logger::info('retriever_apply_dom_filter retriever ' . $retriever['id'] . ' item ' . $item['id'] . ': output was empty'); - return; - } - $body .= "\n\n" . L10n::t('Retrieved') . ' ' . date("Y-m-d") . ': [url='; - $body .= $item['plink']; - $body .= ']' . $item['plink'] . '[/url]'; + $body = HTML::toBBCode($doc->saveHTML()); + if (!strlen($body)) { + Logger::info('retriever_apply_dom_filter retriever ' . $retriever['id'] . ' item ' . $item['id'] . ': output was empty'); + return; + } + $body .= "\n\n" . L10n::t('Retrieved') . ' ' . date("Y-m-d") . ': [url='; + $body .= $item['plink']; + $body .= ']' . $item['plink'] . '[/url]'; - Logger::debug('retriever_apply_dom_filter: XSLT result \"' . $body . '\"'); - retriever_set_body($item, $body); + Logger::debug('retriever_apply_dom_filter: XSLT result \"' . $body . '\"'); + retriever_set_body($item, $body); } +/** + * @brief Converts the completed resource, which must be HTML or XML, into a DOM document + * + * @param array $resource The resource containing the text content + */ function retriever_load_into_dom($resource) { - $encoding = retriever_get_encoding($resource); - $content = mb_convert_encoding($resource['data'], 'HTML-ENTITIES', $encoding); - $doc = new DOMDocument('1.0', 'UTF-8'); - if (strpos($resource['type'], 'html') !== false) { - @$doc->loadHTML($content); - } - else { - $doc->loadXML($content); - } - return $doc; + $encoding = retriever_get_encoding($resource); + $content = mb_convert_encoding($resource['data'], 'HTML-ENTITIES', $encoding); + $doc = new DOMDocument('1.0', 'UTF-8'); + if (strpos($resource['type'], 'html') !== false) { + @$doc->loadHTML($content); + } + else { + $doc->loadXML($content); + } + return $doc; } +/** + * @brief Applies the retriever rules, including configuration for included and excluded portions, to the DOM document + * + * @param DOMDocument $doc The original DOM document downloaded from the link + * @param array $retriever The retriever configuration for this contact + * @return DOMDocument New DOM document containing only the desired content + */ function retriever_extract($doc, $retriever) { - $params = array('$spec' => $retriever['data']); - $extract_template = Renderer::getMarkupTemplate('extract.tpl', 'addon/retriever/'); - $extract_xslt = Renderer::replaceMacros($extract_template, $params); - if ($retriever['data']['include']) { - Logger::debug('retriever_apply_dom_filter: applying include/exclude template \"' . $extract_xslt . '\"'); - $doc = retriever_apply_xslt_text($extract_xslt, $doc); - } - if (array_key_exists('customxslt', $retriever['data']) && $retriever['data']['customxslt']) { - Logger::debug('retriever_extract: applying custom XSLT \"' . $retriever['data']['customxslt'] . '\"'); - $doc = retriever_apply_xslt_text($retriever['data']['customxslt'], $doc); - } - return $doc; + $params = array('$spec' => $retriever['data']); + $extract_template = Renderer::getMarkupTemplate('extract.tpl', 'addon/retriever/'); + $extract_xslt = Renderer::replaceMacros($extract_template, $params); + if ($retriever['data']['include']) { + Logger::debug('retriever_apply_dom_filter: applying include/exclude template \"' . $extract_xslt . '\"'); + $doc = retriever_apply_xslt_text($extract_xslt, $doc); + } + if (array_key_exists('customxslt', $retriever['data']) && $retriever['data']['customxslt']) { + Logger::debug('retriever_extract: applying custom XSLT \"' . $retriever['data']['customxslt'] . '\"'); + $doc = retriever_apply_xslt_text($retriever['data']['customxslt'], $doc); + } + return $doc; } +/** + * @brief Converts local URLs in the DOM document to global URLs + * + * @param DOMDocument $doc DOM document potentially containing links + * @param array $resource Completed resource which contains the text in the DOM document + * @return DOMDocument New DOM document with global URLs + */ function retriever_globalise_urls($doc, $resource) { - $components = parse_url($resource['redirect-url']); - $rooturl = $components['scheme'] . "://" . $components['host']; - $dirurl = $rooturl . dirname($components['path']) . "/"; - $params = array('$dirurl' => $dirurl, '$rooturl' => $rooturl); - $fix_urls_template = Renderer::getMarkupTemplate('fix-urls.tpl', 'addon/retriever/'); - $fix_urls_xslt = Renderer::replaceMacros($fix_urls_template, $params); - $doc = retriever_apply_xslt_text($fix_urls_xslt, $doc); - return $doc; + $components = parse_url($resource['redirect-url']); + $rooturl = $components['scheme'] . "://" . $components['host']; + $dirurl = $rooturl . dirname($components['path']) . "/"; + $params = array('$dirurl' => $dirurl, '$rooturl' => $rooturl); + $fix_urls_template = Renderer::getMarkupTemplate('fix-urls.tpl', 'addon/retriever/'); + $fix_urls_xslt = Renderer::replaceMacros($fix_urls_template, $params); + $doc = retriever_apply_xslt_text($fix_urls_xslt, $doc); + return $doc; } +/** + * @brief Returns the body text for the supplied item. If the item has already been stored in the database, this will fetch the content from the database rather than from the supplied array. + * + * @param array $item Row from the item table + */ function retriever_get_body($item) { - if (!array_key_exists('uri-id', $item) || !$item['uri-id']) { - // item has not yet been stored in database - return $item['body']; - } + if (!array_key_exists('uri-id', $item) || !$item['uri-id']) { + // item has not yet been stored in database + return $item['body']; + } - // item has been stored in database, body is stored in the item-content table - $content = DBA::selectFirst('item-content', ['body'], ['uri-id' => $item['uri-id']]); - if (!$content) { - Logger::warning('retriever_get_body: item-content uri-id ' . $item['uri-id'] . ' has no content'); - return $item['body']; - } - if (!$content['body']) { - Logger::warning('retriever_get_body: item-content uri-id ' . $item['uri-id'] . ' has no body'); - //@@@ check never happens - return $item['body']; - } - if ($content['body'] != $item['body']) { - Logger::warning('@@@ this is probably bad @@@ content: ' . $content['body'] . ' @@@ item: ' . $item['body']); - //@@@ check for this. - } - return $content['body']; + // item has been stored in database, body is stored in the item-content table + $content = DBA::selectFirst('item-content', ['body'], ['uri-id' => $item['uri-id']]); + if (!$content) { + Logger::warning('retriever_get_body: item-content uri-id ' . $item['uri-id'] . ' has no content'); + return $item['body']; + } + if (!$content['body']) { + Logger::warning('retriever_get_body: item-content uri-id ' . $item['uri-id'] . ' has no body'); + return $item['body']; + } + if ($content['body'] != $item['body']) { + Logger::warning('@@@ this is probably bad @@@ content: ' . $content['body'] . ' @@@ item: ' . $item['body']); + } + return $content['body']; } -function retriever_set_body(&$item, $body, $allow_empty = false) { - if (!$body && !$allow_empty) { - Logger::debug('retriever_set_body: will not set empty body in item id ' . $item['id'] . ' uri ' . $item['uri']); - return; - } - $item['body'] = $body; - if (!array_key_exists('id', $item) || !$item['id']) { - // item has not yet been stored in database - return; - } - Item::update(['body' => $body], ['id' => intval($item['id'])]); +/** + * @brief Updates the item with the supplied body text. If the item has already been stored in the database, this will update the database too. + * + * @param array &$item Item in which to set the body (by ref). This may or may not be already stored in the database. + * @param string $body New body content + */ +function retriever_set_body(&$item, $body) { + $item['body'] = $body; + if (!array_key_exists('id', $item) || !$item['id']) { + // item has not yet been stored in database + return; + } + Item::update(['body' => $body], ['id' => intval($item['id'])]); } /** * @brief Searches for images in the item and adds corresponding retriever_items. If the images have already been downloaded, updates the body in the supplied item array. * - * @param array &$item Row from the item table (by ref) + * @param array &$item Item to be searched for images and updated (by ref). This may or may not be already stored in the database. */ function retrieve_images(&$item) { - $body = retriever_get_body($item); - if (!strlen($body)) { - Logger::warning('retrieve_images: no body for item ' . $item['uri']); - return; - } + if (!Config::get('retriever', 'allow_images')) { + return; + } - // I suspect that the first two are not used any more? - preg_match_all("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", $body, $matches1); - preg_match_all("/\[img\](.*?)\[\/img\]/ism", $body, $matches2); - preg_match_all("/\[img\=([^\]]*)\]([^[]*)\[\/img\]/ism", $body, $matches3); - $matches = array_merge($matches1[3], $matches2[1], $matches3[1]); - Logger::debug('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); - foreach ($matches as $url) { - if (!$url) { - continue; - } - if (strpos($url, get_app()->getBaseUrl()) === FALSE) { - $resource = add_retriever_resource($url, $item['uid'], $item['contact-id'], true); - if (!$resource['completed']) { - add_retriever_item($item, $resource); - } - else { - retriever_transform_images($item, $resource); - } - } - } + $body = retriever_get_body($item); + if (!strlen($body)) { + Logger::warning('retrieve_images: no body for item ' . $item['uri']); + return; + } + + // I suspect that the first two are not used any more? + preg_match_all("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", $body, $matches1); + preg_match_all("/\[img\](.*?)\[\/img\]/ism", $body, $matches2); + preg_match_all("/\[img\=([^\]]*)\]([^[]*)\[\/img\]/ism", $body, $matches3); + $matches = array_merge($matches1[3], $matches2[1], $matches3[1]); + Logger::debug('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); + foreach ($matches as $url) { + if (!$url) { + continue; + } + if (strpos($url, System::baseUrl()) === FALSE) { + $resource = add_retriever_resource($url, $item['uid'], $item['contact-id'], true); + if (!$resource['completed']) { + add_retriever_item($item, $resource); + } + else { + retriever_transform_images($item, $resource); + } + } + } } +/** + * @brief Checks if an item has been completed, i.e. all its associated retriever_item rows have been retrieved. If so, update the item to be visible again. + * + * @param array &$item Row from the item table (by ref) + */ function retriever_check_item_completed(&$item) { - $waiting = DBA::selectFirst('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => intval($item['uid']), 'contact-id' => intval($item['contact-id']), 'finished' => 0]); - Logger::debug('retriever_check_item_completed: item ' . $item['uri'] . ' ' . $item['uid'] . ' '. $item['contact-id'] . ' waiting for resources'); - $old_visible = $item['visible']; - $item['visible'] = $waiting ? 0 : 1; - if (array_key_exists('id', $item) && ($item['id'] > 0) && ($old_visible != $item['visible'])) { - Logger::debug('retriever_check_item_completed: changing visible flag to ' . $item['visible']); - Item::update(['visible' => $item['visible']], ['id' => intval($item['id'])]); - } + $waiting = DBA::selectFirst('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => intval($item['uid']), 'contact-id' => intval($item['contact-id']), 'finished' => 0]); + Logger::debug('retriever_check_item_completed: item ' . $item['uri'] . ' ' . $item['uid'] . ' '. $item['contact-id'] . ' waiting for resources'); + $old_visible = $item['visible']; + $item['visible'] = $waiting ? 0 : 1; + if (array_key_exists('id', $item) && ($item['id'] > 0) && ($old_visible != $item['visible'])) { + Logger::debug('retriever_check_item_completed: changing visible flag to ' . $item['visible']); + Item::update(['visible' => $item['visible']], ['id' => intval($item['id'])]); + } } +/** + * @brief Updates an item with a completed resource. If the resource was text, update the body with the new content. If the resource was an image, replace remote images in the body with a local version. + * + * @param array $retriever Rule configuration for this contact + * @param array &$item Row from the item table (by ref) + * @param array $resource The resource that has just been completed + */ function retriever_apply_completed_resource_to_item($retriever, &$item, $resource) { - Logger::debug('retriever_apply_completed_resource_to_item: retriever ' . ($retriever ? $retriever['id'] : 'none') . ' resource ' . $resource['url'] . ' plink ' . $item['plink']); - if (strpos($resource['type'], 'image') !== false) { - retriever_transform_images($item, $resource); - } - if (!$retriever) { - Logger::warning('retriever_apply_completed_resource_to_item: no retriever'); - return; - } - if ((strpos($resource['type'], 'html') !== false) || - (strpos($resource['type'], 'xml') !== false)) { - retriever_apply_dom_filter($retriever, $item, $resource); - if ($retriever['data']['images'] ) { - retrieve_images($item); - } - } + Logger::debug('retriever_apply_completed_resource_to_item: retriever ' . ($retriever ? $retriever['id'] : 'none') . ' resource ' . $resource['url'] . ' plink ' . $item['plink']); + if (strpos($resource['type'], 'image') !== false) { + retriever_transform_images($item, $resource); + } + if (!$retriever) { + Logger::warning('retriever_apply_completed_resource_to_item: no retriever'); + return; + } + if ((strpos($resource['type'], 'html') !== false) || + (strpos($resource['type'], 'xml') !== false)) { + retriever_apply_dom_filter($retriever, $item, $resource); + if ($retriever['data']['images'] ) { + retrieve_images($item); + } + } } /** @@ -659,225 +773,255 @@ function retriever_apply_completed_resource_to_item($retriever, &$item, $resourc * * @param array &$item Row from the item table (by ref) * @param array $resource Row from the resource table containing successfully downloaded image + * + * TODO: split this into two functions, one to store the image, the other to change the item body */ -// TODO: split this into two functions, one to store the image, the other to change the item body function retriever_transform_images(&$item, $resource) { - if (!$resource['data']) { - Logger::info('retriever_transform_images: no data available for ' . $resource['id'] . ' ' . $resource['url']); - return; - } + if (!$resource['data']) { + Logger::info('retriever_transform_images: no data available for ' . $resource['id'] . ' ' . $resource['url']); + return; + } - $data = $resource['data']; - $type = $resource['type']; - $uid = $item['uid']; - $cid = $item['contact-id']; - $rid = Photo::newResource(); - $path = parse_url($resource['url'], PHP_URL_PATH); - $parts = pathinfo($path); - $filename = $parts['filename'] . (array_key_exists('extension', $parts) ? '.' . $parts['extension'] : ''); - $album = 'Wall Photos'; - $scale = 0; - $desc = ''; // TODO: store alt text with resource when it's requested so we can fill this in - Logger::debug('retriever_transform_images storing ' . strlen($data) . ' bytes type ' . $type . ': uid ' . $uid . ' cid ' . $cid . ' rid ' . $rid . ' filename ' . $filename . ' album ' . $album . ' scale ' . $scale . ' desc ' . $desc); - $image = new Image($data, $type); - if (!$image->isValid()) { - Logger::warning('retriever_transform_images: invalid image found at URL ' . $resource['url'] . ' for item ' . $item['id']); - return; - } - $photo = Photo::store($image, $uid, $cid, $rid, $filename, $album, 0, 0, "", "", "", "", $desc); - $new_url = System::baseUrl() . '/photo/' . $rid . '-0.' . $image->getExt(); - if (!strlen($new_url)) { - Logger::warning('retriever_transform_images: no replacement URL for image ' . $resource['url']); - return; - } + $data = $resource['data']; + $type = $resource['type']; + $uid = $item['uid']; + $cid = $item['contact-id']; + $rid = Photo::newResource(); + $path = parse_url($resource['url'], PHP_URL_PATH); + $parts = pathinfo($path); + $filename = $parts['filename'] . (array_key_exists('extension', $parts) ? '.' . $parts['extension'] : ''); + $album = 'Wall Photos'; + $scale = 0; + $desc = ''; // TODO: store alt text with resource when it's requested so we can fill this in + Logger::debug('retriever_transform_images storing ' . strlen($data) . ' bytes type ' . $type . ': uid ' . $uid . ' cid ' . $cid . ' rid ' . $rid . ' filename ' . $filename . ' album ' . $album . ' scale ' . $scale . ' desc ' . $desc); + $image = new Image($data, $type); + if (!$image->isValid()) { + Logger::warning('retriever_transform_images: invalid image found at URL ' . $resource['url'] . ' for item ' . $item['id']); + return; + } + $photo = Photo::store($image, $uid, $cid, $rid, $filename, $album, 0, 0, "", "", "", "", $desc); + $new_url = System::baseUrl() . '/photo/' . $rid . '-0.' . $image->getExt(); + if (!strlen($new_url)) { + Logger::warning('retriever_transform_images: no replacement URL for image ' . $resource['url']); + return; + } - $body = retriever_get_body($item); + $body = retriever_get_body($item); - Logger::debug('retriever_transform_images: replacing ' . $resource['url'] . ' with ' . $new_url . ' in item ' . $item['uri']); - $body = str_replace($resource["url"], $new_url, $body); - retriever_set_body($item, $body); + Logger::debug('retriever_transform_images: replacing ' . $resource['url'] . ' with ' . $new_url . ' in item ' . $item['uri']); + $body = str_replace($resource["url"], $new_url, $body); + retriever_set_body($item, $body); } +/** + * @brief Displays the retriever configuration page for a contact. Alternatively, if the user clicked the "help" button, display the help content. + * + * @param App $a The App object + */ function retriever_content($a) { - if (!local_user()) { - $a->page['content'] .= "

Please log in

"; - return; - } - if ($a->argv[1] === 'help') { - $feeds = DBA::selectToArray('contact', ['id', 'name', 'thumb'], ['uid' => local_user(), 'network' => 'feed']); - for ($i = 0; $i < count($feeds); ++$i) { - $feeds[$i]['url'] = $a->getBaseUrl() . '/retriever/' . $feeds[$i]['id']; - } - $template = Renderer::getMarkupTemplate('/help.tpl', 'addon/retriever/'); - $a->page['content'] .= Renderer::replaceMacros($template, array( - '$config' => $a->getBaseUrl() . '/settings/addon', - '$feeds' => $feeds)); - return; - } - if ($a->argv[1]) { - $retriever_rule = get_retriever_rule($a->argv[1], local_user(), false); + if (!local_user()) { + $a->page['content'] .= "

Please log in

"; + return; + } + if ($a->argv[1] === 'help') { + $feeds = DBA::selectToArray('contact', ['id', 'name', 'thumb'], ['uid' => local_user(), 'network' => 'feed']); + for ($i = 0; $i < count($feeds); ++$i) { + $feeds[$i]['url'] = System::baseUrl() . '/retriever/' . $feeds[$i]['id']; + } + $template = Renderer::getMarkupTemplate('/help.tpl', 'addon/retriever/'); + $a->page['content'] .= Renderer::replaceMacros($template, array( + '$config' => $a->getBaseUrl . '/settings/addon', + '$feeds' => $feeds)); + return; + } + if ($a->argv[1]) { + $retriever_rule = get_retriever_rule($a->argv[1], local_user(), false); - if (!empty($_POST["id"])) { - $retriever_rule = get_retriever_rule($a->argv[1], local_user(), true); - $retriever_rule['data'] = array(); - foreach (array('modurl', 'pattern', 'replace', 'enable', 'images', 'customxslt', 'storecookies', 'cookiedata') as $setting) { - if (empty($_POST['retriever_' . $setting])) { - $retriever_rule['data'][$setting] = NULL; - } - else { - $retriever_rule['data'][$setting] = $_POST['retriever_' . $setting]; - } - } - foreach ($_POST as $k=>$v) { - if (preg_match("/retriever-(include|exclude)-(\d+)-(element|attribute|value)/", $k, $matches)) { - $retriever_rule['data'][$matches[1]][intval($matches[2])][$matches[3]] = $v; - } - } - // You've gotta have an element, even if it's just "*" - foreach ($retriever_rule['data']['include'] as $k=>$clause) { - if (!$clause['element']) { - unset($retriever_rule['data']['include'][$k]); - } - } - foreach ($retriever_rule['data']['exclude'] as $k=>$clause) { - if (!$clause['element']) { - unset($retriever_rule['data']['exclude'][$k]); - } - } - DBA::update('retriever_rule', ['data' => json_encode($retriever_rule['data'])], ['id' => intval($retriever_rule["id"])], ['data' => '']); - $a->page['content'] .= "

Settings Updated"; - if (!empty($_POST["retriever_retrospective"])) { - apply_retrospective($retriever_rule, $_POST["retriever_retrospective"]); - $a->page['content'] .= " and retrospectively applied to " . $_POST["retriever_retrospective"] . " posts"; - } - $a->page['content'] .= ".

"; - } + if (!empty($_POST["id"])) { + $retriever_rule = get_retriever_rule($a->argv[1], local_user(), true); + $retriever_rule['data'] = array(); + foreach (array('modurl', 'pattern', 'replace', 'enable', 'images', 'customxslt', 'storecookies', 'cookiedata') as $setting) { + if (empty($_POST['retriever_' . $setting])) { + $retriever_rule['data'][$setting] = NULL; + } + else { + $retriever_rule['data'][$setting] = $_POST['retriever_' . $setting]; + } + } + foreach ($_POST as $k=>$v) { + if (preg_match("/retriever-(include|exclude)-(\d+)-(element|attribute|value)/", $k, $matches)) { + $retriever_rule['data'][$matches[1]][intval($matches[2])][$matches[3]] = $v; + } + } + // You've gotta have an element, even if it's just "*" + foreach ($retriever_rule['data']['include'] as $k=>$clause) { + if (!$clause['element']) { + unset($retriever_rule['data']['include'][$k]); + } + } + foreach ($retriever_rule['data']['exclude'] as $k=>$clause) { + if (!$clause['element']) { + unset($retriever_rule['data']['exclude'][$k]); + } + } + DBA::update('retriever_rule', ['data' => json_encode($retriever_rule['data'])], ['id' => intval($retriever_rule["id"])], ['data' => '']); + $a->page['content'] .= "

Settings Updated"; + if (!empty($_POST["retriever_retrospective"])) { + apply_retrospective($retriever_rule, $_POST["retriever_retrospective"]); + $a->page['content'] .= " and retrospectively applied to " . $_POST["retriever_retrospective"] . " posts"; + } + $a->page['content'] .= ".

"; + } - $template = Renderer::getMarkupTemplate('/rule-config.tpl', 'addon/retriever/'); - $a->page['content'] .= Renderer::replaceMacros($template, array( - '$enable' => array( - 'retriever_enable', - L10n::t('Enabled'), - $retriever_rule['data']['enable']), - '$modurl' => array( - 'retriever_modurl', - L10n::t('Modify URL'), - $retriever_rule['data']['modurl'], - L10n::t("Modify each article's URL with regular expressions before retrieving.")), - '$pattern' => array( - 'retriever_pattern', - L10n::t('URL Pattern'), - $retriever_rule['data']['pattern'], - L10n::t('Regular expression matching part of the URL to replace')), - '$replace' => array( - 'retriever_replace', - L10n::t('URL Replace'), - $retriever_rule['data']['replace'], - L10n::t('Text to replace matching part of above regular expression')), - '$images' => array( - 'retriever_images', - L10n::t('Download Images'), - $retriever_rule['data']['images']), - '$retrospective' => array( - 'retriever_retrospective', - L10n::t('Retrospectively Apply'), - '0', - L10n::t('Reapply the rules to this number of posts')), - 'storecookies' => array( - 'retriever_storecookies', - L10n::t('Store cookies'), - $retriever_rule['data']['storecookies'], - L10n::t("Preserve cookie data across fetches.")), - '$cookiedata' => array( - 'retriever_cookiedata', - L10n::t('Cookie Data'), - $retriever_rule['data']['cookiedata'], - L10n::t("Latest cookie data for this feed. Netscape cookie file format.")), - '$customxslt' => array( - 'retriever_customxslt', - L10n::t('Custom XSLT'), - $retriever_rule['data']['customxslt'], - L10n::t("When standard rules aren't enough, apply custom XSLT to the article")), - '$title' => L10n::t('Retrieve Feed Content'), - '$help' => $a->getBaseUrl() . '/retriever/help', - '$help_t' => L10n::t('Get Help'), - '$submit_t' => L10n::t('Submit'), - '$submit' => L10n::t('Save Settings'), - '$id' => ($retriever_rule["id"] ? $retriever_rule["id"] : "create"), - '$tag_t' => L10n::t('Tag'), - '$attribute_t' => L10n::t('Attribute'), - '$value_t' => L10n::t('Value'), - '$add_t' => L10n::t('Add'), - '$remove_t' => L10n::t('Remove'), - '$include_t' => L10n::t('Include'), - '$include' => $retriever_rule['data']['include'], - '$exclude_t' => L10n::t('Exclude'), - '$exclude' => $retriever_rule['data']['exclude'])); - return; - } + $template = Renderer::getMarkupTemplate('/rule-config.tpl', 'addon/retriever/'); + $a->page['content'] .= Renderer::replaceMacros($template, array( + '$enable' => array( + 'retriever_enable', + L10n::t('Enabled'), + $retriever_rule['data']['enable']), + '$modurl' => array( + 'retriever_modurl', + L10n::t('Modify URL'), + $retriever_rule['data']['modurl'], + L10n::t("Modify each article's URL with regular expressions before retrieving.")), + '$pattern' => array( + 'retriever_pattern', + L10n::t('URL Pattern'), + $retriever_rule['data']['pattern'], + L10n::t('Regular expression matching part of the URL to replace')), + '$replace' => array( + 'retriever_replace', + L10n::t('URL Replace'), + $retriever_rule['data']['replace'], + L10n::t('Text to replace matching part of above regular expression')), + '$allow_images' => Config::get('retriever', 'allow_images'), + '$images' => array( + 'retriever_images', + L10n::t('Download Images'), + $retriever_rule['data']['images']), + '$retrospective' => array( + 'retriever_retrospective', + L10n::t('Retrospectively Apply'), + '0', + L10n::t('Reapply the rules to this number of posts')), + 'storecookies' => array( + 'retriever_storecookies', + L10n::t('Store cookies'), + $retriever_rule['data']['storecookies'], + L10n::t("Preserve cookie data across fetches.")), + '$cookiedata' => array( + 'retriever_cookiedata', + L10n::t('Cookie Data'), + $retriever_rule['data']['cookiedata'], + L10n::t("Latest cookie data for this feed. Netscape cookie file format.")), + '$customxslt' => array( + 'retriever_customxslt', + L10n::t('Custom XSLT'), + $retriever_rule['data']['customxslt'], + L10n::t("When standard rules aren't enough, apply custom XSLT to the article")), + '$title' => L10n::t('Retrieve Feed Content'), + '$help' => $a->getBaseUrl . '/retriever/help', + '$help_t' => L10n::t('Get Help'), + '$submit_t' => L10n::t('Submit'), + '$submit' => L10n::t('Save Settings'), + '$id' => ($retriever_rule["id"] ? $retriever_rule["id"] : "create"), + '$tag_t' => L10n::t('Tag'), + '$attribute_t' => L10n::t('Attribute'), + '$value_t' => L10n::t('Value'), + '$add_t' => L10n::t('Add'), + '$remove_t' => L10n::t('Remove'), + '$include_t' => L10n::t('Include'), + '$include' => $retriever_rule['data']['include'], + '$exclude_t' => L10n::t('Exclude'), + '$exclude' => $retriever_rule['data']['exclude'])); + return; + } } +/** + * @brief Hook that adds the retriever option to the contact menu + * + * @param App $a The App object + * @param array $args Contact menu details to be filled in (by ref) + */ function retriever_contact_photo_menu($a, &$args) { - if (!$args) { - return; - } - if ($args["contact"]["network"] == "feed") { - $args["menu"][ 'retriever' ] = array(L10n::t('Retriever'), $a->getBaseUrl() . '/retriever/' . $args["contact"]['id']); - } + if (!$args) { + return; + } + if ($args["contact"]["network"] == "feed") { + $args["menu"]['retriever'] = array(L10n::t('Retriever'), System::baseUrl() . '/retriever/' . $args["contact"]['id']); + } } +/** + * @brief Hook for processing new incoming items + * + * @param App $a The App object (by ref) + * @param array $item New item, which has not yet been inserted into database (by ref) + */ function retriever_post_remote_hook(&$a, &$item) { - Logger::info('retriever_post_remote_hook: ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); + Logger::info('retriever_post_remote_hook: ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); - $retriever_rule = get_retriever_rule($item['contact-id'], $item["uid"], false); - if ($retriever_rule) { - retriever_on_item_insert($retriever_rule, $item); - } - else { - if (PConfig::get($item["uid"], 'retriever', 'oembed')) { - // Convert to HTML and back to take advantage of bbcode's resolution of oembeds. - $body = retriever_get_body($item); - $body = HTML::toBBCode(BBCode::convert($body)); - retriever_set_body($item, $body); - } - if (PConfig::get($item["uid"], 'retriever', 'all_photos')) { - retrieve_images($item); - } - } - retriever_check_item_completed($item); + $retriever_rule = get_retriever_rule($item['contact-id'], $item["uid"], false); + if ($retriever_rule) { + retriever_on_item_insert($retriever_rule, $item); + } + else { + if (PConfig::get($item["uid"], 'retriever', 'oembed')) { + // Convert to HTML and back to take advantage of bbcode's resolution of oembeds. + $body = retriever_get_body($item); + $body = HTML::toBBCode(BBCode::convert($body)); + retriever_set_body($item, $body); + } + if (PConfig::get($item["uid"], 'retriever', 'all_photos')) { + retrieve_images($item); + } + } + retriever_check_item_completed($item); } -function retriever_plugin_settings(&$a, &$s) { - $all_photos = PConfig::get(local_user(), 'retriever', 'all_photos'); - $oembed = PConfig::get(local_user(), 'retriever', 'oembed'); - $template = Renderer::getMarkupTemplate('/settings.tpl', 'addon/retriever/'); - $s .= Renderer::replaceMacros($template, array( - '$allphotos' => array( - 'retriever_all_photos', - L10n::t('All Photos'), - $all_photos, - L10n::t('Check this to retrieve photos for all posts')), - '$oembed' => array( - 'retriever_oembed', - L10n::t('Resolve OEmbed'), - $oembed, - L10n::t('Check this to attempt to retrieve embedded content for all posts - useful e.g. for Facebook posts')), - '$submit' => L10n::t('Save Settings'), - '$title' => L10n::t('Retriever Settings'), - '$help' => $a->getBaseUrl() . '/retriever/help')); +/** + * @brief Hook for adding per-user retriever settings to the user's settings page + * + * @param App $a The App object (by ref) + * @param string $s HTML string to which to append settings content (by ref) + */ +function retriever_addon_settings(&$a, &$s) { + $all_photos = PConfig::get(local_user(), 'retriever', 'all_photos'); + $oembed = PConfig::get(local_user(), 'retriever', 'oembed'); + $template = Renderer::getMarkupTemplate('/settings.tpl', 'addon/retriever/'); + $config = array('$submit' => L10n::t('Save Settings'), + '$title' => L10n::t('Retriever Settings'), + '$help' => $a->getBaseUrl . '/retriever/help', + '$allow_images' => Config::get('retriever', 'allow_images')); + $config['$allphotos'] = array('retriever_all_photos', + L10n::t('All Photos'), + $all_photos, + L10n::t('Check this to retrieve photos for all posts')); + $config['$oembed'] = array('retriever_oembed', + L10n::t('Resolve OEmbed'), + $oembed, + L10n::t('Check this to attempt to retrieve embedded content for all posts')); + $s .= Renderer::replaceMacros($template, $config); } -function retriever_plugin_settings_post($a,$post) { - if ($_POST['retriever_all_photos']) { - PConfig::set(local_user(), 'retriever', 'all_photos', $_POST['retriever_all_photos']); - } - else { - PConfig::del(local_user(), 'retriever', 'all_photos'); - } - if ($_POST['retriever_oembed']) { - PConfig::set(local_user(), 'retriever', 'oembed', $_POST['retriever_oembed']); - } - else { - PConfig::del(local_user(), 'retriever', 'oembed'); - } +/** + * @brief Hook for processing post results from user's settings page + * + * @param App $a The App object + * @param array $post Posted content + */ +function retriever_addon_settings_post($a, $post) { + if ($post['retriever_all_photos']) { + PConfig::set(local_user(), 'retriever', 'all_photos', $post['retriever_all_photos']); + } + else { + PConfig::delete(local_user(), 'retriever', 'all_photos'); + } + if ($post['retriever_oembed']) { + PConfig::set(local_user(), 'retriever', 'oembed', $post['retriever_oembed']); + } + else { + PConfig::delete(local_user(), 'retriever', 'oembed'); + } } diff --git a/retriever/templates/admin.tpl b/retriever/templates/admin.tpl index b5a35961..71c8506e 100644 --- a/retriever/templates/admin.tpl +++ b/retriever/templates/admin.tpl @@ -4,5 +4,6 @@ * *}} {{include file="field_input.tpl" field=$downloads_per_cron}} +{{include file="field_checkbox.tpl" field=$allow_images}}
diff --git a/retriever/templates/rule-config.tpl b/retriever/templates/rule-config.tpl index 171054de..8c1bc130 100644 --- a/retriever/templates/rule-config.tpl +++ b/retriever/templates/rule-config.tpl @@ -142,7 +142,9 @@ document.addEventListener('DOMContentLoaded', function() { {{include file="field_checkbox.tpl" field=$modurl}} {{include file="field_input.tpl" field=$pattern}} {{include file="field_input.tpl" field=$replace}} +{{if $allow_images}} {{include file="field_checkbox.tpl" field=$images}} +{{/if}} {{include file="field_textarea.tpl" field=$customxslt}} {{include file="field_checkbox.tpl" field=$storecookies}} {{include file="field_textarea.tpl" field=$cookiedata}} diff --git a/retriever/templates/settings.tpl b/retriever/templates/settings.tpl index 8bfe8db0..3151fd72 100644 --- a/retriever/templates/settings.tpl +++ b/retriever/templates/settings.tpl @@ -1,9 +1,16 @@ -
-

{{$title}}

-

- Get Help -

+ +

{{$title}}

+
+ From 67fc2a8491945c5e9a6c14dd9064761a6fd7a48b Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 13 Oct 2019 11:27:19 +0200 Subject: [PATCH 35/94] remove help section if images not allowed --- retriever/retriever.php | 1 + retriever/templates/help.tpl | 2 ++ 2 files changed, 3 insertions(+) diff --git a/retriever/retriever.php b/retriever/retriever.php index 33f9a40e..6b71c36e 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -831,6 +831,7 @@ function retriever_content($a) { $template = Renderer::getMarkupTemplate('/help.tpl', 'addon/retriever/'); $a->page['content'] .= Renderer::replaceMacros($template, array( '$config' => $a->getBaseUrl . '/settings/addon', + '$allow_images' => Config::get('retriever', 'allow_images'), '$feeds' => $feeds)); return; } diff --git a/retriever/templates/help.tpl b/retriever/templates/help.tpl index b96ec63c..7298c130 100644 --- a/retriever/templates/help.tpl +++ b/retriever/templates/help.tpl @@ -131,6 +131,7 @@ fails, the plugin will keep trying at progressively longer intervals for up to a month, in case the website is temporarily overloaded or the network is down.

+{{if $allow_images}}

Retrieving Images

Retriever can also optionally download images and store them in the @@ -140,6 +141,7 @@ an RSS feed or not. Go to the "Settings" page and click "Plugin settings". Then check the "All Photos" box in the "Retriever Settings" section and click "Submit".

+{{/if}}

Configure Feeds:

{{foreach $feeds as $feed}} From 6e5e06e303f55c9b060127b6941887ad31b8858f Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Mon, 6 Jan 2020 22:12:47 +0100 Subject: [PATCH 36/94] Fix bug in phototrack --- phototrack/phototrack.php | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/phototrack/phototrack.php b/phototrack/phototrack.php index 8b909f5d..e493871d 100644 --- a/phototrack/phototrack.php +++ b/phototrack/phototrack.php @@ -103,12 +103,22 @@ function phototrack_photo_use($photo, $table, $field, $id) { function phototrack_check_field_url($a, $table, $field, $id, $url) { Logger::info('@@@ phototrack_check_field_url table ' . $table . ' field ' . $field . ' id ' . $id . ' url ' . $url); $baseurl = $a->getBaseURL(); - if (strpos($url, $baseurl) !== FALSE) { + if (strpos($url, $baseurl) === FALSE) { + return; + } + else { $url = substr($url, strlen($baseurl)); Logger::info('@@@ phototrack_check_field_url funny url stuff ' . $url . ' base ' . $baseurl); } - if (strpos($url, '/photo/') !== FALSE) { - $rid = substr($url, strlen('/photo/')); + if (strpos($url, '/photo/') === FALSE) { + return; + } + else { + $url = substr($url, strlen('/photo/')); + Logger::info('@@@ phototrack_check_field_url more url stuff ' . $url); + } + if (preg_match('/([0-9a-z]{32})/', $url, $matches)) { + $rid = $matches[0]; Logger::info('@@@ phototrack_check_field_url rid ' . $rid); phototrack_photo_use($rid, $table, $field, $id); } From 3cdffe1fde297a77603ce028376ad4e49daeda83 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Thu, 9 Jan 2020 22:07:55 +0100 Subject: [PATCH 37/94] Update for new version --- retriever/retriever.php | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 6b71c36e..0b78181f 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -714,7 +714,7 @@ function retrieve_images(&$item) { if (!$url) { continue; } - if (strpos($url, System::baseUrl()) === FALSE) { + if (strpos($url, DI::baseUrl()) === FALSE) { $resource = add_retriever_resource($url, $item['uid'], $item['contact-id'], true); if (!$resource['completed']) { add_retriever_item($item, $resource); @@ -800,7 +800,7 @@ function retriever_transform_images(&$item, $resource) { return; } $photo = Photo::store($image, $uid, $cid, $rid, $filename, $album, 0, 0, "", "", "", "", $desc); - $new_url = System::baseUrl() . '/photo/' . $rid . '-0.' . $image->getExt(); + $new_url = DI::baseUrl() . '/photo/' . $rid . '-0.' . $image->getExt(); if (!strlen($new_url)) { Logger::warning('retriever_transform_images: no replacement URL for image ' . $resource['url']); return; @@ -826,7 +826,7 @@ function retriever_content($a) { if ($a->argv[1] === 'help') { $feeds = DBA::selectToArray('contact', ['id', 'name', 'thumb'], ['uid' => local_user(), 'network' => 'feed']); for ($i = 0; $i < count($feeds); ++$i) { - $feeds[$i]['url'] = System::baseUrl() . '/retriever/' . $feeds[$i]['id']; + $feeds[$i]['url'] = DI::baseUrl() . '/retriever/' . $feeds[$i]['id']; } $template = Renderer::getMarkupTemplate('/help.tpl', 'addon/retriever/'); $a->page['content'] .= Renderer::replaceMacros($template, array( @@ -950,7 +950,7 @@ function retriever_contact_photo_menu($a, &$args) { return; } if ($args["contact"]["network"] == "feed") { - $args["menu"]['retriever'] = array(L10n::t('Retriever'), System::baseUrl() . '/retriever/' . $args["contact"]['id']); + $args["menu"]['retriever'] = array(L10n::t('Retriever'), DI::baseUrl() . '/retriever/' . $args["contact"]['id']); } } From 91689cd7982904dd0294a464e13512686cd30c7d Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Fri, 10 Jan 2020 07:32:39 +0100 Subject: [PATCH 38/94] Missing class --- retriever/retriever.php | 1 + 1 file changed, 1 insertion(+) diff --git a/retriever/retriever.php b/retriever/retriever.php index 0b78181f..5da7aff1 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -22,6 +22,7 @@ use Friendica\Database\DBA; use Friendica\Model\ItemURI; use Friendica\Model\Item; use Friendica\Util\DateTimeFormat; +use Friendica\DI; /** * @brief Installation hook for retriever plugin From e090a286b1f469c0bf7efbae7c1d9406bf3f7194 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Fri, 10 Jan 2020 07:34:19 +0100 Subject: [PATCH 39/94] New way of doing baseurl --- phototrack/phototrack.php | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/phototrack/phototrack.php b/phototrack/phototrack.php index e493871d..4df89873 100644 --- a/phototrack/phototrack.php +++ b/phototrack/phototrack.php @@ -25,6 +25,7 @@ use Friendica\Core\Config; use Friendica\Core\Logger; use Friendica\Object\Image; use Friendica\Database\DBA; +use Friendica\DI; if (!defined('PHOTOTRACK_DEFAULT_BATCH_SIZE')) { define('PHOTOTRACK_DEFAULT_BATCH_SIZE', 1000); @@ -102,7 +103,7 @@ function phototrack_photo_use($photo, $table, $field, $id) { function phototrack_check_field_url($a, $table, $field, $id, $url) { Logger::info('@@@ phototrack_check_field_url table ' . $table . ' field ' . $field . ' id ' . $id . ' url ' . $url); - $baseurl = $a->getBaseURL(); + $baseurl = DI::baseUrl(); if (strpos($url, $baseurl) === FALSE) { return; } @@ -125,7 +126,7 @@ function phototrack_check_field_url($a, $table, $field, $id, $url) { } function phototrack_check_field_bbcode($a, $table, $field, $id, $value) { - $baseurl = $a->getBaseURL(); + $baseurl = DI::baseUrl(); $matches = array(); preg_match_all("/\[img(\=([0-9]*)x([0-9]*))?\](.*?)\[\/img\]/ism", $value, $matches); foreach ($matches[4] as $url) { From b9f048c2a85afb572e7029f09fd81f11472c74d8 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Fri, 10 Jan 2020 07:47:08 +0100 Subject: [PATCH 40/94] maybe this way works better --- phototrack/phototrack.php | 4 ++-- retriever/retriever.php | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/phototrack/phototrack.php b/phototrack/phototrack.php index 4df89873..25c299ac 100644 --- a/phototrack/phototrack.php +++ b/phototrack/phototrack.php @@ -103,7 +103,7 @@ function phototrack_photo_use($photo, $table, $field, $id) { function phototrack_check_field_url($a, $table, $field, $id, $url) { Logger::info('@@@ phototrack_check_field_url table ' . $table . ' field ' . $field . ' id ' . $id . ' url ' . $url); - $baseurl = DI::baseUrl(); + $baseurl = DI::baseUrl()->get(true); if (strpos($url, $baseurl) === FALSE) { return; } @@ -126,7 +126,7 @@ function phototrack_check_field_url($a, $table, $field, $id, $url) { } function phototrack_check_field_bbcode($a, $table, $field, $id, $value) { - $baseurl = DI::baseUrl(); + $baseurl = DI::baseUrl()->get(true); $matches = array(); preg_match_all("/\[img(\=([0-9]*)x([0-9]*))?\](.*?)\[\/img\]/ism", $value, $matches); foreach ($matches[4] as $url) { diff --git a/retriever/retriever.php b/retriever/retriever.php index 5da7aff1..e6defdf5 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -715,7 +715,7 @@ function retrieve_images(&$item) { if (!$url) { continue; } - if (strpos($url, DI::baseUrl()) === FALSE) { + if (strpos($url, DI::baseUrl()->get(true)) === FALSE) { $resource = add_retriever_resource($url, $item['uid'], $item['contact-id'], true); if (!$resource['completed']) { add_retriever_item($item, $resource); @@ -801,7 +801,7 @@ function retriever_transform_images(&$item, $resource) { return; } $photo = Photo::store($image, $uid, $cid, $rid, $filename, $album, 0, 0, "", "", "", "", $desc); - $new_url = DI::baseUrl() . '/photo/' . $rid . '-0.' . $image->getExt(); + $new_url = DI::baseUrl()->get(true) . '/photo/' . $rid . '-0.' . $image->getExt(); if (!strlen($new_url)) { Logger::warning('retriever_transform_images: no replacement URL for image ' . $resource['url']); return; @@ -827,7 +827,7 @@ function retriever_content($a) { if ($a->argv[1] === 'help') { $feeds = DBA::selectToArray('contact', ['id', 'name', 'thumb'], ['uid' => local_user(), 'network' => 'feed']); for ($i = 0; $i < count($feeds); ++$i) { - $feeds[$i]['url'] = DI::baseUrl() . '/retriever/' . $feeds[$i]['id']; + $feeds[$i]['url'] = DI::baseUrl()->get(true) . '/retriever/' . $feeds[$i]['id']; } $template = Renderer::getMarkupTemplate('/help.tpl', 'addon/retriever/'); $a->page['content'] .= Renderer::replaceMacros($template, array( @@ -951,7 +951,7 @@ function retriever_contact_photo_menu($a, &$args) { return; } if ($args["contact"]["network"] == "feed") { - $args["menu"]['retriever'] = array(L10n::t('Retriever'), DI::baseUrl() . '/retriever/' . $args["contact"]['id']); + $args["menu"]['retriever'] = array(L10n::t('Retriever'), DI::baseUrl()->get(true) . '/retriever/' . $args["contact"]['id']); } } From e7fb24e9861005ea5539b9716fc3be27ff1d4d0a Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 16 Aug 2020 13:59:34 +0200 Subject: [PATCH 41/94] Update to new module structure --- retriever/retriever.php | 123 ++++++++++++++++++++-------------------- 1 file changed, 60 insertions(+), 63 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index e6defdf5..2f62c52e 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -7,8 +7,6 @@ */ use Friendica\Core\Addon; -use Friendica\Core\Config; -use Friendica\Core\PConfig; use Friendica\Core\Logger; use Friendica\Core\Renderer; use Friendica\Core\System; @@ -17,7 +15,6 @@ use Friendica\Content\Text\BBCode; use Friendica\Model\Photo; use Friendica\Object\Image; use Friendica\Util\Network; -use Friendica\Core\L10n; use Friendica\Database\DBA; use Friendica\Model\ItemURI; use Friendica\Model\Item; @@ -34,7 +31,7 @@ function retriever_install() { Addon::registerHook('contact_photo_menu', 'addon/retriever/retriever.php', 'retriever_contact_photo_menu'); Addon::registerHook('cron', 'addon/retriever/retriever.php', 'retriever_cron'); - if (Config::get('retriever', 'dbversion') != '0.14') { + if (DI::config()->get('retriever', 'dbversion') != '0.14') { $schema = file_get_contents(dirname(__file__).'/database.sql'); $tables = explode(';', $schema); foreach ($tables as $table) { @@ -43,8 +40,8 @@ function retriever_install() { return; } } - Config::set('retriever', 'downloads_per_cron', '100'); - Config::set('retriever', 'dbversion', '0.14'); + DI::config()->set('retriever', 'downloads_per_cron', '100'); + DI::config()->set('retriever', 'dbversion', '0.14'); } } @@ -77,22 +74,22 @@ function retriever_module() {} function retriever_addon_admin(&$a, &$o) { $template = Renderer::getMarkupTemplate('admin.tpl', 'addon/retriever/'); - $downloads_per_cron = Config::get('retriever', 'downloads_per_cron'); + $downloads_per_cron = DI::config()->get('retriever', 'downloads_per_cron'); $downloads_per_cron_config = ['downloads_per_cron', - L10n::t('Downloads per Cron'), + DI::l10n()->t('Downloads per Cron'), $downloads_per_cron, - L10n::t('Maximum number of downloads to attempt during each run of the cron job.')]; + DI::l10n()->t('Maximum number of downloads to attempt during each run of the cron job.')]; - $allow_images = Config::get('retriever', 'allow_images'); + $allow_images = DI::config()->get('retriever', 'allow_images'); $allow_images_config = ['allow_images', - L10n::t('Allow Retrieving Images'), + DI::l10n()->t('Allow Retrieving Images'), $allow_images, - L10n::t('Allow users to request images be downloaded as well as text.
Warning: the images are not automatically deleted and may fill up your database.')]; + DI::l10n()->t('Allow users to request images be downloaded as well as text.
Warning: the images are not automatically deleted and may fill up your database.')]; $o .= Renderer::replaceMacros($template, [ '$downloads_per_cron' => $downloads_per_cron_config, '$allow_images' => $allow_images_config, - '$submit' => L10n::t('Save Settings')]); + '$submit' => DI::l10n()->t('Save Settings')]); } /** @@ -100,16 +97,16 @@ function retriever_addon_admin(&$a, &$o) { */ function retriever_addon_admin_post () { if (!empty($_POST['downloads_per_cron'])) { - Config::set('retriever', 'downloads_per_cron', $_POST['downloads_per_cron']); + DI::config()->set('retriever', 'downloads_per_cron', $_POST['downloads_per_cron']); } - Config::set('retriever', 'allow_images', $_POST['allow_images']); + DI::config()->set('retriever', 'allow_images', $_POST['allow_images']); } /** * @brief Cron jobs for retriever plugin */ function retriever_cron() { - $downloads_per_cron = Config::get('retriever', 'downloads_per_cron'); + $downloads_per_cron = DI::config()->get('retriever', 'downloads_per_cron'); // Do this first, otherwise it can interfere with retriever_retrieve_items retriever_clean_up_completed_resources($downloads_per_cron); @@ -581,7 +578,7 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) { Logger::info('retriever_apply_dom_filter retriever ' . $retriever['id'] . ' item ' . $item['id'] . ': output was empty'); return; } - $body .= "\n\n" . L10n::t('Retrieved') . ' ' . date("Y-m-d") . ': [url='; + $body .= "\n\n" . DI::l10n()->t('Retrieved') . ' ' . date("Y-m-d") . ': [url='; $body .= $item['plink']; $body .= ']' . $item['plink'] . '[/url]'; @@ -695,7 +692,7 @@ function retriever_set_body(&$item, $body) { * @param array &$item Item to be searched for images and updated (by ref). This may or may not be already stored in the database. */ function retrieve_images(&$item) { - if (!Config::get('retriever', 'allow_images')) { + if (!DI::config()->get('retriever', 'allow_images')) { return; } @@ -832,7 +829,7 @@ function retriever_content($a) { $template = Renderer::getMarkupTemplate('/help.tpl', 'addon/retriever/'); $a->page['content'] .= Renderer::replaceMacros($template, array( '$config' => $a->getBaseUrl . '/settings/addon', - '$allow_images' => Config::get('retriever', 'allow_images'), + '$allow_images' => DI::config()->get('retriever', 'allow_images'), '$feeds' => $feeds)); return; } @@ -879,62 +876,62 @@ function retriever_content($a) { $a->page['content'] .= Renderer::replaceMacros($template, array( '$enable' => array( 'retriever_enable', - L10n::t('Enabled'), + DI::l10n()->t('Enabled'), $retriever_rule['data']['enable']), '$modurl' => array( 'retriever_modurl', - L10n::t('Modify URL'), + DI::l10n()->t('Modify URL'), $retriever_rule['data']['modurl'], - L10n::t("Modify each article's URL with regular expressions before retrieving.")), + DI::l10n()->t("Modify each article's URL with regular expressions before retrieving.")), '$pattern' => array( 'retriever_pattern', - L10n::t('URL Pattern'), + DI::l10n()->t('URL Pattern'), $retriever_rule['data']['pattern'], - L10n::t('Regular expression matching part of the URL to replace')), + DI::l10n()->t('Regular expression matching part of the URL to replace')), '$replace' => array( 'retriever_replace', - L10n::t('URL Replace'), + DI::l10n()->t('URL Replace'), $retriever_rule['data']['replace'], - L10n::t('Text to replace matching part of above regular expression')), - '$allow_images' => Config::get('retriever', 'allow_images'), + DI::l10n()->t('Text to replace matching part of above regular expression')), + '$allow_images' => DI::config()->get('retriever', 'allow_images'), '$images' => array( 'retriever_images', - L10n::t('Download Images'), + DI::l10n()->t('Download Images'), $retriever_rule['data']['images']), '$retrospective' => array( 'retriever_retrospective', - L10n::t('Retrospectively Apply'), + DI::l10n()->t('Retrospectively Apply'), '0', - L10n::t('Reapply the rules to this number of posts')), + DI::l10n()->t('Reapply the rules to this number of posts')), 'storecookies' => array( 'retriever_storecookies', - L10n::t('Store cookies'), + DI::l10n()->t('Store cookies'), $retriever_rule['data']['storecookies'], - L10n::t("Preserve cookie data across fetches.")), + DI::l10n()->t("Preserve cookie data across fetches.")), '$cookiedata' => array( 'retriever_cookiedata', - L10n::t('Cookie Data'), + DI::l10n()->t('Cookie Data'), $retriever_rule['data']['cookiedata'], - L10n::t("Latest cookie data for this feed. Netscape cookie file format.")), + DI::l10n()->t("Latest cookie data for this feed. Netscape cookie file format.")), '$customxslt' => array( 'retriever_customxslt', - L10n::t('Custom XSLT'), + DI::l10n()->t('Custom XSLT'), $retriever_rule['data']['customxslt'], - L10n::t("When standard rules aren't enough, apply custom XSLT to the article")), - '$title' => L10n::t('Retrieve Feed Content'), + DI::l10n()->t("When standard rules aren't enough, apply custom XSLT to the article")), + '$title' => DI::l10n()->t('Retrieve Feed Content'), '$help' => $a->getBaseUrl . '/retriever/help', - '$help_t' => L10n::t('Get Help'), - '$submit_t' => L10n::t('Submit'), - '$submit' => L10n::t('Save Settings'), + '$help_t' => DI::l10n()->t('Get Help'), + '$submit_t' => DI::l10n()->t('Submit'), + '$submit' => DI::l10n()->t('Save Settings'), '$id' => ($retriever_rule["id"] ? $retriever_rule["id"] : "create"), - '$tag_t' => L10n::t('Tag'), - '$attribute_t' => L10n::t('Attribute'), - '$value_t' => L10n::t('Value'), - '$add_t' => L10n::t('Add'), - '$remove_t' => L10n::t('Remove'), - '$include_t' => L10n::t('Include'), + '$tag_t' => DI::l10n()->t('Tag'), + '$attribute_t' => DI::l10n()->t('Attribute'), + '$value_t' => DI::l10n()->t('Value'), + '$add_t' => DI::l10n()->t('Add'), + '$remove_t' => DI::l10n()->t('Remove'), + '$include_t' => DI::l10n()->t('Include'), '$include' => $retriever_rule['data']['include'], - '$exclude_t' => L10n::t('Exclude'), + '$exclude_t' => DI::l10n()->t('Exclude'), '$exclude' => $retriever_rule['data']['exclude'])); return; } @@ -951,7 +948,7 @@ function retriever_contact_photo_menu($a, &$args) { return; } if ($args["contact"]["network"] == "feed") { - $args["menu"]['retriever'] = array(L10n::t('Retriever'), DI::baseUrl()->get(true) . '/retriever/' . $args["contact"]['id']); + $args["menu"]['retriever'] = array(DI::l10n()->t('Retriever'), DI::baseUrl()->get(true) . '/retriever/' . $args["contact"]['id']); } } @@ -969,13 +966,13 @@ function retriever_post_remote_hook(&$a, &$item) { retriever_on_item_insert($retriever_rule, $item); } else { - if (PConfig::get($item["uid"], 'retriever', 'oembed')) { + if (PDI::config()->get($item["uid"], 'retriever', 'oembed')) { // Convert to HTML and back to take advantage of bbcode's resolution of oembeds. $body = retriever_get_body($item); $body = HTML::toBBCode(BBCode::convert($body)); retriever_set_body($item, $body); } - if (PConfig::get($item["uid"], 'retriever', 'all_photos')) { + if (PDI::config()->get($item["uid"], 'retriever', 'all_photos')) { retrieve_images($item); } } @@ -989,21 +986,21 @@ function retriever_post_remote_hook(&$a, &$item) { * @param string $s HTML string to which to append settings content (by ref) */ function retriever_addon_settings(&$a, &$s) { - $all_photos = PConfig::get(local_user(), 'retriever', 'all_photos'); - $oembed = PConfig::get(local_user(), 'retriever', 'oembed'); + $all_photos = PDI::config()->get(local_user(), 'retriever', 'all_photos'); + $oembed = PDI::config()->get(local_user(), 'retriever', 'oembed'); $template = Renderer::getMarkupTemplate('/settings.tpl', 'addon/retriever/'); - $config = array('$submit' => L10n::t('Save Settings'), - '$title' => L10n::t('Retriever Settings'), + $config = array('$submit' => DI::l10n()->t('Save Settings'), + '$title' => DI::l10n()->t('Retriever Settings'), '$help' => $a->getBaseUrl . '/retriever/help', - '$allow_images' => Config::get('retriever', 'allow_images')); + '$allow_images' => DI::config()->get('retriever', 'allow_images')); $config['$allphotos'] = array('retriever_all_photos', - L10n::t('All Photos'), + DI::l10n()->t('All Photos'), $all_photos, - L10n::t('Check this to retrieve photos for all posts')); + DI::l10n()->t('Check this to retrieve photos for all posts')); $config['$oembed'] = array('retriever_oembed', - L10n::t('Resolve OEmbed'), + DI::l10n()->t('Resolve OEmbed'), $oembed, - L10n::t('Check this to attempt to retrieve embedded content for all posts')); + DI::l10n()->t('Check this to attempt to retrieve embedded content for all posts')); $s .= Renderer::replaceMacros($template, $config); } @@ -1015,15 +1012,15 @@ function retriever_addon_settings(&$a, &$s) { */ function retriever_addon_settings_post($a, $post) { if ($post['retriever_all_photos']) { - PConfig::set(local_user(), 'retriever', 'all_photos', $post['retriever_all_photos']); + PDI::config()->set(local_user(), 'retriever', 'all_photos', $post['retriever_all_photos']); } else { - PConfig::delete(local_user(), 'retriever', 'all_photos'); + PDI::config()->delete(local_user(), 'retriever', 'all_photos'); } if ($post['retriever_oembed']) { - PConfig::set(local_user(), 'retriever', 'oembed', $post['retriever_oembed']); + PDI::config()->set(local_user(), 'retriever', 'oembed', $post['retriever_oembed']); } else { - PConfig::delete(local_user(), 'retriever', 'oembed'); + PDI::config()->delete(local_user(), 'retriever', 'oembed'); } } From 12388eced883de133d3f0081fb8e330008213ccb Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 16 Aug 2020 14:00:31 +0200 Subject: [PATCH 42/94] Use new L10n thing --- publicise/publicise.php | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/publicise/publicise.php b/publicise/publicise.php index d27eefd4..a746d8af 100644 --- a/publicise/publicise.php +++ b/publicise/publicise.php @@ -9,7 +9,6 @@ use Friendica\Core\Addon; use Friendica\Core\Logger; use Friendica\Core\Renderer; -use Friendica\Core\L10n; use Friendica\Database\DBA; function publicise_install() { @@ -71,11 +70,11 @@ function publicise_addon_admin(&$a,&$o) { $template = Renderer::getMarkupTemplate('admin.tpl', 'addon/publicise/'); $o .= Renderer::replaceMacros($template, array( '$feeds' => $contacts, - '$feed_t' => L10n::t('Feed'), - '$publicised_t' => L10n::t('Publicised'), - '$comments_t' => L10n::t('Allow Comments/Likes'), - '$expire_t' => L10n::t('Expire Articles After (Days)'), - '$submit_t' => L10n::t('Submit'))); + '$feed_t' => DI::l10n()->t('Feed'), + '$publicised_t' => DI::l10n()->t('Publicised'), + '$comments_t' => DI::l10n()->t('Allow Comments/Likes'), + '$expire_t' => DI::l10n()->t('Expire Articles After (Days)'), + '$submit_t' => DI::l10n()->t('Submit'))); } function publicise_make_string($in) { From 542185285b61e121133f2134ec66c6ce083220bd Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 23 Aug 2020 21:15:18 +0100 Subject: [PATCH 43/94] Further updates to 2020.03 --- phototrack/phototrack.php | 17 ++++++++--------- retriever/retriever.php | 16 ++++++++-------- 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/phototrack/phototrack.php b/phototrack/phototrack.php index 25c299ac..c67014c1 100644 --- a/phototrack/phototrack.php +++ b/phototrack/phototrack.php @@ -21,7 +21,6 @@ */ use Friendica\Core\Addon; -use Friendica\Core\Config; use Friendica\Core\Logger; use Friendica\Object\Image; use Friendica\Database\DBA; @@ -43,7 +42,7 @@ function phototrack_install() { Addon::registerHook('notifier_end', 'addon/phototrack/phototrack.php', 'phototrack_notifier_end'); Addon::registerHook('cron', 'addon/phototrack/phototrack.php', 'phototrack_cron'); - if (Config::get('phototrack', 'dbversion') != '0.1') { + if (DI::config()->get('phototrack', 'dbversion') != '0.1') { $schema = file_get_contents(dirname(__file__).'/database.sql'); $arr = explode(';', $schema); foreach ($arr as $a) { @@ -52,7 +51,7 @@ function phototrack_install() { return; } } - Config::set('phototrack', 'dbversion', '0.1'); + DI::config()->set('phototrack', 'dbversion', '0.1'); } } @@ -190,7 +189,7 @@ function phototrack_check_row($a, $table, $row) { } function phototrack_batch_size() { - $batch_size = Config::get('phototrack', 'batch_size'); + $batch_size = DI::config()->get('phototrack', 'batch_size'); if ($batch_size > 0) { return $batch_size; } @@ -210,13 +209,13 @@ function phototrack_search_table($a, $table) { } function phototrack_cron_time() { - $prev_remaining = Config::get('phototrack', 'remaining_items'); + $prev_remaining = DI::config()->get('phototrack', 'remaining_items'); if ($prev_remaining > 10 * phototrack_batch_size()) { Logger::debug('phototrack: more than ' . (10 * phototrack_batch_size()) . ' items remaining'); return true; } - $last = Config::get('phototrack', 'last_search'); - $search_interval = intval(Config::get('phototrack', 'search_interval')); + $last = DI::config()->get('phototrack', 'last_search'); + $search_interval = intval(DI::config()->get('phototrack', 'search_interval')); if (!$search_interval) { $search_interval = PHOTOTRACK_DEFAULT_SEARCH_INTERVAL; } @@ -234,7 +233,7 @@ function phototrack_cron($a, $b) { if (!phototrack_cron_time()) { return; } - Config::set('phototrack', 'last_search', time()); + DI::config()->set('phototrack', 'last_search', time()); $remaining = 0; $remaining += phototrack_search_table($a, 'item'); @@ -244,7 +243,7 @@ function phototrack_cron($a, $b) { $remaining += phototrack_search_table($a, 'fsuggest'); $remaining += phototrack_search_table($a, 'gcontact'); - Config::set('phototrack', 'remaining_items', $remaining); + DI::config()->set('phototrack', 'remaining_items', $remaining); if ($remaining === 0) { phototrack_tidy(); } diff --git a/retriever/retriever.php b/retriever/retriever.php index 2f62c52e..4097674f 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -966,13 +966,13 @@ function retriever_post_remote_hook(&$a, &$item) { retriever_on_item_insert($retriever_rule, $item); } else { - if (PDI::config()->get($item["uid"], 'retriever', 'oembed')) { + if (DI::config()->get($item["uid"], 'retriever', 'oembed')) { // Convert to HTML and back to take advantage of bbcode's resolution of oembeds. $body = retriever_get_body($item); $body = HTML::toBBCode(BBCode::convert($body)); retriever_set_body($item, $body); } - if (PDI::config()->get($item["uid"], 'retriever', 'all_photos')) { + if (DI::config()->get($item["uid"], 'retriever', 'all_photos')) { retrieve_images($item); } } @@ -986,8 +986,8 @@ function retriever_post_remote_hook(&$a, &$item) { * @param string $s HTML string to which to append settings content (by ref) */ function retriever_addon_settings(&$a, &$s) { - $all_photos = PDI::config()->get(local_user(), 'retriever', 'all_photos'); - $oembed = PDI::config()->get(local_user(), 'retriever', 'oembed'); + $all_photos = DI::config()->get(local_user(), 'retriever', 'all_photos'); + $oembed = DI::config()->get(local_user(), 'retriever', 'oembed'); $template = Renderer::getMarkupTemplate('/settings.tpl', 'addon/retriever/'); $config = array('$submit' => DI::l10n()->t('Save Settings'), '$title' => DI::l10n()->t('Retriever Settings'), @@ -1012,15 +1012,15 @@ function retriever_addon_settings(&$a, &$s) { */ function retriever_addon_settings_post($a, $post) { if ($post['retriever_all_photos']) { - PDI::config()->set(local_user(), 'retriever', 'all_photos', $post['retriever_all_photos']); + DI::config()->set(local_user(), 'retriever', 'all_photos', $post['retriever_all_photos']); } else { - PDI::config()->delete(local_user(), 'retriever', 'all_photos'); + DI::config()->delete(local_user(), 'retriever', 'all_photos'); } if ($post['retriever_oembed']) { - PDI::config()->set(local_user(), 'retriever', 'oembed', $post['retriever_oembed']); + DI::config()->set(local_user(), 'retriever', 'oembed', $post['retriever_oembed']); } else { - PDI::config()->delete(local_user(), 'retriever', 'oembed'); + DI::config()->delete(local_user(), 'retriever', 'oembed'); } } From 51ed5a3d5c8ae023f13d770ed3a701dd3d0e4dfe Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Tue, 13 Oct 2020 18:39:01 +0100 Subject: [PATCH 44/94] Update with base url changes and strict key requirements --- retriever/retriever.php | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 4097674f..a98611eb 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -828,7 +828,7 @@ function retriever_content($a) { } $template = Renderer::getMarkupTemplate('/help.tpl', 'addon/retriever/'); $a->page['content'] .= Renderer::replaceMacros($template, array( - '$config' => $a->getBaseUrl . '/settings/addon', + '$config' => DI::baseUrl()->get(true) . '/settings/addon', '$allow_images' => DI::config()->get('retriever', 'allow_images'), '$feeds' => $feeds)); return; @@ -877,27 +877,27 @@ function retriever_content($a) { '$enable' => array( 'retriever_enable', DI::l10n()->t('Enabled'), - $retriever_rule['data']['enable']), + array_key_exists('enable', $retriever_rule['data']) ? $retriever_rule['data']['enable'] : ""), '$modurl' => array( 'retriever_modurl', DI::l10n()->t('Modify URL'), - $retriever_rule['data']['modurl'], + array_key_exists('modurl', $retriever_rule['data']) ? $retriever_rule['data']['modurl'] : "", DI::l10n()->t("Modify each article's URL with regular expressions before retrieving.")), '$pattern' => array( 'retriever_pattern', DI::l10n()->t('URL Pattern'), - $retriever_rule['data']['pattern'], + array_key_exists('pattern', $retriever_rule['data']) ? $retriever_rule['data']['pattern'] : "", DI::l10n()->t('Regular expression matching part of the URL to replace')), '$replace' => array( 'retriever_replace', DI::l10n()->t('URL Replace'), - $retriever_rule['data']['replace'], + array_key_exists('replace', $retriever_rule['data']) ? $retriever_rule['data']['replace'] : "", DI::l10n()->t('Text to replace matching part of above regular expression')), '$allow_images' => DI::config()->get('retriever', 'allow_images'), '$images' => array( 'retriever_images', DI::l10n()->t('Download Images'), - $retriever_rule['data']['images']), + array_key_exists('images', $retriever_rule['data']) ? $retriever_rule['data']['images'] : ""), '$retrospective' => array( 'retriever_retrospective', DI::l10n()->t('Retrospectively Apply'), @@ -906,20 +906,20 @@ function retriever_content($a) { 'storecookies' => array( 'retriever_storecookies', DI::l10n()->t('Store cookies'), - $retriever_rule['data']['storecookies'], + array_key_exists('storecookies', $retriever_rule['data']) ? $retriever_rule['data']['storecookies'] : "", DI::l10n()->t("Preserve cookie data across fetches.")), '$cookiedata' => array( 'retriever_cookiedata', DI::l10n()->t('Cookie Data'), - $retriever_rule['data']['cookiedata'], + array_key_exists('cookiedata', $retriever_rule['data']) ? $retriever_rule['data']['cookiedata'] : "", DI::l10n()->t("Latest cookie data for this feed. Netscape cookie file format.")), '$customxslt' => array( 'retriever_customxslt', DI::l10n()->t('Custom XSLT'), - $retriever_rule['data']['customxslt'], + array_key_exists('customxslt', $retriever_rule['data']) ? $retriever_rule['data']['customxslt'] : "", DI::l10n()->t("When standard rules aren't enough, apply custom XSLT to the article")), '$title' => DI::l10n()->t('Retrieve Feed Content'), - '$help' => $a->getBaseUrl . '/retriever/help', + '$help' => DI::baseUrl()->get(true) . '/retriever/help', '$help_t' => DI::l10n()->t('Get Help'), '$submit_t' => DI::l10n()->t('Submit'), '$submit' => DI::l10n()->t('Save Settings'), @@ -991,7 +991,7 @@ function retriever_addon_settings(&$a, &$s) { $template = Renderer::getMarkupTemplate('/settings.tpl', 'addon/retriever/'); $config = array('$submit' => DI::l10n()->t('Save Settings'), '$title' => DI::l10n()->t('Retriever Settings'), - '$help' => $a->getBaseUrl . '/retriever/help', + '$help' => DI::baseUrl()->get(true) . '/retriever/help', '$allow_images' => DI::config()->get('retriever', 'allow_images')); $config['$allphotos'] = array('retriever_all_photos', DI::l10n()->t('All Photos'), From b93a203740abf3b11ed48a6338272c7eca15111e Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 21 Oct 2020 16:25:51 +0100 Subject: [PATCH 45/94] Fix page assembly --- retriever/retriever.php | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index a98611eb..7c22071c 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -835,6 +835,9 @@ function retriever_content($a) { } if ($a->argv[1]) { $retriever_rule = get_retriever_rule($a->argv[1], local_user(), false); + if (!$retriever_rule) { + $retriever_rule = ['id' => 0, 'data' => ['enable' => 0, 'modurl' => '', 'pattern' => '', 'replace' => '', 'images' => 0, 'storecookies' => 0, 'cookiedata' => '', 'customxslt' => '', 'include' => '', 'exclude' => '']]; + } if (!empty($_POST["id"])) { $retriever_rule = get_retriever_rule($a->argv[1], local_user(), true); @@ -873,31 +876,31 @@ function retriever_content($a) { } $template = Renderer::getMarkupTemplate('/rule-config.tpl', 'addon/retriever/'); - $a->page['content'] .= Renderer::replaceMacros($template, array( + DI::page()['content'] .= Renderer::replaceMacros($template, array( '$enable' => array( 'retriever_enable', DI::l10n()->t('Enabled'), - array_key_exists('enable', $retriever_rule['data']) ? $retriever_rule['data']['enable'] : ""), + $retriever_rule['data']['enable']), '$modurl' => array( 'retriever_modurl', DI::l10n()->t('Modify URL'), - array_key_exists('modurl', $retriever_rule['data']) ? $retriever_rule['data']['modurl'] : "", + $retriever_rule['data']['modurl'], DI::l10n()->t("Modify each article's URL with regular expressions before retrieving.")), '$pattern' => array( 'retriever_pattern', DI::l10n()->t('URL Pattern'), - array_key_exists('pattern', $retriever_rule['data']) ? $retriever_rule['data']['pattern'] : "", + $retriever_rule['data']['pattern'], DI::l10n()->t('Regular expression matching part of the URL to replace')), '$replace' => array( 'retriever_replace', DI::l10n()->t('URL Replace'), - array_key_exists('replace', $retriever_rule['data']) ? $retriever_rule['data']['replace'] : "", + $retriever_rule['data']['replace'], DI::l10n()->t('Text to replace matching part of above regular expression')), '$allow_images' => DI::config()->get('retriever', 'allow_images'), '$images' => array( 'retriever_images', DI::l10n()->t('Download Images'), - array_key_exists('images', $retriever_rule['data']) ? $retriever_rule['data']['images'] : ""), + $retriever_rule['data']['images']), '$retrospective' => array( 'retriever_retrospective', DI::l10n()->t('Retrospectively Apply'), @@ -906,17 +909,17 @@ function retriever_content($a) { 'storecookies' => array( 'retriever_storecookies', DI::l10n()->t('Store cookies'), - array_key_exists('storecookies', $retriever_rule['data']) ? $retriever_rule['data']['storecookies'] : "", + $retriever_rule['data']['storecookies'], DI::l10n()->t("Preserve cookie data across fetches.")), '$cookiedata' => array( 'retriever_cookiedata', DI::l10n()->t('Cookie Data'), - array_key_exists('cookiedata', $retriever_rule['data']) ? $retriever_rule['data']['cookiedata'] : "", + $retriever_rule['data']['cookiedata'], DI::l10n()->t("Latest cookie data for this feed. Netscape cookie file format.")), '$customxslt' => array( 'retriever_customxslt', DI::l10n()->t('Custom XSLT'), - array_key_exists('customxslt', $retriever_rule['data']) ? $retriever_rule['data']['customxslt'] : "", + $retriever_rule['data']['customxslt'], DI::l10n()->t("When standard rules aren't enough, apply custom XSLT to the article")), '$title' => DI::l10n()->t('Retrieve Feed Content'), '$help' => DI::baseUrl()->get(true) . '/retriever/help', From 912e24030dbdcbe162e4cdefcb152dbced27d1af Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 20 Dec 2020 20:38:42 +0000 Subject: [PATCH 46/94] Remove unneeded get_app --- phototrack/phototrack.php | 1 - 1 file changed, 1 deletion(-) diff --git a/phototrack/phototrack.php b/phototrack/phototrack.php index c67014c1..b5f39a1e 100644 --- a/phototrack/phototrack.php +++ b/phototrack/phototrack.php @@ -144,7 +144,6 @@ function phototrack_post_remote_end(&$a, &$item) { } function phototrack_notifier_end($item) { - $a = get_app(); } function phototrack_check_row($a, $table, $row) { From 1d8df0b95bdce78678d827557de1deaa1bc32faa Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 7 Feb 2021 19:37:19 +0100 Subject: [PATCH 47/94] Replace fetchUrlFull with HTTPRequest version --- retriever/retriever.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 7c22071c..0147b0d0 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -273,7 +273,7 @@ function retrieve_resource($resource) { $cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-'); file_put_contents($cookiejar, $rule_data['cookiedata']); } - $fetch_result = Network::fetchUrlFull($resource['url'], $resource['binary'], $redirects, '', $cookiejar); + $fetch_result = DI::httpRequest()->fetchFull($resource['url'], $resource['binary'], $redirects, '', $cookiejar); if (array_key_exists('storecookies', $rule_data) && $rule_data['storecookies']) { $retriever_rule['data']['cookiedata'] = file_get_contents($cookiejar); DBA::update('retriever_rule', ['data' => json_encode($retriever_rule['data'])], ['id' => intval($retriever_rule["id"])], $retriever_rule); From e962de942523cfd231e6caa0d2a740c0769e698e Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Mon, 8 Feb 2021 19:29:45 +0100 Subject: [PATCH 48/94] Remove binary field from httpRequest --- retriever/retriever.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 0147b0d0..c99ce5f8 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -273,7 +273,7 @@ function retrieve_resource($resource) { $cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-'); file_put_contents($cookiejar, $rule_data['cookiedata']); } - $fetch_result = DI::httpRequest()->fetchFull($resource['url'], $resource['binary'], $redirects, '', $cookiejar); + $fetch_result = DI::httpRequest()->fetchFull($resource['url'], $redirects, '', $cookiejar); if (array_key_exists('storecookies', $rule_data) && $rule_data['storecookies']) { $retriever_rule['data']['cookiedata'] = file_get_contents($cookiejar); DBA::update('retriever_rule', ['data' => json_encode($retriever_rule['data'])], ['id' => intval($retriever_rule["id"])], $retriever_rule); From 668590cffa0b5e638dc786d9df3872fd7224af89 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Mon, 26 Apr 2021 20:11:52 +0200 Subject: [PATCH 49/94] Adapt Item methods to Post methods --- retriever/retriever.php | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index c99ce5f8..92442c96 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -18,6 +18,7 @@ use Friendica\Util\Network; use Friendica\Database\DBA; use Friendica\Model\ItemURI; use Friendica\Model\Item; +use Friendica\Model\Post; use Friendica\Util\DateTimeFormat; use Friendica\DI; @@ -322,7 +323,7 @@ function get_retriever_rule($contact_id, $uid, $create) { * @return array Item that was found, or undef if no item could be found */ function retriever_get_item($retriever_item) { - $item = Item::selectFirst([], ['uri' => $retriever_item['item-uri'], 'uid' => intval($retriever_item['item-uid']), 'contact-id' => intval($retriever_item['contact-id'])]); + $item = Post::selectFirst([], ['uri' => $retriever_item['item-uri'], 'uid' => intval($retriever_item['item-uid']), 'contact-id' => intval($retriever_item['contact-id'])]); if (!DBA::isResult($item)) { Logger::warning('retriever_get_item: no item found for uri ' . $retriever_item['item-uri']); return; @@ -377,7 +378,7 @@ function retriever_resource_completed($resource) { * @param int $num The number of existing items to queue for retrieval */ function apply_retrospective($retriever, $num) { - foreach (Item::selectToArray([], ['contact-id' => intval($retriever['contact-id'])], ['order' => ['received' => true], 'limit' => $num]) as $item) { + foreach (Post::selectToArray([], ['contact-id' => intval($retriever['contact-id'])], ['order' => ['received' => true], 'limit' => $num]) as $item) { Item::update(['visible' => 0], ['id' => intval($item['id'])]); foreach (DBA::selectToArray('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => $item['uid'], 'contact-id' => $item['contact-id']]) as $retriever_item) { DBA::delete('retriever_resource', ['id' => $retriever_item['resource']]); From 68f2dd3886d32dc8b699b9f666bfa48bea84ba4e Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Mon, 26 Apr 2021 20:14:54 +0200 Subject: [PATCH 50/94] another check for empty results --- phototrack/phototrack.php | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/phototrack/phototrack.php b/phototrack/phototrack.php index b5f39a1e..9f6c86d1 100644 --- a/phototrack/phototrack.php +++ b/phototrack/phototrack.php @@ -253,9 +253,11 @@ function phototrack_tidy() { q('CREATE TABLE IF NOT EXISTS `phototrack-temp` (`resource-id` char(255) not null)'); q('INSERT INTO `phototrack-temp` SELECT DISTINCT(`resource-id`) FROM photo WHERE photo.`created` < DATE_SUB(NOW(), INTERVAL 2 MONTH)'); $rows = q('SELECT `phototrack-temp`.`resource-id` FROM `phototrack-temp` LEFT OUTER JOIN phototrack_photo_use ON (`phototrack-temp`.`resource-id` = phototrack_photo_use.`resource-id`) WHERE phototrack_photo_use.id IS NULL limit ' . /*$batch_size*/1000); - foreach ($rows as $row) { - Logger::debug('phototrack: remove photo ' . $row['resource-id']); - q('DELETE FROM photo WHERE `resource-id` = "' . $row['resource-id'] . '"'); + if (DBA::isResult($ms_item_ids)) { + foreach ($rows as $row) { + Logger::debug('phototrack: remove photo ' . $row['resource-id']); + q('DELETE FROM photo WHERE `resource-id` = "' . $row['resource-id'] . '"'); + } } q('DROP TABLE `phototrack-temp`'); Logger::info('phototrack_tidy: deleted ' . count($rows) . ' photos'); From b9db44ba5585ae4d25f12554b638bdea9e983e5b Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Mon, 26 Apr 2021 20:38:00 +0200 Subject: [PATCH 51/94] Fix a typo --- phototrack/phototrack.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phototrack/phototrack.php b/phototrack/phototrack.php index 9f6c86d1..4437443b 100644 --- a/phototrack/phototrack.php +++ b/phototrack/phototrack.php @@ -253,7 +253,7 @@ function phototrack_tidy() { q('CREATE TABLE IF NOT EXISTS `phototrack-temp` (`resource-id` char(255) not null)'); q('INSERT INTO `phototrack-temp` SELECT DISTINCT(`resource-id`) FROM photo WHERE photo.`created` < DATE_SUB(NOW(), INTERVAL 2 MONTH)'); $rows = q('SELECT `phototrack-temp`.`resource-id` FROM `phototrack-temp` LEFT OUTER JOIN phototrack_photo_use ON (`phototrack-temp`.`resource-id` = phototrack_photo_use.`resource-id`) WHERE phototrack_photo_use.id IS NULL limit ' . /*$batch_size*/1000); - if (DBA::isResult($ms_item_ids)) { + if (DBA::isResult($rows)) { foreach ($rows as $row) { Logger::debug('phototrack: remove photo ' . $row['resource-id']); q('DELETE FROM photo WHERE `resource-id` = "' . $row['resource-id'] . '"'); From 6199e3af0eb6dcd7d02d7100ce07e8131d5a4873 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Mon, 26 Apr 2021 20:38:48 +0200 Subject: [PATCH 52/94] fixed another obvious mistake --- phototrack/phototrack.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phototrack/phototrack.php b/phototrack/phototrack.php index 4437443b..712c0fc6 100644 --- a/phototrack/phototrack.php +++ b/phototrack/phototrack.php @@ -258,9 +258,9 @@ function phototrack_tidy() { Logger::debug('phototrack: remove photo ' . $row['resource-id']); q('DELETE FROM photo WHERE `resource-id` = "' . $row['resource-id'] . '"'); } + Logger::info('phototrack_tidy: deleted ' . count($rows) . ' photos'); } q('DROP TABLE `phototrack-temp`'); - Logger::info('phototrack_tidy: deleted ' . count($rows) . ' photos'); $rows = q('SELECT id FROM phototrack_photo_use WHERE checked < DATE_SUB(NOW(), INTERVAL 14 DAY)'); foreach ($rows as $row) { q('DELETE FROM phototrack_photo_use WHERE id = ' . $row['id']); From 37797b5b341915727ab940e56e8b0a6bc03642fc Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Mon, 26 Apr 2021 20:48:20 +0200 Subject: [PATCH 53/94] Detect an error in mailstream --- mailstream/mailstream.php | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mailstream/mailstream.php b/mailstream/mailstream.php index 71b123ad..578a03dd 100644 --- a/mailstream/mailstream.php +++ b/mailstream/mailstream.php @@ -322,6 +322,11 @@ function mailstream_subject(array $item): string ); return DI::l10n()->t("Friendica post"); } + $contact = $r[0]; + if (!DBA::isResult($rows)) { + Logger::error('mailstream_subject no contact for item id ' . $item['id'] . ' plink ' . $item['plink'] . ' contact id ' . $item['contact-id'] . ' uid ' . $item['uid']); + return DI::l10n()->t("Friendica post"); + } if ($contact['network'] === 'dfrn') { return DI::l10n()->t("Friendica post"); } From 8477a0f012b6b8c25027a67bb88880b3e549eb11 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Mon, 26 Apr 2021 20:58:06 +0200 Subject: [PATCH 54/94] fix another stupid mistake --- mailstream/mailstream.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mailstream/mailstream.php b/mailstream/mailstream.php index 578a03dd..cec63998 100644 --- a/mailstream/mailstream.php +++ b/mailstream/mailstream.php @@ -323,7 +323,7 @@ function mailstream_subject(array $item): string return DI::l10n()->t("Friendica post"); } $contact = $r[0]; - if (!DBA::isResult($rows)) { + if (!DBA::isResult($r)) { Logger::error('mailstream_subject no contact for item id ' . $item['id'] . ' plink ' . $item['plink'] . ' contact id ' . $item['contact-id'] . ' uid ' . $item['uid']); return DI::l10n()->t("Friendica post"); } From 45e42cbd9f922f1fcaf125522ea471f1be14ae3f Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Mon, 26 Apr 2021 20:59:06 +0200 Subject: [PATCH 55/94] fix another stupid mistake --- mailstream/mailstream.php | 1 + 1 file changed, 1 insertion(+) diff --git a/mailstream/mailstream.php b/mailstream/mailstream.php index cec63998..08acb53c 100644 --- a/mailstream/mailstream.php +++ b/mailstream/mailstream.php @@ -327,6 +327,7 @@ function mailstream_subject(array $item): string Logger::error('mailstream_subject no contact for item id ' . $item['id'] . ' plink ' . $item['plink'] . ' contact id ' . $item['contact-id'] . ' uid ' . $item['uid']); return DI::l10n()->t("Friendica post"); } + $contact = $r[0]; if ($contact['network'] === 'dfrn') { return DI::l10n()->t("Friendica post"); } From 92aecb3a1e757081b43fe4cacadac1a65328aff0 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Mon, 26 Apr 2021 21:47:22 +0200 Subject: [PATCH 56/94] error checking in retriever --- retriever/retriever.php | 3 +++ 1 file changed, 3 insertions(+) diff --git a/retriever/retriever.php b/retriever/retriever.php index 92442c96..60c9a2a2 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -241,6 +241,9 @@ function retrieve_dataurl_resource($resource) { */ function retrieve_resource($resource) { $components = parse_url($resource['url']); + if (!$components) { + Logger::warning('retrieve_resource: URL ' . $resource['url'] . ' could not be parsed'); + } if ($components['scheme'] == "data") { return retrieve_dataurl_resource($resource); } From 775047d3d94a0ea643ac9d7a917538b490e75dd0 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sat, 19 Jun 2021 19:22:37 +0200 Subject: [PATCH 57/94] sync with submitted --- mailstream/mailstream.php | 5 ----- 1 file changed, 5 deletions(-) diff --git a/mailstream/mailstream.php b/mailstream/mailstream.php index 08acb53c..73efa9d5 100644 --- a/mailstream/mailstream.php +++ b/mailstream/mailstream.php @@ -323,11 +323,6 @@ function mailstream_subject(array $item): string return DI::l10n()->t("Friendica post"); } $contact = $r[0]; - if (!DBA::isResult($r)) { - Logger::error('mailstream_subject no contact for item id ' . $item['id'] . ' plink ' . $item['plink'] . ' contact id ' . $item['contact-id'] . ' uid ' . $item['uid']); - return DI::l10n()->t("Friendica post"); - } - $contact = $r[0]; if ($contact['network'] === 'dfrn') { return DI::l10n()->t("Friendica post"); } From 86ca8891898e0c13ab7d21f187d2a3bf5f91f184 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 2 Mar 2022 15:43:15 +0100 Subject: [PATCH 58/94] switch to new way of executing SQL --- phototrack/phototrack.php | 30 ++++++++-------- publicise/publicise.php | 72 +++++++++++++++++++-------------------- 2 files changed, 51 insertions(+), 51 deletions(-) diff --git a/phototrack/phototrack.php b/phototrack/phototrack.php index 712c0fc6..dcf07676 100644 --- a/phototrack/phototrack.php +++ b/phototrack/phototrack.php @@ -67,10 +67,10 @@ function phototrack_module() {} function phototrack_finished_row($table, $id) { $existing = DBA::selectFirst('phototrack_row_check', ['id'], ['table' => $table, 'row-id' => $id]); if (!is_bool($existing)) { - q("UPDATE phototrack_row_check SET checked = NOW() WHERE `table` = '$table' AND `row-id` = '$id'"); + DBA:e("UPDATE phototrack_row_check SET checked = NOW() WHERE `table` = '$table' AND `row-id` = '$id'"); } else { - q("INSERT INTO phototrack_row_check (`table`, `row-id`, `checked`) VALUES ('$table', '$id', NOW())"); + DBA:e("INSERT INTO phototrack_row_check (`table`, `row-id`, `checked`) VALUES ('$table', '$id', NOW())"); } } @@ -86,17 +86,17 @@ function phototrack_photo_use($photo, $table, $field, $id) { if (strlen($photo) != 32) { return; } - $r = q("SELECT `resource-id` FROM `photo` WHERE `resource-id` = '%s' LIMIT 1", DBA::escape($photo)); + $r = DBA:e("SELECT `resource-id` FROM `photo` WHERE `resource-id` = '%s' LIMIT 1", DBA::escape($photo)); if (!count($r)) { return; } $rid = $r[0]['resource-id']; - $existing = q("SELECT id FROM phototrack_photo_use WHERE `resource-id` = '$rid' AND `table` = '$table' AND `field` = '$field' AND `row-id` = '$id'"); + $existing = DBA:e("SELECT id FROM phototrack_photo_use WHERE `resource-id` = '$rid' AND `table` = '$table' AND `field` = '$field' AND `row-id` = '$id'"); if (count($existing)) { - q("UPDATE phototrack_photo_use SET checked = NOW() WHERE `resource-id` = '$rid' AND `table` = '$table' AND `field` = '$field' AND `row-id` = '$id'"); + DBA:e("UPDATE phototrack_photo_use SET checked = NOW() WHERE `resource-id` = '$rid' AND `table` = '$table' AND `field` = '$field' AND `row-id` = '$id'"); } else { - q("INSERT INTO phototrack_photo_use (`resource-id`, `table`, `field`, `row-id`, `checked`) VALUES ('$rid', '$table', '$field', '$id', NOW())"); + DBA:e("INSERT INTO phototrack_photo_use (`resource-id`, `table`, `field`, `row-id`, `checked`) VALUES ('$rid', '$table', '$field', '$id', NOW())"); } } @@ -197,11 +197,11 @@ function phototrack_batch_size() { function phototrack_search_table($a, $table) { $batch_size = phototrack_batch_size(); - $rows = q("SELECT `$table`.* FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) ) ORDER BY phototrack_row_check.checked LIMIT $batch_size"); + $rows = DBA:e("SELECT `$table`.* FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) ) ORDER BY phototrack_row_check.checked LIMIT $batch_size"); foreach ($rows as $row) { phototrack_check_row($a, $table, $row); } - $r = q("SELECT COUNT(*) FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) )"); + $r = DBA:e("SELECT COUNT(*) FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) )"); $remaining = $r[0]['COUNT(*)']; Logger::info('phototrack: searched ' . count($rows) . ' rows in table ' . $table . ', ' . $remaining . ' still remaining to search'); return $remaining; @@ -250,20 +250,20 @@ function phototrack_cron($a, $b) { function phototrack_tidy() { $batch_size = phototrack_batch_size(); - q('CREATE TABLE IF NOT EXISTS `phototrack-temp` (`resource-id` char(255) not null)'); - q('INSERT INTO `phototrack-temp` SELECT DISTINCT(`resource-id`) FROM photo WHERE photo.`created` < DATE_SUB(NOW(), INTERVAL 2 MONTH)'); - $rows = q('SELECT `phototrack-temp`.`resource-id` FROM `phototrack-temp` LEFT OUTER JOIN phototrack_photo_use ON (`phototrack-temp`.`resource-id` = phototrack_photo_use.`resource-id`) WHERE phototrack_photo_use.id IS NULL limit ' . /*$batch_size*/1000); + DBA:e('CREATE TABLE IF NOT EXISTS `phototrack-temp` (`resource-id` char(255) not null)'); + DBA:e('INSERT INTO `phototrack-temp` SELECT DISTINCT(`resource-id`) FROM photo WHERE photo.`created` < DATE_SUB(NOW(), INTERVAL 2 MONTH)'); + $rows = DBA:e('SELECT `phototrack-temp`.`resource-id` FROM `phototrack-temp` LEFT OUTER JOIN phototrack_photo_use ON (`phototrack-temp`.`resource-id` = phototrack_photo_use.`resource-id`) WHERE phototrack_photo_use.id IS NULL limit ' . /*$batch_size*/1000); if (DBA::isResult($rows)) { foreach ($rows as $row) { Logger::debug('phototrack: remove photo ' . $row['resource-id']); - q('DELETE FROM photo WHERE `resource-id` = "' . $row['resource-id'] . '"'); + DBA:e('DELETE FROM photo WHERE `resource-id` = "' . $row['resource-id'] . '"'); } Logger::info('phototrack_tidy: deleted ' . count($rows) . ' photos'); } - q('DROP TABLE `phototrack-temp`'); - $rows = q('SELECT id FROM phototrack_photo_use WHERE checked < DATE_SUB(NOW(), INTERVAL 14 DAY)'); + DBA:e('DROP TABLE `phototrack-temp`'); + $rows = DBA:e('SELECT id FROM phototrack_photo_use WHERE checked < DATE_SUB(NOW(), INTERVAL 14 DAY)'); foreach ($rows as $row) { - q('DELETE FROM phototrack_photo_use WHERE id = ' . $row['id']); + DBA:e('DELETE FROM phototrack_photo_use WHERE id = ' . $row['id']); } Logger::info('phototrack_tidy: deleted ' . count($rows) . ' phototrack_photo_use rows'); } diff --git a/publicise/publicise.php b/publicise/publicise.php index a746d8af..98af1405 100644 --- a/publicise/publicise.php +++ b/publicise/publicise.php @@ -28,11 +28,11 @@ SELECT * OR (`reason` = 'publicise') ORDER BY `contact`.`name` EOF; - return q($query, intval(local_user())); + return DBA:e($query, intval(local_user())); } function publicise_get_user($uid) { - $r = q('SELECT * FROM `user` WHERE `uid` = %d', intval($uid)); + $r = DBA:e('SELECT * FROM `user` WHERE `uid` = %d', intval($uid)); if (count($r) != 1) { Logger::warning('Publicise: unexpected number of results for uid ' . $uid); } @@ -52,7 +52,7 @@ function publicise_addon_admin(&$a,&$o) { $comments = 1; $url = $v['url']; if ($enabled) { - $r = q('SELECT * FROM `user` WHERE `uid` = %d', intval($v['uid'])); + $r = DBA:e('SELECT * FROM `user` WHERE `uid` = %d', intval($v['uid'])); $expire = $r[0]['expire']; $url = $a->get_baseurl() . '/profile/' . $v['nick']; if ($r[0]['page-flags'] == PAGE_SOAPBOX) { @@ -138,7 +138,7 @@ function publicise_create_user($owner, $contact) { 'expire' => publicise_make_int($expire), ); Logger::debug('Publicise: creating user ' . print_r($newuser, true)); - $r = q("INSERT INTO `user` (`" + $r = DBA:e("INSERT INTO `user` (`" . implode("`, `", array_keys($newuser)) . "`) VALUES (" . implode(", ", array_values($newuser)) @@ -147,7 +147,7 @@ function publicise_create_user($owner, $contact) { Logger::warning('Publicise: create user failed'); return; } - $r = q('SELECT * FROM `user` WHERE `guid` = "%s"', DBA::escape($guid)); + $r = DBA:e('SELECT * FROM `user` WHERE `guid` = "%s"', DBA::escape($guid)); if (count($r) != 1) { Logger::warning('Publicise: unexpected number of uids returned'); return; @@ -179,21 +179,21 @@ function publicise_create_self_contact($a, $contact, $uid) { 'avatar-date' => publicise_make_string(datetime_convert()), 'closeness' => publicise_make_int(0), ); - $existing = q("SELECT * FROM `contact` WHERE `uid` = %d AND `self` = 1", intval($uid)); + $existing = DBA:e("SELECT * FROM `contact` WHERE `uid` = %d AND `self` = 1", intval($uid)); if (count($existing)) { $newcontact = $existing[0]; Logger::debug('Publicise: self contact already exists for user ' . $uid . ' id ' . $newcontact['id']); } else { Logger::debug('Publicise: create contact ' . print_r($newcontact, true)); - q("INSERT INTO `contact` (`" + DBA:e("INSERT INTO `contact` (`" . implode("`, `", array_keys($newcontact)) . "`) VALUES (" . implode(", ", array_values($newcontact)) . ")" ); - $results = q("SELECT `id` FROM `contact` WHERE `uid` = %d AND `self` = 1", intval($uid)); + $results = DBA:e("SELECT `id` FROM `contact` WHERE `uid` = %d AND `self` = 1", intval($uid)); if (count($results) != 1) { Logger::warning('Publicise: create self contact failed, will delete uid ' . $uid); - $r = q("DELETE FROM `user` WHERE `uid` = %d", intval($uid)); + $r = DBA:e("DELETE FROM `user` WHERE `uid` = %d", intval($uid)); return; } $newcontact = $results[0]; @@ -216,7 +216,7 @@ function publicise_create_profile($contact, $uid) { 'net-publish' => publicise_make_int(1), ); Logger::debug('Publicise: create profile ' . print_r($newprofile, true)); - $r = q("INSERT INTO `profile` (`" + $r = DBA:e("INSERT INTO `profile` (`" . implode("`, `", array_keys($newprofile)) . "`) VALUES (" . implode(", ", array_values($newprofile)) @@ -224,7 +224,7 @@ function publicise_create_profile($contact, $uid) { if (!$r) { Logger::warning('Publicise: create profile failed'); } - $newprofile = q('SELECT `id` FROM `profile` WHERE `uid` = %d AND `is-default` = 1', intval($uid)); + $newprofile = DBA:e('SELECT `id` FROM `profile` WHERE `uid` = %d AND `is-default` = 1', intval($uid)); if (count($newprofile) != 1) { Logger::warning('Publicise: create profile produced unexpected number of results'); return; @@ -243,15 +243,15 @@ function publicise_set_up_user($a, $contact, $owner) { if (!$self_contact) { notice(sprintf(t("Failed to create self contact for user \"%s\""), $contact['name']) . EOL); Logger::warning("Publicise: unable to create self contact, deleting user " . $user['uid']); - q('DELETE FROM `user` WHERE `uid` = %d', intval($user['uid'])); + DBA:e('DELETE FROM `user` WHERE `uid` = %d', intval($user['uid'])); return; } $profile = publicise_create_profile($contact, $user['uid']); if (!$profile) { notice(sprintf(t("Failed to create profile for user \"%s\""), $contact['name']) . EOL); Logger::warning("Publicise: unable to create profile, deleting user $uid contact $self_contact"); - q('DELETE FROM `user` WHERE `uid` = %d', intval($user['uid'])); - q('DELETE FROM `contact` WHERE `id` = %d', intval($self_contact)); + DBA:e('DELETE FROM `user` WHERE `uid` = %d', intval($user['uid'])); + DBA:e('DELETE FROM `contact` WHERE `id` = %d', intval($self_contact)); return; } return $user; @@ -267,13 +267,13 @@ function publicise($a, &$contact, &$owner) { // Check if we're changing our mind about a feed we earlier depublicised Logger::info('@@@ Publicise: ' . 'SELECT * FROM `user` WHERE `account_expires_on` != "0000-00-00 00:00:00" AND `nickname` = "' . $contact['nick'] . '" AND `email` = "' . $owner['email'] . '" AND `page-flags` in (' . intval(PAGE_COMMUNITY) . ', ' . intval(PAGE_SOAPBOX) . ')'); - $existing = q('SELECT * FROM `user` WHERE `account_expires_on` != "0000-00-00 00:00:00" AND `nickname` = "%s" AND `email` = "%s" AND `page-flags` in (%d, %d)', + $existing = DBA:e('SELECT * FROM `user` WHERE `account_expires_on` != "0000-00-00 00:00:00" AND `nickname` = "%s" AND `email` = "%s" AND `page-flags` in (%d, %d)', DBA::escape($contact['nick']), DBA::escape($owner['email']), intval(PAGE_COMMUNITY), intval(PAGE_SOAPBOX)); if (count($existing) == 1) { Logger::info('@@@ Publicise: there is existing'); $owner = $existing[0]; - q('UPDATE `user` SET `account_expires_on` = "0000-00-00 00:00:00", `account_removed` = 0, `account_expired` = 0 WHERE `uid` = %d', intval($owner['uid'])); - q('UPDATE `profile` SET `publish` = 1, `net-publish` = 1 WHERE `uid` = %d AND `is-default` = 1', intval($owner['uid'])); + DBA:e('UPDATE `user` SET `account_expires_on` = "0000-00-00 00:00:00", `account_removed` = 0, `account_expired` = 0 WHERE `uid` = %d', intval($owner['uid'])); + DBA:e('UPDATE `profile` SET `publish` = 1, `net-publish` = 1 WHERE `uid` = %d AND `is-default` = 1', intval($owner['uid'])); Logger::debug('Publicise: recycled previous user ' . $owner['uid']); } else { @@ -286,19 +286,19 @@ function publicise($a, &$contact, &$owner) { } Logger::info('Publicise: new contact user is ' . $owner['uid']); - $r = q("UPDATE `contact` SET `uid` = %d, `reason` = 'publicise', `hidden` = 1 WHERE id = %d", intval($owner['uid']), intval($contact['id'])); + $r = DBA:e("UPDATE `contact` SET `uid` = %d, `reason` = 'publicise', `hidden` = 1 WHERE id = %d", intval($owner['uid']), intval($contact['id'])); if (!$r) { Logger::warning('Publicise: update contact failed, user is probably in a bad state ' . $user['uid']); } $contact['uid'] = $owner['uid']; $contact['reason'] = 'publicise'; $contact['hidden'] = 1; - $r = q("UPDATE `item` SET `uid` = %d, type = 'wall', wall = 1, private = 0 WHERE `contact-id` = %d", + $r = DBA:e("UPDATE `item` SET `uid` = %d, type = 'wall', wall = 1, private = 0 WHERE `contact-id` = %d", intval($owner['uid']), intval($contact['id'])); Logger::debug('Publicise: moved items from contact ' . $contact['id'] . ' to uid ' . $owner['uid']); // Update the retriever config - $r = q("UPDATE `retriever_rule` SET `uid` = %d WHERE `contact-id` = %d", + $r = DBA:e("UPDATE `retriever_rule` SET `uid` = %d WHERE `contact-id` = %d", intval($owner['uid']), intval($contact['id'])); info(sprintf(t("Moved feed \"%s\" to dedicated account"), $contact['name']) . EOL); @@ -306,7 +306,7 @@ function publicise($a, &$contact, &$owner) { } function publicise_self_contact($uid) { - $r = q('SELECT * FROM `contact` WHERE `uid` = %d AND `self` = 1', intval($uid)); + $r = DBA:e('SELECT * FROM `contact` WHERE `uid` = %d AND `self` = 1', intval($uid)); if (count($r) != 1) { Logger::warning('Publicise: unexpected number of self contacts for user ' . $uid); return; @@ -330,7 +330,7 @@ function depublicise($a, $contact, $user) { // If the local_user() is subscribed to the feed, take ownership // of the feed and all its items and photos. Otherwise they will // be deleted when the account expires. - $r = q('SELECT * FROM `contact` WHERE `uid` = %d AND `url` = "%s"', + $r = DBA:e('SELECT * FROM `contact` WHERE `uid` = %d AND `url` = "%s"', intval(local_user()), DBA::escape($self_contact['url'])); if (count($r)) { // Delete the contact to the feed user and any @@ -338,32 +338,32 @@ function depublicise($a, $contact, $user) { // which will be brought back into the local_user's feed along // with the feed contact itself. foreach ($r as $my_contact) { - q('DELETE FROM `item` WHERE `contact-id` = %d', intval($my_contact['id'])); - q('DELETE FROM `contact` WHERE `id` = %d', intval($my_contact['id'])); + DBA:e('DELETE FROM `item` WHERE `contact-id` = %d', intval($my_contact['id'])); + DBA:e('DELETE FROM `contact` WHERE `id` = %d', intval($my_contact['id'])); } // Move the feed contact to local_user. Existing items stay // attached to the original feed contact, but must have their uid // updated. Also update the fields we scribbled over in // publicise_post_remote_hook. - q('UPDATE `contact` SET `uid` = %d, `reason` = "", hidden = 0 WHERE id = %d', + DBA:e('UPDATE `contact` SET `uid` = %d, `reason` = "", hidden = 0 WHERE id = %d', intval(local_user()), intval($contact['id'])); - q('UPDATE `item` SET `uid` = %d, `wall` = 0, `type` = "remote", `private` = 2 WHERE `contact-id` = %d', + DBA:e('UPDATE `item` SET `uid` = %d, `wall` = 0, `type` = "remote", `private` = 2 WHERE `contact-id` = %d', intval(local_user()), intval($contact['id'])); // Take ownership of any photos created by the feed user - q('UPDATE `photo` SET `uid` = %d WHERE `uid` = %d', + DBA:e('UPDATE `photo` SET `uid` = %d WHERE `uid` = %d', intval(local_user()), intval($user['uid'])); // Update the retriever config - $r = q("UPDATE `retriever_rule` SET `uid` = %d WHERE `contact-id` = %d", + $r = DBA:e("UPDATE `retriever_rule` SET `uid` = %d WHERE `contact-id` = %d", intval($owner['uid']), intval($contact['id'])); } // Set the account to removed and expired right now. It will be cleaned up by cron after 3 days, giving a chance to change your mind - q('UPDATE `user` SET `account_removed` = 1, `account_expired` = 1, `account_expires_on` = UTC_TIMESTAMP() WHERE `uid` = %d', + DBA:e('UPDATE `user` SET `account_removed` = 1, `account_expired` = 1, `account_expires_on` = UTC_TIMESTAMP() WHERE `uid` = %d', intval($user['uid'])); - q('UPDATE `profile` SET `publish` = 0, `net-publish` = 0 WHERE `uid` = %d AND `is-default` = 1', intval($user['uid'])); + DBA:e('UPDATE `profile` SET `publish` = 0, `net-publish` = 0 WHERE `uid` = %d AND `is-default` = 1', intval($user['uid'])); info(sprintf(t("Removed dedicated account for feed \"%s\""), $contact['name']) . EOL); } @@ -393,22 +393,22 @@ function publicise_addon_admin_post ($a) { } } if ($_POST['publicise-expire-' . $contact['id']] != $user['expire']) { - q('UPDATE `user` SET `expire` = %d WHERE `uid` = %d', + DBA:e('UPDATE `user` SET `expire` = %d WHERE `uid` = %d', intval($_POST['publicise-expire-' . $contact['id']]), intval($user['uid'])); } if ($_POST['publicise-comments-' . $contact['id']]) { if ($user['page-flags'] != PAGE_COMMUNITY) { - q('UPDATE `user` SET `page-flags` = %d WHERE `uid` = %d', + DBA:e('UPDATE `user` SET `page-flags` = %d WHERE `uid` = %d', intval(PAGE_COMMUNITY), intval($user['uid'])); - q('UPDATE `contact` SET `rel` = %d WHERE `uid` = %d AND `network` = "dfrn"', + DBA:e('UPDATE `contact` SET `rel` = %d WHERE `uid` = %d AND `network` = "dfrn"', intval(CONTACT_IS_SHARING), intval($user['uid'])); } } else { if ($user['page-flags'] != PAGE_SOAPBOX) { - q('UPDATE `user` SET `page-flags` = %d WHERE `uid` = %d', + DBA:e('UPDATE `user` SET `page-flags` = %d WHERE `uid` = %d', intval(PAGE_SOAPBOX), intval($user['uid'])); - q('UPDATE `contact` SET `rel` = %d WHERE `uid` = %d AND `network` = "dfrn"', + DBA:e('UPDATE `contact` SET `rel` = %d WHERE `uid` = %d AND `network` = "dfrn"', intval(CONTACT_IS_FOLLOWER), intval($user['uid'])); } } @@ -417,7 +417,7 @@ function publicise_addon_admin_post ($a) { } function publicise_post_remote_hook(&$a, &$item) { - $r1 = q("SELECT `uid` FROM `contact` WHERE `id` = %d AND `reason` = 'publicise'", intval($item['contact-id'])); + $r1 = DBA:e("SELECT `uid` FROM `contact` WHERE `id` = %d AND `reason` = 'publicise'", intval($item['contact-id'])); if (!$r1) { return; } From 56c8ad70ae0b57204d46dd3257472460ec0e2454 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 2 Mar 2022 16:15:59 +0100 Subject: [PATCH 59/94] switch to new way of executing SQL --- phototrack/phototrack.php | 30 ++++++++-------- publicise/publicise.php | 72 +++++++++++++++++++-------------------- 2 files changed, 51 insertions(+), 51 deletions(-) diff --git a/phototrack/phototrack.php b/phototrack/phototrack.php index dcf07676..f8a3f704 100644 --- a/phototrack/phototrack.php +++ b/phototrack/phototrack.php @@ -67,10 +67,10 @@ function phototrack_module() {} function phototrack_finished_row($table, $id) { $existing = DBA::selectFirst('phototrack_row_check', ['id'], ['table' => $table, 'row-id' => $id]); if (!is_bool($existing)) { - DBA:e("UPDATE phototrack_row_check SET checked = NOW() WHERE `table` = '$table' AND `row-id` = '$id'"); + DBA::e("UPDATE phototrack_row_check SET checked = NOW() WHERE `table` = '$table' AND `row-id` = '$id'"); } else { - DBA:e("INSERT INTO phototrack_row_check (`table`, `row-id`, `checked`) VALUES ('$table', '$id', NOW())"); + DBA::e("INSERT INTO phototrack_row_check (`table`, `row-id`, `checked`) VALUES ('$table', '$id', NOW())"); } } @@ -86,17 +86,17 @@ function phototrack_photo_use($photo, $table, $field, $id) { if (strlen($photo) != 32) { return; } - $r = DBA:e("SELECT `resource-id` FROM `photo` WHERE `resource-id` = '%s' LIMIT 1", DBA::escape($photo)); + $r = DBA::e("SELECT `resource-id` FROM `photo` WHERE `resource-id` = '%s' LIMIT 1", DBA::escape($photo)); if (!count($r)) { return; } $rid = $r[0]['resource-id']; - $existing = DBA:e("SELECT id FROM phototrack_photo_use WHERE `resource-id` = '$rid' AND `table` = '$table' AND `field` = '$field' AND `row-id` = '$id'"); + $existing = DBA::e("SELECT id FROM phototrack_photo_use WHERE `resource-id` = '$rid' AND `table` = '$table' AND `field` = '$field' AND `row-id` = '$id'"); if (count($existing)) { - DBA:e("UPDATE phototrack_photo_use SET checked = NOW() WHERE `resource-id` = '$rid' AND `table` = '$table' AND `field` = '$field' AND `row-id` = '$id'"); + DBA::e("UPDATE phototrack_photo_use SET checked = NOW() WHERE `resource-id` = '$rid' AND `table` = '$table' AND `field` = '$field' AND `row-id` = '$id'"); } else { - DBA:e("INSERT INTO phototrack_photo_use (`resource-id`, `table`, `field`, `row-id`, `checked`) VALUES ('$rid', '$table', '$field', '$id', NOW())"); + DBA::e("INSERT INTO phototrack_photo_use (`resource-id`, `table`, `field`, `row-id`, `checked`) VALUES ('$rid', '$table', '$field', '$id', NOW())"); } } @@ -197,11 +197,11 @@ function phototrack_batch_size() { function phototrack_search_table($a, $table) { $batch_size = phototrack_batch_size(); - $rows = DBA:e("SELECT `$table`.* FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) ) ORDER BY phototrack_row_check.checked LIMIT $batch_size"); + $rows = DBA::e("SELECT `$table`.* FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) ) ORDER BY phototrack_row_check.checked LIMIT $batch_size"); foreach ($rows as $row) { phototrack_check_row($a, $table, $row); } - $r = DBA:e("SELECT COUNT(*) FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) )"); + $r = DBA::e("SELECT COUNT(*) FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) )"); $remaining = $r[0]['COUNT(*)']; Logger::info('phototrack: searched ' . count($rows) . ' rows in table ' . $table . ', ' . $remaining . ' still remaining to search'); return $remaining; @@ -250,20 +250,20 @@ function phototrack_cron($a, $b) { function phototrack_tidy() { $batch_size = phototrack_batch_size(); - DBA:e('CREATE TABLE IF NOT EXISTS `phototrack-temp` (`resource-id` char(255) not null)'); - DBA:e('INSERT INTO `phototrack-temp` SELECT DISTINCT(`resource-id`) FROM photo WHERE photo.`created` < DATE_SUB(NOW(), INTERVAL 2 MONTH)'); - $rows = DBA:e('SELECT `phototrack-temp`.`resource-id` FROM `phototrack-temp` LEFT OUTER JOIN phototrack_photo_use ON (`phototrack-temp`.`resource-id` = phototrack_photo_use.`resource-id`) WHERE phototrack_photo_use.id IS NULL limit ' . /*$batch_size*/1000); + DBA::e('CREATE TABLE IF NOT EXISTS `phototrack-temp` (`resource-id` char(255) not null)'); + DBA::e('INSERT INTO `phototrack-temp` SELECT DISTINCT(`resource-id`) FROM photo WHERE photo.`created` < DATE_SUB(NOW(), INTERVAL 2 MONTH)'); + $rows = DBA::e('SELECT `phototrack-temp`.`resource-id` FROM `phototrack-temp` LEFT OUTER JOIN phototrack_photo_use ON (`phototrack-temp`.`resource-id` = phototrack_photo_use.`resource-id`) WHERE phototrack_photo_use.id IS NULL limit ' . /*$batch_size*/1000); if (DBA::isResult($rows)) { foreach ($rows as $row) { Logger::debug('phototrack: remove photo ' . $row['resource-id']); - DBA:e('DELETE FROM photo WHERE `resource-id` = "' . $row['resource-id'] . '"'); + DBA::e('DELETE FROM photo WHERE `resource-id` = "' . $row['resource-id'] . '"'); } Logger::info('phototrack_tidy: deleted ' . count($rows) . ' photos'); } - DBA:e('DROP TABLE `phototrack-temp`'); - $rows = DBA:e('SELECT id FROM phototrack_photo_use WHERE checked < DATE_SUB(NOW(), INTERVAL 14 DAY)'); + DBA::e('DROP TABLE `phototrack-temp`'); + $rows = DBA::e('SELECT id FROM phototrack_photo_use WHERE checked < DATE_SUB(NOW(), INTERVAL 14 DAY)'); foreach ($rows as $row) { - DBA:e('DELETE FROM phototrack_photo_use WHERE id = ' . $row['id']); + DBA::e( 'DELETE FROM phototrack_photo_use WHERE id = ' . $row['id']); } Logger::info('phototrack_tidy: deleted ' . count($rows) . ' phototrack_photo_use rows'); } diff --git a/publicise/publicise.php b/publicise/publicise.php index 98af1405..a5dc2807 100644 --- a/publicise/publicise.php +++ b/publicise/publicise.php @@ -28,11 +28,11 @@ SELECT * OR (`reason` = 'publicise') ORDER BY `contact`.`name` EOF; - return DBA:e($query, intval(local_user())); + return DBA::e($query, intval(local_user())); } function publicise_get_user($uid) { - $r = DBA:e('SELECT * FROM `user` WHERE `uid` = %d', intval($uid)); + $r = DBA::e('SELECT * FROM `user` WHERE `uid` = %d', intval($uid)); if (count($r) != 1) { Logger::warning('Publicise: unexpected number of results for uid ' . $uid); } @@ -52,7 +52,7 @@ function publicise_addon_admin(&$a,&$o) { $comments = 1; $url = $v['url']; if ($enabled) { - $r = DBA:e('SELECT * FROM `user` WHERE `uid` = %d', intval($v['uid'])); + $r = DBA::e('SELECT * FROM `user` WHERE `uid` = %d', intval($v['uid'])); $expire = $r[0]['expire']; $url = $a->get_baseurl() . '/profile/' . $v['nick']; if ($r[0]['page-flags'] == PAGE_SOAPBOX) { @@ -138,7 +138,7 @@ function publicise_create_user($owner, $contact) { 'expire' => publicise_make_int($expire), ); Logger::debug('Publicise: creating user ' . print_r($newuser, true)); - $r = DBA:e("INSERT INTO `user` (`" + $r = DBA::e("INSERT INTO `user` (`" . implode("`, `", array_keys($newuser)) . "`) VALUES (" . implode(", ", array_values($newuser)) @@ -147,7 +147,7 @@ function publicise_create_user($owner, $contact) { Logger::warning('Publicise: create user failed'); return; } - $r = DBA:e('SELECT * FROM `user` WHERE `guid` = "%s"', DBA::escape($guid)); + $r = DBA::e('SELECT * FROM `user` WHERE `guid` = "%s"', DBA::escape($guid)); if (count($r) != 1) { Logger::warning('Publicise: unexpected number of uids returned'); return; @@ -179,21 +179,21 @@ function publicise_create_self_contact($a, $contact, $uid) { 'avatar-date' => publicise_make_string(datetime_convert()), 'closeness' => publicise_make_int(0), ); - $existing = DBA:e("SELECT * FROM `contact` WHERE `uid` = %d AND `self` = 1", intval($uid)); + $existing = DBA::e("SELECT * FROM `contact` WHERE `uid` = %d AND `self` = 1", intval($uid)); if (count($existing)) { $newcontact = $existing[0]; Logger::debug('Publicise: self contact already exists for user ' . $uid . ' id ' . $newcontact['id']); } else { Logger::debug('Publicise: create contact ' . print_r($newcontact, true)); - DBA:e("INSERT INTO `contact` (`" + DBA::e("INSERT INTO `contact` (`" . implode("`, `", array_keys($newcontact)) . "`) VALUES (" . implode(", ", array_values($newcontact)) . ")" ); - $results = DBA:e("SELECT `id` FROM `contact` WHERE `uid` = %d AND `self` = 1", intval($uid)); + $results = DBA::e("SELECT `id` FROM `contact` WHERE `uid` = %d AND `self` = 1", intval($uid)); if (count($results) != 1) { Logger::warning('Publicise: create self contact failed, will delete uid ' . $uid); - $r = DBA:e("DELETE FROM `user` WHERE `uid` = %d", intval($uid)); + $r = DBA::e("DELETE FROM `user` WHERE `uid` = %d", intval($uid)); return; } $newcontact = $results[0]; @@ -216,7 +216,7 @@ function publicise_create_profile($contact, $uid) { 'net-publish' => publicise_make_int(1), ); Logger::debug('Publicise: create profile ' . print_r($newprofile, true)); - $r = DBA:e("INSERT INTO `profile` (`" + $r = DBA::e("INSERT INTO `profile` (`" . implode("`, `", array_keys($newprofile)) . "`) VALUES (" . implode(", ", array_values($newprofile)) @@ -224,7 +224,7 @@ function publicise_create_profile($contact, $uid) { if (!$r) { Logger::warning('Publicise: create profile failed'); } - $newprofile = DBA:e('SELECT `id` FROM `profile` WHERE `uid` = %d AND `is-default` = 1', intval($uid)); + $newprofile = DBA::e('SELECT `id` FROM `profile` WHERE `uid` = %d AND `is-default` = 1', intval($uid)); if (count($newprofile) != 1) { Logger::warning('Publicise: create profile produced unexpected number of results'); return; @@ -243,15 +243,15 @@ function publicise_set_up_user($a, $contact, $owner) { if (!$self_contact) { notice(sprintf(t("Failed to create self contact for user \"%s\""), $contact['name']) . EOL); Logger::warning("Publicise: unable to create self contact, deleting user " . $user['uid']); - DBA:e('DELETE FROM `user` WHERE `uid` = %d', intval($user['uid'])); + DBA::e('DELETE FROM `user` WHERE `uid` = %d', intval($user['uid'])); return; } $profile = publicise_create_profile($contact, $user['uid']); if (!$profile) { notice(sprintf(t("Failed to create profile for user \"%s\""), $contact['name']) . EOL); Logger::warning("Publicise: unable to create profile, deleting user $uid contact $self_contact"); - DBA:e('DELETE FROM `user` WHERE `uid` = %d', intval($user['uid'])); - DBA:e('DELETE FROM `contact` WHERE `id` = %d', intval($self_contact)); + DBA::e('DELETE FROM `user` WHERE `uid` = %d', intval($user['uid'])); + DBA::e('DELETE FROM `contact` WHERE `id` = %d', intval($self_contact)); return; } return $user; @@ -267,13 +267,13 @@ function publicise($a, &$contact, &$owner) { // Check if we're changing our mind about a feed we earlier depublicised Logger::info('@@@ Publicise: ' . 'SELECT * FROM `user` WHERE `account_expires_on` != "0000-00-00 00:00:00" AND `nickname` = "' . $contact['nick'] . '" AND `email` = "' . $owner['email'] . '" AND `page-flags` in (' . intval(PAGE_COMMUNITY) . ', ' . intval(PAGE_SOAPBOX) . ')'); - $existing = DBA:e('SELECT * FROM `user` WHERE `account_expires_on` != "0000-00-00 00:00:00" AND `nickname` = "%s" AND `email` = "%s" AND `page-flags` in (%d, %d)', + $existing = DBA::e('SELECT * FROM `user` WHERE `account_expires_on` != "0000-00-00 00:00:00" AND `nickname` = "%s" AND `email` = "%s" AND `page-flags` in (%d, %d)', DBA::escape($contact['nick']), DBA::escape($owner['email']), intval(PAGE_COMMUNITY), intval(PAGE_SOAPBOX)); if (count($existing) == 1) { Logger::info('@@@ Publicise: there is existing'); $owner = $existing[0]; - DBA:e('UPDATE `user` SET `account_expires_on` = "0000-00-00 00:00:00", `account_removed` = 0, `account_expired` = 0 WHERE `uid` = %d', intval($owner['uid'])); - DBA:e('UPDATE `profile` SET `publish` = 1, `net-publish` = 1 WHERE `uid` = %d AND `is-default` = 1', intval($owner['uid'])); + DBA::e('UPDATE `user` SET `account_expires_on` = "0000-00-00 00:00:00", `account_removed` = 0, `account_expired` = 0 WHERE `uid` = %d', intval($owner['uid'])); + DBA::e('UPDATE `profile` SET `publish` = 1, `net-publish` = 1 WHERE `uid` = %d AND `is-default` = 1', intval($owner['uid'])); Logger::debug('Publicise: recycled previous user ' . $owner['uid']); } else { @@ -286,19 +286,19 @@ function publicise($a, &$contact, &$owner) { } Logger::info('Publicise: new contact user is ' . $owner['uid']); - $r = DBA:e("UPDATE `contact` SET `uid` = %d, `reason` = 'publicise', `hidden` = 1 WHERE id = %d", intval($owner['uid']), intval($contact['id'])); + $r = DBA::e("UPDATE `contact` SET `uid` = %d, `reason` = 'publicise', `hidden` = 1 WHERE id = %d", intval($owner['uid']), intval($contact['id'])); if (!$r) { Logger::warning('Publicise: update contact failed, user is probably in a bad state ' . $user['uid']); } $contact['uid'] = $owner['uid']; $contact['reason'] = 'publicise'; $contact['hidden'] = 1; - $r = DBA:e("UPDATE `item` SET `uid` = %d, type = 'wall', wall = 1, private = 0 WHERE `contact-id` = %d", + $r = DBA::e("UPDATE `item` SET `uid` = %d, type = 'wall', wall = 1, private = 0 WHERE `contact-id` = %d", intval($owner['uid']), intval($contact['id'])); Logger::debug('Publicise: moved items from contact ' . $contact['id'] . ' to uid ' . $owner['uid']); // Update the retriever config - $r = DBA:e("UPDATE `retriever_rule` SET `uid` = %d WHERE `contact-id` = %d", + $r = DBA::e("UPDATE `retriever_rule` SET `uid` = %d WHERE `contact-id` = %d", intval($owner['uid']), intval($contact['id'])); info(sprintf(t("Moved feed \"%s\" to dedicated account"), $contact['name']) . EOL); @@ -306,7 +306,7 @@ function publicise($a, &$contact, &$owner) { } function publicise_self_contact($uid) { - $r = DBA:e('SELECT * FROM `contact` WHERE `uid` = %d AND `self` = 1', intval($uid)); + $r = DBA::e('SELECT * FROM `contact` WHERE `uid` = %d AND `self` = 1', intval($uid)); if (count($r) != 1) { Logger::warning('Publicise: unexpected number of self contacts for user ' . $uid); return; @@ -330,7 +330,7 @@ function depublicise($a, $contact, $user) { // If the local_user() is subscribed to the feed, take ownership // of the feed and all its items and photos. Otherwise they will // be deleted when the account expires. - $r = DBA:e('SELECT * FROM `contact` WHERE `uid` = %d AND `url` = "%s"', + $r = DBA::e('SELECT * FROM `contact` WHERE `uid` = %d AND `url` = "%s"', intval(local_user()), DBA::escape($self_contact['url'])); if (count($r)) { // Delete the contact to the feed user and any @@ -338,32 +338,32 @@ function depublicise($a, $contact, $user) { // which will be brought back into the local_user's feed along // with the feed contact itself. foreach ($r as $my_contact) { - DBA:e('DELETE FROM `item` WHERE `contact-id` = %d', intval($my_contact['id'])); - DBA:e('DELETE FROM `contact` WHERE `id` = %d', intval($my_contact['id'])); + DBA::e('DELETE FROM `item` WHERE `contact-id` = %d', intval($my_contact['id'])); + DBA::e('DELETE FROM `contact` WHERE `id` = %d', intval($my_contact['id'])); } // Move the feed contact to local_user. Existing items stay // attached to the original feed contact, but must have their uid // updated. Also update the fields we scribbled over in // publicise_post_remote_hook. - DBA:e('UPDATE `contact` SET `uid` = %d, `reason` = "", hidden = 0 WHERE id = %d', + DBA::e('UPDATE `contact` SET `uid` = %d, `reason` = "", hidden = 0 WHERE id = %d', intval(local_user()), intval($contact['id'])); - DBA:e('UPDATE `item` SET `uid` = %d, `wall` = 0, `type` = "remote", `private` = 2 WHERE `contact-id` = %d', + DBA::e('UPDATE `item` SET `uid` = %d, `wall` = 0, `type` = "remote", `private` = 2 WHERE `contact-id` = %d', intval(local_user()), intval($contact['id'])); // Take ownership of any photos created by the feed user - DBA:e('UPDATE `photo` SET `uid` = %d WHERE `uid` = %d', + DBA::e('UPDATE `photo` SET `uid` = %d WHERE `uid` = %d', intval(local_user()), intval($user['uid'])); // Update the retriever config - $r = DBA:e("UPDATE `retriever_rule` SET `uid` = %d WHERE `contact-id` = %d", + $r = DBA::e("UPDATE `retriever_rule` SET `uid` = %d WHERE `contact-id` = %d", intval($owner['uid']), intval($contact['id'])); } // Set the account to removed and expired right now. It will be cleaned up by cron after 3 days, giving a chance to change your mind - DBA:e('UPDATE `user` SET `account_removed` = 1, `account_expired` = 1, `account_expires_on` = UTC_TIMESTAMP() WHERE `uid` = %d', + DBA::e('UPDATE `user` SET `account_removed` = 1, `account_expired` = 1, `account_expires_on` = UTC_TIMESTAMP() WHERE `uid` = %d', intval($user['uid'])); - DBA:e('UPDATE `profile` SET `publish` = 0, `net-publish` = 0 WHERE `uid` = %d AND `is-default` = 1', intval($user['uid'])); + DBA::e('UPDATE `profile` SET `publish` = 0, `net-publish` = 0 WHERE `uid` = %d AND `is-default` = 1', intval($user['uid'])); info(sprintf(t("Removed dedicated account for feed \"%s\""), $contact['name']) . EOL); } @@ -393,22 +393,22 @@ function publicise_addon_admin_post ($a) { } } if ($_POST['publicise-expire-' . $contact['id']] != $user['expire']) { - DBA:e('UPDATE `user` SET `expire` = %d WHERE `uid` = %d', + DBA::e('UPDATE `user` SET `expire` = %d WHERE `uid` = %d', intval($_POST['publicise-expire-' . $contact['id']]), intval($user['uid'])); } if ($_POST['publicise-comments-' . $contact['id']]) { if ($user['page-flags'] != PAGE_COMMUNITY) { - DBA:e('UPDATE `user` SET `page-flags` = %d WHERE `uid` = %d', + DBA::e('UPDATE `user` SET `page-flags` = %d WHERE `uid` = %d', intval(PAGE_COMMUNITY), intval($user['uid'])); - DBA:e('UPDATE `contact` SET `rel` = %d WHERE `uid` = %d AND `network` = "dfrn"', + DBA::e('UPDATE `contact` SET `rel` = %d WHERE `uid` = %d AND `network` = "dfrn"', intval(CONTACT_IS_SHARING), intval($user['uid'])); } } else { if ($user['page-flags'] != PAGE_SOAPBOX) { - DBA:e('UPDATE `user` SET `page-flags` = %d WHERE `uid` = %d', + DBA::e('UPDATE `user` SET `page-flags` = %d WHERE `uid` = %d', intval(PAGE_SOAPBOX), intval($user['uid'])); - DBA:e('UPDATE `contact` SET `rel` = %d WHERE `uid` = %d AND `network` = "dfrn"', + DBA::e('UPDATE `contact` SET `rel` = %d WHERE `uid` = %d AND `network` = "dfrn"', intval(CONTACT_IS_FOLLOWER), intval($user['uid'])); } } @@ -417,7 +417,7 @@ function publicise_addon_admin_post ($a) { } function publicise_post_remote_hook(&$a, &$item) { - $r1 = DBA:e("SELECT `uid` FROM `contact` WHERE `id` = %d AND `reason` = 'publicise'", intval($item['contact-id'])); + $r1 = DBA::e("SELECT `uid` FROM `contact` WHERE `id` = %d AND `reason` = 'publicise'", intval($item['contact-id'])); if (!$r1) { return; } From cf7da5a247860721b16a78a625a9b4628e6f811b Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 2 Mar 2022 16:22:45 +0100 Subject: [PATCH 60/94] switch to new way of executing SQL --- retriever/retriever.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 60c9a2a2..5e7a783e 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -172,7 +172,7 @@ function retriever_retrieve_items($max_items) { */ function retriever_clean_up_completed_resources($max_items) { // TODO: figure out how to do this with DBA module - $r = q('SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT %d', + $r = DBA::e('SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT %d', intval($max_items)); if (!$r) { $r = array(); @@ -209,10 +209,10 @@ function retriever_tidy() { DBA::delete('retriever_resource', ['completed IS NOT NULL AND completed < DATE_SUB(now(), INTERVAL 1 WEEK)']); DBA::delete('retriever_resource', ['completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)']); - $r = q("SELECT retriever_item.id FROM retriever_item LEFT OUTER JOIN retriever_resource ON (retriever_item.resource = retriever_resource.id) WHERE retriever_resource.id is null"); + $r = DBA::e("SELECT retriever_item.id FROM retriever_item LEFT OUTER JOIN retriever_resource ON (retriever_item.resource = retriever_resource.id) WHERE retriever_resource.id is null"); Logger::info('retriever_tidy: found ' . count($r) . ' retriever_items with no retriever_resource'); foreach ($r as $rr) { - q('DELETE FROM retriever_item WHERE id = %d', intval($rr['id'])); + DBA::e('DELETE FROM retriever_item WHERE id = %d', intval($rr['id'])); } } From d997efe533a19cdc77d32915893710388da63d01 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 2 Mar 2022 16:26:56 +0100 Subject: [PATCH 61/94] new style of http request --- retriever/retriever.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 5e7a783e..cd8aacea 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -277,7 +277,7 @@ function retrieve_resource($resource) { $cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-'); file_put_contents($cookiejar, $rule_data['cookiedata']); } - $fetch_result = DI::httpRequest()->fetchFull($resource['url'], $redirects, '', $cookiejar); + $fetch_result = DI::httpClient()->fetchFull($resource['url'], $redirects, '', $cookiejar); if (array_key_exists('storecookies', $rule_data) && $rule_data['storecookies']) { $retriever_rule['data']['cookiedata'] = file_get_contents($cookiejar); DBA::update('retriever_rule', ['data' => json_encode($retriever_rule['data'])], ['id' => intval($retriever_rule["id"])], $retriever_rule); From 003d7b6aa06950a83e1a7da017e0b5cc78a81f19 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 2 Mar 2022 16:39:12 +0100 Subject: [PATCH 62/94] attempt to handle one error --- retriever/retriever.php | 3 +++ 1 file changed, 3 insertions(+) diff --git a/retriever/retriever.php b/retriever/retriever.php index cd8aacea..263f5842 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -639,6 +639,9 @@ function retriever_extract($doc, $retriever) { */ function retriever_globalise_urls($doc, $resource) { $components = parse_url($resource['redirect-url']); + if (!array_key_exists('scheme', $components) || !array_key_exists('host', $components) || !array_key_exists('path', $components)) { + return $doc; + } $rooturl = $components['scheme'] . "://" . $components['host']; $dirurl = $rooturl . dirname($components['path']) . "/"; $params = array('$dirurl' => $dirurl, '$rooturl' => $rooturl); From 2b56f3f25f65e94316d7bc83f4c7f6ef5201b568 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 2 Mar 2022 16:44:54 +0100 Subject: [PATCH 63/94] perhaps it should be this style --- publicise/publicise.php | 421 +--------------------------------------- retriever/retriever.php | 4 +- 2 files changed, 3 insertions(+), 422 deletions(-) diff --git a/publicise/publicise.php b/publicise/publicise.php index a5dc2807..d2dbca24 100644 --- a/publicise/publicise.php +++ b/publicise/publicise.php @@ -1,423 +1,4 @@ - - */ - -use Friendica\Core\Addon; -use Friendica\Core\Logger; -use Friendica\Core\Renderer; -use Friendica\Database\DBA; - -function publicise_install() { - Addon::registerHook('post_remote', 'addon/publicise/publicise.php', 'publicise_post_remote_hook'); -} - -function publicise_uninstall() { - Addon::unregisterHook('post_remote', 'addon/publicise/publicise.php', 'publicise_post_remote_hook'); - Addon::unregisterHook('post_remote_end', 'addon/publicise/publicise.php', 'publicise_post_remote_end_hook'); -} - -function publicise_get_contacts() { - $query = <<$v) { - $enabled = ($v['reason'] === 'publicise') ? 1 : NULL; - $expire = 30; - $comments = 1; - $url = $v['url']; - if ($enabled) { - $r = DBA::e('SELECT * FROM `user` WHERE `uid` = %d', intval($v['uid'])); - $expire = $r[0]['expire']; - $url = $a->get_baseurl() . '/profile/' . $v['nick']; - if ($r[0]['page-flags'] == PAGE_SOAPBOX) { - $comments = NULL; - } - if ($r[0]['account_expired']) { - $enabled = NULL; - } - } - $contacts[$k]['enabled'] = array('publicise-enabled-' . $v['id'], NULL, $enabled); - $contacts[$k]['comments'] = array('publicise-comments-' . $v['id'], NULL, $comments); - $contacts[$k]['expire'] = $expire; - $contacts[$k]['url'] = $url; - } - $template = Renderer::getMarkupTemplate('admin.tpl', 'addon/publicise/'); - $o .= Renderer::replaceMacros($template, array( - '$feeds' => $contacts, - '$feed_t' => DI::l10n()->t('Feed'), - '$publicised_t' => DI::l10n()->t('Publicised'), - '$comments_t' => DI::l10n()->t('Allow Comments/Likes'), - '$expire_t' => DI::l10n()->t('Expire Articles After (Days)'), - '$submit_t' => DI::l10n()->t('Submit'))); -} - -function publicise_make_string($in) { - return "'" . DBA::escape($in) . "'"; -} - -function publicise_make_int($in) { - return intval($in) ? $in : 0; -} - -function publicise_create_user($owner, $contact) { - - $nick = $contact['nick']; - if (!$nick) { - notice(sprintf(t("Can't publicise feed \"%s\" because it doesn't have a nickname"), $contact['name']) . EOL); - return; - } - Logger::info('Publicise: create user, beginning key generation...'); - $res=openssl_pkey_new(array( - 'digest_alg' => 'sha1', - 'private_key_bits' => 4096, - 'encrypt_key' => false )); - $prvkey = ''; - openssl_pkey_export($res, $prvkey); - $pkey = openssl_pkey_get_details($res); - $pubkey = $pkey["key"]; - $sres=openssl_pkey_new(array( - 'digest_alg' => 'sha1', - 'private_key_bits' => 512, - 'encrypt_key' => false )); - $sprvkey = ''; - openssl_pkey_export($sres, $sprvkey); - $spkey = openssl_pkey_get_details($sres); - $spubkey = $spkey["key"]; - $guid = generate_user_guid(); - - $newuser = array( - 'guid' => publicise_make_string($guid), - 'username' => publicise_make_string($contact['name']), - 'password' => publicise_make_string($owner['password']), - 'nickname' => publicise_make_string($contact['nick']), - 'email' => publicise_make_string($owner['email']), - 'openid' => publicise_make_string($owner['openid']), - 'timezone' => publicise_make_string($owner['timezone']), - 'language' => publicise_make_string($owner['language']), - 'register_date' => publicise_make_string(datetime_convert()), - 'default-location' => publicise_make_string($owner['default-location']), - 'allow_location' => publicise_make_string($owner['allow_location']), - 'theme' => publicise_make_string($owner['theme']), - 'pubkey' => publicise_make_string($pubkey), - 'prvkey' => publicise_make_string($prvkey), - 'spubkey' => publicise_make_string($spubkey), - 'sprvkey' => publicise_make_string($sprvkey), - 'verified' => publicise_make_int($owner['verified']), - 'blocked' => publicise_make_int(0), - 'blockwall' => publicise_make_int(1), - 'hidewall' => publicise_make_int(0), - 'blocktags' => publicise_make_int(0), - 'notify-flags' => publicise_make_int($owner['notifyflags']), - 'page-flags' => publicise_make_int($comments ? PAGE_COMMUNITY : PAGE_SOAPBOX), - 'expire' => publicise_make_int($expire), - ); - Logger::debug('Publicise: creating user ' . print_r($newuser, true)); - $r = DBA::e("INSERT INTO `user` (`" - . implode("`, `", array_keys($newuser)) - . "`) VALUES (" - . implode(", ", array_values($newuser)) - . ")" ); - if (!$r) { - Logger::warning('Publicise: create user failed'); - return; - } - $r = DBA::e('SELECT * FROM `user` WHERE `guid` = "%s"', DBA::escape($guid)); - if (count($r) != 1) { - Logger::warning('Publicise: unexpected number of uids returned'); - return; - } - Logger::debug('Publicise: created user ID ' . $r[0]); - return $r[0]; -} - -function publicise_create_self_contact($a, $contact, $uid) { - $newcontact = array( - 'uid' => $uid, - 'created' => publicise_make_string(datetime_convert()), - 'self' => publicise_make_int(1), - 'name' => publicise_make_string($contact['name']), - 'nick' => publicise_make_string($contact['nick']), - 'photo' => publicise_make_string($contact['photo']), - 'thumb' => publicise_make_string($contact['thumb']), - 'micro' => publicise_make_string($contact['micro']), - 'blocked' => publicise_make_int(0), - 'pending' => publicise_make_int(0), - 'url' => publicise_make_string($a->get_baseurl() . '/profile/' . $contact['nick']), - 'nurl' => publicise_make_string($a->get_baseurl() . '/profile/' . $contact['nick']), - 'request' => publicise_make_string($a->get_baseurl() . '/dfrn_request/' . $contact['nick']), - 'notify' => publicise_make_string($a->get_baseurl() . '/dfrn_notify/' . $contact['nick']), - 'poll' => publicise_make_string($a->get_baseurl() . '/dfrn_poll/' . $contact['nick']), - 'confirm' => publicise_make_string($a->get_baseurl() . '/dfrn_confirm/' . $contact['nick']), - 'poco' => publicise_make_string($a->get_baseurl() . '/poco/' . $contact['nick']), - 'uri-date' => publicise_make_string(datetime_convert()), - 'avatar-date' => publicise_make_string(datetime_convert()), - 'closeness' => publicise_make_int(0), - ); - $existing = DBA::e("SELECT * FROM `contact` WHERE `uid` = %d AND `self` = 1", intval($uid)); - if (count($existing)) { - $newcontact = $existing[0]; - Logger::debug('Publicise: self contact already exists for user ' . $uid . ' id ' . $newcontact['id']); - } else { - Logger::debug('Publicise: create contact ' . print_r($newcontact, true)); - DBA::e("INSERT INTO `contact` (`" - . implode("`, `", array_keys($newcontact)) - . "`) VALUES (" - . implode(", ", array_values($newcontact)) - . ")" ); - $results = DBA::e("SELECT `id` FROM `contact` WHERE `uid` = %d AND `self` = 1", intval($uid)); - if (count($results) != 1) { - Logger::warning('Publicise: create self contact failed, will delete uid ' . $uid); - $r = DBA::e("DELETE FROM `user` WHERE `uid` = %d", intval($uid)); - return; - } - $newcontact = $results[0]; - Logger::debug('Publicise: created self contact for user ' . $uid . ' id ' . $newcontact['id']); - } - Logger::debug('Publicise: self contact for ' . $uid . ' nick ' . $contact['nick'] . ' is ' . $newcontact['id']); - return $newcontact['id']; -} - -function publicise_create_profile($contact, $uid) { - $newprofile = array( - 'uid' => $uid, - 'profile-name' => publicise_make_string('default'), - 'is-default' => publicise_make_int(1), - 'name' => publicise_make_string($contact['name']), - 'photo' => publicise_make_string($contact['photo']), - 'thumb' => publicise_make_string($contact['thumb']), - 'homepage' => publicise_make_string($contact['url']), - 'publish' => publicise_make_int(1), - 'net-publish' => publicise_make_int(1), - ); - Logger::debug('Publicise: create profile ' . print_r($newprofile, true)); - $r = DBA::e("INSERT INTO `profile` (`" - . implode("`, `", array_keys($newprofile)) - . "`) VALUES (" - . implode(", ", array_values($newprofile)) - . ")" ); - if (!$r) { - Logger::warning('Publicise: create profile failed'); - } - $newprofile = DBA::e('SELECT `id` FROM `profile` WHERE `uid` = %d AND `is-default` = 1', intval($uid)); - if (count($newprofile) != 1) { - Logger::warning('Publicise: create profile produced unexpected number of results'); - return; - } - Logger::debug('Publicise: created profile ' . $newprofile[0]['id']); - return $newprofile[0]['id']; -} - -function publicise_set_up_user($a, $contact, $owner) { - $user = publicise_create_user($owner, $contact); - if (!$user) { - notice(sprintf(t("Failed to create user for feed \"%s\""), $contact['name']) . EOL); - return; - } - $self_contact = publicise_create_self_contact($a, $contact, $user['uid']); - if (!$self_contact) { - notice(sprintf(t("Failed to create self contact for user \"%s\""), $contact['name']) . EOL); - Logger::warning("Publicise: unable to create self contact, deleting user " . $user['uid']); - DBA::e('DELETE FROM `user` WHERE `uid` = %d', intval($user['uid'])); - return; - } - $profile = publicise_create_profile($contact, $user['uid']); - if (!$profile) { - notice(sprintf(t("Failed to create profile for user \"%s\""), $contact['name']) . EOL); - Logger::warning("Publicise: unable to create profile, deleting user $uid contact $self_contact"); - DBA::e('DELETE FROM `user` WHERE `uid` = %d', intval($user['uid'])); - DBA::e('DELETE FROM `contact` WHERE `id` = %d', intval($self_contact)); - return; - } - return $user; -} - -function publicise($a, &$contact, &$owner) { - Logger::info('@@@ Publicise: publicise'); - if (!is_site_admin()) { - notice(t("Only admin users can publicise feeds")); - Logger::warning('Publicise: non-admin tried to publicise'); - return; - } - - // Check if we're changing our mind about a feed we earlier depublicised - Logger::info('@@@ Publicise: ' . 'SELECT * FROM `user` WHERE `account_expires_on` != "0000-00-00 00:00:00" AND `nickname` = "' . $contact['nick'] . '" AND `email` = "' . $owner['email'] . '" AND `page-flags` in (' . intval(PAGE_COMMUNITY) . ', ' . intval(PAGE_SOAPBOX) . ')'); - $existing = DBA::e('SELECT * FROM `user` WHERE `account_expires_on` != "0000-00-00 00:00:00" AND `nickname` = "%s" AND `email` = "%s" AND `page-flags` in (%d, %d)', - DBA::escape($contact['nick']), DBA::escape($owner['email']), intval(PAGE_COMMUNITY), intval(PAGE_SOAPBOX)); - if (count($existing) == 1) { - Logger::info('@@@ Publicise: there is existing'); - $owner = $existing[0]; - DBA::e('UPDATE `user` SET `account_expires_on` = "0000-00-00 00:00:00", `account_removed` = 0, `account_expired` = 0 WHERE `uid` = %d', intval($owner['uid'])); - DBA::e('UPDATE `profile` SET `publish` = 1, `net-publish` = 1 WHERE `uid` = %d AND `is-default` = 1', intval($owner['uid'])); - Logger::debug('Publicise: recycled previous user ' . $owner['uid']); - } - else { - Logger::info('@@@ Publicise: there is not existing'); - $owner = publicise_set_up_user($a, $contact, $owner); - if (!$owner) { - return; - } - Logger::debug("Publicise: created new user " . $owner['uid']); - } - Logger::info('Publicise: new contact user is ' . $owner['uid']); - - $r = DBA::e("UPDATE `contact` SET `uid` = %d, `reason` = 'publicise', `hidden` = 1 WHERE id = %d", intval($owner['uid']), intval($contact['id'])); - if (!$r) { - Logger::warning('Publicise: update contact failed, user is probably in a bad state ' . $user['uid']); - } - $contact['uid'] = $owner['uid']; - $contact['reason'] = 'publicise'; - $contact['hidden'] = 1; - $r = DBA::e("UPDATE `item` SET `uid` = %d, type = 'wall', wall = 1, private = 0 WHERE `contact-id` = %d", - intval($owner['uid']), intval($contact['id'])); - Logger::debug('Publicise: moved items from contact ' . $contact['id'] . ' to uid ' . $owner['uid']); - - // Update the retriever config - $r = DBA::e("UPDATE `retriever_rule` SET `uid` = %d WHERE `contact-id` = %d", - intval($owner['uid']), intval($contact['id'])); - - info(sprintf(t("Moved feed \"%s\" to dedicated account"), $contact['name']) . EOL); - return true; -} - -function publicise_self_contact($uid) { - $r = DBA::e('SELECT * FROM `contact` WHERE `uid` = %d AND `self` = 1', intval($uid)); - if (count($r) != 1) { - Logger::warning('Publicise: unexpected number of self contacts for user ' . $uid); - return; - } - return $r[0]; -} - -function depublicise($a, $contact, $user) { - require_once('include/Contact.php'); - - if (!is_site_admin()) { - notice("Only admin users can depublicise feeds"); - Logger::warning('Publicise: non-admin tried to depublicise'); - return; - } - - Logger::debug('Publicise: about to depublicise contact ' . $contact['id'] . ' user ' . $user['uid']); - - $self_contact = publicise_self_contact($user['uid']); - - // If the local_user() is subscribed to the feed, take ownership - // of the feed and all its items and photos. Otherwise they will - // be deleted when the account expires. - $r = DBA::e('SELECT * FROM `contact` WHERE `uid` = %d AND `url` = "%s"', - intval(local_user()), DBA::escape($self_contact['url'])); - if (count($r)) { - // Delete the contact to the feed user and any - // copies of its items. These will be replaced by the originals, - // which will be brought back into the local_user's feed along - // with the feed contact itself. - foreach ($r as $my_contact) { - DBA::e('DELETE FROM `item` WHERE `contact-id` = %d', intval($my_contact['id'])); - DBA::e('DELETE FROM `contact` WHERE `id` = %d', intval($my_contact['id'])); - } - - // Move the feed contact to local_user. Existing items stay - // attached to the original feed contact, but must have their uid - // updated. Also update the fields we scribbled over in - // publicise_post_remote_hook. - DBA::e('UPDATE `contact` SET `uid` = %d, `reason` = "", hidden = 0 WHERE id = %d', - intval(local_user()), intval($contact['id'])); - DBA::e('UPDATE `item` SET `uid` = %d, `wall` = 0, `type` = "remote", `private` = 2 WHERE `contact-id` = %d', - intval(local_user()), intval($contact['id'])); - - // Take ownership of any photos created by the feed user - DBA::e('UPDATE `photo` SET `uid` = %d WHERE `uid` = %d', - intval(local_user()), intval($user['uid'])); - - // Update the retriever config - $r = DBA::e("UPDATE `retriever_rule` SET `uid` = %d WHERE `contact-id` = %d", - intval($owner['uid']), intval($contact['id'])); - } - - // Set the account to removed and expired right now. It will be cleaned up by cron after 3 days, giving a chance to change your mind - DBA::e('UPDATE `user` SET `account_removed` = 1, `account_expired` = 1, `account_expires_on` = UTC_TIMESTAMP() WHERE `uid` = %d', - intval($user['uid'])); - DBA::e('UPDATE `profile` SET `publish` = 0, `net-publish` = 0 WHERE `uid` = %d AND `is-default` = 1', intval($user['uid'])); - - info(sprintf(t("Removed dedicated account for feed \"%s\""), $contact['name']) . EOL); -} - -function publicise_addon_admin_post ($a) { - Logger::info('@@@ publicise_addon_admin_post'); - if (!is_site_admin()) { - Logger::warning('Publicise: non-admin tried to do admin post'); - return; - } - - foreach (publicise_get_contacts() as $contact) { - Logger::info('@@@ publicise_addon_admin_post contact ' . $contact['id'] . ' ' . $contact['name']); - $user = publicise_get_user($contact['uid']); - if (!$_POST['publicise-enabled-' . $contact['id']]) { - if ($contact['reason'] === 'publicise') { - Logger::info('@@@ depublicise'); - depublicise($a, $contact, $user); - } - } - else { - if ($contact['reason'] !== 'publicise') { - Logger::info('@@@ publicise'); - if (!publicise($a, $contact, $user)) { - Logger::warning('Publicise: failed to publicise contact ' . $contact['id']); - continue; - } - } - if ($_POST['publicise-expire-' . $contact['id']] != $user['expire']) { - DBA::e('UPDATE `user` SET `expire` = %d WHERE `uid` = %d', - intval($_POST['publicise-expire-' . $contact['id']]), intval($user['uid'])); - } - if ($_POST['publicise-comments-' . $contact['id']]) { - if ($user['page-flags'] != PAGE_COMMUNITY) { - DBA::e('UPDATE `user` SET `page-flags` = %d WHERE `uid` = %d', - intval(PAGE_COMMUNITY), intval($user['uid'])); - DBA::e('UPDATE `contact` SET `rel` = %d WHERE `uid` = %d AND `network` = "dfrn"', - intval(CONTACT_IS_SHARING), intval($user['uid'])); - } - } - else { - if ($user['page-flags'] != PAGE_SOAPBOX) { - DBA::e('UPDATE `user` SET `page-flags` = %d WHERE `uid` = %d', - intval(PAGE_SOAPBOX), intval($user['uid'])); - DBA::e('UPDATE `contact` SET `rel` = %d WHERE `uid` = %d AND `network` = "dfrn"', - intval(CONTACT_IS_FOLLOWER), intval($user['uid'])); - } - } - } - } -} - -function publicise_post_remote_hook(&$a, &$item) { - $r1 = DBA::e("SELECT `uid` FROM `contact` WHERE `id` = %d AND `reason` = 'publicise'", intval($item['contact-id'])); +"SELECT `uid` FROM `contact` WHERE `id` = %d AND `reason` = 'publicise'", intval($item['contact-id'])); if (!$r1) { return; } diff --git a/retriever/retriever.php b/retriever/retriever.php index 263f5842..fc864129 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -172,7 +172,7 @@ function retriever_retrieve_items($max_items) { */ function retriever_clean_up_completed_resources($max_items) { // TODO: figure out how to do this with DBA module - $r = DBA::e('SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT %d', + $r = DBA::p('SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT %d', intval($max_items)); if (!$r) { $r = array(); @@ -209,7 +209,7 @@ function retriever_tidy() { DBA::delete('retriever_resource', ['completed IS NOT NULL AND completed < DATE_SUB(now(), INTERVAL 1 WEEK)']); DBA::delete('retriever_resource', ['completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)']); - $r = DBA::e("SELECT retriever_item.id FROM retriever_item LEFT OUTER JOIN retriever_resource ON (retriever_item.resource = retriever_resource.id) WHERE retriever_resource.id is null"); + $r = DBA::p("SELECT retriever_item.id FROM retriever_item LEFT OUTER JOIN retriever_resource ON (retriever_item.resource = retriever_resource.id) WHERE retriever_resource.id is null"); Logger::info('retriever_tidy: found ' . count($r) . ' retriever_items with no retriever_resource'); foreach ($r as $rr) { DBA::e('DELETE FROM retriever_item WHERE id = %d', intval($rr['id'])); From 2f5f240d78010994f183e7a05cff9b5c63ce6982 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 2 Mar 2022 16:46:22 +0100 Subject: [PATCH 64/94] stray line --- mailstream/mailstream.php | 1 - 1 file changed, 1 deletion(-) diff --git a/mailstream/mailstream.php b/mailstream/mailstream.php index 73efa9d5..71b123ad 100644 --- a/mailstream/mailstream.php +++ b/mailstream/mailstream.php @@ -322,7 +322,6 @@ function mailstream_subject(array $item): string ); return DI::l10n()->t("Friendica post"); } - $contact = $r[0]; if ($contact['network'] === 'dfrn') { return DI::l10n()->t("Friendica post"); } From aa9fbc82bdb419f9b1c7a18a4a5edb68cbc84ea7 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 2 Mar 2022 16:51:40 +0100 Subject: [PATCH 65/94] also update these queries --- phototrack/phototrack.php | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/phototrack/phototrack.php b/phototrack/phototrack.php index f8a3f704..1b8ad738 100644 --- a/phototrack/phototrack.php +++ b/phototrack/phototrack.php @@ -86,12 +86,12 @@ function phototrack_photo_use($photo, $table, $field, $id) { if (strlen($photo) != 32) { return; } - $r = DBA::e("SELECT `resource-id` FROM `photo` WHERE `resource-id` = '%s' LIMIT 1", DBA::escape($photo)); + $r = DBA::p("SELECT `resource-id` FROM `photo` WHERE `resource-id` = '%s' LIMIT 1", DBA::escape($photo)); if (!count($r)) { return; } $rid = $r[0]['resource-id']; - $existing = DBA::e("SELECT id FROM phototrack_photo_use WHERE `resource-id` = '$rid' AND `table` = '$table' AND `field` = '$field' AND `row-id` = '$id'"); + $existing = DBA::p("SELECT id FROM phototrack_photo_use WHERE `resource-id` = '$rid' AND `table` = '$table' AND `field` = '$field' AND `row-id` = '$id'"); if (count($existing)) { DBA::e("UPDATE phototrack_photo_use SET checked = NOW() WHERE `resource-id` = '$rid' AND `table` = '$table' AND `field` = '$field' AND `row-id` = '$id'"); } @@ -197,11 +197,11 @@ function phototrack_batch_size() { function phototrack_search_table($a, $table) { $batch_size = phototrack_batch_size(); - $rows = DBA::e("SELECT `$table`.* FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) ) ORDER BY phototrack_row_check.checked LIMIT $batch_size"); + $rows = DBA::p("SELECT `$table`.* FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) ) ORDER BY phototrack_row_check.checked LIMIT $batch_size"); foreach ($rows as $row) { phototrack_check_row($a, $table, $row); } - $r = DBA::e("SELECT COUNT(*) FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) )"); + $r = DBA::p("SELECT COUNT(*) FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) )"); $remaining = $r[0]['COUNT(*)']; Logger::info('phototrack: searched ' . count($rows) . ' rows in table ' . $table . ', ' . $remaining . ' still remaining to search'); return $remaining; @@ -252,7 +252,7 @@ function phototrack_tidy() { $batch_size = phototrack_batch_size(); DBA::e('CREATE TABLE IF NOT EXISTS `phototrack-temp` (`resource-id` char(255) not null)'); DBA::e('INSERT INTO `phototrack-temp` SELECT DISTINCT(`resource-id`) FROM photo WHERE photo.`created` < DATE_SUB(NOW(), INTERVAL 2 MONTH)'); - $rows = DBA::e('SELECT `phototrack-temp`.`resource-id` FROM `phototrack-temp` LEFT OUTER JOIN phototrack_photo_use ON (`phototrack-temp`.`resource-id` = phototrack_photo_use.`resource-id`) WHERE phototrack_photo_use.id IS NULL limit ' . /*$batch_size*/1000); + $rows = DBA::p('SELECT `phototrack-temp`.`resource-id` FROM `phototrack-temp` LEFT OUTER JOIN phototrack_photo_use ON (`phototrack-temp`.`resource-id` = phototrack_photo_use.`resource-id`) WHERE phototrack_photo_use.id IS NULL limit ' . /*$batch_size*/1000); if (DBA::isResult($rows)) { foreach ($rows as $row) { Logger::debug('phototrack: remove photo ' . $row['resource-id']); @@ -261,7 +261,7 @@ function phototrack_tidy() { Logger::info('phototrack_tidy: deleted ' . count($rows) . ' photos'); } DBA::e('DROP TABLE `phototrack-temp`'); - $rows = DBA::e('SELECT id FROM phototrack_photo_use WHERE checked < DATE_SUB(NOW(), INTERVAL 14 DAY)'); + $rows = DBA::p('SELECT id FROM phototrack_photo_use WHERE checked < DATE_SUB(NOW(), INTERVAL 14 DAY)'); foreach ($rows as $row) { DBA::e( 'DELETE FROM phototrack_photo_use WHERE id = ' . $row['id']); } From 25e033eb88eb432b0882f5e0777e0c157bbbb615 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 2 Mar 2022 16:54:23 +0100 Subject: [PATCH 66/94] add anotehr check --- retriever/retriever.php | 3 +++ 1 file changed, 3 insertions(+) diff --git a/retriever/retriever.php b/retriever/retriever.php index fc864129..8083dcf7 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -210,6 +210,9 @@ function retriever_tidy() { DBA::delete('retriever_resource', ['completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)']); $r = DBA::p("SELECT retriever_item.id FROM retriever_item LEFT OUTER JOIN retriever_resource ON (retriever_item.resource = retriever_resource.id) WHERE retriever_resource.id is null"); + if (!DBA::isResult($r)) { + return; + } Logger::info('retriever_tidy: found ' . count($r) . ' retriever_items with no retriever_resource'); foreach ($r as $rr) { DBA::e('DELETE FROM retriever_item WHERE id = %d', intval($rr['id'])); From dc9ebc732a73d4553ce3335347c9e6da74559df4 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 2 Mar 2022 17:36:38 +0100 Subject: [PATCH 67/94] another migrated function --- phototrack/phototrack.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/phototrack/phototrack.php b/phototrack/phototrack.php index 1b8ad738..ef2cb154 100644 --- a/phototrack/phototrack.php +++ b/phototrack/phototrack.php @@ -24,6 +24,7 @@ use Friendica\Core\Addon; use Friendica\Core\Logger; use Friendica\Object\Image; use Friendica\Database\DBA; +use Friendica\Util\Images; use Friendica\DI; if (!defined('PHOTOTRACK_DEFAULT_BATCH_SIZE')) { @@ -76,7 +77,7 @@ function phototrack_finished_row($table, $id) { function phototrack_photo_use($photo, $table, $field, $id) { Logger::debug('@@@ phototrack_photo_use ' . $photo); - foreach (Image::supportedTypes() as $m => $e) { + foreach (Images::supportedTypes() as $m => $e) { $photo = str_replace(".$e", '', $photo); } if (substr($photo, -2, 1) == '-') { From 9cd1c2b72a955e5c301eb970811a0d8f21adc7ee Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 2 Mar 2022 21:05:23 +0100 Subject: [PATCH 68/94] this is more correct --- retriever/retriever.php | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 8083dcf7..8755dc92 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -174,11 +174,11 @@ function retriever_clean_up_completed_resources($max_items) { // TODO: figure out how to do this with DBA module $r = DBA::p('SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT %d', intval($max_items)); - if (!$r) { - $r = array(); + if (!DBA::isResult($r)) { + return; } - Logger::debug('retriever_clean_up_completed_resources: items waiting even though resource has completed: ' . count($r)); - foreach ($r as $rr) { + Logger::debug('retriever_clean_up_completed_resources: items waiting even though resource has completed: ' . DBA::numRows($r)); + while ($rr = DBA::fetch($r)) { $retriever_item = DBA::selectFirst('retriever_item', [], ['id' => intval($rr['item'])]); if (!DBA::isResult($retriever_item)) { Logger::warning('retriever_clean_up_completed_resources: no retriever item with id ' . $rr['item']); @@ -210,11 +210,11 @@ function retriever_tidy() { DBA::delete('retriever_resource', ['completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)']); $r = DBA::p("SELECT retriever_item.id FROM retriever_item LEFT OUTER JOIN retriever_resource ON (retriever_item.resource = retriever_resource.id) WHERE retriever_resource.id is null"); - if (!DBA::isResult($r)) { - return; - } - Logger::info('retriever_tidy: found ' . count($r) . ' retriever_items with no retriever_resource'); - foreach ($r as $rr) { + if (!DBA::isResult($r)) { + return; + } + Logger::info('retriever_tidy: found ' . DBA::numRows($r) . ' retriever_items with no retriever_resource'); + while ($rr = DBA::fetch($r)) { DBA::e('DELETE FROM retriever_item WHERE id = %d', intval($rr['id'])); } } From dee4f73b0a0df250c25085e7f43b20b43dabe999 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 2 Mar 2022 21:20:20 +0100 Subject: [PATCH 69/94] this is more correcter --- phototrack/phototrack.php | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/phototrack/phototrack.php b/phototrack/phototrack.php index ef2cb154..0e6db4c1 100644 --- a/phototrack/phototrack.php +++ b/phototrack/phototrack.php @@ -25,6 +25,7 @@ use Friendica\Core\Logger; use Friendica\Object\Image; use Friendica\Database\DBA; use Friendica\Util\Images; +use Friendica\Util\DateTimeFormat; use Friendica\DI; if (!defined('PHOTOTRACK_DEFAULT_BATCH_SIZE')) { @@ -68,10 +69,10 @@ function phototrack_module() {} function phototrack_finished_row($table, $id) { $existing = DBA::selectFirst('phototrack_row_check', ['id'], ['table' => $table, 'row-id' => $id]); if (!is_bool($existing)) { - DBA::e("UPDATE phototrack_row_check SET checked = NOW() WHERE `table` = '$table' AND `row-id` = '$id'"); + DBA::update('phototrack_row_check', ['checked' => DateTimeFormat::utcNow()], ['table' => $table, 'row-id' = $id]); } else { - DBA::e("INSERT INTO phototrack_row_check (`table`, `row-id`, `checked`) VALUES ('$table', '$id', NOW())"); + DBA::insert('phototrack_row_check', ['table' => $table, 'row-id' = $id, 'checked' => DateTimeFormat::utcNow()]); } } @@ -87,17 +88,17 @@ function phototrack_photo_use($photo, $table, $field, $id) { if (strlen($photo) != 32) { return; } - $r = DBA::p("SELECT `resource-id` FROM `photo` WHERE `resource-id` = '%s' LIMIT 1", DBA::escape($photo)); - if (!count($r)) { + $r = DBA::selectFirst('photo', ['resource-id'], ['resource-id' => $photo]); + if (!DBA::isResult($r)) { return; } - $rid = $r[0]['resource-id']; - $existing = DBA::p("SELECT id FROM phototrack_photo_use WHERE `resource-id` = '$rid' AND `table` = '$table' AND `field` = '$field' AND `row-id` = '$id'"); - if (count($existing)) { - DBA::e("UPDATE phototrack_photo_use SET checked = NOW() WHERE `resource-id` = '$rid' AND `table` = '$table' AND `field` = '$field' AND `row-id` = '$id'"); + $rid = $r['resource-id']; + $existing = DBA::selectFirst('phototrack_photo_use', ['id'], ['resource-id' => $rid, 'table' => $table, 'field' => $field, 'row-id' = $id]); + if (DBA::isResult($existing)) { + DBA::update('phototrack_photo_use', ['checked' => DateTimeFormat::utcNow()], ['resource-id' => $rid, 'table' => $table, 'field' => $field, 'row-id' = $id]); } else { - DBA::e("INSERT INTO phototrack_photo_use (`resource-id`, `table`, `field`, `row-id`, `checked`) VALUES ('$rid', '$table', '$field', '$id', NOW())"); + DBA::insert('phototrack_photo_use', ['resource-id' => $rid, 'table' => $table, 'field' => $field, 'row-id' = $id, 'checked' => DateTimeFormat::utcNow()]); } } @@ -199,11 +200,13 @@ function phototrack_batch_size() { function phototrack_search_table($a, $table) { $batch_size = phototrack_batch_size(); $rows = DBA::p("SELECT `$table`.* FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) ) ORDER BY phototrack_row_check.checked LIMIT $batch_size"); - foreach ($rows as $row) { - phototrack_check_row($a, $table, $row); + if (DBA::isResult($rows)) { + while ($row = DBA::fetch($rows)) { + phototrack_check_row($a, $table, $row); + } } $r = DBA::p("SELECT COUNT(*) FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) )"); - $remaining = $r[0]['COUNT(*)']; + $remaining = DBA::fetch($r)['COUNT(*)']; Logger::info('phototrack: searched ' . count($rows) . ' rows in table ' . $table . ', ' . $remaining . ' still remaining to search'); return $remaining; } From 0cbcbadec0b0ed30ef6026f2d49ee48f928cf0fc Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 2 Mar 2022 21:39:03 +0100 Subject: [PATCH 70/94] syntax errors --- phototrack/phototrack.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/phototrack/phototrack.php b/phototrack/phototrack.php index 0e6db4c1..82b4bfd3 100644 --- a/phototrack/phototrack.php +++ b/phototrack/phototrack.php @@ -69,10 +69,10 @@ function phototrack_module() {} function phototrack_finished_row($table, $id) { $existing = DBA::selectFirst('phototrack_row_check', ['id'], ['table' => $table, 'row-id' => $id]); if (!is_bool($existing)) { - DBA::update('phototrack_row_check', ['checked' => DateTimeFormat::utcNow()], ['table' => $table, 'row-id' = $id]); + DBA::update('phototrack_row_check', ['checked' => DateTimeFormat::utcNow()], ['table' => $table, 'row-id' => $id]); } else { - DBA::insert('phototrack_row_check', ['table' => $table, 'row-id' = $id, 'checked' => DateTimeFormat::utcNow()]); + DBA::insert('phototrack_row_check', ['table' => $table, 'row-id' => $id, 'checked' => DateTimeFormat::utcNow()]); } } @@ -206,7 +206,7 @@ function phototrack_search_table($a, $table) { } } $r = DBA::p("SELECT COUNT(*) FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) )"); - $remaining = DBA::fetch($r)['COUNT(*)']; + $remaining = DBA::fetch($r)['count(*)']; Logger::info('phototrack: searched ' . count($rows) . ' rows in table ' . $table . ', ' . $remaining . ' still remaining to search'); return $remaining; } From fa711292056035b7e34dc24b9c6282f3175c9434 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 2 Mar 2022 22:02:01 +0100 Subject: [PATCH 71/94] syntax errors --- phototrack/phototrack.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/phototrack/phototrack.php b/phototrack/phototrack.php index 82b4bfd3..e9f0a7cd 100644 --- a/phototrack/phototrack.php +++ b/phototrack/phototrack.php @@ -93,9 +93,9 @@ function phototrack_photo_use($photo, $table, $field, $id) { return; } $rid = $r['resource-id']; - $existing = DBA::selectFirst('phototrack_photo_use', ['id'], ['resource-id' => $rid, 'table' => $table, 'field' => $field, 'row-id' = $id]); + $existing = DBA::selectFirst('phototrack_photo_use', ['id'], ['resource-id' => $rid, 'table' => $table, 'field' => $field, 'row-id' => $id]); if (DBA::isResult($existing)) { - DBA::update('phototrack_photo_use', ['checked' => DateTimeFormat::utcNow()], ['resource-id' => $rid, 'table' => $table, 'field' => $field, 'row-id' = $id]); + DBA::update('phototrack_photo_use', ['checked' => DateTimeFormat::utcNow()], ['resource-id' => $rid, 'table' => $table, 'field' => $field, 'row-id' => $id]); } else { DBA::insert('phototrack_photo_use', ['resource-id' => $rid, 'table' => $table, 'field' => $field, 'row-id' = $id, 'checked' => DateTimeFormat::utcNow()]); From 2abe0398ad435b71687a87e55c250167999c215d Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 2 Mar 2022 22:02:58 +0100 Subject: [PATCH 72/94] syntax errors --- phototrack/phototrack.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phototrack/phototrack.php b/phototrack/phototrack.php index e9f0a7cd..63c764fc 100644 --- a/phototrack/phototrack.php +++ b/phototrack/phototrack.php @@ -98,7 +98,7 @@ function phototrack_photo_use($photo, $table, $field, $id) { DBA::update('phototrack_photo_use', ['checked' => DateTimeFormat::utcNow()], ['resource-id' => $rid, 'table' => $table, 'field' => $field, 'row-id' => $id]); } else { - DBA::insert('phototrack_photo_use', ['resource-id' => $rid, 'table' => $table, 'field' => $field, 'row-id' = $id, 'checked' => DateTimeFormat::utcNow()]); + DBA::insert('phototrack_photo_use', ['resource-id' => $rid, 'table' => $table, 'field' => $field, 'row-id' => $id, 'checked' => DateTimeFormat::utcNow()]); } } From 16cf075e828305860e5c9924713c41c8237711a4 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 2 Mar 2022 22:05:28 +0100 Subject: [PATCH 73/94] syntax errors --- phototrack/phototrack.php | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/phototrack/phototrack.php b/phototrack/phototrack.php index 63c764fc..8fb637b2 100644 --- a/phototrack/phototrack.php +++ b/phototrack/phototrack.php @@ -206,8 +206,8 @@ function phototrack_search_table($a, $table) { } } $r = DBA::p("SELECT COUNT(*) FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) )"); - $remaining = DBA::fetch($r)['count(*)']; - Logger::info('phototrack: searched ' . count($rows) . ' rows in table ' . $table . ', ' . $remaining . ' still remaining to search'); + $remaining = DBA::fetch($r)['count']; + Logger::info('phototrack: searched ' . DBA::numRows($rows) . ' rows in table ' . $table . ', ' . $remaining . ' still remaining to search'); return $remaining; } @@ -262,12 +262,12 @@ function phototrack_tidy() { Logger::debug('phototrack: remove photo ' . $row['resource-id']); DBA::e('DELETE FROM photo WHERE `resource-id` = "' . $row['resource-id'] . '"'); } - Logger::info('phototrack_tidy: deleted ' . count($rows) . ' photos'); + Logger::info('phototrack_tidy: deleted ' . DBA::numRows($rows) . ' photos'); } DBA::e('DROP TABLE `phototrack-temp`'); $rows = DBA::p('SELECT id FROM phototrack_photo_use WHERE checked < DATE_SUB(NOW(), INTERVAL 14 DAY)'); foreach ($rows as $row) { DBA::e( 'DELETE FROM phototrack_photo_use WHERE id = ' . $row['id']); } - Logger::info('phototrack_tidy: deleted ' . count($rows) . ' phototrack_photo_use rows'); + Logger::info('phototrack_tidy: deleted ' . DBA::numRows($rows) . ' phototrack_photo_use rows'); } From 1e6de4af530ab5039a42326f483987694914c74f Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 2 Mar 2022 22:19:00 +0100 Subject: [PATCH 74/94] improvements --- phototrack/phototrack.php | 1 + retriever/retriever.php | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/phototrack/phototrack.php b/phototrack/phototrack.php index 8fb637b2..0ede2a1c 100644 --- a/phototrack/phototrack.php +++ b/phototrack/phototrack.php @@ -206,6 +206,7 @@ function phototrack_search_table($a, $table) { } } $r = DBA::p("SELECT COUNT(*) FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) )"); + Logger::info("@@@ phototrack_search_table " . print_r(DBA::fetch($r))); $remaining = DBA::fetch($r)['count']; Logger::info('phototrack: searched ' . DBA::numRows($rows) . ' rows in table ' . $table . ', ' . $remaining . ' still remaining to search'); return $remaining; diff --git a/retriever/retriever.php b/retriever/retriever.php index 8755dc92..7caa05e5 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -215,7 +215,7 @@ function retriever_tidy() { } Logger::info('retriever_tidy: found ' . DBA::numRows($r) . ' retriever_items with no retriever_resource'); while ($rr = DBA::fetch($r)) { - DBA::e('DELETE FROM retriever_item WHERE id = %d', intval($rr['id'])); + DBA::delete('retriever_item', ['id' => intval($rr['id'])]); } } From 91e5b6337b8c8d4a32cb115270c893f6d36669d3 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Thu, 3 Mar 2022 07:37:37 +0100 Subject: [PATCH 75/94] fix sql syntax --- retriever/retriever.php | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 7caa05e5..047511c5 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -172,8 +172,7 @@ function retriever_retrieve_items($max_items) { */ function retriever_clean_up_completed_resources($max_items) { // TODO: figure out how to do this with DBA module - $r = DBA::p('SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT %d', - intval($max_items)); + $r = DBA::p("SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT $max_items"); if (!DBA::isResult($r)) { return; } From 061545c4567d4b0e548e8e6a2b30282026d1d297 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Thu, 3 Mar 2022 11:33:35 +0100 Subject: [PATCH 76/94] use new temppath function --- retriever/retriever.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 047511c5..ef00bd33 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -276,7 +276,7 @@ function retrieve_resource($resource) { $redirects = 0; $cookiejar = ''; if (array_key_exists('storecookies', $rule_data) && $rule_data['storecookies']) { - $cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-'); + $cookiejar = tempnam(System::getTempPath(), 'cookiejar-retriever-'); file_put_contents($cookiejar, $rule_data['cookiedata']); } $fetch_result = DI::httpClient()->fetchFull($resource['url'], $redirects, '', $cookiejar); From 8ad2b51492fe921f2bca7ae05e67b7d551c5b8b0 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sat, 7 May 2022 19:32:24 +0200 Subject: [PATCH 77/94] fix argv stuff --- retriever/retriever.php | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index ef00bd33..9dbe6170 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -830,7 +830,7 @@ function retriever_content($a) { $a->page['content'] .= "

Please log in

"; return; } - if ($a->argv[1] === 'help') { + if (isset(DI::args()->getArgv()[1]) and DI::args()->getArgv()[1] === 'help') { $feeds = DBA::selectToArray('contact', ['id', 'name', 'thumb'], ['uid' => local_user(), 'network' => 'feed']); for ($i = 0; $i < count($feeds); ++$i) { $feeds[$i]['url'] = DI::baseUrl()->get(true) . '/retriever/' . $feeds[$i]['id']; @@ -842,14 +842,15 @@ function retriever_content($a) { '$feeds' => $feeds)); return; } - if ($a->argv[1]) { - $retriever_rule = get_retriever_rule($a->argv[1], local_user(), false); + if (isset(DI::args()->getArgv()[1])) { + $arg1 = DI::args()->getArgv()[1] + $retriever_rule = get_retriever_rule($arg1, local_user(), false); if (!$retriever_rule) { - $retriever_rule = ['id' => 0, 'data' => ['enable' => 0, 'modurl' => '', 'pattern' => '', 'replace' => '', 'images' => 0, 'storecookies' => 0, 'cookiedata' => '', 'customxslt' => '', 'include' => '', 'exclude' => '']]; + $retriever_rule = ['id' => 0, 'data' => ['enable' => 0, 'modurl' => '', 'pattern' => '', 'replace' => '', 'images' => 0, 'storecookies' => 0, 'cookiedata' => '', 'customxslt' => '', 'include' => '', 'exclude' => '']]; } if (!empty($_POST["id"])) { - $retriever_rule = get_retriever_rule($a->argv[1], local_user(), true); + $retriever_rule = get_retriever_rule($arg1, local_user(), true); $retriever_rule['data'] = array(); foreach (array('modurl', 'pattern', 'replace', 'enable', 'images', 'customxslt', 'storecookies', 'cookiedata') as $setting) { if (empty($_POST['retriever_' . $setting])) { From 55dd0e84d89543680ac66d84bd1902c7c11a72bd Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sat, 7 May 2022 19:33:34 +0200 Subject: [PATCH 78/94] fix argv stuff --- retriever/retriever.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 9dbe6170..210c6542 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -843,7 +843,7 @@ function retriever_content($a) { return; } if (isset(DI::args()->getArgv()[1])) { - $arg1 = DI::args()->getArgv()[1] + $arg1 = DI::args()->getArgv()[1]; $retriever_rule = get_retriever_rule($arg1, local_user(), false); if (!$retriever_rule) { $retriever_rule = ['id' => 0, 'data' => ['enable' => 0, 'modurl' => '', 'pattern' => '', 'replace' => '', 'images' => 0, 'storecookies' => 0, 'cookiedata' => '', 'customxslt' => '', 'include' => '', 'exclude' => '']]; From adf160716c32f9c623a9341238cc0eba13f636e7 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 8 May 2022 14:46:06 +0100 Subject: [PATCH 79/94] correct use of fetchFull --- retriever/retriever.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 210c6542..42462bcb 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -279,7 +279,7 @@ function retrieve_resource($resource) { $cookiejar = tempnam(System::getTempPath(), 'cookiejar-retriever-'); file_put_contents($cookiejar, $rule_data['cookiedata']); } - $fetch_result = DI::httpClient()->fetchFull($resource['url'], $redirects, '', $cookiejar); + $fetch_result = DI::httpClient()->fetchFull($resource['url'], $redirects, 0, $cookiejar); if (array_key_exists('storecookies', $rule_data) && $rule_data['storecookies']) { $retriever_rule['data']['cookiedata'] = file_get_contents($cookiejar); DBA::update('retriever_rule', ['data' => json_encode($retriever_rule['data'])], ['id' => intval($retriever_rule["id"])], $retriever_rule); From a0fbb7b9bed818ba30e12acfdeec9875b74c2117 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 8 May 2022 21:41:30 +0200 Subject: [PATCH 80/94] fix comment --- retriever/retriever.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 42462bcb..1401f90d 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -925,7 +925,7 @@ function retriever_content($a) { 'retriever_cookiedata', DI::l10n()->t('Cookie Data'), $retriever_rule['data']['cookiedata'], - DI::l10n()->t("Latest cookie data for this feed. Netscape cookie file format.")), + DI::l10n()->t("Latest cookie data for this feed. Example: [{\"Name\":\"cookie-name\",\"Value\":\"cookie-value\",\"Domain\":\"example.com\",\"Path\":\"\\/path\\/\",\"Max-Age\":null,\"Expires\":1682450014,\"Secure\":true,\"Discard\":false,\"HttpOnly\":true}]")), '$customxslt' => array( 'retriever_customxslt', DI::l10n()->t('Custom XSLT'), From 52c287269f0771b721b3ede84bbab92a4728885e Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 22 Jun 2022 17:55:36 +0100 Subject: [PATCH 81/94] Use separate album and repair dox for ces --- retriever/retriever.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 1401f90d..83c357d3 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -797,7 +797,7 @@ function retriever_transform_images(&$item, $resource) { $path = parse_url($resource['url'], PHP_URL_PATH); $parts = pathinfo($path); $filename = $parts['filename'] . (array_key_exists('extension', $parts) ? '.' . $parts['extension'] : ''); - $album = 'Wall Photos'; + $album = 'Retriever'; $scale = 0; $desc = ''; // TODO: store alt text with resource when it's requested so we can fill this in Logger::debug('retriever_transform_images storing ' . strlen($data) . ' bytes type ' . $type . ': uid ' . $uid . ' cid ' . $cid . ' rid ' . $rid . ' filename ' . $filename . ' album ' . $album . ' scale ' . $scale . ' desc ' . $desc); From b7f43462111291b912fba21c0b5a143e60a4c918 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 2 Oct 2022 20:18:48 +0200 Subject: [PATCH 82/94] Update to correct collation mode --- retriever/database.sql | 12 ++++++------ retriever/retriever.php | 14 +++++++++++++- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/retriever/database.sql b/retriever/database.sql index 68480cfd..6139fea4 100644 --- a/retriever/database.sql +++ b/retriever/database.sql @@ -6,11 +6,11 @@ CREATE TABLE IF NOT EXISTS `retriever_rule` ( PRIMARY KEY (`id`), KEY `uid` (`uid`), KEY `contact-id` (`contact-id`) -) DEFAULT CHARSET=utf8 COLLATE=utf8_bin; +) DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; CREATE TABLE IF NOT EXISTS `retriever_item` ( `id` int(11) unsigned NOT NULL AUTO_INCREMENT, - `item-uri` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NOT NULL, + `item-uri` varbinary(255) NOT NULL, `item-uid` int(10) unsigned NOT NULL DEFAULT '0', `contact-id` int(10) unsigned NOT NULL DEFAULT '0', `resource` int(11) NOT NULL, @@ -20,7 +20,7 @@ CREATE TABLE IF NOT EXISTS `retriever_item` ( KEY `item-uid` (`item-uid`), KEY `all` (`item-uri`, `item-uid`, `contact-id`), PRIMARY KEY (`id`) -) DEFAULT CHARSET=utf8 COLLATE=utf8_bin; +) DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; CREATE TABLE IF NOT EXISTS `retriever_resource` ( `id` int(11) unsigned NOT NULL AUTO_INCREMENT, @@ -28,15 +28,15 @@ CREATE TABLE IF NOT EXISTS `retriever_resource` ( `contact-id` int(10) unsigned NOT NULL DEFAULT '0', `type` char(255) NULL DEFAULT NULL, `binary` int(1) NOT NULL DEFAULT 0, - `url` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NOT NULL, + `url` varbinary(800) NOT NULL, `created` timestamp NOT NULL DEFAULT now(), `completed` timestamp NULL DEFAULT NULL, `last-try` timestamp NULL DEFAULT NULL, `num-tries` int(11) NOT NULL DEFAULT 0, `data` mediumblob NULL DEFAULT NULL, `http-code` smallint(1) unsigned NULL DEFAULT NULL, - `redirect-url` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NULL DEFAULT NULL, + `redirect-url` varbinary(800) NOT NULL, KEY `url` (`url`), KEY `completed` (`completed`), PRIMARY KEY (`id`) -) DEFAULT CHARSET=utf8 COLLATE=utf8_bin +) DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; diff --git a/retriever/retriever.php b/retriever/retriever.php index 83c357d3..714a33a1 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -32,7 +32,19 @@ function retriever_install() { Addon::registerHook('contact_photo_menu', 'addon/retriever/retriever.php', 'retriever_contact_photo_menu'); Addon::registerHook('cron', 'addon/retriever/retriever.php', 'retriever_cron'); - if (DI::config()->get('retriever', 'dbversion') != '0.14') { + if (DI::config()->get('retriever', 'dbversion') == '0.14') { + if (!DBA::e("ALTER TABLE `retriever_rule` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci") || + !DBA::e("ALTER TABLE `retriever_item` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci") || + !DBA::e("ALTER TABLE `retriever_item` MODIFY `item-uri` varbinary(255) NOT NULL") || + !DBA::e("ALTER TABLE `retriever_resource` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci") || + !DBA::e("ALTER TABLE `retriever_resource` MODIFY `url` varbinary(800) NOT NULL") || + !DBA::e("ALTER TABLE `retriever_resource` MODIFY `redirect-url` varbinary(800) NOT NULL")) { + Logger::warning('Unable to update database tables: ' . DBA::errorMessage()); + return; + } + DI::config()->set('retriever', 'dbversion', '0.15'); + } + if (DI::config()->get('retriever', 'dbversion') != '0.15') { $schema = file_get_contents(dirname(__file__).'/database.sql'); $tables = explode(';', $schema); foreach ($tables as $table) { From ce6a14e0555662add54fff93c200c8eee7a85f4d Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 2 Oct 2022 21:09:16 +0200 Subject: [PATCH 83/94] Use new hook registration calls --- retriever/retriever.php | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 714a33a1..5bd078c6 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -7,6 +7,7 @@ */ use Friendica\Core\Addon; +use Friendica\Core\Hook; use Friendica\Core\Logger; use Friendica\Core\Renderer; use Friendica\Core\System; @@ -26,11 +27,11 @@ use Friendica\DI; * @brief Installation hook for retriever plugin */ function retriever_install() { - Addon::registerHook('addon_settings', 'addon/retriever/retriever.php', 'retriever_addon_settings'); - Addon::registerHook('addon_settings_post', 'addon/retriever/retriever.php', 'retriever_addon_settings_post'); - Addon::registerHook('post_remote', 'addon/retriever/retriever.php', 'retriever_post_remote_hook'); - Addon::registerHook('contact_photo_menu', 'addon/retriever/retriever.php', 'retriever_contact_photo_menu'); - Addon::registerHook('cron', 'addon/retriever/retriever.php', 'retriever_cron'); + Hook::register('addon_settings', 'addon/retriever/retriever.php', 'retriever_addon_settings'); + Hook::register('addon_settings_post', 'addon/retriever/retriever.php', 'retriever_addon_settings_post'); + Hook::register('post_remote', 'addon/retriever/retriever.php', 'retriever_post_remote_hook'); + Hook::register('contact_photo_menu', 'addon/retriever/retriever.php', 'retriever_contact_photo_menu'); + Hook::register('cron', 'addon/retriever/retriever.php', 'retriever_cron'); if (DI::config()->get('retriever', 'dbversion') == '0.14') { if (!DBA::e("ALTER TABLE `retriever_rule` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci") || @@ -62,13 +63,13 @@ function retriever_install() { * @brief Uninstallation hook for retriever plugin */ function retriever_uninstall() { - Addon::unregisterHook('addon_settings', 'addon/retriever/retriever.php', 'retriever_addon_settings'); - Addon::unregisterHook('addon_settings_post', 'addon/retriever/retriever.php', 'retriever_addon_settings_post'); - Addon::unregisterHook('post_remote', 'addon/retriever/retriever.php', 'retriever_post_remote_hook'); - Addon::unregisterHook('addon_settings', 'addon/retriever/retriever.php', 'retriever_addon_settings'); - Addon::unregisterHook('addon_settings_post', 'addon/retriever/retriever.php', 'retriever_addon_settings_post'); - Addon::unregisterHook('contact_photo_menu', 'addon/retriever/retriever.php', 'retriever_contact_photo_menu'); - Addon::unregisterHook('cron', 'addon/retriever/retriever.php', 'retriever_cron'); + Hook::unregister('addon_settings', 'addon/retriever/retriever.php', 'retriever_addon_settings'); + Hook::unregister('addon_settings_post', 'addon/retriever/retriever.php', 'retriever_addon_settings_post'); + Hook::unregister('post_remote', 'addon/retriever/retriever.php', 'retriever_post_remote_hook'); + Hook::unregister('addon_settings', 'addon/retriever/retriever.php', 'retriever_addon_settings'); + Hook::unregister('addon_settings_post', 'addon/retriever/retriever.php', 'retriever_addon_settings_post'); + Hook::unregister('contact_photo_menu', 'addon/retriever/retriever.php', 'retriever_contact_photo_menu'); + Hook::unregister('cron', 'addon/retriever/retriever.php', 'retriever_cron'); } /** From 6a3175537e947c022244961902ddef70480251af Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 2 Oct 2022 21:19:08 +0200 Subject: [PATCH 84/94] Fix length of keys --- retriever/database.sql | 4 ++-- retriever/retriever.php | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/retriever/database.sql b/retriever/database.sql index 6139fea4..2cabf9ef 100644 --- a/retriever/database.sql +++ b/retriever/database.sql @@ -28,14 +28,14 @@ CREATE TABLE IF NOT EXISTS `retriever_resource` ( `contact-id` int(10) unsigned NOT NULL DEFAULT '0', `type` char(255) NULL DEFAULT NULL, `binary` int(1) NOT NULL DEFAULT 0, - `url` varbinary(800) NOT NULL, + `url` varbinary(700) NOT NULL, `created` timestamp NOT NULL DEFAULT now(), `completed` timestamp NULL DEFAULT NULL, `last-try` timestamp NULL DEFAULT NULL, `num-tries` int(11) NOT NULL DEFAULT 0, `data` mediumblob NULL DEFAULT NULL, `http-code` smallint(1) unsigned NULL DEFAULT NULL, - `redirect-url` varbinary(800) NOT NULL, + `redirect-url` varbinary(700) NOT NULL, KEY `url` (`url`), KEY `completed` (`completed`), PRIMARY KEY (`id`) diff --git a/retriever/retriever.php b/retriever/retriever.php index 5bd078c6..977ed49a 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -38,8 +38,8 @@ function retriever_install() { !DBA::e("ALTER TABLE `retriever_item` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci") || !DBA::e("ALTER TABLE `retriever_item` MODIFY `item-uri` varbinary(255) NOT NULL") || !DBA::e("ALTER TABLE `retriever_resource` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci") || - !DBA::e("ALTER TABLE `retriever_resource` MODIFY `url` varbinary(800) NOT NULL") || - !DBA::e("ALTER TABLE `retriever_resource` MODIFY `redirect-url` varbinary(800) NOT NULL")) { + !DBA::e("ALTER TABLE `retriever_resource` MODIFY `url` varbinary(700) NOT NULL") || + !DBA::e("ALTER TABLE `retriever_resource` MODIFY `redirect-url` varbinary(700) NOT NULL")) { Logger::warning('Unable to update database tables: ' . DBA::errorMessage()); return; } @@ -479,9 +479,9 @@ function add_retriever_resource($url, $uid, $cid, $binary = false) { return $resource; } - // 800 characters is the size of this field in the database - if (strlen($url) > 800) { - Logger::warning('add_retriever_resource: URL is longer than 800 characters'); + // 700 characters is the size of this field in the database + if (strlen($url) > 700) { + Logger::warning('add_retriever_resource: URL is longer than 700 characters'); } $resource = DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)]); From d18a905022764d095142aef26afd3f23b5b50a18 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 2 Oct 2022 20:19:53 +0100 Subject: [PATCH 85/94] add log lines to install --- retriever/retriever.php | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/retriever/retriever.php b/retriever/retriever.php index 977ed49a..640bc5cc 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -27,6 +27,8 @@ use Friendica\DI; * @brief Installation hook for retriever plugin */ function retriever_install() { + Logger::debug('Install retriever'); + Hook::register('addon_settings', 'addon/retriever/retriever.php', 'retriever_addon_settings'); Hook::register('addon_settings_post', 'addon/retriever/retriever.php', 'retriever_addon_settings_post'); Hook::register('post_remote', 'addon/retriever/retriever.php', 'retriever_post_remote_hook'); @@ -63,6 +65,8 @@ function retriever_install() { * @brief Uninstallation hook for retriever plugin */ function retriever_uninstall() { + Logger::debug('Uninstall retriever'); + Hook::unregister('addon_settings', 'addon/retriever/retriever.php', 'retriever_addon_settings'); Hook::unregister('addon_settings_post', 'addon/retriever/retriever.php', 'retriever_addon_settings_post'); Hook::unregister('post_remote', 'addon/retriever/retriever.php', 'retriever_post_remote_hook'); From 893ab076e8f91b8d488d83acc50a44eee46cc618 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sun, 2 Oct 2022 21:29:16 +0200 Subject: [PATCH 86/94] fix order of upgrade commands --- retriever/retriever.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 640bc5cc..52f5f0b0 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -37,11 +37,11 @@ function retriever_install() { if (DI::config()->get('retriever', 'dbversion') == '0.14') { if (!DBA::e("ALTER TABLE `retriever_rule` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci") || - !DBA::e("ALTER TABLE `retriever_item` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci") || !DBA::e("ALTER TABLE `retriever_item` MODIFY `item-uri` varbinary(255) NOT NULL") || - !DBA::e("ALTER TABLE `retriever_resource` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci") || + !DBA::e("ALTER TABLE `retriever_item` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci") || !DBA::e("ALTER TABLE `retriever_resource` MODIFY `url` varbinary(700) NOT NULL") || !DBA::e("ALTER TABLE `retriever_resource` MODIFY `redirect-url` varbinary(700) NOT NULL")) { + !DBA::e("ALTER TABLE `retriever_resource` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci") || Logger::warning('Unable to update database tables: ' . DBA::errorMessage()); return; } From f780c6e5b282bf7560253423993e1a82c4ec9cca Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sat, 15 Oct 2022 18:02:43 +0200 Subject: [PATCH 87/94] add types to parameters --- retriever/retriever.php | 68 ++++++++++++++++++++--------------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 52f5f0b0..7d1229e6 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -77,19 +77,19 @@ function retriever_uninstall() { } /** - * @brief Module hook for retriever plugin - * - * TODO: figure out what this should be used for + * This is a statement rather than an actual function definition. The simple + * existence of this method is checked to figure out if the addon offers a + * module. */ function retriever_module() {} /** * @brief Admin page hook for retriever plugin * - * @param App $a App object (by ref) + * @param App $a App object (unused) * @param string $o HTML to append content to (by ref) */ -function retriever_addon_admin(&$a, &$o) { +function retriever_addon_admin(App $a, string &$o) { $template = Renderer::getMarkupTemplate('admin.tpl', 'addon/retriever/'); $downloads_per_cron = DI::config()->get('retriever', 'downloads_per_cron'); @@ -141,7 +141,7 @@ $retriever_item_count = 0; * * @param int $max_items Maximum number of items to retrieve in this call */ -function retriever_retrieve_items($max_items) { +function retriever_retrieve_items(int $max_items) { global $retriever_item_count; $retriever_schedule = array(array(1,'minute'), @@ -187,7 +187,7 @@ function retriever_retrieve_items($max_items) { * * @param int $max_items Maximum number of items to retrieve in this call */ -function retriever_clean_up_completed_resources($max_items) { +function retriever_clean_up_completed_resources(int $max_items) { // TODO: figure out how to do this with DBA module $r = DBA::p("SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT $max_items"); if (!DBA::isResult($r)) { @@ -240,7 +240,7 @@ function retriever_tidy() { * * @param array $resource The row from the retriever_resource table */ -function retrieve_dataurl_resource($resource) { +function retrieve_dataurl_resource(array $resource) { if (!preg_match("/date:(.*);base64,(.*)/", $resource['url'], $matches)) { Logger::warning('retrieve_dataurl_resource: resource ' . $resource['id'] . ' does not match pattern'); } else { @@ -258,7 +258,7 @@ function retrieve_dataurl_resource($resource) { * * @param array $resource The row from the retriever_resource table */ -function retrieve_resource($resource) { +function retrieve_resource(array $resource) { $components = parse_url($resource['url']); if (!$components) { Logger::warning('retrieve_resource: URL ' . $resource['url'] . ' could not be parsed'); @@ -325,7 +325,7 @@ function retrieve_resource($resource) { * @param boolean $create Whether to create a new configuration if none exists already * @return array The row from the retriever_rule database for this configuration */ -function get_retriever_rule($contact_id, $uid, $create) { +function get_retriever_rule(string $contact_id, string $uid, bool $create) { $retriever_rule = DBA::selectFirst('retriever_rule', [], ['contact-id' => intval($contact_id), 'uid' => intval($uid)]); if ($retriever_rule) { $retriever_rule['data'] = json_decode($retriever_rule['data'], true); @@ -344,7 +344,7 @@ function get_retriever_rule($contact_id, $uid, $create) { * @param array $retriever_item Row from the retriever_item table * @return array Item that was found, or undef if no item could be found */ -function retriever_get_item($retriever_item) { +function retriever_get_item(array $retriever_item) { $item = Post::selectFirst([], ['uri' => $retriever_item['item-uri'], 'uid' => intval($retriever_item['item-uid']), 'contact-id' => intval($retriever_item['contact-id'])]); if (!DBA::isResult($item)) { Logger::warning('retriever_get_item: no item found for uri ' . $retriever_item['item-uri']); @@ -359,7 +359,7 @@ function retriever_get_item($retriever_item) { * @param int $retriever_item_id ID of the retriever item corresponding to this resource * @param array $resource The full details of the completed resource */ -function retriever_item_completed($retriever_item_id, $resource) { +function retriever_item_completed(string $retriever_item_id, array $resource) { Logger::debug('retriever_item_completed: id ' . $retriever_item_id . ' url ' . $resource['url']); $retriever_item = DBA::selectFirst('retriever_item', [], ['id' => intval($retriever_item_id)]); @@ -386,7 +386,7 @@ function retriever_item_completed($retriever_item_id, $resource) { * * @param array $resource The full details of the completed resource */ -function retriever_resource_completed($resource) { +function retriever_resource_completed(array $resource) { Logger::debug('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url']); foreach (DBA::selectToArray('retriever_item', ['id'], ['resource' => intval($resource['id'])]) as $retriever_item) { retriever_item_completed($retriever_item['id'], $resource); @@ -399,7 +399,7 @@ function retriever_resource_completed($resource) { * @param array $retriever The row from the retriever_rule table for the contact * @param int $num The number of existing items to queue for retrieval */ -function apply_retrospective($retriever, $num) { +function apply_retrospective(array $retriever, int $num) { foreach (Post::selectToArray([], ['contact-id' => intval($retriever['contact-id'])], ['order' => ['received' => true], 'limit' => $num]) as $item) { Item::update(['visible' => 0], ['id' => intval($item['id'])]); foreach (DBA::selectToArray('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => $item['uid'], 'contact-id' => $item['contact-id']]) as $retriever_item) { @@ -418,7 +418,7 @@ function apply_retrospective($retriever, $num) { * * TODO: This queries then inserts. It should use some kind of lock to avoid requesting the same resource twice. */ -function retriever_on_item_insert($retriever, &$item) { +function retriever_on_item_insert(array $retriever, array &$item) { if (!$retriever || !$retriever['id']) { Logger::info('retriever_on_item_insert: No retriever supplied'); return; @@ -457,7 +457,7 @@ function retriever_on_item_insert($retriever, &$item) { * @param boolean $binary Specifies if this download should be done in binary mode * @return array The created resource */ -function add_retriever_resource($url, $uid, $cid, $binary = false) { +function add_retriever_resource(string $url, string $uid, string $cid, bool $binary = false) { Logger::debug('add_retriever_resource: url ' . $url . ' uid ' . $uid . ' contact-id ' . $cid); $scheme = parse_url($url, PHP_URL_SCHEME); @@ -505,7 +505,7 @@ function add_retriever_resource($url, $uid, $cid, $binary = false) { * @param array $resource Resource that the item needs to wait for. This must have already been stored in the database. * @return int ID of the retriever item that was created, or the existing one if present */ -function add_retriever_item($item, $resource) { +function add_retriever_item(array $item, array $resource) { Logger::debug('add_retriever_item: ' . $resource['url'] . ' for ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); if (!array_key_exists('id', $resource) || !$resource['id']) { @@ -532,7 +532,7 @@ function add_retriever_item($item, $resource) { * @param array $resource The completed resource * @return string Character encoding, e.g. "utf-8" or "iso-8859-1" */ -function retriever_get_encoding($resource) { +function retriever_get_encoding(array $resource) { $matches = array(); if (preg_match('/charset=(.*)/', $resource['type'], $matches)) { return trim(array_pop($matches)); @@ -547,7 +547,7 @@ function retriever_get_encoding($resource) { * @param DOMDocument $doc Input to the XSLT template * @return DOMDocument Result of applying the template */ -function retriever_apply_xslt_text($xslt_text, $doc) { +function retriever_apply_xslt_text(string $xslt_text, DOMDocument $doc) { if (!$xslt_text) { Logger::info('retriever_apply_xslt_text: empty XSLT text'); return $doc; @@ -570,7 +570,7 @@ function retriever_apply_xslt_text($xslt_text, $doc) { * @param array &$item Item to be in which to store the new body (by ref). This may or may not be already stored in the database. * @param array $resource Newly completed resource, which should be text (HTML or XML) */ -function retriever_apply_dom_filter($retriever, &$item, $resource) { +function retriever_apply_dom_filter(array $retriever, array &$item, array $resource) { Logger::debug('retriever_apply_dom_filter: applying XSLT to uri ' . $item['uri'] . ' uid ' . $item['uid'] . ' contact ' . $item['contact-id']); if (!array_key_exists('include', $retriever['data']) && !array_key_exists('customxslt', $retriever['data'])) { @@ -614,7 +614,7 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) { * * @param array $resource The resource containing the text content */ -function retriever_load_into_dom($resource) { +function retriever_load_into_dom(array $resource) { $encoding = retriever_get_encoding($resource); $content = mb_convert_encoding($resource['data'], 'HTML-ENTITIES', $encoding); $doc = new DOMDocument('1.0', 'UTF-8'); @@ -634,7 +634,7 @@ function retriever_load_into_dom($resource) { * @param array $retriever The retriever configuration for this contact * @return DOMDocument New DOM document containing only the desired content */ -function retriever_extract($doc, $retriever) { +function retriever_extract(DOMDocument $doc, array $retriever) { $params = array('$spec' => $retriever['data']); $extract_template = Renderer::getMarkupTemplate('extract.tpl', 'addon/retriever/'); $extract_xslt = Renderer::replaceMacros($extract_template, $params); @@ -656,7 +656,7 @@ function retriever_extract($doc, $retriever) { * @param array $resource Completed resource which contains the text in the DOM document * @return DOMDocument New DOM document with global URLs */ -function retriever_globalise_urls($doc, $resource) { +function retriever_globalise_urls(DOMDocument $doc, array $resource) { $components = parse_url($resource['redirect-url']); if (!array_key_exists('scheme', $components) || !array_key_exists('host', $components) || !array_key_exists('path', $components)) { return $doc; @@ -675,7 +675,7 @@ function retriever_globalise_urls($doc, $resource) { * * @param array $item Row from the item table */ -function retriever_get_body($item) { +function retriever_get_body(array $item) { if (!array_key_exists('uri-id', $item) || !$item['uri-id']) { // item has not yet been stored in database return $item['body']; @@ -703,7 +703,7 @@ function retriever_get_body($item) { * @param array &$item Item in which to set the body (by ref). This may or may not be already stored in the database. * @param string $body New body content */ -function retriever_set_body(&$item, $body) { +function retriever_set_body(array &$item, string $body) { $item['body'] = $body; if (!array_key_exists('id', $item) || !$item['id']) { // item has not yet been stored in database @@ -717,7 +717,7 @@ function retriever_set_body(&$item, $body) { * * @param array &$item Item to be searched for images and updated (by ref). This may or may not be already stored in the database. */ -function retrieve_images(&$item) { +function retrieve_images(array &$item) { if (!DI::config()->get('retriever', 'allow_images')) { return; } @@ -755,7 +755,7 @@ function retrieve_images(&$item) { * * @param array &$item Row from the item table (by ref) */ -function retriever_check_item_completed(&$item) +function retriever_check_item_completed(array &$item) { $waiting = DBA::selectFirst('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => intval($item['uid']), 'contact-id' => intval($item['contact-id']), 'finished' => 0]); Logger::debug('retriever_check_item_completed: item ' . $item['uri'] . ' ' . $item['uid'] . ' '. $item['contact-id'] . ' waiting for resources'); @@ -774,7 +774,7 @@ function retriever_check_item_completed(&$item) * @param array &$item Row from the item table (by ref) * @param array $resource The resource that has just been completed */ -function retriever_apply_completed_resource_to_item($retriever, &$item, $resource) { +function retriever_apply_completed_resource_to_item(array $retriever, array &$item, array $resource) { Logger::debug('retriever_apply_completed_resource_to_item: retriever ' . ($retriever ? $retriever['id'] : 'none') . ' resource ' . $resource['url'] . ' plink ' . $item['plink']); if (strpos($resource['type'], 'image') !== false) { retriever_transform_images($item, $resource); @@ -800,7 +800,7 @@ function retriever_apply_completed_resource_to_item($retriever, &$item, $resourc * * TODO: split this into two functions, one to store the image, the other to change the item body */ -function retriever_transform_images(&$item, $resource) { +function retriever_transform_images(array &$item, array $resource) { if (!$resource['data']) { Logger::info('retriever_transform_images: no data available for ' . $resource['id'] . ' ' . $resource['url']); return; @@ -842,7 +842,7 @@ function retriever_transform_images(&$item, $resource) { * * @param App $a The App object */ -function retriever_content($a) { +function retriever_content(App $a) { if (!local_user()) { $a->page['content'] .= "

Please log in

"; return; @@ -973,7 +973,7 @@ function retriever_content($a) { * @param App $a The App object * @param array $args Contact menu details to be filled in (by ref) */ -function retriever_contact_photo_menu($a, &$args) { +function retriever_contact_photo_menu(App $a, array &$args) { if (!$args) { return; } @@ -988,7 +988,7 @@ function retriever_contact_photo_menu($a, &$args) { * @param App $a The App object (by ref) * @param array $item New item, which has not yet been inserted into database (by ref) */ -function retriever_post_remote_hook(&$a, &$item) { +function retriever_post_remote_hook(App &$a, array &$item) { Logger::info('retriever_post_remote_hook: ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); $retriever_rule = get_retriever_rule($item['contact-id'], $item["uid"], false); @@ -1015,7 +1015,7 @@ function retriever_post_remote_hook(&$a, &$item) { * @param App $a The App object (by ref) * @param string $s HTML string to which to append settings content (by ref) */ -function retriever_addon_settings(&$a, &$s) { +function retriever_addon_settings(App &$a, string &$s) { $all_photos = DI::config()->get(local_user(), 'retriever', 'all_photos'); $oembed = DI::config()->get(local_user(), 'retriever', 'oembed'); $template = Renderer::getMarkupTemplate('/settings.tpl', 'addon/retriever/'); @@ -1040,7 +1040,7 @@ function retriever_addon_settings(&$a, &$s) { * @param App $a The App object * @param array $post Posted content */ -function retriever_addon_settings_post($a, $post) { +function retriever_addon_settings_post(App $a, array $post) { if ($post['retriever_all_photos']) { DI::config()->set(local_user(), 'retriever', 'all_photos', $post['retriever_all_photos']); } From 025b362290b6c933b02eef9357197513b07f8abf Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sat, 7 Jan 2023 00:05:19 +0100 Subject: [PATCH 88/94] Add missing use statement --- retriever/retriever.php | 1 + 1 file changed, 1 insertion(+) diff --git a/retriever/retriever.php b/retriever/retriever.php index 7d1229e6..7b9a7bf4 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -22,6 +22,7 @@ use Friendica\Model\Item; use Friendica\Model\Post; use Friendica\Util\DateTimeFormat; use Friendica\DI; +use Friendica\App; /** * @brief Installation hook for retriever plugin From 1fc7334639363abbb70af91c9b47ded78e6847ab Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sat, 7 Jan 2023 11:21:50 +0100 Subject: [PATCH 89/94] need return value hint --- mailstream/mailstream.php | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mailstream/mailstream.php b/mailstream/mailstream.php index 71b123ad..ad28c18e 100644 --- a/mailstream/mailstream.php +++ b/mailstream/mailstream.php @@ -416,7 +416,7 @@ function mailstream_send(string $message_id, array $item, array $user): bool '$upstream' => DI::l10n()->t('Upstream'), '$local' => DI::l10n()->t('Local'), '$item' => $item]); - mailstream_html_wrap($mail->Body); + $mail->Body = mailstream_html_wrap($mail->Body); if (!$mail->Send()) { throw new Exception($mail->ErrorInfo); } @@ -437,7 +437,8 @@ function mailstream_send(string $message_id, array $item, array $user): bool * bbcode's output suitable for transmission, we try to break things * up so that lines are about 200 characters. * - * @param string $text text to word wrap - modified in-place + * @param string $text text to word wrap + * @return string wrapped text */ function mailstream_html_wrap(string &$text) { @@ -446,6 +447,7 @@ function mailstream_html_wrap(string &$text) $lines[$i] = preg_replace('/ /', "\n", $lines[$i], 1); } $text = implode($lines); + return $text; } /** From 3a2065b505400f4420d6ae993fc769b8739524f3 Mon Sep 17 00:00:00 2001 From: Michael Date: Mon, 17 Oct 2022 05:50:23 +0000 Subject: [PATCH 90/94] The priority is now a class constant --- ifttt/ifttt.php | 2 +- twitter/twitter.php | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ifttt/ifttt.php b/ifttt/ifttt.php index 6d4f402c..faaea2ca 100644 --- a/ifttt/ifttt.php +++ b/ifttt/ifttt.php @@ -180,5 +180,5 @@ function ifttt_message($uid, $item) $link = hash('ripemd128', $item['msg']); } - Post\Delayed::add($link, $post, Worker::PRIORITY_MEDIUM, Post\Delayed::PREPARED); + Post\Delayed::add($link, $post, Worker::PRIORITY_MEDIUM, Post\Delayed::UNPREPARED); } diff --git a/twitter/twitter.php b/twitter/twitter.php index c55489a0..59861a35 100644 --- a/twitter/twitter.php +++ b/twitter/twitter.php @@ -1363,7 +1363,7 @@ function twitter_fetchtimeline(int $uid): void Logger::info('Posting mirror post', ['twitter-id' => $post->id_str, 'uid' => $uid]); - Post\Delayed::add($mirrorpost['extid'], $mirrorpost, Worker::PRIORITY_MEDIUM, Post\Delayed::PREPARED); + Post\Delayed::add($mirrorpost['extid'], $mirrorpost, Worker::PRIORITY_MEDIUM, Post\Delayed::UNPREPARED); } } DI::pConfig()->set($uid, 'twitter', 'lastid', $lastid); From 48f4dc01753ca86121c2d4160f59276c7e918766 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 28 Dec 2022 19:40:50 +0100 Subject: [PATCH 91/94] replace local_user --- retriever/retriever.php | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 7b9a7bf4..d374ca80 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -10,6 +10,7 @@ use Friendica\Core\Addon; use Friendica\Core\Hook; use Friendica\Core\Logger; use Friendica\Core\Renderer; +use Friendica\Core\Session; use Friendica\Core\System; use Friendica\Content\Text\HTML; use Friendica\Content\Text\BBCode; @@ -844,12 +845,12 @@ function retriever_transform_images(array &$item, array $resource) { * @param App $a The App object */ function retriever_content(App $a) { - if (!local_user()) { + if (!Session::getLocalUser()) { $a->page['content'] .= "

Please log in

"; return; } if (isset(DI::args()->getArgv()[1]) and DI::args()->getArgv()[1] === 'help') { - $feeds = DBA::selectToArray('contact', ['id', 'name', 'thumb'], ['uid' => local_user(), 'network' => 'feed']); + $feeds = DBA::selectToArray('contact', ['id', 'name', 'thumb'], ['uid' => Session::getLocalUser(), 'network' => 'feed']); for ($i = 0; $i < count($feeds); ++$i) { $feeds[$i]['url'] = DI::baseUrl()->get(true) . '/retriever/' . $feeds[$i]['id']; } @@ -862,13 +863,13 @@ function retriever_content(App $a) { } if (isset(DI::args()->getArgv()[1])) { $arg1 = DI::args()->getArgv()[1]; - $retriever_rule = get_retriever_rule($arg1, local_user(), false); + $retriever_rule = get_retriever_rule($arg1, Session::getLocalUser(), false); if (!$retriever_rule) { $retriever_rule = ['id' => 0, 'data' => ['enable' => 0, 'modurl' => '', 'pattern' => '', 'replace' => '', 'images' => 0, 'storecookies' => 0, 'cookiedata' => '', 'customxslt' => '', 'include' => '', 'exclude' => '']]; } if (!empty($_POST["id"])) { - $retriever_rule = get_retriever_rule($arg1, local_user(), true); + $retriever_rule = get_retriever_rule($arg1, Session::getLocalUser(), true); $retriever_rule['data'] = array(); foreach (array('modurl', 'pattern', 'replace', 'enable', 'images', 'customxslt', 'storecookies', 'cookiedata') as $setting) { if (empty($_POST['retriever_' . $setting])) { @@ -1017,8 +1018,8 @@ function retriever_post_remote_hook(App &$a, array &$item) { * @param string $s HTML string to which to append settings content (by ref) */ function retriever_addon_settings(App &$a, string &$s) { - $all_photos = DI::config()->get(local_user(), 'retriever', 'all_photos'); - $oembed = DI::config()->get(local_user(), 'retriever', 'oembed'); + $all_photos = DI::config()->get(Session::getLocalUser(), 'retriever', 'all_photos'); + $oembed = DI::config()->get(Session::getLocalUser(), 'retriever', 'oembed'); $template = Renderer::getMarkupTemplate('/settings.tpl', 'addon/retriever/'); $config = array('$submit' => DI::l10n()->t('Save Settings'), '$title' => DI::l10n()->t('Retriever Settings'), @@ -1043,15 +1044,15 @@ function retriever_addon_settings(App &$a, string &$s) { */ function retriever_addon_settings_post(App $a, array $post) { if ($post['retriever_all_photos']) { - DI::config()->set(local_user(), 'retriever', 'all_photos', $post['retriever_all_photos']); + DI::config()->set(Session::getLocalUser(), 'retriever', 'all_photos', $post['retriever_all_photos']); } else { - DI::config()->delete(local_user(), 'retriever', 'all_photos'); + DI::config()->delete(Session::getLocalUser(), 'retriever', 'all_photos'); } if ($post['retriever_oembed']) { - DI::config()->set(local_user(), 'retriever', 'oembed', $post['retriever_oembed']); + DI::config()->set(Session::getLocalUser(), 'retriever', 'oembed', $post['retriever_oembed']); } else { - DI::config()->delete(local_user(), 'retriever', 'oembed'); + DI::config()->delete(Session::getLocalUser(), 'retriever', 'oembed'); } } From 9ae80d744542f322180c0f9916391f3883c30cb2 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Thu, 29 Dec 2022 19:57:03 +0100 Subject: [PATCH 92/94] fix contact photo menu callback --- retriever/retriever.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index d374ca80..46916647 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -975,7 +975,7 @@ function retriever_content(App $a) { * @param App $a The App object * @param array $args Contact menu details to be filled in (by ref) */ -function retriever_contact_photo_menu(App $a, array &$args) { +function retriever_contact_photo_menu(array &$args) { if (!$args) { return; } From d05c5c6dc5fb6b659e54a4baa4edb7856ae37d76 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Thu, 29 Dec 2022 20:02:18 +0100 Subject: [PATCH 93/94] fix contact photo menu callback really --- retriever/retriever.php | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index 46916647..a5e2f779 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -6,6 +6,7 @@ * Author: Matthew Exon */ +use Friendica\App; use Friendica\Core\Addon; use Friendica\Core\Hook; use Friendica\Core\Logger; @@ -972,10 +973,10 @@ function retriever_content(App $a) { /** * @brief Hook that adds the retriever option to the contact menu * - * @param App $a The App object + * @param App $a The App object (by ref) * @param array $args Contact menu details to be filled in (by ref) */ -function retriever_contact_photo_menu(array &$args) { +function retriever_contact_photo_menu(App &$a, array &$args) { if (!$args) { return; } From 3da587f07eb3f69824043a5b1415224408d3285c Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Sat, 7 Jan 2023 18:46:09 +0100 Subject: [PATCH 94/94] remove duplicate use directive --- retriever/retriever.php | 1 - 1 file changed, 1 deletion(-) diff --git a/retriever/retriever.php b/retriever/retriever.php index a5e2f779..9370271c 100644 --- a/retriever/retriever.php +++ b/retriever/retriever.php @@ -24,7 +24,6 @@ use Friendica\Model\Item; use Friendica\Model\Post; use Friendica\Util\DateTimeFormat; use Friendica\DI; -use Friendica\App; /** * @brief Installation hook for retriever plugin