friendica-addons/retriever/retriever.php

884 lines
43 KiB
PHP
Raw Normal View History

2019-07-07 15:45:23 +02:00
<?php
/**
2019-09-30 08:25:00 +02:00
* Name: Retriever
2019-07-07 15:45:23 +02:00
* Description: Follow the permalink of RSS/Atom feed items and replace the summary with the full content.
2019-09-30 08:25:00 +02:00
* Version: 1.0
2019-07-07 15:45:23 +02:00
* Author: Matthew Exon <http://mat.exon.name>
*/
use Friendica\Core\Addon;
use Friendica\Core\Config;
use Friendica\Core\PConfig;
2019-07-20 15:37:57 +02:00
use Friendica\Core\Logger;
2019-07-20 15:45:10 +02:00
use Friendica\Core\Renderer;
2019-09-22 11:47:30 +02:00
use Friendica\Core\System;
2019-07-07 15:45:23 +02:00
use Friendica\Content\Text\HTML;
use Friendica\Content\Text\BBCode;
2019-09-22 11:47:30 +02:00
use Friendica\Model\Photo;
2019-07-07 15:45:23 +02:00
use Friendica\Object\Image;
use Friendica\Util\Network;
use Friendica\Core\L10n;
use Friendica\Database\DBA;
2019-07-21 19:27:14 +02:00
use Friendica\Model\ItemURI;
2019-09-22 11:47:30 +02:00
use Friendica\Model\Item;
2019-10-11 18:47:32 +02:00
use Friendica\Util\DateTimeFormat;
2019-07-07 15:45:23 +02:00
2019-10-12 19:08:11 +02:00
/**
* @brief Installation hook for retriever plugin
*/
2019-07-07 15:45:23 +02:00
function retriever_install() {
Addon::registerHook('plugin_settings', 'addon/retriever/retriever.php', 'retriever_plugin_settings');
Addon::registerHook('plugin_settings_post', 'addon/retriever/retriever.php', 'retriever_plugin_settings_post');
Addon::registerHook('post_remote', 'addon/retriever/retriever.php', 'retriever_post_remote_hook');
Addon::registerHook('contact_photo_menu', 'addon/retriever/retriever.php', 'retriever_contact_photo_menu');
Addon::registerHook('cron', 'addon/retriever/retriever.php', 'retriever_cron');
if (Config::get('retriever', 'dbversion') == '0.10') {
q('ALTER TABLE `retriever_resource` MODIFY COLUMN `type` char(255) NULL DEFAULT NULL');
q('ALTER TABLE `retriever_resource` MODIFY COLUMN `data` mediumblob NULL DEFAULT NULL');
q('ALTER TABLE `retriever_rule` MODIFY COLUMN `data` mediumtext NULL DEFAULT NULL');
2019-07-07 15:45:23 +02:00
Config::set('retriever', 'dbversion', '0.11');
}
if (Config::get('retriever', 'dbversion') == '0.11') {
q('ALTER TABLE `retriever_resource` ADD INDEX `url` (`url`)');
q('ALTER TABLE `retriever_resource` ADD INDEX `completed` (`completed`)');
q('ALTER TABLE `retriever_item` ADD INDEX `finished` (`finished`)');
q('ALTER TABLE `retriever_item` ADD INDEX `item-uid` (`item-uid`)');
2019-07-07 15:45:23 +02:00
Config::set('retriever', 'dbversion', '0.12');
}
2019-09-22 11:47:30 +02:00
if (Config::get('retriever', 'dbversion') == '0.12') {
q("ALTER TABLE `retriever_resource` ADD COLUMN `contact-id` int(10) unsigned NOT NULL DEFAULT '0' AFTER `id`");
q("ALTER TABLE `retriever_resource` ADD COLUMN `item-uid` int(10) unsigned NOT NULL DEFAULT '0' AFTER `id`");
Config::set('retriever', 'dbversion', '0.13');
}
2019-09-29 20:59:14 +02:00
if (Config::get('retriever', 'dbversion') == '0.13') {
Config::set('retriever', 'downloads_per_cron', '100');
}
if (Config::get('retriever', 'dbversion') != '0.14') {
2019-07-07 15:45:23 +02:00
$schema = file_get_contents(dirname(__file__).'/database.sql');
2019-10-12 19:08:11 +02:00
$tables = explode(';', $schema);
foreach ($tables as $table) {
if (!DBA::e($table)) {
2019-09-29 17:04:34 +02:00
Logger::warning('Unable to create database table: ' . DBA::errorMessage());
return;
}
2019-07-07 15:45:23 +02:00
}
2019-09-29 20:59:14 +02:00
Config::set('retriever', 'downloads_per_cron', '100');
Config::set('retriever', 'dbversion', '0.14');
2019-07-07 15:45:23 +02:00
}
}
2019-10-12 19:08:11 +02:00
/**
* @brief Uninstallation hook for retriever plugin
*/
2019-07-07 15:45:23 +02:00
function retriever_uninstall() {
Addon::unregisterHook('plugin_settings', 'addon/retriever/retriever.php', 'retriever_plugin_settings');
Addon::unregisterHook('plugin_settings_post', 'addon/retriever/retriever.php', 'retriever_plugin_settings_post');
Addon::unregisterHook('post_remote', 'addon/retriever/retriever.php', 'retriever_post_remote_hook');
Addon::unregisterHook('plugin_settings', 'addon/retriever/retriever.php', 'retriever_plugin_settings');
Addon::unregisterHook('plugin_settings_post', 'addon/retriever/retriever.php', 'retriever_plugin_settings_post');
Addon::unregisterHook('contact_photo_menu', 'addon/retriever/retriever.php', 'retriever_contact_photo_menu');
Addon::unregisterHook('cron', 'addon/retriever/retriever.php', 'retriever_cron');
}
2019-10-12 19:08:11 +02:00
/**
* @brief Module hook for retriever plugin
*
* TODO: figure out what this should be used for
*/
2019-07-07 15:45:23 +02:00
function retriever_module() {}
2019-10-12 19:08:11 +02:00
/**
* @brief Admin page hook for retriever plugin
*/
function retriever_addon_admin() {
2019-09-29 20:59:14 +02:00
$downloads_per_cron = Config::get('retriever', 'downloads_per_cron');
$template = Renderer::getMarkupTemplate('admin.tpl', 'addon/retriever/');
$config = ['downloads_per_cron',
L10n::t('Downloads per Cron'),
$downloads_per_cron,
L10n::t('Maximum number of downloads to attempt during each run of the cron job.')];
$o .= Renderer::replaceMacros($template, [
'$downloads_per_cron' => $config,
'$submit' => L10n::t('Save Settings')]);
}
2019-10-12 19:08:11 +02:00
/**
* @brief Admin page post hook for retriever plugin
*/
function retriever_addon_admin_post () {
2019-09-29 20:59:14 +02:00
if (!empty($_POST['downloads_per_cron'])) {
Config::set('retriever', 'downloads_per_cron', $_POST['downloads_per_cron']);
}
}
2019-10-12 19:08:11 +02:00
/**
* @brief Cron jobs for retriever plugin
*/
function retriever_cron() {
2019-09-29 20:59:14 +02:00
$downloads_per_cron = Config::get('retriever', 'downloads_per_cron');
2019-09-22 11:47:30 +02:00
2019-09-29 20:59:14 +02:00
// Do this first, otherwise it can interfere with retriever_retrieve_items
2019-10-12 19:08:11 +02:00
retriever_clean_up_completed_resources($downloads_per_cron);
2019-09-22 11:47:30 +02:00
2019-10-12 19:08:11 +02:00
retriever_retrieve_items($downloads_per_cron);
2019-07-07 15:45:23 +02:00
retriever_tidy();
}
$retriever_item_count = 0;
2019-10-12 19:08:11 +02:00
/**
* @brief Searches for items in the retriever_items table that should be retrieved and attempts to retrieve them
*
* @param int $max_items Maximum number of items to retrieve in this call
*/
function retriever_retrieve_items($max_items) {
2019-07-07 15:45:23 +02:00
global $retriever_item_count;
$retriever_schedule = array(array(1,'minute'),
array(10,'minute'),
array(1,'hour'),
array(1,'day'),
array(2,'day'),
array(1,'week'),
array(1,'month'));
$schedule_clauses = array();
for ($i = 0; $i < count($retriever_schedule); $i++) {
$num = $retriever_schedule[$i][0];
$unit = $retriever_schedule[$i][1];
array_push($schedule_clauses,
'(`num-tries` = ' . $i . ' AND TIMESTAMPADD(' . DBA::escape($unit) .
', ' . intval($num) . ', `last-try`) < now())');
}
$retrieve_items = $max_items - $retriever_item_count;
do {
2019-10-09 20:54:39 +02:00
Logger::debug('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . intval($retriever_item_count) . ', retrieve ' . $retrieve_items);
2019-10-12 19:08:11 +02:00
$retriever_resources = DBA::selectToArray('retriever_resource', [], ['`completed` IS NULL AND (`last-try` IS NULL OR ' . implode($schedule_clauses, ' OR ') . ')'], ['order' => ['last-try' => 0], 'limit' => $retrieve_items]);
2019-09-22 11:47:30 +02:00
if (!is_array($retriever_resources)) {
2019-07-07 15:45:23 +02:00
break;
}
2019-09-22 11:47:30 +02:00
if (count($retriever_resources) == 0) {
2019-07-07 15:45:23 +02:00
break;
}
2019-09-22 19:55:07 +02:00
Logger::debug('retriever_retrieve_items: found ' . count($retriever_resources) . ' waiting resources in database');
2019-09-22 11:47:30 +02:00
foreach ($retriever_resources as $retriever_resource) {
retrieve_resource($retriever_resource);
2019-07-07 15:45:23 +02:00
$retriever_item_count++;
}
$retrieve_items = $max_items - $retriever_item_count;
}
while ($retrieve_items > 0);
2019-09-29 20:59:14 +02:00
Logger::debug('retriever_retrieve_items: finished retrieving items');
2019-09-22 11:47:30 +02:00
}
2019-07-07 15:45:23 +02:00
2019-10-12 19:08:11 +02:00
/**
* @brief Looks for items that are waiting even though the resource has completed. This shouldn't happen, but is worth cleaning up if it does.
*
* @param int $max_items Maximum number of items to retrieve in this call
*/
function retriever_clean_up_completed_resources($max_items) {
2019-09-29 20:59:14 +02:00
// TODO: figure out how to do this with DBA module
$r = q('SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT %d',
2019-09-22 11:47:30 +02:00
intval($max_items));
2019-07-07 15:45:23 +02:00
if (!$r) {
$r = array();
}
2019-09-22 19:55:07 +02:00
Logger::debug('retriever_clean_up_completed_resources: items waiting even though resource has completed: ' . count($r));
2019-07-07 15:45:23 +02:00
foreach ($r as $rr) {
$retriever_item = retriever_get_retriever_item($rr['item']);
2019-09-22 11:47:30 +02:00
if (!DBA::isResult($retriever_item)) {
2019-09-22 19:55:07 +02:00
Logger::warning('retriever_clean_up_completed_resources: no retriever item with id ' . $rr['item']);
2019-07-07 15:45:23 +02:00
continue;
}
$item = retriever_get_item($retriever_item);
if (!$item) {
2019-09-22 19:55:07 +02:00
Logger::warning('retriever_clean_up_completed_resources: no item ' . $retriever_item['item-uri']);
2019-07-07 15:45:23 +02:00
continue;
}
2019-09-22 11:47:30 +02:00
$retriever_rule = get_retriever_rule($retriever_item['contact-id'], $item['uid']);
if (!$retriever_rule) {
2019-09-22 19:55:07 +02:00
Logger::warning('retriever_clean_up_completed_resources: no retriever for uri ' . $retriever_item['item-uri'] . ' uid ' . $retriever_item['uid'] . ' ' . $retriever_item['contact-id']);
2019-07-07 15:45:23 +02:00
continue;
}
2019-09-29 17:09:11 +02:00
$resource = DBA::selectFirst('retriever_resource', [], ['id' => intval($rr['resource'])]);
2019-10-12 19:08:11 +02:00
retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource);
2019-10-08 07:29:59 +02:00
DBA::update('retriever_item', ['finished' => 1], ['id' => intval($retriever_item['id'])], ['finished' => 0]);
2019-07-07 15:45:23 +02:00
retriever_check_item_completed($item);
}
}
2019-10-12 19:08:11 +02:00
/**
* @brief Deletes old rows from the retriever_item and retriever_resource table that are unlikely to be needed
*/
2019-07-07 15:45:23 +02:00
function retriever_tidy() {
2019-10-09 20:54:39 +02:00
DBA::delete('retriever_resource', ['completed IS NOT NULL AND completed < DATE_SUB(now(), INTERVAL 1 WEEK)']);
DBA::delete('retriever_resource', ['completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)']);
2019-07-07 15:45:23 +02:00
$r = q("SELECT retriever_item.id FROM retriever_item LEFT OUTER JOIN retriever_resource ON (retriever_item.resource = retriever_resource.id) WHERE retriever_resource.id is null");
2019-09-22 19:55:07 +02:00
Logger::info('retriever_tidy: found ' . count($r) . ' retriever_items with no retriever_resource');
2019-07-07 15:45:23 +02:00
foreach ($r as $rr) {
q('DELETE FROM retriever_item WHERE id = %d', intval($rr['id']));
}
}
function retrieve_dataurl_resource($resource) {
if (!preg_match("/date:(.*);base64,(.*)/", $resource['url'], $matches)) {
2019-10-12 19:08:11 +02:00
Logger::warning('retrieve_dataurl_resource: resource ' . $resource['id'] . ' does not match pattern');
2019-07-07 15:45:23 +02:00
} else {
$resource['type'] = $matches[1];
$resource['data'] = base64url_decode($matches[2]);
}
// Succeed or fail, there's no point retrying
q("UPDATE `retriever_resource` SET `last-try` = now(), `num-tries` = `num-tries` + 1, `completed` = now(), `data` = '%s', `type` = '%s' WHERE id = %d",
DBA::escape($resource['data']),
DBA::escape($resource['type']),
intval($resource['id']));
2019-10-12 19:08:11 +02:00
retriever_resource_completed($resource);
2019-07-07 15:45:23 +02:00
}
function retrieve_resource($resource) {
if (substr($resource['url'], 0, 5) == "data:") {
return retrieve_dataurl_resource($resource);
}
2019-09-22 11:47:30 +02:00
$retriever_rule = get_retriever_rule($resource['contact-id'], $resource['item-uid']);
2019-10-11 18:47:32 +02:00
if (!$retriever_rule) {
Logger::warning('retrieve_resource: no rule found for contact ' . $resource['contact-id'] . ' item ' . $resource['item-uid']);
return;
}
2019-10-02 07:19:59 +02:00
$rule_data = $retriever_rule['data'];
2019-10-11 18:47:32 +02:00
if (!$rule_data) {
Logger::warning('retrieve_resource: no rule data found for contact ' . $resource['contact-id'] . ' item ' . $resource['item-uid']);
return;
}
2019-09-22 11:47:30 +02:00
2019-07-07 15:45:23 +02:00
try {
2019-09-22 19:55:07 +02:00
Logger::debug('retrieve_resource: ' . ($resource['num-tries'] + 1) . ' attempt at resource ' . $resource['id'] . ' ' . $resource['url']);
2019-07-26 06:49:53 +02:00
$redirects = 0;
$cookiejar = '';
2019-10-02 07:19:59 +02:00
if (array_key_exists('storecookies', $rule_data) && $rule_data['storecookies']) {
$cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-');
2019-10-02 07:19:59 +02:00
file_put_contents($cookiejar, $rule_data['cookiedata']);
2019-09-22 11:47:30 +02:00
}
2019-07-26 06:49:53 +02:00
$fetch_result = Network::fetchUrlFull($resource['url'], $resource['binary'], $redirects, '', $cookiejar);
2019-10-02 07:19:59 +02:00
if (array_key_exists('storecookies', $rule_data) && $rule_data['storecookies']) {
$retriever_rule['data']['cookiedata'] = file_get_contents($cookiejar);
2019-10-08 07:29:59 +02:00
DBA::update('retriever_rule', ['data' => json_encode($retriever_rule['data'])], ['id' => intval($retriever_rule["id"])], $retriever_rule);
2019-10-02 07:19:59 +02:00
unlink($cookiejar);
2019-09-22 11:47:30 +02:00
}
2019-07-20 15:37:57 +02:00
$resource['data'] = $fetch_result->getBody();
$resource['http-code'] = $fetch_result->getReturnCode();
$resource['type'] = $fetch_result->getContentType();
$resource['redirect-url'] = $fetch_result->getRedirectUrl();
2019-09-22 19:55:07 +02:00
Logger::debug('retrieve_resource: got code ' . $resource['http-code'] . ' retrieving resource ' . $resource['id'] . ' final url ' . $resource['redirect-url']);
2019-07-07 15:45:23 +02:00
} catch (Exception $e) {
2019-09-22 19:55:07 +02:00
Logger::info('retrieve_resource: unable to retrieve ' . $resource['url'] . ' - ' . $e->getMessage());
2019-07-07 15:45:23 +02:00
}
2019-10-12 19:08:11 +02:00
DBA::update('retriever_resource', ['id' => intval($resource['id'])], ['last-try' => DateTimeFormat::utcNow(), 'num-tries' => intval($resource['num-tries']) + 1, 'http-code' => intval($resource['http-code']), 'redirect-url' => $resource['redirect-url']], ['last-try' => false]);
2019-07-07 15:45:23 +02:00
if ($resource['data']) {
2019-10-12 19:08:11 +02:00
DBA::update('retriever_resource', ['id' => intval($resource['id'])], ['completed' => DateTimeFormat::utcNow(), 'data' => $resource['data'], 'type' => $resource['type']], ['completed' => false]);
retriever_resource_completed($resource);
2019-07-07 15:45:23 +02:00
}
}
2019-09-22 11:47:30 +02:00
function get_retriever_rule($contact_id, $uid, $create = false) {
2019-10-02 07:19:59 +02:00
$retriever_rule = DBA::selectFirst('retriever_rule', [], ['contact-id' => intval($contact_id), 'uid' => intval($uid)]);
if ($retriever_rule) {
$retriever_rule['data'] = json_decode($retriever_rule['data'], true);
return $retriever_rule;
2019-07-07 15:45:23 +02:00
}
if ($create) {
2019-10-02 07:19:59 +02:00
DBA::insert('retriever_rule', ['uid' => intval($uid), 'contact-id' => intval($contact_id)]);
2019-10-11 18:47:32 +02:00
$retriever_rule = DBA::selectFirst('retriever_rule', [], ['contact-id' => intval($contact_id), 'uid' => intval($uid)]);
return $retriever_rule;
2019-07-07 15:45:23 +02:00
}
}
function retriever_get_retriever_item($id) {
2019-09-22 11:47:30 +02:00
return DBA::selectFirst('retriever_item', [], ['id' => intval($id)]);
}
2019-07-07 15:45:23 +02:00
function retriever_get_item($retriever_item) {
2019-10-02 07:19:59 +02:00
$item = Item::selectFirst([], ['uri' => $retriever_item['item-uri'], 'uid' => intval($retriever_item['item-uid']), 'contact-id' => intval($retriever_item['contact-id'])]);
if (!DBA::isResult($item)) {
Logger::warning('retriever_get_item: no item found for uri ' . $retriever_item['item-uri']);
return;
2019-07-07 15:45:23 +02:00
}
2019-10-02 07:19:59 +02:00
return $item;
2019-07-07 15:45:23 +02:00
}
2019-10-09 20:54:39 +02:00
function retriever_item_completed($retriever_item_id, $resource) {
2019-09-22 19:55:07 +02:00
Logger::debug('retriever_item_completed: id ' . $retriever_item_id . ' url ' . $resource['url']);
2019-07-07 15:45:23 +02:00
$retriever_item = retriever_get_retriever_item($retriever_item_id);
2019-09-22 11:47:30 +02:00
if (!DBA::isResult($retriever_item)) {
2019-09-22 19:55:07 +02:00
Logger::info('retriever_item_completed: no retriever item with id ' . $retriever_item_id);
2019-07-07 15:45:23 +02:00
return;
}
$item = retriever_get_item($retriever_item);
if (!$item) {
2019-09-30 20:52:51 +02:00
Logger::warning('retriever_item_completed: no item ' . $retriever_item['item-uri']);
2019-07-07 15:45:23 +02:00
return;
}
2019-09-22 11:47:30 +02:00
// Note: the retriever might be null. Doesn't matter.
$retriever_rule = get_retriever_rule($retriever_item['contact-id'], $retriever_item['item-uid']);
2019-07-07 15:45:23 +02:00
2019-10-09 20:54:39 +02:00
retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource);
2019-07-07 15:45:23 +02:00
2019-10-08 07:29:59 +02:00
DBA::update('retriever_item', ['finished' => 1], ['id' => intval($retriever_item['id'])], ['finished' => 0]);
2019-07-07 15:45:23 +02:00
retriever_check_item_completed($item);
}
2019-10-09 20:54:39 +02:00
function retriever_resource_completed($resource) {
2019-09-22 19:55:07 +02:00
Logger::debug('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url']);
2019-10-08 07:29:59 +02:00
foreach (DBA::selectToArray('retriever_item', ['id'], ['resource' => intval($resource['id'])]) as $retriever_item) {
2019-10-09 20:54:39 +02:00
retriever_item_completed($retriever_item['id'], $resource);
2019-07-07 15:45:23 +02:00
}
}
2019-10-09 20:54:39 +02:00
function apply_retrospective($retriever, $num) {
2019-10-08 07:29:59 +02:00
foreach (Item::selectToArray([], ['contact-id' => intval($retriever['contact-id'])], ['order' => ['received' => true], 'limit' => $num]) as $item) {
2019-10-02 07:19:59 +02:00
Item::update(['visible' => 0], ['id' => intval($item['id'])]);
2019-10-08 07:29:59 +02:00
foreach (DBA::selectToArray('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => $item['uid'], 'contact-id' => $item['contact-id']]) as $retriever_item) {
DBA::delete('retriever_resource', ['id' => $retriever_item['resource']]);
DBA::delete('retriever_item', ['id' => $retriever_item['id']]);
}
2019-10-09 20:54:39 +02:00
retriever_on_item_insert($retriever, $item);
2019-07-07 15:45:23 +02:00
}
}
2019-10-12 19:08:11 +02:00
/**
* @brief Queues an item for retrieval. It does not actually perform the retrieval.
*
* @param array $retriever Retriever rule configuration for this contact
* @param array $item Item that should be retrieved. This may or may not have been already stored in the database.
*
* TODO: This queries then inserts. It should use some kind of lock to avoid requesting the same resource twice.
*/
2019-10-09 20:54:39 +02:00
function retriever_on_item_insert($retriever, &$item) {
2019-07-07 15:45:23 +02:00
if (!$retriever || !$retriever['id']) {
2019-09-22 19:55:07 +02:00
Logger::info('retriever_on_item_insert: No retriever supplied');
2019-07-07 15:45:23 +02:00
return;
}
2019-09-30 20:52:51 +02:00
if (!array_key_exists('enable', $retriever['data']) || !$retriever['data']['enable'] == "on") {
2019-07-07 15:45:23 +02:00
return;
}
if (array_key_exists('plink', $item) && strlen($item['plink'])) {
2019-09-27 22:05:22 +02:00
$url = $item['plink'];
2019-07-07 15:45:23 +02:00
}
else {
if (!array_key_exists('uri-id', $item)) {
Logger::warning('retriever_on_item_insert: item ' . $item['id'] . ' has no plink and no uri-id');
2019-09-27 22:05:22 +02:00
return;
}
$content = DBA::selectFirst('item-content', [], ['uri-id' => $item['uri-id']]);
2019-09-27 22:05:22 +02:00
$url = $content['plink'];
}
2019-10-09 20:54:39 +02:00
if (array_key_exists('modurl', $retriever['data']) && $retriever['data']['modurl']) {
$orig_url = $url;
$url = preg_replace('/' . $retriever['data']['pattern'] . '/', $retriever['data']['replace'], $orig_url);
Logger::debug('retriever_on_item_insert: Changed ' . $orig_url . ' to ' . $url);
2019-07-07 15:45:23 +02:00
}
2019-10-09 20:54:39 +02:00
$resource = add_retriever_resource($url, $item['uid'], $item['contact-id']);
2019-07-07 15:45:23 +02:00
$retriever_item_id = add_retriever_item($item, $resource);
}
2019-10-09 20:54:39 +02:00
function add_retriever_resource($url, $uid, $cid, $binary = false) {
2019-09-22 19:55:07 +02:00
Logger::debug('add_retriever_resource: url ' . $url . ' uid ' . $uid . ' contact-id ' . $cid);
2019-07-07 15:45:23 +02:00
$scheme = parse_url($url, PHP_URL_SCHEME);
if ($scheme == 'data') {
$fp = fopen($url, 'r');
$meta = stream_get_meta_data($fp);
$type = $meta['mediatype'];
$data = stream_get_contents($fp);
fclose($fp);
$url = 'md5://' . hash('md5', $url);
2019-10-11 18:47:32 +02:00
$resource = DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)]);
if ($resource) {
2019-09-22 19:55:07 +02:00
Logger::debug('add_retriever_resource: Resource ' . $url . ' already requested');
2019-07-07 15:45:23 +02:00
return $resource;
}
2019-10-09 20:54:39 +02:00
DBA::insert('retriever_resource', ['item-uid' => intval($uid), 'contact-id' => intval($cid), 'type' => $type, 'binary' => ($binary ? 1 : 0), 'url' => $url, 'completed' => DateTimeFormat::utcNow(), 'data' => $data]);
2019-10-11 18:47:32 +02:00
$resource = DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)]);
if ($resource) {
2019-10-09 20:54:39 +02:00
retriever_resource_completed($resource);
2019-07-07 15:45:23 +02:00
}
return $resource;
}
2019-10-12 19:08:11 +02:00
// 800 characters is the size of this field in the database
2019-07-07 15:45:23 +02:00
if (strlen($url) > 800) {
2019-09-22 19:55:07 +02:00
Logger::warning('add_retriever_resource: URL is longer than 800 characters');
2019-07-07 15:45:23 +02:00
}
2019-10-11 18:47:32 +02:00
$resource = DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)]);
if ($resource) {
2019-09-22 19:55:07 +02:00
Logger::debug('add_retriever_resource: Resource ' . $url . ' uid ' . $uid . ' cid ' . $cid . ' already requested');
2019-10-11 18:47:32 +02:00
return $resource;
2019-07-07 15:45:23 +02:00
}
2019-10-09 20:54:39 +02:00
DBA::insert('retriever_resource', ['item-uid' => intval($uid), 'contact-id' => intval($cid), 'binary' => ($binary ? 1 : 0), 'url' => $url]);
2019-10-08 18:55:34 +02:00
return DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)]);
2019-07-07 15:45:23 +02:00
}
function add_retriever_item(&$item, $resource) {
2019-09-22 19:55:07 +02:00
Logger::debug('add_retriever_item: ' . $resource['url'] . ' for ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']);
2019-07-07 15:45:23 +02:00
2019-10-09 20:54:39 +02:00
if (!array_key_exists('id', $resource) || !$resource['id']) {
Logger::warning('add_retriever_item: resource is empty');
//@@@ check that this does not happen
return;
}
if (DBA::selectFirst('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => intval($item['uid']), 'resource' => intval($resource['id'])])) {
2019-09-22 19:55:07 +02:00
Logger::info("add_retriever_item: retriever item already present for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']);
2019-09-15 10:26:25 +02:00
return;
}
2019-10-09 20:54:39 +02:00
DBA::insert('retriever_item', ['item-uri' => $item['uri'], 'item-uid' => intval($item['uid']), 'contact-id' => intval($item['contact-id']), 'resource' => intval($resource['id'])]);
$retriever_item = DBA::selectFirst('retriever_item', ['id'], ['item-uri' => $item['uri'], 'item-uid' => intval($item['uid']), 'resource' => intval($resource['id'])]);
if (!$retriever_item) {
2019-09-22 19:55:07 +02:00
Logger::info("add_retriever_item: couldn't create retriever item for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']);
2019-07-07 15:45:23 +02:00
return;
}
2019-10-09 20:54:39 +02:00
Logger::debug('add_retriever_item: created retriever_item ' . $retriever_item['id'] . ' for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']);
return $retriever_item['id'];
2019-07-07 15:45:23 +02:00
}
function retriever_get_encoding($resource) {
$matches = array();
if (preg_match('/charset=(.*)/', $resource['type'], $matches)) {
return trim(array_pop($matches));
}
return 'utf-8';
}
function retriever_apply_xslt_text($xslt_text, $doc) {
if (!$xslt_text) {
2019-09-22 19:55:07 +02:00
Logger::info('retriever_apply_xslt_text: empty XSLT text');
2019-07-07 15:45:23 +02:00
return $doc;
}
$xslt_doc = new DOMDocument();
if (!$xslt_doc->loadXML($xslt_text)) {
2019-09-22 19:55:07 +02:00
Logger::info('retriever_apply_xslt_text: could not load XML');
2019-07-07 15:45:23 +02:00
return $doc;
}
$xp = new XsltProcessor();
$xp->importStylesheet($xslt_doc);
$result = $xp->transformToDoc($doc);
return $result;
}
function retriever_apply_dom_filter($retriever, &$item, $resource) {
2019-10-09 20:54:39 +02:00
Logger::debug('retriever_apply_dom_filter: applying XSLT to uri ' . $item['uri'] . ' uid ' . $item['uid'] . ' contact ' . $item['contact-id']);
2019-07-07 15:45:23 +02:00
2019-07-20 11:44:38 +02:00
if (!array_key_exists('include', $retriever['data']) && !array_key_exists('customxslt', $retriever['data'])) {
2019-09-22 19:55:07 +02:00
Logger::info('retriever_apply_dom_filter: no include and no customxslt');
2019-07-07 15:45:23 +02:00
return;
}
if (!$resource['data']) {
2019-09-22 19:55:07 +02:00
Logger::info('retriever_apply_dom_filter: no text to work with');
2019-07-07 15:45:23 +02:00
return;
}
2019-10-02 07:19:59 +02:00
$doc = retriever_load_into_dom($resource);
$doc = retriever_extract($doc, $retriever);
if (!$doc) {
Logger::info('retriever_apply_dom_filter: failed to apply extract XSLT template');
return;
}
$doc = retriever_globalise_urls($doc, $resource);
if (!$doc) {
Logger::info('retriever_apply_dom_filter: failed to apply fix urls XSLT template');
return;
}
$body = HTML::toBBCode($doc->saveHTML());
if (!strlen($body)) {
Logger::info('retriever_apply_dom_filter retriever ' . $retriever['id'] . ' item ' . $item['id'] . ': output was empty');
return;
}
$body .= "\n\n" . L10n::t('Retrieved') . ' ' . date("Y-m-d") . ': [url=';
$body .= $item['plink'];
$body .= ']' . $item['plink'] . '[/url]';
Logger::debug('retriever_apply_dom_filter: XSLT result \"' . $body . '\"');
2019-10-12 19:08:11 +02:00
retriever_set_body($item, $body);
2019-10-02 07:19:59 +02:00
}
function retriever_load_into_dom($resource) {
2019-07-07 15:45:23 +02:00
$encoding = retriever_get_encoding($resource);
$content = mb_convert_encoding($resource['data'], 'HTML-ENTITIES', $encoding);
$doc = new DOMDocument('1.0', 'UTF-8');
if (strpos($resource['type'], 'html') !== false) {
@$doc->loadHTML($content);
}
else {
$doc->loadXML($content);
}
2019-10-02 07:19:59 +02:00
return $doc;
}
2019-07-07 15:45:23 +02:00
2019-10-02 07:19:59 +02:00
function retriever_extract($doc, $retriever) {
2019-07-07 15:45:23 +02:00
$params = array('$spec' => $retriever['data']);
2019-07-20 15:45:10 +02:00
$extract_template = Renderer::getMarkupTemplate('extract.tpl', 'addon/retriever/');
$extract_xslt = Renderer::replaceMacros($extract_template, $params);
2019-07-07 15:45:23 +02:00
if ($retriever['data']['include']) {
2019-09-22 19:55:07 +02:00
Logger::debug('retriever_apply_dom_filter: applying include/exclude template \"' . $extract_xslt . '\"');
2019-07-07 15:45:23 +02:00
$doc = retriever_apply_xslt_text($extract_xslt, $doc);
}
if (array_key_exists('customxslt', $retriever['data']) && $retriever['data']['customxslt']) {
2019-10-02 07:19:59 +02:00
Logger::debug('retriever_extract: applying custom XSLT \"' . $retriever['data']['customxslt'] . '\"');
2019-07-07 15:45:23 +02:00
$doc = retriever_apply_xslt_text($retriever['data']['customxslt'], $doc);
}
2019-10-02 07:19:59 +02:00
return $doc;
}
2019-07-07 15:45:23 +02:00
2019-10-02 07:19:59 +02:00
function retriever_globalise_urls($doc, $resource) {
2019-07-07 15:45:23 +02:00
$components = parse_url($resource['redirect-url']);
$rooturl = $components['scheme'] . "://" . $components['host'];
$dirurl = $rooturl . dirname($components['path']) . "/";
$params = array('$dirurl' => $dirurl, '$rooturl' => $rooturl);
2019-07-20 15:45:10 +02:00
$fix_urls_template = Renderer::getMarkupTemplate('fix-urls.tpl', 'addon/retriever/');
$fix_urls_xslt = Renderer::replaceMacros($fix_urls_template, $params);
2019-07-07 15:45:23 +02:00
$doc = retriever_apply_xslt_text($fix_urls_xslt, $doc);
2019-10-02 07:19:59 +02:00
return $doc;
2019-07-07 15:45:23 +02:00
}
2019-10-11 18:47:32 +02:00
function retriever_get_body($item) {
2019-10-12 19:08:11 +02:00
if (!array_key_exists('uri-id', $item) || !$item['uri-id']) {
// item has not yet been stored in database
return $item['body'];
}
// item has been stored in database, body is stored in the item-content table
$content = DBA::selectFirst('item-content', ['body'], ['uri-id' => $item['uri-id']]);
if (!$content) {
Logger::warning('retriever_get_body: item-content uri-id ' . $item['uri-id'] . ' has no content');
return $item['body'];
2019-10-11 18:47:32 +02:00
}
2019-10-12 19:08:11 +02:00
if (!$content['body']) {
Logger::warning('retriever_get_body: item-content uri-id ' . $item['uri-id'] . ' has no body');
//@@@ check never happens
return $item['body'];
}
if ($content['body'] != $item['body']) {
Logger::warning('@@@ this is probably bad @@@ content: ' . $content['body'] . ' @@@ item: ' . $item['body']);
//@@@ check for this.
}
return $content['body'];
2019-10-11 18:47:32 +02:00
}
2019-09-22 11:47:30 +02:00
2019-10-11 18:47:32 +02:00
function retriever_set_body(&$item, $body, $allow_empty = false) {
if (!$body && !$allow_empty) {
Logger::debug('retriever_set_body: will not set empty body in item id ' . $item['id'] . ' uri ' . $item['uri']);
return;
}
$item['body'] = $body;
2019-10-12 19:08:11 +02:00
if (!array_key_exists('id', $item) || !$item['id']) {
// item has not yet been stored in database
return;
2019-10-11 18:47:32 +02:00
}
2019-10-12 19:08:11 +02:00
Item::update(['body' => $body], ['id' => intval($item['id'])]);
2019-10-11 18:47:32 +02:00
}
2019-09-22 11:47:30 +02:00
2019-10-11 18:47:32 +02:00
/**
2019-10-12 19:08:11 +02:00
* @brief Searches for images in the item and adds corresponding retriever_items. If the images have already been downloaded, updates the body in the supplied item array.
2019-10-11 18:47:32 +02:00
*
* @param array &$item Row from the item table (by ref)
*/
function retrieve_images(&$item) {
$body = retriever_get_body($item);
2019-09-22 11:47:30 +02:00
if (!strlen($body)) {
2019-10-11 18:47:32 +02:00
Logger::warning('retrieve_images: no body for item ' . $item['uri']);
2019-09-22 11:47:30 +02:00
return;
}
2019-09-29 22:05:49 +02:00
// I suspect that the first two are not used any more?
2019-10-11 18:47:32 +02:00
preg_match_all("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", $body, $matches1);
preg_match_all("/\[img\](.*?)\[\/img\]/ism", $body, $matches2);
preg_match_all("/\[img\=([^\]]*)\]([^[]*)\[\/img\]/ism", $body, $matches3);
2019-09-29 20:59:14 +02:00
$matches = array_merge($matches1[3], $matches2[1], $matches3[1]);
2019-09-22 19:55:07 +02:00
Logger::debug('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']);
2019-07-07 15:45:23 +02:00
foreach ($matches as $url) {
2019-10-09 21:03:45 +02:00
if (!$url) {
continue;
}
2019-07-20 15:37:57 +02:00
if (strpos($url, get_app()->getBaseUrl()) === FALSE) {
2019-10-09 20:54:39 +02:00
$resource = add_retriever_resource($url, $item['uid'], $item['contact-id'], true);
2019-07-07 15:45:23 +02:00
if (!$resource['completed']) {
add_retriever_item($item, $resource);
}
else {
2019-10-09 20:54:39 +02:00
retriever_transform_images($item, $resource);
2019-07-07 15:45:23 +02:00
}
}
}
}
function retriever_check_item_completed(&$item)
{
2019-10-08 18:55:34 +02:00
$waiting = DBA::selectFirst('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => intval($item['uid']), 'contact-id' => intval($item['contact-id']), 'finished' => 0]);
2019-10-09 20:54:39 +02:00
Logger::debug('retriever_check_item_completed: item ' . $item['uri'] . ' ' . $item['uid'] . ' '. $item['contact-id'] . ' waiting for resources');
2019-07-07 15:45:23 +02:00
$old_visible = $item['visible'];
$item['visible'] = $waiting ? 0 : 1;
if (array_key_exists('id', $item) && ($item['id'] > 0) && ($old_visible != $item['visible'])) {
2019-09-22 19:55:07 +02:00
Logger::debug('retriever_check_item_completed: changing visible flag to ' . $item['visible']);
2019-10-08 07:29:59 +02:00
Item::update(['visible' => $item['visible']], ['id' => intval($item['id'])]);
2019-07-07 15:45:23 +02:00
}
}
2019-10-09 20:54:39 +02:00
function retriever_apply_completed_resource_to_item($retriever, &$item, $resource) {
2019-09-22 19:55:07 +02:00
Logger::debug('retriever_apply_completed_resource_to_item: retriever ' . ($retriever ? $retriever['id'] : 'none') . ' resource ' . $resource['url'] . ' plink ' . $item['plink']);
2019-07-07 15:45:23 +02:00
if (strpos($resource['type'], 'image') !== false) {
2019-10-09 20:54:39 +02:00
retriever_transform_images($item, $resource);
2019-07-07 15:45:23 +02:00
}
if (!$retriever) {
2019-10-08 18:55:34 +02:00
Logger::warning('retriever_apply_completed_resource_to_item: no retriever');
2019-07-07 15:45:23 +02:00
return;
}
if ((strpos($resource['type'], 'html') !== false) ||
(strpos($resource['type'], 'xml') !== false)) {
retriever_apply_dom_filter($retriever, $item, $resource);
if ($retriever['data']['images'] ) {
2019-10-09 20:54:39 +02:00
retrieve_images($item);
2019-07-07 15:45:23 +02:00
}
}
}
2019-10-09 20:54:39 +02:00
/**
* @brief Stores the image downloaded in the supplied resource and updates the item body by replacing the remote URL with the local URL. The body will be updated in the supplied item array. If the item has already been stored, and therefore has an ID already, the row in the database will be updated too.
*
* @param array &$item Row from the item table (by ref)
* @param array $resource Row from the resource table containing successfully downloaded image
*/
// TODO: split this into two functions, one to store the image, the other to change the item body
function retriever_transform_images(&$item, $resource) {
if (!$resource['data']) {
2019-09-22 19:55:07 +02:00
Logger::info('retriever_transform_images: no data available for ' . $resource['id'] . ' ' . $resource['url']);
2019-07-07 15:45:23 +02:00
return;
}
2019-10-02 07:19:59 +02:00
$data = $resource['data'];
$type = $resource['type'];
$uid = $item['uid'];
$cid = $item['contact-id'];
$rid = Photo::newResource();
$path = parse_url($resource['url'], PHP_URL_PATH);
$parts = pathinfo($path);
$filename = $parts['filename'] . (array_key_exists('extension', $parts) ? '.' . $parts['extension'] : '');
$album = 'Wall Photos';
$scale = 0;
$desc = ''; // TODO: store alt text with resource when it's requested so we can fill this in
Logger::debug('retriever_transform_images storing ' . strlen($data) . ' bytes type ' . $type . ': uid ' . $uid . ' cid ' . $cid . ' rid ' . $rid . ' filename ' . $filename . ' album ' . $album . ' scale ' . $scale . ' desc ' . $desc);
$image = new Image($data, $type);
if (!$image->isValid()) {
Logger::warning('retriever_transform_images: invalid image found at URL ' . $resource['url'] . ' for item ' . $item['id']);
return;
}
$photo = Photo::store($image, $uid, $cid, $rid, $filename, $album, 0, 0, "", "", "", "", $desc);
$new_url = System::baseUrl() . '/photo/' . $rid . '-0.' . $image->getExt();
if (!strlen($new_url)) {
Logger::warning('retriever_transform_images: no replacement URL for image ' . $resource['url']);
return;
}
2019-09-22 11:47:30 +02:00
2019-10-11 18:47:32 +02:00
$body = retriever_get_body($item);
2019-09-22 11:47:30 +02:00
2019-10-02 07:19:59 +02:00
Logger::debug('retriever_transform_images: replacing ' . $resource['url'] . ' with ' . $new_url . ' in item ' . $item['uri']);
$body = str_replace($resource["url"], $new_url, $body);
2019-10-11 18:47:32 +02:00
retriever_set_body($item, $body);
2019-07-07 15:45:23 +02:00
}
function retriever_content($a) {
if (!local_user()) {
$a->page['content'] .= "<p>Please log in</p>";
return;
}
if ($a->argv[1] === 'help') {
2019-10-08 07:29:59 +02:00
$feeds = DBA::selectToArray('contact', ['id', 'name', 'thumb'], ['uid' => local_user(), 'network' => 'feed']);
for ($i = 0; $i < count($feeds); ++$i) {
$feeds[$i]['url'] = $a->getBaseUrl() . '/retriever/' . $feeds[$i]['id'];
2019-07-07 15:45:23 +02:00
}
2019-07-20 15:45:10 +02:00
$template = Renderer::getMarkupTemplate('/help.tpl', 'addon/retriever/');
$a->page['content'] .= Renderer::replaceMacros($template, array(
2019-07-21 19:27:14 +02:00
'$config' => $a->getBaseUrl() . '/settings/addon',
2019-07-07 15:45:23 +02:00
'$feeds' => $feeds));
return;
}
if ($a->argv[1]) {
2019-09-22 11:47:30 +02:00
$retriever_rule = get_retriever_rule($a->argv[1], local_user(), false);
2019-07-07 15:45:23 +02:00
2019-07-21 19:27:14 +02:00
if (!empty($_POST["id"])) {
2019-09-22 11:47:30 +02:00
$retriever_rule = get_retriever_rule($a->argv[1], local_user(), true);
$retriever_rule['data'] = array();
2019-10-02 07:19:59 +02:00
foreach (array('modurl', 'pattern', 'replace', 'enable', 'images', 'customxslt', 'storecookies', 'cookiedata') as $setting) {
if (empty($_POST['retriever_' . $setting])) {
$retriever_rule['data'][$setting] = NULL;
}
else {
$retriever_rule['data'][$setting] = $_POST['retriever_' . $setting];
2019-07-07 15:45:23 +02:00
}
}
foreach ($_POST as $k=>$v) {
if (preg_match("/retriever-(include|exclude)-(\d+)-(element|attribute|value)/", $k, $matches)) {
2019-09-22 11:47:30 +02:00
$retriever_rule['data'][$matches[1]][intval($matches[2])][$matches[3]] = $v;
2019-07-07 15:45:23 +02:00
}
}
// You've gotta have an element, even if it's just "*"
2019-09-22 11:47:30 +02:00
foreach ($retriever_rule['data']['include'] as $k=>$clause) {
2019-07-07 15:45:23 +02:00
if (!$clause['element']) {
2019-09-22 11:47:30 +02:00
unset($retriever_rule['data']['include'][$k]);
2019-07-07 15:45:23 +02:00
}
}
2019-09-22 11:47:30 +02:00
foreach ($retriever_rule['data']['exclude'] as $k=>$clause) {
2019-07-07 15:45:23 +02:00
if (!$clause['element']) {
2019-09-22 11:47:30 +02:00
unset($retriever_rule['data']['exclude'][$k]);
2019-07-07 15:45:23 +02:00
}
}
2019-10-08 18:55:34 +02:00
DBA::update('retriever_rule', ['data' => json_encode($retriever_rule['data'])], ['id' => intval($retriever_rule["id"])], ['data' => '']);
2019-07-07 15:45:23 +02:00
$a->page['content'] .= "<p><b>Settings Updated";
2019-07-21 19:27:14 +02:00
if (!empty($_POST["retriever_retrospective"])) {
2019-10-12 19:08:11 +02:00
apply_retrospective($retriever_rule, $_POST["retriever_retrospective"]);
$a->page['content'] .= " and retrospectively applied to " . $_POST["retriever_retrospective"] . " posts";
2019-07-07 15:45:23 +02:00
}
$a->page['content'] .= ".</p></b>";
}
2019-07-20 15:45:10 +02:00
$template = Renderer::getMarkupTemplate('/rule-config.tpl', 'addon/retriever/');
$a->page['content'] .= Renderer::replaceMacros($template, array(
2019-07-07 15:45:23 +02:00
'$enable' => array(
'retriever_enable',
L10n::t('Enabled'),
2019-09-22 11:47:30 +02:00
$retriever_rule['data']['enable']),
2019-10-02 07:19:59 +02:00
'$modurl' => array(
'retriever_modurl',
L10n::t('Modify URL'),
$retriever_rule['data']['modurl'],
L10n::t("Modify each article's URL with regular expressions before retrieving.")),
2019-07-07 15:45:23 +02:00
'$pattern' => array(
'retriever_pattern',
L10n::t('URL Pattern'),
$retriever_rule['data']['pattern'],
2019-07-07 15:45:23 +02:00
L10n::t('Regular expression matching part of the URL to replace')),
'$replace' => array(
'retriever_replace',
L10n::t('URL Replace'),
$retriever_rule['data']['replace'],
2019-07-07 15:45:23 +02:00
L10n::t('Text to replace matching part of above regular expression')),
'$images' => array(
'retriever_images',
L10n::t('Download Images'),
2019-09-22 11:47:30 +02:00
$retriever_rule['data']['images']),
2019-07-07 15:45:23 +02:00
'$retrospective' => array(
'retriever_retrospective',
L10n::t('Retrospectively Apply'),
'0',
L10n::t('Reapply the rules to this number of posts')),
'storecookies' => array(
'retriever_storecookies',
L10n::t('Store cookies'),
2019-09-22 11:47:30 +02:00
$retriever_rule['data']['storecookies'],
L10n::t("Preserve cookie data across fetches.")),
'$cookiedata' => array(
'retriever_cookiedata',
L10n::t('Cookie Data'),
2019-09-22 11:47:30 +02:00
$retriever_rule['data']['cookiedata'],
L10n::t("Latest cookie data for this feed. Netscape cookie file format.")),
2019-07-07 15:45:23 +02:00
'$customxslt' => array(
'retriever_customxslt',
L10n::t('Custom XSLT'),
2019-09-22 11:47:30 +02:00
$retriever_rule['data']['customxslt'],
2019-07-07 15:45:23 +02:00
L10n::t("When standard rules aren't enough, apply custom XSLT to the article")),
'$title' => L10n::t('Retrieve Feed Content'),
2019-07-21 19:27:14 +02:00
'$help' => $a->getBaseUrl() . '/retriever/help',
2019-07-07 15:45:23 +02:00
'$help_t' => L10n::t('Get Help'),
'$submit_t' => L10n::t('Submit'),
'$submit' => L10n::t('Save Settings'),
2019-09-22 11:47:30 +02:00
'$id' => ($retriever_rule["id"] ? $retriever_rule["id"] : "create"),
2019-07-07 15:45:23 +02:00
'$tag_t' => L10n::t('Tag'),
'$attribute_t' => L10n::t('Attribute'),
'$value_t' => L10n::t('Value'),
'$add_t' => L10n::t('Add'),
'$remove_t' => L10n::t('Remove'),
'$include_t' => L10n::t('Include'),
2019-09-22 11:47:30 +02:00
'$include' => $retriever_rule['data']['include'],
2019-07-07 15:45:23 +02:00
'$exclude_t' => L10n::t('Exclude'),
'$exclude' => $retriever_rule['data']['exclude']));
2019-07-07 15:45:23 +02:00
return;
}
}
function retriever_contact_photo_menu($a, &$args) {
if (!$args) {
return;
}
if ($args["contact"]["network"] == "feed") {
2019-07-21 19:27:14 +02:00
$args["menu"][ 'retriever' ] = array(L10n::t('Retriever'), $a->getBaseUrl() . '/retriever/' . $args["contact"]['id']);
2019-07-07 15:45:23 +02:00
}
}
function retriever_post_remote_hook(&$a, &$item) {
2019-09-22 19:55:07 +02:00
Logger::info('retriever_post_remote_hook: ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']);
2019-07-07 15:45:23 +02:00
2019-09-22 11:47:30 +02:00
$retriever_rule = get_retriever_rule($item['contact-id'], $item["uid"], false);
if ($retriever_rule) {
2019-10-09 20:54:39 +02:00
retriever_on_item_insert($retriever_rule, $item);
2019-07-07 15:45:23 +02:00
}
else {
if (PConfig::get($item["uid"], 'retriever', 'oembed')) {
// Convert to HTML and back to take advantage of bbcode's resolution of oembeds.
2019-10-11 18:47:32 +02:00
$body = retriever_get_body($item);
$body = HTML::toBBCode(BBCode::convert($body));
retriever_set_body($item, $body);
2019-07-07 15:45:23 +02:00
}
if (PConfig::get($item["uid"], 'retriever', 'all_photos')) {
2019-10-09 20:54:39 +02:00
retrieve_images($item);
2019-07-07 15:45:23 +02:00
}
}
retriever_check_item_completed($item);
}
2019-10-12 19:08:11 +02:00
function retriever_plugin_settings(&$a, &$s) {
2019-07-07 15:45:23 +02:00
$all_photos = PConfig::get(local_user(), 'retriever', 'all_photos');
$oembed = PConfig::get(local_user(), 'retriever', 'oembed');
2019-07-20 15:45:10 +02:00
$template = Renderer::getMarkupTemplate('/settings.tpl', 'addon/retriever/');
$s .= Renderer::replaceMacros($template, array(
2019-07-07 15:45:23 +02:00
'$allphotos' => array(
'retriever_all_photos',
L10n::t('All Photos'),
$all_photos,
L10n::t('Check this to retrieve photos for all posts')),
'$oembed' => array(
'retriever_oembed',
L10n::t('Resolve OEmbed'),
$oembed,
L10n::t('Check this to attempt to retrieve embedded content for all posts - useful e.g. for Facebook posts')),
'$submit' => L10n::t('Save Settings'),
'$title' => L10n::t('Retriever Settings'),
2019-07-21 19:27:14 +02:00
'$help' => $a->getBaseUrl() . '/retriever/help'));
2019-07-07 15:45:23 +02:00
}
function retriever_plugin_settings_post($a,$post) {
if ($_POST['retriever_all_photos']) {
PConfig::set(local_user(), 'retriever', 'all_photos', $_POST['retriever_all_photos']);
}
else {
PConfig::del(local_user(), 'retriever', 'all_photos');
}
if ($_POST['retriever_oembed']) {
PConfig::set(local_user(), 'retriever', 'oembed', $_POST['retriever_oembed']);
}
else {
PConfig::del(local_user(), 'retriever', 'oembed');
}
}