Compare commits

...
Sign in to create a new pull request.

96 commits

Author SHA1 Message Date
Matthew Exon
8516079ed1 log uid but ignore results 2023-05-07 13:37:00 +02:00
Matthew Exon
260d17ed74 use new style of accessing baseUrl 2023-05-07 13:32:59 +02:00
Matthew Exon
7a8296f356 remove App arguments 2023-05-07 13:32:51 +02:00
Matthew Exon
b677b6a63f remove duplicate use directive 2023-05-07 12:40:36 +02:00
Matthew Exon
761a9d8c20 fix contact photo menu callback really 2023-05-07 12:40:36 +02:00
Matthew Exon
be44ac9b2b fix contact photo menu callback 2023-05-07 12:40:36 +02:00
Matthew Exon
6dac6a00e9 replace local_user 2023-05-07 12:40:36 +02:00
7702b6b8ce The priority is now a class constant 2023-05-07 12:40:36 +02:00
Matthew Exon
00b370c671 Add missing use statement 2023-05-07 12:39:54 +02:00
Matthew Exon
440ff9c6d3 add types to parameters 2023-05-07 12:39:54 +02:00
Matthew Exon
4b1f673ec7 fix order of upgrade commands 2023-05-07 12:39:54 +02:00
Matthew Exon
472daa8672 add log lines to install 2023-05-07 12:39:54 +02:00
Matthew Exon
301d6452dc Fix length of keys 2023-05-07 12:39:54 +02:00
Matthew Exon
b3ba24921e Use new hook registration calls 2023-05-07 12:39:54 +02:00
Matthew Exon
a21413dce3 Update to correct collation mode 2023-05-07 12:39:54 +02:00
Matthew Exon
70153bde61 Use separate album and repair dox for ces 2023-05-07 12:39:54 +02:00
Matthew Exon
63a8ac39f8 fix comment 2023-05-07 12:39:54 +02:00
Matthew Exon
41fd46600e correct use of fetchFull 2023-05-07 12:39:54 +02:00
Matthew Exon
430211ebc2 fix argv stuff 2023-05-07 12:39:54 +02:00
Matthew Exon
09933337b4 fix argv stuff 2023-05-07 12:39:54 +02:00
Matthew Exon
81433d0b43 use new temppath function 2023-05-07 12:39:54 +02:00
Matthew Exon
8b4e9c0e5d fix sql syntax 2023-05-07 12:39:54 +02:00
Matthew Exon
5e7311a588 improvements 2023-05-07 12:39:54 +02:00
Matthew Exon
3db0c551d6 syntax errors 2023-05-07 12:39:54 +02:00
Matthew Exon
fff186c0b0 syntax errors 2023-05-07 12:39:54 +02:00
Matthew Exon
f9353dea28 syntax errors 2023-05-07 12:39:54 +02:00
Matthew Exon
2247073e47 syntax errors 2023-05-07 12:39:54 +02:00
Matthew Exon
23106aaed4 this is more correcter 2023-05-07 12:39:54 +02:00
Matthew Exon
d9094aa032 this is more correct 2023-05-07 12:39:54 +02:00
Matthew Exon
497a11a032 another migrated function 2023-05-07 12:39:54 +02:00
Matthew Exon
05c37f3156 add anotehr check 2023-05-07 12:39:54 +02:00
Matthew Exon
e29060c438 also update these queries 2023-05-07 12:39:54 +02:00
Matthew Exon
7b1d398279 stray line 2023-05-07 12:39:54 +02:00
Matthew Exon
c720283533 perhaps it should be this style 2023-05-07 12:39:54 +02:00
Matthew Exon
194f987afd attempt to handle one error 2023-05-07 12:39:53 +02:00
Matthew Exon
a5241698c8 new style of http request 2023-05-07 12:39:53 +02:00
Matthew Exon
5442886f4c switch to new way of executing SQL 2023-05-07 12:39:53 +02:00
Matthew Exon
1f181edf6f switch to new way of executing SQL 2023-05-07 12:39:53 +02:00
Matthew Exon
6e12c60296 switch to new way of executing SQL 2023-05-07 12:39:53 +02:00
Matthew Exon
ff8516331f sync with submitted 2023-05-07 12:39:53 +02:00
Matthew Exon
b883a449fd error checking in retriever 2023-05-07 12:39:53 +02:00
Matthew Exon
7c915e621f fix another stupid mistake 2023-05-07 12:39:53 +02:00
Matthew Exon
8f09825d2c fix another stupid mistake 2023-05-07 12:39:53 +02:00
Matthew Exon
71404d970f Detect an error in mailstream 2023-05-07 12:39:53 +02:00
Matthew Exon
0db41cdbd9 fixed another obvious mistake 2023-05-07 12:39:53 +02:00
Matthew Exon
ba88e7e8cb Fix a typo 2023-05-07 12:39:53 +02:00
Matthew Exon
966f0525b7 another check for empty results 2023-05-07 12:39:53 +02:00
Matthew Exon
3d148ed9bc Adapt Item methods to Post methods 2023-05-07 12:39:53 +02:00
Matthew Exon
a40af26ec5 Remove binary field from httpRequest 2023-05-07 12:39:53 +02:00
Matthew Exon
88b508ae5d Replace fetchUrlFull with HTTPRequest version 2023-05-07 12:39:53 +02:00
Matthew Exon
9cb7c4bbff Remove unneeded get_app 2023-05-07 12:39:53 +02:00
Matthew Exon
a71e3a34e5 Fix page assembly 2023-05-07 12:39:53 +02:00
Matthew Exon
c31ed576fc Update with base url changes and strict key requirements 2023-05-07 12:39:53 +02:00
Matthew Exon
67cf46ae31 Further updates to 2020.03 2023-05-07 12:39:44 +02:00
Matthew Exon
275e0fcabf Use new L10n thing 2023-05-07 12:39:44 +02:00
Matthew Exon
62292fd297 Update to new module structure 2023-05-07 12:39:44 +02:00
Matthew Exon
235fa49d50 maybe this way works better 2023-05-07 12:39:44 +02:00
Matthew Exon
891cb4e876 New way of doing baseurl 2023-05-07 12:39:44 +02:00
Matthew Exon
82d1f71d20 Missing class 2023-05-07 12:39:44 +02:00
Matthew Exon
ccd0efd0f7 Update for new version 2023-05-07 12:39:44 +02:00
Matthew Exon
9dbc11be1b Fix bug in phototrack 2023-05-07 12:39:44 +02:00
Matthew Exon
22da732b99 remove help section if images not allowed 2023-05-07 12:39:44 +02:00
Matthew Exon
038952461b Almost finished, maybe not working 2023-05-07 12:39:44 +02:00
Matthew Exon
3dbf7aec36 working much better 2023-05-07 12:39:44 +02:00
Matthew Exon
de1d3a8032 I think this works 2023-05-07 12:39:44 +02:00
Matthew Exon
0d2b2fd6b8 small addition 2023-05-07 12:39:44 +02:00
Matthew Exon
c4e2590891 small cleanup 2023-05-07 12:39:44 +02:00
Matthew Exon
418426e8a8 working much better 2023-05-07 12:39:44 +02:00
Matthew Exon
86eeb59ae5 maybe broken again 2023-05-07 12:39:44 +02:00
Matthew Exon
29888f9e87 Now retriever works again 2023-05-07 12:39:44 +02:00
Matthew Exon
32056ea578 extensive refactoring 2023-05-07 12:39:44 +02:00
Matthew Exon
c287de8f59 retriever tweaks 2023-05-07 12:39:44 +02:00
Matthew Exon
86ea938621 Add phototrack and publicise 2023-05-07 12:39:44 +02:00
Matthew Exon
9a85904003 configurable number of requests 2023-05-07 12:39:44 +02:00
Matthew Exon
d19d96b580 update version number 2023-05-07 12:39:44 +02:00
Matthew Exon
396f64a476 Stuff in retriever 2023-05-07 12:39:44 +02:00
Matthew Exon
3abf8c6850 fixed image regex 2023-05-07 12:39:44 +02:00
Matthew Exon
26f231f395 more dba stuff 2023-05-07 12:39:44 +02:00
Matthew Exon
0e3805bb3c fakerei2 2023-05-07 12:39:44 +02:00
Matthew Exon
928911ea6d Fix bugs in retriever retrospective stuff 2023-05-07 12:39:44 +02:00
Matthew Exon
2153e7059b more retriever stuff 2023-05-07 12:39:44 +02:00
Administrator
dc068b16ec Fix retriever database problems 2023-05-07 12:39:44 +02:00
Matthew Exon
eab0467e35 retriever stuff 2023-05-07 12:39:44 +02:00
Matthew Exon
cdb3ac82e5 Change logging functions 2023-05-07 12:39:43 +02:00
Matthew Exon
68d3e6c7cf Improvement 2023-05-07 12:39:43 +02:00
Administrator
cc438120ac this is working OK 2023-05-07 12:39:43 +02:00
Matthew Exon
df1a5a8a3f fixed a bug and commented on another 2023-05-07 12:39:43 +02:00
Matthew Exon
4050928be7 fix 2023-05-07 12:39:43 +02:00
Matthew Exon
049fcc0494 tentative database work 2023-05-07 12:39:43 +02:00
Matthew Exon
c4543de561 More preparation for persistent cookies 2023-05-07 12:39:43 +02:00
Matthew Exon
d086f2b3bb beginnings of persistent cookiejar support 2023-05-07 12:39:43 +02:00
Matthew Exon
9e434957b0 now working retriever 2023-05-07 12:39:43 +02:00
Matthew Exon
aa12fec3f5 more fixes 2023-05-07 12:39:43 +02:00
Matthew Exon
ea8dca34db more fixes 2023-05-07 12:39:43 +02:00
Matthew Exon
65138c22ed Fixes for retriever 2023-05-07 12:39:43 +02:00
Matthew Exon
90eda43139 Latest version of retriever 2023-05-07 12:39:43 +02:00
15 changed files with 1852 additions and 16 deletions

View file

@ -180,5 +180,5 @@ function ifttt_message($uid, $item)
$link = hash('ripemd128', $item['msg']); $link = hash('ripemd128', $item['msg']);
} }
Post\Delayed::add($link, $post, Worker::PRIORITY_MEDIUM, Post\Delayed::PREPARED); Post\Delayed::add($link, $post, Worker::PRIORITY_MEDIUM, Post\Delayed::UNPREPARED);
} }

View file

@ -66,10 +66,9 @@ function mailstream_module() {}
/** /**
* Adds an item in "addon features" in the admin menu of the site * Adds an item in "addon features" in the admin menu of the site
* *
* @param App $a App object (unused)
* @param string $o HTML form data * @param string $o HTML form data
*/ */
function mailstream_addon_admin(App $a, string &$o) function mailstream_addon_admin(string &$o)
{ {
$frommail = DI::config()->get('mailstream', 'frommail'); $frommail = DI::config()->get('mailstream', 'frommail');
$template = Renderer::getMarkupTemplate('admin.tpl', 'addon/mailstream/'); $template = Renderer::getMarkupTemplate('admin.tpl', 'addon/mailstream/');
@ -103,14 +102,14 @@ function mailstream_addon_admin_post()
*/ */
function mailstream_generate_id(string $uri): string function mailstream_generate_id(string $uri): string
{ {
$host = DI::baseUrl()->getHostname(); $host = DI::baseUrl()->getHost();
$resource = hash('md5', $uri); $resource = hash('md5', $uri);
$message_id = "<" . $resource . "@" . $host . ">"; $message_id = "<" . $resource . "@" . $host . ">";
Logger::debug('mailstream: Generated message ID ' . $message_id . ' for URI ' . $uri); Logger::debug('mailstream: Generated message ID ' . $message_id . ' for URI ' . $uri);
return $message_id; return $message_id;
} }
function mailstream_send_hook(App $a, array $data) function mailstream_send_hook(array $data)
{ {
$criteria = array('uid' => $data['uid'], 'contact-id' => $data['contact-id'], 'uri' => $data['uri']); $criteria = array('uid' => $data['uid'], 'contact-id' => $data['contact-id'], 'uri' => $data['uri']);
$item = Post::selectFirst([], $criteria); $item = Post::selectFirst([], $criteria);
@ -138,17 +137,17 @@ function mailstream_send_hook(App $a, array $data)
* mailstream is enabled and the necessary data is available, forks a * mailstream is enabled and the necessary data is available, forks a
* workerqueue item to send the email. * workerqueue item to send the email.
* *
* @param App $a App object (unused)
* @param array $item content of the item (may or may not already be stored in the item table) * @param array $item content of the item (may or may not already be stored in the item table)
* @return void * @return void
*/ */
function mailstream_post_hook(App $a, array &$item) function mailstream_post_hook(array &$item)
{ {
mailstream_check_version(); mailstream_check_version();
Logger::debug('@@@ mailstream_post_hook', ['item-uid' => $item['uid']]);
if (!DI::pConfig()->get($item['uid'], 'mailstream', 'enabled')) { if (!DI::pConfig()->get($item['uid'], 'mailstream', 'enabled')) {
Logger::debug('mailstream: not enabled.', ['item' => $item['id'], ' uid ' => $item['uid']]); Logger::debug('mailstream: not enabled for item ' . $item['id'] . ' uid ' . $item['uid']);
return; // return;
} }
if (!$item['uid']) { if (!$item['uid']) {
Logger::debug('mailstream: no uid for item ' . $item['id']); Logger::debug('mailstream: no uid for item ' . $item['id']);
@ -414,7 +413,7 @@ function mailstream_send(string $message_id, array $item, array $user): bool
$template = Renderer::getMarkupTemplate('mail.tpl', 'addon/mailstream/'); $template = Renderer::getMarkupTemplate('mail.tpl', 'addon/mailstream/');
$mail->AltBody = BBCode::toPlaintext($item['body']); $mail->AltBody = BBCode::toPlaintext($item['body']);
$item['body'] = BBCode::convertForUriId($item['uri-id'], $item['body'], BBCode::CONNECTORS); $item['body'] = BBCode::convertForUriId($item['uri-id'], $item['body'], BBCode::CONNECTORS);
$item['url'] = DI::baseUrl()->get() . '/display/' . $item['guid']; $item['url'] = DI::baseUrl() . '/display/' . $item['guid'];
$mail->Body = Renderer::replaceMacros($template, [ $mail->Body = Renderer::replaceMacros($template, [
'$upstream' => DI::l10n()->t('Upstream'), '$upstream' => DI::l10n()->t('Upstream'),
'$uri' => DI::l10n()->t('URI'), '$uri' => DI::l10n()->t('URI'),
@ -468,7 +467,7 @@ function mailstream_convert_table_entries()
'message_id' => $ms_item_id['message-id'], 'message_id' => $ms_item_id['message-id'],
'tries' => 0); 'tries' => 0);
if (!$ms_item_id['message-id'] || !strlen($ms_item_id['message-id'])) { if (!$ms_item_id['message-id'] || !strlen($ms_item_id['message-id'])) {
Logger::info('mailstream_convert_table_entries: item has no message-id.', 'item' => $ms_item_id['id'], 'uri' => $ms_item_id['uri']]); Logger::info('mailstream_convert_table_entries: item has no message-id.', ['item' => $ms_item_id['id'], 'uri' => $ms_item_id['uri']]);
continue; continue;
} }
Logger::info('mailstream_convert_table_entries: convert item to workerqueue', $send_hook_data); Logger::info('mailstream_convert_table_entries: convert item to workerqueue', $send_hook_data);
@ -480,11 +479,10 @@ function mailstream_convert_table_entries()
/** /**
* Form for configuring mailstream features for a user * Form for configuring mailstream features for a user
* *
* @param App $a App object
* @param array $data Hook data array * @param array $data Hook data array
* @throws \Friendica\Network\HTTPException\ServiceUnavailableException * @throws \Friendica\Network\HTTPException\ServiceUnavailableException
*/ */
function mailstream_addon_settings(App &$a, array &$data) function mailstream_addon_settings(array &$data)
{ {
$enabled = DI::pConfig()->get(DI::userSession()->getLocalUserId(), 'mailstream', 'enabled'); $enabled = DI::pConfig()->get(DI::userSession()->getLocalUserId(), 'mailstream', 'enabled');
$address = DI::pConfig()->get(DI::userSession()->getLocalUserId(), 'mailstream', 'address'); $address = DI::pConfig()->get(DI::userSession()->getLocalUserId(), 'mailstream', 'address');
@ -528,11 +526,10 @@ function mailstream_addon_settings(App &$a, array &$data)
/** /**
* Process data submitted to user's mailstream features form * Process data submitted to user's mailstream features form
* @param App $a
* @param array $post POST data * @param array $post POST data
* @return void * @return void
*/ */
function mailstream_addon_settings_post(App $a, array $post) function mailstream_addon_settings_post(array $post)
{ {
if (!DI::userSession()->getLocalUserId() || empty($post['mailstream-submit'])) { if (!DI::userSession()->getLocalUserId() || empty($post['mailstream-submit'])) {
return; return;

23
phototrack/database.sql Normal file
View file

@ -0,0 +1,23 @@
CREATE TABLE IF NOT EXISTS `phototrack_photo_use` (
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
`resource-id` char(64) NOT NULL,
`table` char(64) NOT NULL,
`field` char(64) NOT NULL,
`row-id` int(11) NOT NULL,
`checked` timestamp NOT NULL DEFAULT now(),
PRIMARY KEY (`id`),
INDEX `resource-id` (`resource-id`),
INDEX `row` (`table`,`field`,`row-id`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin;
CREATE TABLE IF NOT EXISTS `phototrack_row_check` (
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
`table` char(64) NOT NULL,
`row-id` int(11) NOT NULL,
`checked` timestamp NOT NULL DEFAULT now(),
PRIMARY KEY (`id`),
INDEX `row` (`table`,`row-id`),
INDEX `checked` (`checked`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin;
SELECT TRUE

274
phototrack/phototrack.php Normal file
View file

@ -0,0 +1,274 @@
<?php
/**
* Name: Photo Track
* Description: Track which photos are actually being used and delete any others
* Version: 1.0
* Author: Matthew Exon <http://mat.exon.name>
*/
/*
* List of tables and the fields that are checked:
*
* contact: photo thumb micro about
* fcontact: photo
* fsuggest: photo
* gcontact: photo about
* item: body
* item-content: body
* mail: from-photo
* notify: photo
* profile: photo thumb about
*/
use Friendica\Core\Addon;
use Friendica\Core\Logger;
use Friendica\Object\Image;
use Friendica\Database\DBA;
use Friendica\Util\Images;
use Friendica\Util\DateTimeFormat;
use Friendica\DI;
if (!defined('PHOTOTRACK_DEFAULT_BATCH_SIZE')) {
define('PHOTOTRACK_DEFAULT_BATCH_SIZE', 1000);
}
// Time in *minutes* between searching for photo uses
if (!defined('PHOTOTRACK_DEFAULT_SEARCH_INTERVAL')) {
define('PHOTOTRACK_DEFAULT_SEARCH_INTERVAL', 10);
}
function phototrack_install() {
global $db;
Addon::registerHook('post_local_end', 'addon/phototrack/phototrack.php', 'phototrack_post_local_end');
Addon::registerHook('post_remote_end', 'addon/phototrack/phototrack.php', 'phototrack_post_remote_end');
Addon::registerHook('notifier_end', 'addon/phototrack/phototrack.php', 'phototrack_notifier_end');
Addon::registerHook('cron', 'addon/phototrack/phototrack.php', 'phototrack_cron');
if (DI::config()->get('phototrack', 'dbversion') != '0.1') {
$schema = file_get_contents(dirname(__file__).'/database.sql');
$arr = explode(';', $schema);
foreach ($arr as $a) {
if (!DBA::e($a)) {
Logger::warning('Unable to create database table: ' . DBA::errorMessage());
return;
}
}
DI::config()->set('phototrack', 'dbversion', '0.1');
}
}
function phototrack_uninstall() {
Addon::unregisterHook('post_local_end', 'addon/phototrack/phototrack.php', 'phototrack_post_local_end');
Addon::unregisterHook('post_remote_end', 'addon/phototrack/phototrack.php', 'phototrack_post_remote_end');
Addon::unregisterHook('notifier_end', 'addon/phototrack/phototrack.php', 'phototrack_notifier_end');
Addon::unregisterHook('cron', 'addon/phototrack/phototrack.php', 'phototrack_cron');
}
function phototrack_module() {}
function phototrack_finished_row($table, $id) {
$existing = DBA::selectFirst('phototrack_row_check', ['id'], ['table' => $table, 'row-id' => $id]);
if (!is_bool($existing)) {
DBA::update('phototrack_row_check', ['checked' => DateTimeFormat::utcNow()], ['table' => $table, 'row-id' => $id]);
}
else {
DBA::insert('phototrack_row_check', ['table' => $table, 'row-id' => $id, 'checked' => DateTimeFormat::utcNow()]);
}
}
function phototrack_photo_use($photo, $table, $field, $id) {
Logger::debug('@@@ phototrack_photo_use ' . $photo);
foreach (Images::supportedTypes() as $m => $e) {
$photo = str_replace(".$e", '', $photo);
}
if (substr($photo, -2, 1) == '-') {
$resolution = intval(substr($photo,-1,1));
$photo = substr($photo,0,-2);
}
if (strlen($photo) != 32) {
return;
}
$r = DBA::selectFirst('photo', ['resource-id'], ['resource-id' => $photo]);
if (!DBA::isResult($r)) {
return;
}
$rid = $r['resource-id'];
$existing = DBA::selectFirst('phototrack_photo_use', ['id'], ['resource-id' => $rid, 'table' => $table, 'field' => $field, 'row-id' => $id]);
if (DBA::isResult($existing)) {
DBA::update('phototrack_photo_use', ['checked' => DateTimeFormat::utcNow()], ['resource-id' => $rid, 'table' => $table, 'field' => $field, 'row-id' => $id]);
}
else {
DBA::insert('phototrack_photo_use', ['resource-id' => $rid, 'table' => $table, 'field' => $field, 'row-id' => $id, 'checked' => DateTimeFormat::utcNow()]);
}
}
function phototrack_check_field_url($a, $table, $field, $id, $url) {
Logger::info('@@@ phototrack_check_field_url table ' . $table . ' field ' . $field . ' id ' . $id . ' url ' . $url);
$baseurl = DI::baseUrl()->get(true);
if (strpos($url, $baseurl) === FALSE) {
return;
}
else {
$url = substr($url, strlen($baseurl));
Logger::info('@@@ phototrack_check_field_url funny url stuff ' . $url . ' base ' . $baseurl);
}
if (strpos($url, '/photo/') === FALSE) {
return;
}
else {
$url = substr($url, strlen('/photo/'));
Logger::info('@@@ phototrack_check_field_url more url stuff ' . $url);
}
if (preg_match('/([0-9a-z]{32})/', $url, $matches)) {
$rid = $matches[0];
Logger::info('@@@ phototrack_check_field_url rid ' . $rid);
phototrack_photo_use($rid, $table, $field, $id);
}
}
function phototrack_check_field_bbcode($a, $table, $field, $id, $value) {
$baseurl = DI::baseUrl()->get(true);
$matches = array();
preg_match_all("/\[img(\=([0-9]*)x([0-9]*))?\](.*?)\[\/img\]/ism", $value, $matches);
foreach ($matches[4] as $url) {
phototrack_check_field_url($a, $table, $field, $id, $url);
}
}
function phototrack_post_local_end(&$a, &$item) {
phototrack_check_row($a, 'item', $item);
phototrack_check_row($a, 'item-content', $item);
}
function phototrack_post_remote_end(&$a, &$item) {
phototrack_check_row($a, 'item', $item);
phototrack_check_row($a, 'item-content', $item);
}
function phototrack_notifier_end($item) {
}
function phototrack_check_row($a, $table, $row) {
switch ($table) {
case 'item':
$fields = array(
'body' => 'bbcode');
break;
case 'item-content':
$fields = array(
'body' => 'bbcode');
break;
case 'contact':
$fields = array(
'photo' => 'url',
'thumb' => 'url',
'micro' => 'url',
'about' => 'bbcode');
break;
case 'fcontact':
$fields = array(
'photo' => 'url');
break;
case 'fsuggest':
$fields = array(
'photo' => 'url');
break;
case 'gcontact':
$fields = array(
'photo' => 'url',
'about' => 'bbcode');
break;
default: $fields = array(); break;
}
foreach ($fields as $field => $type) {
switch ($type) {
case 'bbcode': phototrack_check_field_bbcode($a, $table, $field, $row['id'], $row[$field]); break;
case 'url': phototrack_check_field_url($a, $table, $field, $row['id'], $row[$field]); break;
}
}
phototrack_finished_row($table, $row['id']);
}
function phototrack_batch_size() {
$batch_size = DI::config()->get('phototrack', 'batch_size');
if ($batch_size > 0) {
return $batch_size;
}
return PHOTOTRACK_DEFAULT_BATCH_SIZE;
}
function phototrack_search_table($a, $table) {
$batch_size = phototrack_batch_size();
$rows = DBA::p("SELECT `$table`.* FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) ) ORDER BY phototrack_row_check.checked LIMIT $batch_size");
if (DBA::isResult($rows)) {
while ($row = DBA::fetch($rows)) {
phototrack_check_row($a, $table, $row);
}
}
$r = DBA::p("SELECT COUNT(*) FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) )");
Logger::info("@@@ phototrack_search_table " . print_r(DBA::fetch($r)));
$remaining = DBA::fetch($r)['count'];
Logger::info('phototrack: searched ' . DBA::numRows($rows) . ' rows in table ' . $table . ', ' . $remaining . ' still remaining to search');
return $remaining;
}
function phototrack_cron_time() {
$prev_remaining = DI::config()->get('phototrack', 'remaining_items');
if ($prev_remaining > 10 * phototrack_batch_size()) {
Logger::debug('phototrack: more than ' . (10 * phototrack_batch_size()) . ' items remaining');
return true;
}
$last = DI::config()->get('phototrack', 'last_search');
$search_interval = intval(DI::config()->get('phototrack', 'search_interval'));
if (!$search_interval) {
$search_interval = PHOTOTRACK_DEFAULT_SEARCH_INTERVAL;
}
if ($last) {
$next = $last + ($search_interval * 60);
if ($next > time()) {
Logger::debug('phototrack: search interval not reached');
return false;
}
}
return true;
}
function phototrack_cron($a, $b) {
if (!phototrack_cron_time()) {
return;
}
DI::config()->set('phototrack', 'last_search', time());
$remaining = 0;
$remaining += phototrack_search_table($a, 'item');
$remaining += phototrack_search_table($a, 'item-content');
$remaining += phototrack_search_table($a, 'contact');
$remaining += phototrack_search_table($a, 'fcontact');
$remaining += phototrack_search_table($a, 'fsuggest');
$remaining += phototrack_search_table($a, 'gcontact');
DI::config()->set('phototrack', 'remaining_items', $remaining);
if ($remaining === 0) {
phototrack_tidy();
}
}
function phototrack_tidy() {
$batch_size = phototrack_batch_size();
DBA::e('CREATE TABLE IF NOT EXISTS `phototrack-temp` (`resource-id` char(255) not null)');
DBA::e('INSERT INTO `phototrack-temp` SELECT DISTINCT(`resource-id`) FROM photo WHERE photo.`created` < DATE_SUB(NOW(), INTERVAL 2 MONTH)');
$rows = DBA::p('SELECT `phototrack-temp`.`resource-id` FROM `phototrack-temp` LEFT OUTER JOIN phototrack_photo_use ON (`phototrack-temp`.`resource-id` = phototrack_photo_use.`resource-id`) WHERE phototrack_photo_use.id IS NULL limit ' . /*$batch_size*/1000);
if (DBA::isResult($rows)) {
foreach ($rows as $row) {
Logger::debug('phototrack: remove photo ' . $row['resource-id']);
DBA::e('DELETE FROM photo WHERE `resource-id` = "' . $row['resource-id'] . '"');
}
Logger::info('phototrack_tidy: deleted ' . DBA::numRows($rows) . ' photos');
}
DBA::e('DROP TABLE `phototrack-temp`');
$rows = DBA::p('SELECT id FROM phototrack_photo_use WHERE checked < DATE_SUB(NOW(), INTERVAL 14 DAY)');
foreach ($rows as $row) {
DBA::e( 'DELETE FROM phototrack_photo_use WHERE id = ' . $row['id']);
}
Logger::info('phototrack_tidy: deleted ' . DBA::numRows($rows) . ' phototrack_photo_use rows');
}

11
publicise/publicise.php Normal file
View file

@ -0,0 +1,11 @@
"SELECT `uid` FROM `contact` WHERE `id` = %d AND `reason` = 'publicise'", intval($item['contact-id']));
if (!$r1) {
return;
}
Logger::debug('Publicise: moving to wall: ' . $item['uid'] . ' ' . $item['contact-id'] . ' ' . $item['uri']);
$item['type'] = 'wall';
$item['wall'] = 1;
$item['private'] = 0;
}

View file

@ -0,0 +1,39 @@
{{*
* AUTOMATICALLY GENERATED TEMPLATE
* DO NOT EDIT THIS FILE, CHANGES WILL BE OVERWRITTEN
*
*}}
<form method="post">
<table>
<thead>
<tr>
<th>{{$feed_t}}</th>
<th>{{$publicised_t}}</th>
<th>{{$comments_t}}</th>
<th>{{$expire_t}}</th>
</tr>
</thead>
<tbody>
{{foreach $feeds as $f}}
<tr>
<td>
<a href="{{$f.url}}">
<img style="vertical-align:middle" src='{{$f.micro}}'>
<span style="margin-left:1em">{{$f.name}}</span>
</a>
</td>
<td>
{{include file="field_yesno.tpl" field=$f.enabled}}
</td>
<td>
{{include file="field_yesno.tpl" field=$f.comments}}
</td>
<td>
<input name="publicise-expire-{{$f.id}}" value="{{$f.expire}}">
</td>
</tr>
{{/foreach}}
</tbody>
</table>
<input type="submit" size="70" value="{{$submit_t}}">
</form>

42
retriever/database.sql Normal file
View file

@ -0,0 +1,42 @@
CREATE TABLE IF NOT EXISTS `retriever_rule` (
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
`uid` int(11) NOT NULL,
`contact-id` int(11) NOT NULL,
`data` mediumtext NULL DEFAULT NULL,
PRIMARY KEY (`id`),
KEY `uid` (`uid`),
KEY `contact-id` (`contact-id`)
) DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
CREATE TABLE IF NOT EXISTS `retriever_item` (
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
`item-uri` varbinary(255) NOT NULL,
`item-uid` int(10) unsigned NOT NULL DEFAULT '0',
`contact-id` int(10) unsigned NOT NULL DEFAULT '0',
`resource` int(11) NOT NULL,
`finished` tinyint(1) unsigned NOT NULL DEFAULT '0',
KEY `resource` (`resource`),
KEY `finished` (`finished`),
KEY `item-uid` (`item-uid`),
KEY `all` (`item-uri`, `item-uid`, `contact-id`),
PRIMARY KEY (`id`)
) DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
CREATE TABLE IF NOT EXISTS `retriever_resource` (
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
`item-uid` int(10) unsigned NOT NULL DEFAULT '0',
`contact-id` int(10) unsigned NOT NULL DEFAULT '0',
`type` char(255) NULL DEFAULT NULL,
`binary` int(1) NOT NULL DEFAULT 0,
`url` varbinary(700) NOT NULL,
`created` timestamp NOT NULL DEFAULT now(),
`completed` timestamp NULL DEFAULT NULL,
`last-try` timestamp NULL DEFAULT NULL,
`num-tries` int(11) NOT NULL DEFAULT 0,
`data` mediumblob NULL DEFAULT NULL,
`http-code` smallint(1) unsigned NULL DEFAULT NULL,
`redirect-url` varbinary(700) NOT NULL,
KEY `url` (`url`),
KEY `completed` (`completed`),
PRIMARY KEY (`id`)
) DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;

1058
retriever/retriever.php Normal file
View file

@ -0,0 +1,1058 @@
<?php
/**
* Name: Retriever
* Description: Follow the permalink of RSS/Atom feed items and replace the summary with the full content.
* Version: 1.0
* Author: Matthew Exon <http://mat.exon.name>
*/
use Friendica\App;
use Friendica\Core\Addon;
use Friendica\Core\Hook;
use Friendica\Core\Logger;
use Friendica\Core\Renderer;
use Friendica\Core\Session;
use Friendica\Core\System;
use Friendica\Content\Text\HTML;
use Friendica\Content\Text\BBCode;
use Friendica\Model\Photo;
use Friendica\Object\Image;
use Friendica\Util\Network;
use Friendica\Database\DBA;
use Friendica\Model\ItemURI;
use Friendica\Model\Item;
use Friendica\Model\Post;
use Friendica\Util\DateTimeFormat;
use Friendica\DI;
/**
* @brief Installation hook for retriever plugin
*/
function retriever_install() {
Logger::debug('Install retriever');
Hook::register('addon_settings', 'addon/retriever/retriever.php', 'retriever_addon_settings');
Hook::register('addon_settings_post', 'addon/retriever/retriever.php', 'retriever_addon_settings_post');
Hook::register('post_remote', 'addon/retriever/retriever.php', 'retriever_post_remote_hook');
Hook::register('contact_photo_menu', 'addon/retriever/retriever.php', 'retriever_contact_photo_menu');
Hook::register('cron', 'addon/retriever/retriever.php', 'retriever_cron');
if (DI::config()->get('retriever', 'dbversion') == '0.14') {
if (!DBA::e("ALTER TABLE `retriever_rule` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci") ||
!DBA::e("ALTER TABLE `retriever_item` MODIFY `item-uri` varbinary(255) NOT NULL") ||
!DBA::e("ALTER TABLE `retriever_item` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci") ||
!DBA::e("ALTER TABLE `retriever_resource` MODIFY `url` varbinary(700) NOT NULL") ||
!DBA::e("ALTER TABLE `retriever_resource` MODIFY `redirect-url` varbinary(700) NOT NULL")) {
!DBA::e("ALTER TABLE `retriever_resource` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci") ||
Logger::warning('Unable to update database tables: ' . DBA::errorMessage());
return;
}
DI::config()->set('retriever', 'dbversion', '0.15');
}
if (DI::config()->get('retriever', 'dbversion') != '0.15') {
$schema = file_get_contents(dirname(__file__).'/database.sql');
$tables = explode(';', $schema);
foreach ($tables as $table) {
if (!DBA::e($table)) {
Logger::warning('Unable to create database table: ' . DBA::errorMessage());
return;
}
}
DI::config()->set('retriever', 'downloads_per_cron', '100');
DI::config()->set('retriever', 'dbversion', '0.14');
}
}
/**
* @brief Uninstallation hook for retriever plugin
*/
function retriever_uninstall() {
Logger::debug('Uninstall retriever');
Hook::unregister('addon_settings', 'addon/retriever/retriever.php', 'retriever_addon_settings');
Hook::unregister('addon_settings_post', 'addon/retriever/retriever.php', 'retriever_addon_settings_post');
Hook::unregister('post_remote', 'addon/retriever/retriever.php', 'retriever_post_remote_hook');
Hook::unregister('addon_settings', 'addon/retriever/retriever.php', 'retriever_addon_settings');
Hook::unregister('addon_settings_post', 'addon/retriever/retriever.php', 'retriever_addon_settings_post');
Hook::unregister('contact_photo_menu', 'addon/retriever/retriever.php', 'retriever_contact_photo_menu');
Hook::unregister('cron', 'addon/retriever/retriever.php', 'retriever_cron');
}
/**
* This is a statement rather than an actual function definition. The simple
* existence of this method is checked to figure out if the addon offers a
* module.
*/
function retriever_module() {}
/**
* @brief Admin page hook for retriever plugin
*
* @param App $a App object (unused)
* @param string $o HTML to append content to (by ref)
*/
function retriever_addon_admin(App $a, string &$o) {
$template = Renderer::getMarkupTemplate('admin.tpl', 'addon/retriever/');
$downloads_per_cron = DI::config()->get('retriever', 'downloads_per_cron');
$downloads_per_cron_config = ['downloads_per_cron',
DI::l10n()->t('Downloads per Cron'),
$downloads_per_cron,
DI::l10n()->t('Maximum number of downloads to attempt during each run of the cron job.')];
$allow_images = DI::config()->get('retriever', 'allow_images');
$allow_images_config = ['allow_images',
DI::l10n()->t('Allow Retrieving Images'),
$allow_images,
DI::l10n()->t('Allow users to request images be downloaded as well as text.<br><b>Warning: the images are not automatically deleted and may fill up your database.</b>')];
$o .= Renderer::replaceMacros($template, [
'$downloads_per_cron' => $downloads_per_cron_config,
'$allow_images' => $allow_images_config,
'$submit' => DI::l10n()->t('Save Settings')]);
}
/**
* @brief Admin page post hook for retriever plugin
*/
function retriever_addon_admin_post () {
if (!empty($_POST['downloads_per_cron'])) {
DI::config()->set('retriever', 'downloads_per_cron', $_POST['downloads_per_cron']);
}
DI::config()->set('retriever', 'allow_images', $_POST['allow_images']);
}
/**
* @brief Cron jobs for retriever plugin
*/
function retriever_cron() {
$downloads_per_cron = DI::config()->get('retriever', 'downloads_per_cron');
// Do this first, otherwise it can interfere with retriever_retrieve_items
retriever_clean_up_completed_resources($downloads_per_cron);
retriever_retrieve_items($downloads_per_cron);
retriever_tidy();
}
// This global variable is used to track the number of items that have been retrieved during the course of this process
$retriever_item_count = 0;
/**
* @brief Searches for items in the retriever_items table that should be retrieved and attempts to retrieve them
*
* @param int $max_items Maximum number of items to retrieve in this call
*/
function retriever_retrieve_items(int $max_items) {
global $retriever_item_count;
$retriever_schedule = array(array(1,'minute'),
array(10,'minute'),
array(1,'hour'),
array(1,'day'),
array(2,'day'),
array(1,'week'),
array(1,'month'));
$schedule_clauses = array();
for ($i = 0; $i < count($retriever_schedule); $i++) {
$num = $retriever_schedule[$i][0];
$unit = $retriever_schedule[$i][1];
array_push($schedule_clauses,
'(`num-tries` = ' . $i . ' AND TIMESTAMPADD(' . DBA::escape($unit) .
', ' . intval($num) . ', `last-try`) < now())');
}
$retrieve_items = $max_items - $retriever_item_count;
do {
Logger::debug('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . intval($retriever_item_count) . ', retrieve ' . $retrieve_items);
$retriever_resources = DBA::selectToArray('retriever_resource', [], ['`completed` IS NULL AND (`last-try` IS NULL OR ' . implode($schedule_clauses, ' OR ') . ')'], ['order' => ['last-try' => 0], 'limit' => $retrieve_items]);
if (!is_array($retriever_resources)) {
break;
}
if (count($retriever_resources) == 0) {
break;
}
Logger::debug('retriever_retrieve_items: found ' . count($retriever_resources) . ' waiting resources in database');
foreach ($retriever_resources as $retriever_resource) {
retrieve_resource($retriever_resource);
$retriever_item_count++;
}
$retrieve_items = $max_items - $retriever_item_count;
}
while ($retrieve_items > 0);
Logger::debug('retriever_retrieve_items: finished retrieving items');
}
/**
* @brief Looks for items that are waiting even though the resource has completed. This shouldn't happen, but is worth cleaning up if it does.
*
* @param int $max_items Maximum number of items to retrieve in this call
*/
function retriever_clean_up_completed_resources(int $max_items) {
// TODO: figure out how to do this with DBA module
$r = DBA::p("SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT $max_items");
if (!DBA::isResult($r)) {
return;
}
Logger::debug('retriever_clean_up_completed_resources: items waiting even though resource has completed: ' . DBA::numRows($r));
while ($rr = DBA::fetch($r)) {
$retriever_item = DBA::selectFirst('retriever_item', [], ['id' => intval($rr['item'])]);
if (!DBA::isResult($retriever_item)) {
Logger::warning('retriever_clean_up_completed_resources: no retriever item with id ' . $rr['item']);
continue;
}
$item = retriever_get_item($retriever_item);
if (!$item) {
Logger::warning('retriever_clean_up_completed_resources: no item ' . $retriever_item['item-uri']);
continue;
}
$retriever_rule = get_retriever_rule($retriever_item['contact-id'], $item['uid'], false);
if (!$retriever_rule) {
Logger::warning('retriever_clean_up_completed_resources: no retriever for uri ' . $retriever_item['item-uri'] . ' uid ' . $retriever_item['uid'] . ' ' . $retriever_item['contact-id']);
continue;
}
$resource = DBA::selectFirst('retriever_resource', [], ['id' => intval($rr['resource'])]);
retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource);
// TODO: I don't really get how the $old_fields argument to DBA::update works
DBA::update('retriever_item', ['finished' => 1], ['id' => intval($retriever_item['id'])], ['finished' => 0]);
retriever_check_item_completed($item);
}
}
/**
* @brief Deletes old rows from the retriever_item and retriever_resource table that are unlikely to be needed
*/
function retriever_tidy() {
DBA::delete('retriever_resource', ['completed IS NOT NULL AND completed < DATE_SUB(now(), INTERVAL 1 WEEK)']);
DBA::delete('retriever_resource', ['completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)']);
$r = DBA::p("SELECT retriever_item.id FROM retriever_item LEFT OUTER JOIN retriever_resource ON (retriever_item.resource = retriever_resource.id) WHERE retriever_resource.id is null");
if (!DBA::isResult($r)) {
return;
}
Logger::info('retriever_tidy: found ' . DBA::numRows($r) . ' retriever_items with no retriever_resource');
while ($rr = DBA::fetch($r)) {
DBA::delete('retriever_item', ['id' => intval($rr['id'])]);
}
}
/**
* @brief Special case of retrieving a resource: if the URL is a data URL, do not use cURL, decode the URL directly
*
* @param array $resource The row from the retriever_resource table
*/
function retrieve_dataurl_resource(array $resource) {
if (!preg_match("/date:(.*);base64,(.*)/", $resource['url'], $matches)) {
Logger::warning('retrieve_dataurl_resource: resource ' . $resource['id'] . ' does not match pattern');
} else {
$resource['type'] = $matches[1];
$resource['data'] = base64url_decode($matches[2]);
}
// Succeed or fail, there's no point retrying
DBA::update('retriever_resource', ['id' => intval($resource['id'])], ['last-try' => DateTimeFormat::utcNow(), 'num-tries' => intval($resource['num-tries']) + 1, 'completed' => DateTimeFormat::utcNow(), 'data' => $resource['data'], 'type' => $resource['type']], ['last-try' => false]);
retriever_resource_completed($resource);
}
/**
* @brief Makes an attempt to retrieve the supplied resource, and updates the row in the table with the results
*
* @param array $resource The row from the retriever_resource table
*/
function retrieve_resource(array $resource) {
$components = parse_url($resource['url']);
if (!$components) {
Logger::warning('retrieve_resource: URL ' . $resource['url'] . ' could not be parsed');
}
if ($components['scheme'] == "data") {
return retrieve_dataurl_resource($resource);
}
if (($components['scheme'] != "http") && ($components['scheme'] != "https")) {
Logger::warning('retrieve_resource: URL scheme not supported for ' . $resource['url']);
DBA::update('retriever_resource', ['completed' => DateTimeFormat::utcNow()], ['id' => intval($resource['id'])], ['completed' => false]);
retriever_resource_completed($resource);
return;
}
$retriever_rule = get_retriever_rule($resource['contact-id'], $resource['item-uid'], false);
if (!$retriever_rule) {
Logger::warning('retrieve_resource: no rule found for resource id ' . $resource['id'] . ' contact ' . $resource['contact-id'] . ' item ' . $resource['item-uid']);
DBA::update('retriever_resource', ['completed' => DateTimeFormat::utcNow()], ['id' => intval($resource['id'])], ['completed' => false]);
retriever_resource_completed($resource);
return;
}
$rule_data = $retriever_rule['data'];
if (!$rule_data) {
Logger::warning('retrieve_resource: no rule data found for resource id ' . $resource['id'] . ' contact ' . $resource['contact-id'] . ' item ' . $resource['item-uid']);
DBA::update('retriever_resource', ['completed' => DateTimeFormat::utcNow()], ['id' => intval($resource['id'])], ['completed' => false]);
retriever_resource_completed($resource);
return;
}
try {
Logger::debug('retrieve_resource: ' . ($resource['num-tries'] + 1) . ' attempt at resource ' . $resource['id'] . ' ' . $resource['url']);
$redirects = 0;
$cookiejar = '';
if (array_key_exists('storecookies', $rule_data) && $rule_data['storecookies']) {
$cookiejar = tempnam(System::getTempPath(), 'cookiejar-retriever-');
file_put_contents($cookiejar, $rule_data['cookiedata']);
}
$fetch_result = DI::httpClient()->fetchFull($resource['url'], $redirects, 0, $cookiejar);
if (array_key_exists('storecookies', $rule_data) && $rule_data['storecookies']) {
$retriever_rule['data']['cookiedata'] = file_get_contents($cookiejar);
DBA::update('retriever_rule', ['data' => json_encode($retriever_rule['data'])], ['id' => intval($retriever_rule["id"])], $retriever_rule);
unlink($cookiejar);
}
$resource['data'] = $fetch_result->getBody();
$resource['http-code'] = $fetch_result->getReturnCode();
$resource['type'] = $fetch_result->getContentType();
$resource['redirect-url'] = $fetch_result->getRedirectUrl();
Logger::debug('retrieve_resource: got code ' . $resource['http-code'] . ' retrieving resource ' . $resource['id'] . ' final url ' . $resource['redirect-url']);
} catch (Exception $e) {
Logger::info('retrieve_resource: unable to retrieve ' . $resource['url'] . ' - ' . $e->getMessage());
}
DBA::update('retriever_resource', ['last-try' => DateTimeFormat::utcNow(), 'num-tries' => intval($resource['num-tries']) + 1, 'http-code' => intval($resource['http-code']), 'redirect-url' => $resource['redirect-url']], ['id' => intval($resource['id'])], ['last-try' => false]);
if ($resource['data']) {
DBA::update('retriever_resource', ['completed' => DateTimeFormat::utcNow(), 'data' => $resource['data'], 'type' => $resource['type']], ['id' => intval($resource['id'])], ['completed' => false]);
retriever_resource_completed($resource);
}
}
/**
* @brief Gets the retriever configuration for a particular contact. Optionally, will create a blank configuration.
*
* @param int $contact_id The Contact ID of the retriever configuration
* @param int $uid The User ID of the retriever configuration
* @param boolean $create Whether to create a new configuration if none exists already
* @return array The row from the retriever_rule database for this configuration
*/
function get_retriever_rule(string $contact_id, string $uid, bool $create) {
$retriever_rule = DBA::selectFirst('retriever_rule', [], ['contact-id' => intval($contact_id), 'uid' => intval($uid)]);
if ($retriever_rule) {
$retriever_rule['data'] = json_decode($retriever_rule['data'], true);
return $retriever_rule;
}
if ($create) {
DBA::insert('retriever_rule', ['uid' => intval($uid), 'contact-id' => intval($contact_id)]);
$retriever_rule = DBA::selectFirst('retriever_rule', [], ['contact-id' => intval($contact_id), 'uid' => intval($uid)]);
return $retriever_rule;
}
}
/**
* @brief Looks up the item from the database that corresponds to the retriever_item
*
* @param array $retriever_item Row from the retriever_item table
* @return array Item that was found, or undef if no item could be found
*/
function retriever_get_item(array $retriever_item) {
$item = Post::selectFirst([], ['uri' => $retriever_item['item-uri'], 'uid' => intval($retriever_item['item-uid']), 'contact-id' => intval($retriever_item['contact-id'])]);
if (!DBA::isResult($item)) {
Logger::warning('retriever_get_item: no item found for uri ' . $retriever_item['item-uri']);
return;
}
return $item;
}
/**
* @brief This function should be called when a resource is completed to trigger all next steps, based on the corresponding retriever item
*
* @param int $retriever_item_id ID of the retriever item corresponding to this resource
* @param array $resource The full details of the completed resource
*/
function retriever_item_completed(string $retriever_item_id, array $resource) {
Logger::debug('retriever_item_completed: id ' . $retriever_item_id . ' url ' . $resource['url']);
$retriever_item = DBA::selectFirst('retriever_item', [], ['id' => intval($retriever_item_id)]);
if (!DBA::isResult($retriever_item)) {
Logger::info('retriever_item_completed: no retriever item with id ' . $retriever_item_id);
return;
}
$item = retriever_get_item($retriever_item);
if (!$item) {
Logger::warning('retriever_item_completed: no item ' . $retriever_item['item-uri']);
return;
}
// Note: the retriever might be null. Doesn't matter.
$retriever_rule = get_retriever_rule($retriever_item['contact-id'], $retriever_item['item-uid'], false);
retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource);
DBA::update('retriever_item', ['finished' => 1], ['id' => intval($retriever_item['id'])], ['finished' => 0]);
retriever_check_item_completed($item);
}
/**
* @brief This function should be called when a resource is completed to trigger all next steps
*
* @param array $resource The full details of the completed resource
*/
function retriever_resource_completed(array $resource) {
Logger::debug('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url']);
foreach (DBA::selectToArray('retriever_item', ['id'], ['resource' => intval($resource['id'])]) as $retriever_item) {
retriever_item_completed($retriever_item['id'], $resource);
}
}
/**
* @brief For a retriever config for a particular contact, remove existing artifacts for a number of completed items and queue them to be tried again. Will make the items invisible until they are again completed. The items chosen will be the most recently received.
*
* @param array $retriever The row from the retriever_rule table for the contact
* @param int $num The number of existing items to queue for retrieval
*/
function apply_retrospective(array $retriever, int $num) {
foreach (Post::selectToArray([], ['contact-id' => intval($retriever['contact-id'])], ['order' => ['received' => true], 'limit' => $num]) as $item) {
Item::update(['visible' => 0], ['id' => intval($item['id'])]);
foreach (DBA::selectToArray('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => $item['uid'], 'contact-id' => $item['contact-id']]) as $retriever_item) {
DBA::delete('retriever_resource', ['id' => $retriever_item['resource']]);
DBA::delete('retriever_item', ['id' => $retriever_item['id']]);
}
retriever_on_item_insert($retriever, $item);
}
}
/**
* @brief Queues an item for retrieval. It does not actually perform the retrieval.
*
* @param array $retriever Retriever rule configuration for this contact
* @param array $item Item that should be retrieved. This may or may not have been already stored in the database.
*
* TODO: This queries then inserts. It should use some kind of lock to avoid requesting the same resource twice.
*/
function retriever_on_item_insert(array $retriever, array &$item) {
if (!$retriever || !$retriever['id']) {
Logger::info('retriever_on_item_insert: No retriever supplied');
return;
}
if (!array_key_exists('enable', $retriever['data']) || !$retriever['data']['enable'] == "on") {
return;
}
if (array_key_exists('plink', $item) && strlen($item['plink'])) {
$url = $item['plink'];
}
else {
if (!array_key_exists('uri-id', $item)) {
Logger::warning('retriever_on_item_insert: item ' . $item['id'] . ' has no plink and no uri-id');
return;
}
$content = DBA::selectFirst('item-content', [], ['uri-id' => $item['uri-id']]);
$url = $content['plink'];
}
if (array_key_exists('modurl', $retriever['data']) && $retriever['data']['modurl']) {
$orig_url = $url;
$url = preg_replace('/' . $retriever['data']['pattern'] . '/', $retriever['data']['replace'], $orig_url);
Logger::debug('retriever_on_item_insert: Changed ' . $orig_url . ' to ' . $url);
}
$resource = add_retriever_resource($url, $item['uid'], $item['contact-id']);
$retriever_item_id = add_retriever_item($item, $resource);
}
/**
* @brief Creates a new resource to be downloaded from the supplied URL. Unique resources are created for each URL, UID and contact ID, because different contact IDs may have different rules for how to retrieve them. If the URL is actually a data URL, the resource is completed immediately.
*
* @param string $url URL of the resource to be downloaded
* @param int $uid User ID that this resource is being downloaded fore
* @param int $cid Contact ID of the item that triggered the downloading of this resource
* @param boolean $binary Specifies if this download should be done in binary mode
* @return array The created resource
*/
function add_retriever_resource(string $url, string $uid, string $cid, bool $binary = false) {
Logger::debug('add_retriever_resource: url ' . $url . ' uid ' . $uid . ' contact-id ' . $cid);
$scheme = parse_url($url, PHP_URL_SCHEME);
if ($scheme == 'data') {
$fp = fopen($url, 'r');
$meta = stream_get_meta_data($fp);
$type = $meta['mediatype'];
$data = stream_get_contents($fp);
fclose($fp);
$url = 'md5://' . hash('md5', $url);
$resource = DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)]);
if ($resource) {
Logger::debug('add_retriever_resource: Resource ' . $url . ' already requested');
return $resource;
}
DBA::insert('retriever_resource', ['item-uid' => intval($uid), 'contact-id' => intval($cid), 'type' => $type, 'binary' => ($binary ? 1 : 0), 'url' => $url, 'completed' => DateTimeFormat::utcNow(), 'data' => $data]);
$resource = DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)]);
if ($resource) {
retriever_resource_completed($resource);
}
return $resource;
}
// 700 characters is the size of this field in the database
if (strlen($url) > 700) {
Logger::warning('add_retriever_resource: URL is longer than 700 characters');
}
$resource = DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)]);
if ($resource) {
Logger::debug('add_retriever_resource: Resource ' . $url . ' uid ' . $uid . ' cid ' . $cid . ' already requested');
return $resource;
}
DBA::insert('retriever_resource', ['item-uid' => intval($uid), 'contact-id' => intval($cid), 'binary' => ($binary ? 1 : 0), 'url' => $url]);
return DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)]);
}
/**
* @brief Adds a retriever item for the supplied resource and item, to mark that this item should wait for the resource to be completed. Does not create a retriever item if a matching one already exists.
*
* @param array $item Item that is waiting for the resource. This may or may not have been already stored in the database.
* @param array $resource Resource that the item needs to wait for. This must have already been stored in the database.
* @return int ID of the retriever item that was created, or the existing one if present
*/
function add_retriever_item(array $item, array $resource) {
Logger::debug('add_retriever_item: ' . $resource['url'] . ' for ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']);
if (!array_key_exists('id', $resource) || !$resource['id']) {
Logger::warning('add_retriever_item: resource is empty');
return;
}
if (DBA::selectFirst('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => intval($item['uid']), 'resource' => intval($resource['id'])])) {
Logger::info("add_retriever_item: retriever item already present for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']);
return;
}
DBA::insert('retriever_item', ['item-uri' => $item['uri'], 'item-uid' => intval($item['uid']), 'contact-id' => intval($item['contact-id']), 'resource' => intval($resource['id'])]);
$retriever_item = DBA::selectFirst('retriever_item', ['id'], ['item-uri' => $item['uri'], 'item-uid' => intval($item['uid']), 'resource' => intval($resource['id'])]);
if (!$retriever_item) {
Logger::info("add_retriever_item: couldn't create retriever item for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']);
return;
}
Logger::debug('add_retriever_item: created retriever_item ' . $retriever_item['id'] . ' for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']);
return $retriever_item['id'];
}
/**
* @brief Analyse a completed text resource (such as HTML) for the character encoding used
*
* @param array $resource The completed resource
* @return string Character encoding, e.g. "utf-8" or "iso-8859-1"
*/
function retriever_get_encoding(array $resource) {
$matches = array();
if (preg_match('/charset=(.*)/', $resource['type'], $matches)) {
return trim(array_pop($matches));
}
return 'utf-8';
}
/**
* @brief Apply the XSLT template to the DOM document
*
* @param string $xslt_text Text of the XSLT template
* @param DOMDocument $doc Input to the XSLT template
* @return DOMDocument Result of applying the template
*/
function retriever_apply_xslt_text(string $xslt_text, DOMDocument $doc) {
if (!$xslt_text) {
Logger::info('retriever_apply_xslt_text: empty XSLT text');
return $doc;
}
$xslt_doc = new DOMDocument();
if (!$xslt_doc->loadXML($xslt_text)) {
Logger::info('retriever_apply_xslt_text: could not load XML');
return $doc;
}
$xp = new XsltProcessor();
$xp->importStylesheet($xslt_doc);
$result = $xp->transformToDoc($doc);
return $result;
}
/**
* @brief Applies the retriever rules to the downloaded resource, and stores the results as the new body text of the item
*
* @param array $retriever Retriever rules as stored in the database, with the "data" element already decoded from JSON
* @param array &$item Item to be in which to store the new body (by ref). This may or may not be already stored in the database.
* @param array $resource Newly completed resource, which should be text (HTML or XML)
*/
function retriever_apply_dom_filter(array $retriever, array &$item, array $resource) {
Logger::debug('retriever_apply_dom_filter: applying XSLT to uri ' . $item['uri'] . ' uid ' . $item['uid'] . ' contact ' . $item['contact-id']);
if (!array_key_exists('include', $retriever['data']) && !array_key_exists('customxslt', $retriever['data'])) {
Logger::info('retriever_apply_dom_filter: no include and no customxslt');
return;
}
if (!$resource['data']) {
Logger::info('retriever_apply_dom_filter: no text to work with');
return;
}
$doc = retriever_load_into_dom($resource);
$doc = retriever_extract($doc, $retriever);
if (!$doc) {
Logger::info('retriever_apply_dom_filter: failed to apply extract XSLT template');
return;
}
$doc = retriever_globalise_urls($doc, $resource);
if (!$doc) {
Logger::info('retriever_apply_dom_filter: failed to apply fix urls XSLT template');
return;
}
$body = HTML::toBBCode($doc->saveHTML());
if (!strlen($body)) {
Logger::info('retriever_apply_dom_filter retriever ' . $retriever['id'] . ' item ' . $item['id'] . ': output was empty');
return;
}
$body .= "\n\n" . DI::l10n()->t('Retrieved') . ' ' . date("Y-m-d") . ': [url=';
$body .= $item['plink'];
$body .= ']' . $item['plink'] . '[/url]';
Logger::debug('retriever_apply_dom_filter: XSLT result \"' . $body . '\"');
retriever_set_body($item, $body);
}
/**
* @brief Converts the completed resource, which must be HTML or XML, into a DOM document
*
* @param array $resource The resource containing the text content
*/
function retriever_load_into_dom(array $resource) {
$encoding = retriever_get_encoding($resource);
$content = mb_convert_encoding($resource['data'], 'HTML-ENTITIES', $encoding);
$doc = new DOMDocument('1.0', 'UTF-8');
if (strpos($resource['type'], 'html') !== false) {
@$doc->loadHTML($content);
}
else {
$doc->loadXML($content);
}
return $doc;
}
/**
* @brief Applies the retriever rules, including configuration for included and excluded portions, to the DOM document
*
* @param DOMDocument $doc The original DOM document downloaded from the link
* @param array $retriever The retriever configuration for this contact
* @return DOMDocument New DOM document containing only the desired content
*/
function retriever_extract(DOMDocument $doc, array $retriever) {
$params = array('$spec' => $retriever['data']);
$extract_template = Renderer::getMarkupTemplate('extract.tpl', 'addon/retriever/');
$extract_xslt = Renderer::replaceMacros($extract_template, $params);
if ($retriever['data']['include']) {
Logger::debug('retriever_apply_dom_filter: applying include/exclude template \"' . $extract_xslt . '\"');
$doc = retriever_apply_xslt_text($extract_xslt, $doc);
}
if (array_key_exists('customxslt', $retriever['data']) && $retriever['data']['customxslt']) {
Logger::debug('retriever_extract: applying custom XSLT \"' . $retriever['data']['customxslt'] . '\"');
$doc = retriever_apply_xslt_text($retriever['data']['customxslt'], $doc);
}
return $doc;
}
/**
* @brief Converts local URLs in the DOM document to global URLs
*
* @param DOMDocument $doc DOM document potentially containing links
* @param array $resource Completed resource which contains the text in the DOM document
* @return DOMDocument New DOM document with global URLs
*/
function retriever_globalise_urls(DOMDocument $doc, array $resource) {
$components = parse_url($resource['redirect-url']);
if (!array_key_exists('scheme', $components) || !array_key_exists('host', $components) || !array_key_exists('path', $components)) {
return $doc;
}
$rooturl = $components['scheme'] . "://" . $components['host'];
$dirurl = $rooturl . dirname($components['path']) . "/";
$params = array('$dirurl' => $dirurl, '$rooturl' => $rooturl);
$fix_urls_template = Renderer::getMarkupTemplate('fix-urls.tpl', 'addon/retriever/');
$fix_urls_xslt = Renderer::replaceMacros($fix_urls_template, $params);
$doc = retriever_apply_xslt_text($fix_urls_xslt, $doc);
return $doc;
}
/**
* @brief Returns the body text for the supplied item. If the item has already been stored in the database, this will fetch the content from the database rather than from the supplied array.
*
* @param array $item Row from the item table
*/
function retriever_get_body(array $item) {
if (!array_key_exists('uri-id', $item) || !$item['uri-id']) {
// item has not yet been stored in database
return $item['body'];
}
// item has been stored in database, body is stored in the item-content table
$content = DBA::selectFirst('item-content', ['body'], ['uri-id' => $item['uri-id']]);
if (!$content) {
Logger::warning('retriever_get_body: item-content uri-id ' . $item['uri-id'] . ' has no content');
return $item['body'];
}
if (!$content['body']) {
Logger::warning('retriever_get_body: item-content uri-id ' . $item['uri-id'] . ' has no body');
return $item['body'];
}
if ($content['body'] != $item['body']) {
Logger::warning('@@@ this is probably bad @@@ content: ' . $content['body'] . ' @@@ item: ' . $item['body']);
}
return $content['body'];
}
/**
* @brief Updates the item with the supplied body text. If the item has already been stored in the database, this will update the database too.
*
* @param array &$item Item in which to set the body (by ref). This may or may not be already stored in the database.
* @param string $body New body content
*/
function retriever_set_body(array &$item, string $body) {
$item['body'] = $body;
if (!array_key_exists('id', $item) || !$item['id']) {
// item has not yet been stored in database
return;
}
Item::update(['body' => $body], ['id' => intval($item['id'])]);
}
/**
* @brief Searches for images in the item and adds corresponding retriever_items. If the images have already been downloaded, updates the body in the supplied item array.
*
* @param array &$item Item to be searched for images and updated (by ref). This may or may not be already stored in the database.
*/
function retrieve_images(array &$item) {
if (!DI::config()->get('retriever', 'allow_images')) {
return;
}
$body = retriever_get_body($item);
if (!strlen($body)) {
Logger::warning('retrieve_images: no body for item ' . $item['uri']);
return;
}
// I suspect that the first two are not used any more?
preg_match_all("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", $body, $matches1);
preg_match_all("/\[img\](.*?)\[\/img\]/ism", $body, $matches2);
preg_match_all("/\[img\=([^\]]*)\]([^[]*)\[\/img\]/ism", $body, $matches3);
$matches = array_merge($matches1[3], $matches2[1], $matches3[1]);
Logger::debug('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']);
foreach ($matches as $url) {
if (!$url) {
continue;
}
if (strpos($url, DI::baseUrl()->get(true)) === FALSE) {
$resource = add_retriever_resource($url, $item['uid'], $item['contact-id'], true);
if (!$resource['completed']) {
add_retriever_item($item, $resource);
}
else {
retriever_transform_images($item, $resource);
}
}
}
}
/**
* @brief Checks if an item has been completed, i.e. all its associated retriever_item rows have been retrieved. If so, update the item to be visible again.
*
* @param array &$item Row from the item table (by ref)
*/
function retriever_check_item_completed(array &$item)
{
$waiting = DBA::selectFirst('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => intval($item['uid']), 'contact-id' => intval($item['contact-id']), 'finished' => 0]);
Logger::debug('retriever_check_item_completed: item ' . $item['uri'] . ' ' . $item['uid'] . ' '. $item['contact-id'] . ' waiting for resources');
$old_visible = $item['visible'];
$item['visible'] = $waiting ? 0 : 1;
if (array_key_exists('id', $item) && ($item['id'] > 0) && ($old_visible != $item['visible'])) {
Logger::debug('retriever_check_item_completed: changing visible flag to ' . $item['visible']);
Item::update(['visible' => $item['visible']], ['id' => intval($item['id'])]);
}
}
/**
* @brief Updates an item with a completed resource. If the resource was text, update the body with the new content. If the resource was an image, replace remote images in the body with a local version.
*
* @param array $retriever Rule configuration for this contact
* @param array &$item Row from the item table (by ref)
* @param array $resource The resource that has just been completed
*/
function retriever_apply_completed_resource_to_item(array $retriever, array &$item, array $resource) {
Logger::debug('retriever_apply_completed_resource_to_item: retriever ' . ($retriever ? $retriever['id'] : 'none') . ' resource ' . $resource['url'] . ' plink ' . $item['plink']);
if (strpos($resource['type'], 'image') !== false) {
retriever_transform_images($item, $resource);
}
if (!$retriever) {
Logger::warning('retriever_apply_completed_resource_to_item: no retriever');
return;
}
if ((strpos($resource['type'], 'html') !== false) ||
(strpos($resource['type'], 'xml') !== false)) {
retriever_apply_dom_filter($retriever, $item, $resource);
if ($retriever['data']['images'] ) {
retrieve_images($item);
}
}
}
/**
* @brief Stores the image downloaded in the supplied resource and updates the item body by replacing the remote URL with the local URL. The body will be updated in the supplied item array. If the item has already been stored, and therefore has an ID already, the row in the database will be updated too.
*
* @param array &$item Row from the item table (by ref)
* @param array $resource Row from the resource table containing successfully downloaded image
*
* TODO: split this into two functions, one to store the image, the other to change the item body
*/
function retriever_transform_images(array &$item, array $resource) {
if (!$resource['data']) {
Logger::info('retriever_transform_images: no data available for ' . $resource['id'] . ' ' . $resource['url']);
return;
}
$data = $resource['data'];
$type = $resource['type'];
$uid = $item['uid'];
$cid = $item['contact-id'];
$rid = Photo::newResource();
$path = parse_url($resource['url'], PHP_URL_PATH);
$parts = pathinfo($path);
$filename = $parts['filename'] . (array_key_exists('extension', $parts) ? '.' . $parts['extension'] : '');
$album = 'Retriever';
$scale = 0;
$desc = ''; // TODO: store alt text with resource when it's requested so we can fill this in
Logger::debug('retriever_transform_images storing ' . strlen($data) . ' bytes type ' . $type . ': uid ' . $uid . ' cid ' . $cid . ' rid ' . $rid . ' filename ' . $filename . ' album ' . $album . ' scale ' . $scale . ' desc ' . $desc);
$image = new Image($data, $type);
if (!$image->isValid()) {
Logger::warning('retriever_transform_images: invalid image found at URL ' . $resource['url'] . ' for item ' . $item['id']);
return;
}
$photo = Photo::store($image, $uid, $cid, $rid, $filename, $album, 0, 0, "", "", "", "", $desc);
$new_url = DI::baseUrl()->get(true) . '/photo/' . $rid . '-0.' . $image->getExt();
if (!strlen($new_url)) {
Logger::warning('retriever_transform_images: no replacement URL for image ' . $resource['url']);
return;
}
$body = retriever_get_body($item);
Logger::debug('retriever_transform_images: replacing ' . $resource['url'] . ' with ' . $new_url . ' in item ' . $item['uri']);
$body = str_replace($resource["url"], $new_url, $body);
retriever_set_body($item, $body);
}
/**
* @brief Displays the retriever configuration page for a contact. Alternatively, if the user clicked the "help" button, display the help content.
*
* @param App $a The App object
*/
function retriever_content(App $a) {
if (!Session::getLocalUser()) {
$a->page['content'] .= "<p>Please log in</p>";
return;
}
if (isset(DI::args()->getArgv()[1]) and DI::args()->getArgv()[1] === 'help') {
$feeds = DBA::selectToArray('contact', ['id', 'name', 'thumb'], ['uid' => Session::getLocalUser(), 'network' => 'feed']);
for ($i = 0; $i < count($feeds); ++$i) {
$feeds[$i]['url'] = DI::baseUrl()->get(true) . '/retriever/' . $feeds[$i]['id'];
}
$template = Renderer::getMarkupTemplate('/help.tpl', 'addon/retriever/');
$a->page['content'] .= Renderer::replaceMacros($template, array(
'$config' => DI::baseUrl()->get(true) . '/settings/addon',
'$allow_images' => DI::config()->get('retriever', 'allow_images'),
'$feeds' => $feeds));
return;
}
if (isset(DI::args()->getArgv()[1])) {
$arg1 = DI::args()->getArgv()[1];
$retriever_rule = get_retriever_rule($arg1, Session::getLocalUser(), false);
if (!$retriever_rule) {
$retriever_rule = ['id' => 0, 'data' => ['enable' => 0, 'modurl' => '', 'pattern' => '', 'replace' => '', 'images' => 0, 'storecookies' => 0, 'cookiedata' => '', 'customxslt' => '', 'include' => '', 'exclude' => '']];
}
if (!empty($_POST["id"])) {
$retriever_rule = get_retriever_rule($arg1, Session::getLocalUser(), true);
$retriever_rule['data'] = array();
foreach (array('modurl', 'pattern', 'replace', 'enable', 'images', 'customxslt', 'storecookies', 'cookiedata') as $setting) {
if (empty($_POST['retriever_' . $setting])) {
$retriever_rule['data'][$setting] = NULL;
}
else {
$retriever_rule['data'][$setting] = $_POST['retriever_' . $setting];
}
}
foreach ($_POST as $k=>$v) {
if (preg_match("/retriever-(include|exclude)-(\d+)-(element|attribute|value)/", $k, $matches)) {
$retriever_rule['data'][$matches[1]][intval($matches[2])][$matches[3]] = $v;
}
}
// You've gotta have an element, even if it's just "*"
foreach ($retriever_rule['data']['include'] as $k=>$clause) {
if (!$clause['element']) {
unset($retriever_rule['data']['include'][$k]);
}
}
foreach ($retriever_rule['data']['exclude'] as $k=>$clause) {
if (!$clause['element']) {
unset($retriever_rule['data']['exclude'][$k]);
}
}
DBA::update('retriever_rule', ['data' => json_encode($retriever_rule['data'])], ['id' => intval($retriever_rule["id"])], ['data' => '']);
$a->page['content'] .= "<p><b>Settings Updated";
if (!empty($_POST["retriever_retrospective"])) {
apply_retrospective($retriever_rule, $_POST["retriever_retrospective"]);
$a->page['content'] .= " and retrospectively applied to " . $_POST["retriever_retrospective"] . " posts";
}
$a->page['content'] .= ".</p></b>";
}
$template = Renderer::getMarkupTemplate('/rule-config.tpl', 'addon/retriever/');
DI::page()['content'] .= Renderer::replaceMacros($template, array(
'$enable' => array(
'retriever_enable',
DI::l10n()->t('Enabled'),
$retriever_rule['data']['enable']),
'$modurl' => array(
'retriever_modurl',
DI::l10n()->t('Modify URL'),
$retriever_rule['data']['modurl'],
DI::l10n()->t("Modify each article's URL with regular expressions before retrieving.")),
'$pattern' => array(
'retriever_pattern',
DI::l10n()->t('URL Pattern'),
$retriever_rule['data']['pattern'],
DI::l10n()->t('Regular expression matching part of the URL to replace')),
'$replace' => array(
'retriever_replace',
DI::l10n()->t('URL Replace'),
$retriever_rule['data']['replace'],
DI::l10n()->t('Text to replace matching part of above regular expression')),
'$allow_images' => DI::config()->get('retriever', 'allow_images'),
'$images' => array(
'retriever_images',
DI::l10n()->t('Download Images'),
$retriever_rule['data']['images']),
'$retrospective' => array(
'retriever_retrospective',
DI::l10n()->t('Retrospectively Apply'),
'0',
DI::l10n()->t('Reapply the rules to this number of posts')),
'storecookies' => array(
'retriever_storecookies',
DI::l10n()->t('Store cookies'),
$retriever_rule['data']['storecookies'],
DI::l10n()->t("Preserve cookie data across fetches.")),
'$cookiedata' => array(
'retriever_cookiedata',
DI::l10n()->t('Cookie Data'),
$retriever_rule['data']['cookiedata'],
DI::l10n()->t("Latest cookie data for this feed. Example: [{\"Name\":\"cookie-name\",\"Value\":\"cookie-value\",\"Domain\":\"example.com\",\"Path\":\"\\/path\\/\",\"Max-Age\":null,\"Expires\":1682450014,\"Secure\":true,\"Discard\":false,\"HttpOnly\":true}]")),
'$customxslt' => array(
'retriever_customxslt',
DI::l10n()->t('Custom XSLT'),
$retriever_rule['data']['customxslt'],
DI::l10n()->t("When standard rules aren't enough, apply custom XSLT to the article")),
'$title' => DI::l10n()->t('Retrieve Feed Content'),
'$help' => DI::baseUrl()->get(true) . '/retriever/help',
'$help_t' => DI::l10n()->t('Get Help'),
'$submit_t' => DI::l10n()->t('Submit'),
'$submit' => DI::l10n()->t('Save Settings'),
'$id' => ($retriever_rule["id"] ? $retriever_rule["id"] : "create"),
'$tag_t' => DI::l10n()->t('Tag'),
'$attribute_t' => DI::l10n()->t('Attribute'),
'$value_t' => DI::l10n()->t('Value'),
'$add_t' => DI::l10n()->t('Add'),
'$remove_t' => DI::l10n()->t('Remove'),
'$include_t' => DI::l10n()->t('Include'),
'$include' => $retriever_rule['data']['include'],
'$exclude_t' => DI::l10n()->t('Exclude'),
'$exclude' => $retriever_rule['data']['exclude']));
return;
}
}
/**
* @brief Hook that adds the retriever option to the contact menu
*
* @param App $a The App object (by ref)
* @param array $args Contact menu details to be filled in (by ref)
*/
function retriever_contact_photo_menu(App &$a, array &$args) {
if (!$args) {
return;
}
if ($args["contact"]["network"] == "feed") {
$args["menu"]['retriever'] = array(DI::l10n()->t('Retriever'), DI::baseUrl()->get(true) . '/retriever/' . $args["contact"]['id']);
}
}
/**
* @brief Hook for processing new incoming items
*
* @param App $a The App object (by ref)
* @param array $item New item, which has not yet been inserted into database (by ref)
*/
function retriever_post_remote_hook(App &$a, array &$item) {
Logger::info('retriever_post_remote_hook: ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']);
$retriever_rule = get_retriever_rule($item['contact-id'], $item["uid"], false);
if ($retriever_rule) {
retriever_on_item_insert($retriever_rule, $item);
}
else {
if (DI::config()->get($item["uid"], 'retriever', 'oembed')) {
// Convert to HTML and back to take advantage of bbcode's resolution of oembeds.
$body = retriever_get_body($item);
$body = HTML::toBBCode(BBCode::convert($body));
retriever_set_body($item, $body);
}
if (DI::config()->get($item["uid"], 'retriever', 'all_photos')) {
retrieve_images($item);
}
}
retriever_check_item_completed($item);
}
/**
* @brief Hook for adding per-user retriever settings to the user's settings page
*
* @param App $a The App object (by ref)
* @param string $s HTML string to which to append settings content (by ref)
*/
function retriever_addon_settings(App &$a, string &$s) {
$all_photos = DI::config()->get(Session::getLocalUser(), 'retriever', 'all_photos');
$oembed = DI::config()->get(Session::getLocalUser(), 'retriever', 'oembed');
$template = Renderer::getMarkupTemplate('/settings.tpl', 'addon/retriever/');
$config = array('$submit' => DI::l10n()->t('Save Settings'),
'$title' => DI::l10n()->t('Retriever Settings'),
'$help' => DI::baseUrl()->get(true) . '/retriever/help',
'$allow_images' => DI::config()->get('retriever', 'allow_images'));
$config['$allphotos'] = array('retriever_all_photos',
DI::l10n()->t('All Photos'),
$all_photos,
DI::l10n()->t('Check this to retrieve photos for all posts'));
$config['$oembed'] = array('retriever_oembed',
DI::l10n()->t('Resolve OEmbed'),
$oembed,
DI::l10n()->t('Check this to attempt to retrieve embedded content for all posts'));
$s .= Renderer::replaceMacros($template, $config);
}
/**
* @brief Hook for processing post results from user's settings page
*
* @param App $a The App object
* @param array $post Posted content
*/
function retriever_addon_settings_post(App $a, array $post) {
if ($post['retriever_all_photos']) {
DI::config()->set(Session::getLocalUser(), 'retriever', 'all_photos', $post['retriever_all_photos']);
}
else {
DI::config()->delete(Session::getLocalUser(), 'retriever', 'all_photos');
}
if ($post['retriever_oembed']) {
DI::config()->set(Session::getLocalUser(), 'retriever', 'oembed', $post['retriever_oembed']);
}
else {
DI::config()->delete(Session::getLocalUser(), 'retriever', 'oembed');
}
}

View file

@ -0,0 +1,9 @@
{{*
* AUTOMATICALLY GENERATED TEMPLATE
* DO NOT EDIT THIS FILE, CHANGES WILL BE OVERWRITTEN
*
*}}
{{include file="field_input.tpl" field=$downloads_per_cron}}
{{include file="field_checkbox.tpl" field=$allow_images}}
<div class="submit"><input type="submit" name="page_site" value="{{$submit}}"></div>

View file

@ -0,0 +1,24 @@
<?xml version="1.0" encoding="utf-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
<xsl:output method="html" indent="yes" version="4.0"/>
<xsl:template match="text()"/>
{{function clause_xpath}}{{if !$clause.attribute}}{{$clause.element}}{{elseif $clause.attribute == 'class'}}{{$clause.element}}[contains(concat(' ', normalize-space(@class), ' '), '{{$clause.value}}')]{{else}}{{$clause.element}}[@{{$clause.attribute}}='{{$clause.value}}']{{/if}}{{/function}}
{{foreach $spec.include as $clause}}
<xsl:template match="{{clause_xpath clause=$clause}}">
<xsl:copy>
<xsl:apply-templates select="node()|@*" mode="remove"/>
</xsl:copy>
</xsl:template>{{/foreach}}
{{foreach $spec.exclude as $clause}}
<xsl:template match="{{clause_xpath clause=$clause}}" mode="remove"/>{{/foreach}}
<xsl:template match="node()|@*" mode="remove">
<xsl:copy>
<xsl:apply-templates select="node()|@*" mode="remove"/>
</xsl:copy>
</xsl:template>
</xsl:stylesheet>

View file

@ -0,0 +1,26 @@
<?xml version="1.0" encoding="utf-8"?>
<!-- attempt to replace relative URLs with absolute URLs -->
<!-- http://stackoverflow.com/questions/3824631/replace-href-value-in-anchor-tags-of-html-using-xslt -->
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
<xsl:output method="html" indent="yes" version="4.0"/>
<xsl:template match="node()|@*">
<xsl:copy>
<xsl:apply-templates select="node()|@*"/>
</xsl:copy>
</xsl:template>
<xsl:template match="*/@src[starts-with(.,'.')]">
<xsl:attribute name="src">
<xsl:value-of select="concat('{{$dirurl}}',.)"/>
</xsl:attribute>
</xsl:template>
<xsl:template match="*/@src[starts-with(.,'/')]">
<xsl:attribute name="src">
<xsl:value-of select="concat('{{$rooturl}}',.)"/>
</xsl:attribute>
</xsl:template>
</xsl:stylesheet>

View file

@ -0,0 +1,163 @@
<h2>Retriever Plugin Help</h2>
<p>
This plugin replaces the short excerpts you normally get in RSS feeds
with the full content of the article from the source website. You
specify which part of the page you're interested in with a set of
rules. When each item arrives, the plugin downloads the full page
from the website, extracts content using the rules, and replaces the
original article.
</p>
<p>
There's a few reasons you may want to do this. The source website
might be slow or overloaded. The source website might be
untrustworthy, in which case using Friendica to scrub the HTML is a
good idea. You might be on a LAN that blacklists certain websites.
It also works neatly with the mailstream plugin, allowing you to read
a news stream comfortably without needing continuous Internet
connectivity.
</p>
<p>
However, setting up retriever can be quite tricky since it depends on
the internal design of the website. That was designed to make life
easy for the website's developers, not for you. You'll need to have
some familiarity with HTML, and be willing to adapt when the website
suddenly changes everything without notice.
</p>
<h3>Configuring Retriever for a feed</h3>
<p>
To set up retriever for an RSS feed, go to the "Contacts" page and
find your feed. Then click on the drop-down menu on the contact.
Select "Retriever" to get to the retriever configuration.
</p>
<p>
The "Include" configuration section specifies parts of the page to
include in the article. Each row has three components:
</p>
<ul>
<li>An HTML tag (e.g. "div", "span", "p")</li>
<li>An attribute (usually "class" or "id")</li>
<li>A value for the attribute</li>
</ul>
<p>
A simple case is when the article is wrapped in a "div" element:
</p>
<pre>
...
&lt;div class="ArticleWrapper"&gt;
&lt;h2&gt;Man Bites Dog&lt;/h2&gt;
&lt;img src="mbd.jpg"&gt;
&lt;p&gt;
Residents of the sleepy community of Nowheresville were
shocked yesterday by the sight of creepy local weirdo Jim
McOddman assaulting innocent local dog Snufflekins with his
false teeth.
&lt;/p&gt;
...
&lt;/div&gt;
...
</pre>
<p>
You then specify the tag "div", attribute "class", and value
"ArticleWrapper". Everything else in the page, such as navigation
panels and menus and footers and so on, will be discarded. If there
is more than one section of the page you want to include, specify each
one on a separate row. If the matching section contains some sections
you want to remove, specify those in the "Exclude" section in the same
way.
</p>
<p>
Once you've got a configuration that you think will work, you can try
it out on some existing articles. Type a number into the
"Retrospectively Apply" box and click "Submit". After a while
(exactly how long depends on your system's cron configuration) the new
articles should be available.
</p>
<h3>Techniques</h3>
<p>
You can leave the attribute and value blank to include all the
corresponding elements with the specified tag name. You can also use
a tag name of just an asterisk ("*"), which will match any element type with the
specified attribute regardless of the tag.
</p>
<p>
Note that the "class" attribute is a special case. Many web page
templates will put multiple different classes in the same element,
separated by spaces. If you specify an attribute of "class" it will
match an element if any of its classes matches the specified value.
For example:
</p>
<pre>
&lt;div class="article breaking-news"&gt;
</pre>
<p>
In this case you can specify a value of "article", or "breaking-news".
You can also specify "article breaking-news", but that won't match if
the website suddenly changes to "breaking-news article", so that's not
recommended.
</p>
<p>
One useful trick you can try is using the website's "print" pages.
Many news sites have print versions of all their articles. These are
usually drastically simplified compared to the live website page.
Sometimes this is a good way to get the whole article when it's
normally split across multiple pages.
</p>
<p>
Hopefully the URL for the print page is a predictable variant of the
normal article URL. For example, an article URL like:
</p>
<pre>
http://www.newssite.com/article-8636.html
</pre>
<p>
...might have a print version at:
</p>
<pre>
http://www.newssite.com/print/article-8636.html
</pre>
<p>
To change the URL used to retrieve the page, use the "URL Pattern" and
"URL Replace" fields. The pattern is a regular expression matching
part of the URL to replace. In this case, you might use a pattern of
"/article" and a replace string of "/print/article". A common pattern
is simply a dollar sign ("$"), used to add the replace string to the end of the URL.
</p>
<h3>Background Processing</h3>
<p>
Note that retrieving and processing the articles can take some time,
so it's done in the background. Incoming articles will be marked as
invisible while they're in the process of being downloaded. If a URL
fails, the plugin will keep trying at progressively longer intervals
for up to a month, in case the website is temporarily overloaded or
the network is down.
</p>
{{if $allow_images}}
<h3>Retrieving Images</h3>
<p>
Retriever can also optionally download images and store them in the
local Friendica instance. Just check the "Download Images" box. You
can also download images in every item from your network, whether it's
an RSS feed or not. Go to the "Settings" page and
click <a href="$config">"Plugin settings"</a>. Then check the "All
Photos" box in the "Retriever Settings" section and click "Submit".
</p>
{{/if}}
<h2>Configure Feeds:</h2>
<div>
{{foreach $feeds as $feed}}
<div class="contact-entry-wrapper" id="contact-entry-wrapper-{{$feed.id}}">
<a href="{{$feed.url}} title="{{$feed.img_hover}}">
<div class="contact-entry-photo-wrapper">
<div class="contact-entry-photo mframe" id="contact-entry-photo-{{$feed.id}}">
<img src="{{$feed.thumb}}" {{$feed.sparkle}} alt="{{$feed.name}}"/>
</div>
</div>
<div class="contact-entry-desc">
<div class="contact-entry-name" id="contact-entry-name-{{$feed.id}}">
{{$feed.name}}
</div>
</div>
</a>
</div>
{{/foreach}}
</div>

View file

@ -0,0 +1,154 @@
<div class="settings-block">
<script language="javascript">
function retriever_add_row(id)
{
var tbody = document.getElementById(id);
var last = tbody.rows[tbody.childElementCount - 1];
var count = +last.id.replace(id + '-', '');
count++;
var row = document.createElement('tr');
row.id = id + '-' + count;
var cell1 = document.createElement('td');
var inptag = document.createElement('input');
inptag.name = row.id + '-element';
cell1.appendChild(inptag);
row.appendChild(cell1);
var cell2 = document.createElement('td');
var inpatt = document.createElement('input');
inpatt.name = row.id + '-attribute';
cell2.appendChild(inpatt);
row.appendChild(cell2);
var cell3 = document.createElement('td');
var inpval = document.createElement('input');
inpval.name = row.id + '-value';
cell3.appendChild(inpval);
row.appendChild(cell3);
var cell4 = document.createElement('td');
var butrem = document.createElement('input');
butrem.id = row.id + '-rem';
butrem.type = 'button';
butrem.onclick = function(){retriever_remove_row(id, count)};
butrem.value = '{{$remove_t}}';
cell4.appendChild(butrem);
row.appendChild(cell4);
tbody.appendChild(row);
}
function retriever_remove_row(id, number)
{
var tbody = document.getElementById(id);
var row = document.getElementById(id + '-' + number);
tbody.removeChild(row);
}
function retriever_toggle_url_block()
{
var pattern = document.querySelector("#id_retriever_pattern").parentNode;
if (document.querySelector("#id_retriever_modurl").checked) {
pattern.style.display = "block";
}
else {
pattern.style.display = "none";
}
var replace = document.querySelector("#id_retriever_replace").parentNode;
if (document.querySelector("#id_retriever_modurl").checked) {
replace.style.display = "block";
}
else {
replace.style.display = "none";
}
}
function retriever_toggle_cookiedata_block()
{
var div = document.querySelector("#id_retriever_cookiedata").parentNode;
if (document.querySelector("#id_retriever_storecookies").checked) {
div.style.display = "block";
}
else {
div.style.display = "none";
}
}
document.addEventListener('DOMContentLoaded', function() {
retriever_toggle_url_block();
document.querySelector("#id_retriever_modurl").addEventListener('change', retriever_toggle_url_block, false);
retriever_toggle_cookiedata_block();
document.querySelector("#id_retriever_storecookies").addEventListener('change', retriever_toggle_cookiedata_block, false);
}, false);
</script>
<h2>{{$title}}</h2>
<p><a href="{{$help}}">{{$help_t}}</a></p>
<form method="post">
<input type="hidden" name="id" value="{{$id}}">
{{include file="field_checkbox.tpl" field=$enable}}
<h3>{{$include_t}}:</h3>
<div>
<table>
<thead>
<tr><th>{{$tag_t}}</th><th>{{$attribute_t}}</th><th>{{$value_t}}</th></tr>
</thead>
<tbody id="retriever-include">
{{if $include}}
{{foreach $include as $k=>$m}}
<tr id="retriever-include-{{$k}}">
<td><input name="retriever-include-{{$k}}-element" value="{{$m.element}}"></td>
<td><input name="retriever-include-{{$k}}-attribute" value="{{$m.attribute}}"></td>
<td><input name="retriever-include-{{$k}}-value" value="{{$m.value}}"></td>
<td><input id="retrieve-include-{{$k}}-rem" type="button" onclick="retriever_remove_row('retriever-include', {{$k}})" value="{{$remove_t}}"></td>
</tr>
{{/foreach}}
{{else}}
<tr id="retriever-include-0">
<td><input name="retriever-include-0-element"></td>
<td><input name="retriever-include-0-attribute"></td>
<td><input name="retriever-include-0-value"></td>
<td><input id="retrieve-include-0-rem" type="button" onclick="retriever_remove_row('retriever-include', 0)" value="{{$remove_t}}"></td>
</tr>
{{/if}}
</tbody>
</table>
<input type="button" onclick="retriever_add_row('retriever-include')" value="{{$add_t}}">
</div>
<h3>{{$exclude_t}}:</h3>
<div>
<table>
<thead>
<tr><th>{{$tag_t}}</th><th>{{$attribute_t}}</th><th>{{$value_t}}</th></tr>
</thead>
<tbody id="retriever-exclude">
{{if $exclude}}
{{foreach $exclude as $k=>$r}}
<tr id="retriever-exclude-{{$k}}">
<td><input name="retriever-exclude-{{$k}}-element" value="{{$r.element}}"></td>
<td><input name="retriever-exclude-{{$k}}-attribute" value="{{$r.attribute}}"></td>
<td><input name="retriever-exclude-{{$k}}-value" value="{{$r.value}}"></td>
<td><input id="retrieve-exclude-{{$k}}-rem" type="button" onclick="retriever_remove_row('retriever-exclude', {{$k}})" value="{{$remove_t}}"></td>
</tr>
{{/foreach}}
{{else}}
<tr id="retriever-exclude-0">
<td><input name="retriever-exclude-0-element"></td>
<td><input name="retriever-exclude-0-attribute"></td>
<td><input name="retriever-exclude-0-value"></td>
<td><input id="retrieve-exclude-0-rem" type="button" onclick="retriever_remove_row('retriever-exclude', 0)" value="{{$remove_t}}"></td>
</tr>
{{/if}}
</tbody>
</table>
<input type="button" onclick="retriever_add_row('retriever-exclude')" value="{{$add_t}}">
</div>
{{include file="field_checkbox.tpl" field=$modurl}}
{{include file="field_input.tpl" field=$pattern}}
{{include file="field_input.tpl" field=$replace}}
{{if $allow_images}}
{{include file="field_checkbox.tpl" field=$images}}
{{/if}}
{{include file="field_textarea.tpl" field=$customxslt}}
{{include file="field_checkbox.tpl" field=$storecookies}}
{{include file="field_textarea.tpl" field=$cookiedata}}
{{include file="field_input.tpl" field=$retrospective}}
<input type="submit" size="70" value="{{$submit_t}}">
</form>
</div>

View file

@ -0,0 +1,16 @@
<span id="settings_retriever_inflated" class="settings-block fakelink" style="display: block;" onclick="openClose('settings_retriever_expanded'); openClose('settings_retriever_inflated');">
<h3>{{$title}}</h3>
</span>
<div id="settings_retriever_expanded" class="settings-block" style="display: none;">
<span class="fakelink" onclick="openClose('settings_retriever_expanded'); openClose('settings_retriever_inflated');">
<h3>{{$title}}</h3>
</span>
<p>
<a href="{{$help}}">Get Help</a>
</p>
{{if $allow_images}}
{{include file="field_checkbox.tpl" field=$allphotos}}
{{/if}}
{{include file="field_checkbox.tpl" field=$oembed}}
<input type="submit" value="{{$submit}}">
</div>

View file

@ -1363,7 +1363,7 @@ function twitter_fetchtimeline(int $uid): void
Logger::info('Posting mirror post', ['twitter-id' => $post->id_str, 'uid' => $uid]); Logger::info('Posting mirror post', ['twitter-id' => $post->id_str, 'uid' => $uid]);
Post\Delayed::add($mirrorpost['extid'], $mirrorpost, Worker::PRIORITY_MEDIUM, Post\Delayed::PREPARED); Post\Delayed::add($mirrorpost['extid'], $mirrorpost, Worker::PRIORITY_MEDIUM, Post\Delayed::UNPREPARED);
} }
} }
DI::pConfig()->set($uid, 'twitter', 'lastid', $lastid); DI::pConfig()->set($uid, 'twitter', 'lastid', $lastid);