Compare commits

...

94 commits

Author SHA1 Message Date
Matthew Exon c646c4785f log uid but ignore results 2023-05-27 15:51:03 +02:00
Matthew Exon cdaa2bf421 remove duplicate use directive 2023-05-27 15:51:03 +02:00
Matthew Exon 3f672c2f17 fix contact photo menu callback really 2023-05-27 15:51:03 +02:00
Matthew Exon 41963cdbd9 fix contact photo menu callback 2023-05-27 15:51:03 +02:00
Matthew Exon c6bdf2c0fb replace local_user 2023-05-27 15:51:03 +02:00
Michael 15659608d0 The priority is now a class constant 2023-05-27 15:51:03 +02:00
Matthew Exon 237fbd8271 Add missing use statement 2023-05-27 15:51:03 +02:00
Matthew Exon 1df3a190b4 add types to parameters 2023-05-27 15:51:03 +02:00
Matthew Exon 590bbeddce fix order of upgrade commands 2023-05-27 15:51:03 +02:00
Matthew Exon 672e2affe2 add log lines to install 2023-05-27 15:51:03 +02:00
Matthew Exon 83649f7784 Fix length of keys 2023-05-27 15:51:03 +02:00
Matthew Exon c3d2ee3ef2 Use new hook registration calls 2023-05-27 15:51:03 +02:00
Matthew Exon bf3668fcac Update to correct collation mode 2023-05-27 15:51:03 +02:00
Matthew Exon fa742f4015 Use separate album and repair dox for ces 2023-05-27 15:51:03 +02:00
Matthew Exon 4e0e3e83fe fix comment 2023-05-27 15:51:03 +02:00
Matthew Exon 6b8bc583f0 correct use of fetchFull 2023-05-27 15:51:03 +02:00
Matthew Exon 09ae53d804 fix argv stuff 2023-05-27 15:51:02 +02:00
Matthew Exon 55d6d5aec2 fix argv stuff 2023-05-27 15:51:02 +02:00
Matthew Exon 71e9d28d48 use new temppath function 2023-05-27 15:51:02 +02:00
Matthew Exon 56519cf49a fix sql syntax 2023-05-27 15:51:02 +02:00
Matthew Exon f582ba89bd improvements 2023-05-27 15:51:02 +02:00
Matthew Exon af23ecf94f syntax errors 2023-05-27 15:51:02 +02:00
Matthew Exon 5a7baf109e syntax errors 2023-05-27 15:51:02 +02:00
Matthew Exon 25b77bc318 syntax errors 2023-05-27 15:51:02 +02:00
Matthew Exon 0412e84a4a syntax errors 2023-05-27 15:51:02 +02:00
Matthew Exon cd4c4f21bc this is more correcter 2023-05-27 15:51:02 +02:00
Matthew Exon 7a9e5c5766 this is more correct 2023-05-27 15:51:02 +02:00
Matthew Exon d5fbfd2b47 another migrated function 2023-05-27 15:51:02 +02:00
Matthew Exon b58ed5cd50 add anotehr check 2023-05-27 15:51:02 +02:00
Matthew Exon 4b777745ad also update these queries 2023-05-27 15:51:02 +02:00
Matthew Exon 74fe4edacd stray line 2023-05-27 15:51:02 +02:00
Matthew Exon 84ae02e9da perhaps it should be this style 2023-05-27 15:51:02 +02:00
Matthew Exon 7f936a4c00 attempt to handle one error 2023-05-27 15:51:02 +02:00
Matthew Exon 1dfdd64a63 new style of http request 2023-05-27 15:51:02 +02:00
Matthew Exon f27c942c5e switch to new way of executing SQL 2023-05-27 15:51:02 +02:00
Matthew Exon 5bf4e741b1 switch to new way of executing SQL 2023-05-27 15:51:02 +02:00
Matthew Exon 9e82ec5d07 switch to new way of executing SQL 2023-05-27 15:51:02 +02:00
Matthew Exon 11413a9a71 sync with submitted 2023-05-27 15:51:02 +02:00
Matthew Exon 837e36fd1c error checking in retriever 2023-05-27 15:51:02 +02:00
Matthew Exon f14ce5cf2c fix another stupid mistake 2023-05-27 15:51:02 +02:00
Matthew Exon 089caf8953 fix another stupid mistake 2023-05-27 15:51:02 +02:00
Matthew Exon 514c054789 Detect an error in mailstream 2023-05-27 15:51:02 +02:00
Matthew Exon 9639865f5d fixed another obvious mistake 2023-05-27 15:51:02 +02:00
Matthew Exon c16659f1ed Fix a typo 2023-05-27 15:51:02 +02:00
Matthew Exon e312ab04b9 another check for empty results 2023-05-27 15:51:02 +02:00
Matthew Exon b3d5e6b286 Adapt Item methods to Post methods 2023-05-27 15:51:02 +02:00
Matthew Exon c8ad0a5120 Remove binary field from httpRequest 2023-05-27 15:51:02 +02:00
Matthew Exon 87e24e1e17 Replace fetchUrlFull with HTTPRequest version 2023-05-27 15:51:02 +02:00
Matthew Exon ec04215f98 Remove unneeded get_app 2023-05-27 15:51:02 +02:00
Matthew Exon f7cfb2613c Fix page assembly 2023-05-27 15:51:02 +02:00
Matthew Exon c6ff8e577b Update with base url changes and strict key requirements 2023-05-27 15:51:02 +02:00
Matthew Exon d50a503823 Further updates to 2020.03 2023-05-27 15:51:02 +02:00
Matthew Exon 012952e86f Use new L10n thing 2023-05-27 15:51:02 +02:00
Matthew Exon f0f64f001e Update to new module structure 2023-05-27 15:51:02 +02:00
Matthew Exon f8af9c77f9 maybe this way works better 2023-05-27 15:51:02 +02:00
Matthew Exon f7eb26d9b4 New way of doing baseurl 2023-05-27 15:51:02 +02:00
Matthew Exon 62b2e5c40e Missing class 2023-05-27 15:51:02 +02:00
Matthew Exon 641c01bb90 Update for new version 2023-05-27 15:51:02 +02:00
Matthew Exon 4d5a68c6a4 Fix bug in phototrack 2023-05-27 15:51:02 +02:00
Matthew Exon 3859a41118 remove help section if images not allowed 2023-05-27 15:51:02 +02:00
Matthew Exon 4be5839afe Almost finished, maybe not working 2023-05-27 15:51:02 +02:00
Matthew Exon 78ae25d054 working much better 2023-05-27 15:51:02 +02:00
Matthew Exon 79928b6aee I think this works 2023-05-27 15:51:02 +02:00
Matthew Exon 7ca21e944f small addition 2023-05-27 15:51:01 +02:00
Matthew Exon 154c8b3d94 small cleanup 2023-05-27 15:51:01 +02:00
Matthew Exon deb608aba2 working much better 2023-05-27 15:51:01 +02:00
Matthew Exon 2a7c3f0227 maybe broken again 2023-05-27 15:51:01 +02:00
Matthew Exon 21a2221785 Now retriever works again 2023-05-27 15:51:01 +02:00
Matthew Exon 3234c9e370 extensive refactoring 2023-05-27 15:51:01 +02:00
Matthew Exon a6ef544b00 retriever tweaks 2023-05-27 15:51:01 +02:00
Matthew Exon bd23d0c600 Add phototrack and publicise 2023-05-27 15:51:01 +02:00
Matthew Exon c4dfeb72cd configurable number of requests 2023-05-27 15:51:01 +02:00
Matthew Exon b1f809d95e update version number 2023-05-27 15:51:01 +02:00
Matthew Exon 24d610ffaa Stuff in retriever 2023-05-27 15:51:01 +02:00
Matthew Exon e5b8cd23f6 fixed image regex 2023-05-27 15:51:01 +02:00
Matthew Exon dff09edf5a more dba stuff 2023-05-27 15:51:01 +02:00
Matthew Exon 0f04231bbe fakerei2 2023-05-27 15:51:01 +02:00
Matthew Exon 86ded88814 Fix bugs in retriever retrospective stuff 2023-05-27 15:51:01 +02:00
Matthew Exon b82ba57d14 more retriever stuff 2023-05-27 15:51:01 +02:00
Administrator 3ffdb0e9ec Fix retriever database problems 2023-05-27 15:51:01 +02:00
Matthew Exon 67470bda94 retriever stuff 2023-05-27 15:51:01 +02:00
Matthew Exon a0cc15b536 Change logging functions 2023-05-27 15:51:01 +02:00
Matthew Exon 5ca66335d8 Improvement 2023-05-27 15:51:01 +02:00
Administrator d80120e0be this is working OK 2023-05-27 15:51:01 +02:00
Matthew Exon 9cb20a1d58 fixed a bug and commented on another 2023-05-27 15:51:01 +02:00
Matthew Exon 8dcfaa87b8 fix 2023-05-27 15:51:01 +02:00
Matthew Exon c35e659cee tentative database work 2023-05-27 15:51:01 +02:00
Matthew Exon 98c8001be1 More preparation for persistent cookies 2023-05-27 15:51:01 +02:00
Matthew Exon b11d820fb5 beginnings of persistent cookiejar support 2023-05-27 15:51:01 +02:00
Matthew Exon 3842a1292f now working retriever 2023-05-27 15:51:01 +02:00
Matthew Exon 4088d22ab4 more fixes 2023-05-27 15:51:01 +02:00
Matthew Exon b45f659321 more fixes 2023-05-27 15:51:01 +02:00
Matthew Exon 00fb460a2a Fixes for retriever 2023-05-27 15:51:01 +02:00
Matthew Exon a3a61cdf60 Latest version of retriever 2023-05-27 15:51:01 +02:00
15 changed files with 1844 additions and 4 deletions

View file

@ -180,5 +180,5 @@ function ifttt_message($uid, $item)
$link = hash('ripemd128', $item['msg']);
}
Post\Delayed::add($link, $post, Worker::PRIORITY_MEDIUM, Post\Delayed::PREPARED);
Post\Delayed::add($link, $post, Worker::PRIORITY_MEDIUM, Post\Delayed::UNPREPARED);
}

View file

@ -143,10 +143,11 @@ function mailstream_send_hook(array $data)
function mailstream_post_hook(array &$item)
{
mailstream_check_version();
Logger::debug('@@@ mailstream_post_hook', ['item-uid' => $item['uid']]);
if (!DI::pConfig()->get($item['uid'], 'mailstream', 'enabled')) {
Logger::debug('mailstream: not enabled.', ['item' => $item['id'], ' uid ' => $item['uid']]);
return;
Logger::debug('mailstream: not enabled for item ' . $item['id'] . ' uid ' . $item['uid']);
// return;
}
if (!$item['uid']) {
Logger::debug('mailstream: no uid for item ' . $item['id']);

23
phototrack/database.sql Normal file
View file

@ -0,0 +1,23 @@
CREATE TABLE IF NOT EXISTS `phototrack_photo_use` (
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
`resource-id` char(64) NOT NULL,
`table` char(64) NOT NULL,
`field` char(64) NOT NULL,
`row-id` int(11) NOT NULL,
`checked` timestamp NOT NULL DEFAULT now(),
PRIMARY KEY (`id`),
INDEX `resource-id` (`resource-id`),
INDEX `row` (`table`,`field`,`row-id`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin;
CREATE TABLE IF NOT EXISTS `phototrack_row_check` (
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
`table` char(64) NOT NULL,
`row-id` int(11) NOT NULL,
`checked` timestamp NOT NULL DEFAULT now(),
PRIMARY KEY (`id`),
INDEX `row` (`table`,`row-id`),
INDEX `checked` (`checked`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin;
SELECT TRUE

274
phototrack/phototrack.php Normal file
View file

@ -0,0 +1,274 @@
<?php
/**
* Name: Photo Track
* Description: Track which photos are actually being used and delete any others
* Version: 1.0
* Author: Matthew Exon <http://mat.exon.name>
*/
/*
* List of tables and the fields that are checked:
*
* contact: photo thumb micro about
* fcontact: photo
* fsuggest: photo
* gcontact: photo about
* item: body
* item-content: body
* mail: from-photo
* notify: photo
* profile: photo thumb about
*/
use Friendica\Core\Addon;
use Friendica\Core\Logger;
use Friendica\Object\Image;
use Friendica\Database\DBA;
use Friendica\Util\Images;
use Friendica\Util\DateTimeFormat;
use Friendica\DI;
if (!defined('PHOTOTRACK_DEFAULT_BATCH_SIZE')) {
define('PHOTOTRACK_DEFAULT_BATCH_SIZE', 1000);
}
// Time in *minutes* between searching for photo uses
if (!defined('PHOTOTRACK_DEFAULT_SEARCH_INTERVAL')) {
define('PHOTOTRACK_DEFAULT_SEARCH_INTERVAL', 10);
}
function phototrack_install() {
global $db;
Addon::registerHook('post_local_end', 'addon/phototrack/phototrack.php', 'phototrack_post_local_end');
Addon::registerHook('post_remote_end', 'addon/phototrack/phototrack.php', 'phototrack_post_remote_end');
Addon::registerHook('notifier_end', 'addon/phototrack/phototrack.php', 'phototrack_notifier_end');
Addon::registerHook('cron', 'addon/phototrack/phototrack.php', 'phototrack_cron');
if (DI::config()->get('phototrack', 'dbversion') != '0.1') {
$schema = file_get_contents(dirname(__file__).'/database.sql');
$arr = explode(';', $schema);
foreach ($arr as $a) {
if (!DBA::e($a)) {
Logger::warning('Unable to create database table: ' . DBA::errorMessage());
return;
}
}
DI::config()->set('phototrack', 'dbversion', '0.1');
}
}
function phototrack_uninstall() {
Addon::unregisterHook('post_local_end', 'addon/phototrack/phototrack.php', 'phototrack_post_local_end');
Addon::unregisterHook('post_remote_end', 'addon/phototrack/phototrack.php', 'phototrack_post_remote_end');
Addon::unregisterHook('notifier_end', 'addon/phototrack/phototrack.php', 'phototrack_notifier_end');
Addon::unregisterHook('cron', 'addon/phototrack/phototrack.php', 'phototrack_cron');
}
function phototrack_module() {}
function phototrack_finished_row($table, $id) {
$existing = DBA::selectFirst('phototrack_row_check', ['id'], ['table' => $table, 'row-id' => $id]);
if (!is_bool($existing)) {
DBA::update('phototrack_row_check', ['checked' => DateTimeFormat::utcNow()], ['table' => $table, 'row-id' => $id]);
}
else {
DBA::insert('phototrack_row_check', ['table' => $table, 'row-id' => $id, 'checked' => DateTimeFormat::utcNow()]);
}
}
function phototrack_photo_use($photo, $table, $field, $id) {
Logger::debug('@@@ phototrack_photo_use ' . $photo);
foreach (Images::supportedTypes() as $m => $e) {
$photo = str_replace(".$e", '', $photo);
}
if (substr($photo, -2, 1) == '-') {
$resolution = intval(substr($photo,-1,1));
$photo = substr($photo,0,-2);
}
if (strlen($photo) != 32) {
return;
}
$r = DBA::selectFirst('photo', ['resource-id'], ['resource-id' => $photo]);
if (!DBA::isResult($r)) {
return;
}
$rid = $r['resource-id'];
$existing = DBA::selectFirst('phototrack_photo_use', ['id'], ['resource-id' => $rid, 'table' => $table, 'field' => $field, 'row-id' => $id]);
if (DBA::isResult($existing)) {
DBA::update('phototrack_photo_use', ['checked' => DateTimeFormat::utcNow()], ['resource-id' => $rid, 'table' => $table, 'field' => $field, 'row-id' => $id]);
}
else {
DBA::insert('phototrack_photo_use', ['resource-id' => $rid, 'table' => $table, 'field' => $field, 'row-id' => $id, 'checked' => DateTimeFormat::utcNow()]);
}
}
function phototrack_check_field_url($a, $table, $field, $id, $url) {
Logger::info('@@@ phototrack_check_field_url table ' . $table . ' field ' . $field . ' id ' . $id . ' url ' . $url);
$baseurl = DI::baseUrl()->get(true);
if (strpos($url, $baseurl) === FALSE) {
return;
}
else {
$url = substr($url, strlen($baseurl));
Logger::info('@@@ phototrack_check_field_url funny url stuff ' . $url . ' base ' . $baseurl);
}
if (strpos($url, '/photo/') === FALSE) {
return;
}
else {
$url = substr($url, strlen('/photo/'));
Logger::info('@@@ phototrack_check_field_url more url stuff ' . $url);
}
if (preg_match('/([0-9a-z]{32})/', $url, $matches)) {
$rid = $matches[0];
Logger::info('@@@ phototrack_check_field_url rid ' . $rid);
phototrack_photo_use($rid, $table, $field, $id);
}
}
function phototrack_check_field_bbcode($a, $table, $field, $id, $value) {
$baseurl = DI::baseUrl()->get(true);
$matches = array();
preg_match_all("/\[img(\=([0-9]*)x([0-9]*))?\](.*?)\[\/img\]/ism", $value, $matches);
foreach ($matches[4] as $url) {
phototrack_check_field_url($a, $table, $field, $id, $url);
}
}
function phototrack_post_local_end(&$a, &$item) {
phototrack_check_row($a, 'item', $item);
phototrack_check_row($a, 'item-content', $item);
}
function phototrack_post_remote_end(&$a, &$item) {
phototrack_check_row($a, 'item', $item);
phototrack_check_row($a, 'item-content', $item);
}
function phototrack_notifier_end($item) {
}
function phototrack_check_row($a, $table, $row) {
switch ($table) {
case 'item':
$fields = array(
'body' => 'bbcode');
break;
case 'item-content':
$fields = array(
'body' => 'bbcode');
break;
case 'contact':
$fields = array(
'photo' => 'url',
'thumb' => 'url',
'micro' => 'url',
'about' => 'bbcode');
break;
case 'fcontact':
$fields = array(
'photo' => 'url');
break;
case 'fsuggest':
$fields = array(
'photo' => 'url');
break;
case 'gcontact':
$fields = array(
'photo' => 'url',
'about' => 'bbcode');
break;
default: $fields = array(); break;
}
foreach ($fields as $field => $type) {
switch ($type) {
case 'bbcode': phototrack_check_field_bbcode($a, $table, $field, $row['id'], $row[$field]); break;
case 'url': phototrack_check_field_url($a, $table, $field, $row['id'], $row[$field]); break;
}
}
phototrack_finished_row($table, $row['id']);
}
function phototrack_batch_size() {
$batch_size = DI::config()->get('phototrack', 'batch_size');
if ($batch_size > 0) {
return $batch_size;
}
return PHOTOTRACK_DEFAULT_BATCH_SIZE;
}
function phototrack_search_table($a, $table) {
$batch_size = phototrack_batch_size();
$rows = DBA::p("SELECT `$table`.* FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) ) ORDER BY phototrack_row_check.checked LIMIT $batch_size");
if (DBA::isResult($rows)) {
while ($row = DBA::fetch($rows)) {
phototrack_check_row($a, $table, $row);
}
}
$r = DBA::p("SELECT COUNT(*) FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) )");
Logger::info("@@@ phototrack_search_table " . print_r(DBA::fetch($r)));
$remaining = DBA::fetch($r)['count'];
Logger::info('phototrack: searched ' . DBA::numRows($rows) . ' rows in table ' . $table . ', ' . $remaining . ' still remaining to search');
return $remaining;
}
function phototrack_cron_time() {
$prev_remaining = DI::config()->get('phototrack', 'remaining_items');
if ($prev_remaining > 10 * phototrack_batch_size()) {
Logger::debug('phototrack: more than ' . (10 * phototrack_batch_size()) . ' items remaining');
return true;
}
$last = DI::config()->get('phototrack', 'last_search');
$search_interval = intval(DI::config()->get('phototrack', 'search_interval'));
if (!$search_interval) {
$search_interval = PHOTOTRACK_DEFAULT_SEARCH_INTERVAL;
}
if ($last) {
$next = $last + ($search_interval * 60);
if ($next > time()) {
Logger::debug('phototrack: search interval not reached');
return false;
}
}
return true;
}
function phototrack_cron($a, $b) {
if (!phototrack_cron_time()) {
return;
}
DI::config()->set('phototrack', 'last_search', time());
$remaining = 0;
$remaining += phototrack_search_table($a, 'item');
$remaining += phototrack_search_table($a, 'item-content');
$remaining += phototrack_search_table($a, 'contact');
$remaining += phototrack_search_table($a, 'fcontact');
$remaining += phototrack_search_table($a, 'fsuggest');
$remaining += phototrack_search_table($a, 'gcontact');
DI::config()->set('phototrack', 'remaining_items', $remaining);
if ($remaining === 0) {
phototrack_tidy();
}
}
function phototrack_tidy() {
$batch_size = phototrack_batch_size();
DBA::e('CREATE TABLE IF NOT EXISTS `phototrack-temp` (`resource-id` char(255) not null)');
DBA::e('INSERT INTO `phototrack-temp` SELECT DISTINCT(`resource-id`) FROM photo WHERE photo.`created` < DATE_SUB(NOW(), INTERVAL 2 MONTH)');
$rows = DBA::p('SELECT `phototrack-temp`.`resource-id` FROM `phototrack-temp` LEFT OUTER JOIN phototrack_photo_use ON (`phototrack-temp`.`resource-id` = phototrack_photo_use.`resource-id`) WHERE phototrack_photo_use.id IS NULL limit ' . /*$batch_size*/1000);
if (DBA::isResult($rows)) {
foreach ($rows as $row) {
Logger::debug('phototrack: remove photo ' . $row['resource-id']);
DBA::e('DELETE FROM photo WHERE `resource-id` = "' . $row['resource-id'] . '"');
}
Logger::info('phototrack_tidy: deleted ' . DBA::numRows($rows) . ' photos');
}
DBA::e('DROP TABLE `phototrack-temp`');
$rows = DBA::p('SELECT id FROM phototrack_photo_use WHERE checked < DATE_SUB(NOW(), INTERVAL 14 DAY)');
foreach ($rows as $row) {
DBA::e( 'DELETE FROM phototrack_photo_use WHERE id = ' . $row['id']);
}
Logger::info('phototrack_tidy: deleted ' . DBA::numRows($rows) . ' phototrack_photo_use rows');
}

11
publicise/publicise.php Normal file
View file

@ -0,0 +1,11 @@
"SELECT `uid` FROM `contact` WHERE `id` = %d AND `reason` = 'publicise'", intval($item['contact-id']));
if (!$r1) {
return;
}
Logger::debug('Publicise: moving to wall: ' . $item['uid'] . ' ' . $item['contact-id'] . ' ' . $item['uri']);
$item['type'] = 'wall';
$item['wall'] = 1;
$item['private'] = 0;
}

View file

@ -0,0 +1,39 @@
{{*
* AUTOMATICALLY GENERATED TEMPLATE
* DO NOT EDIT THIS FILE, CHANGES WILL BE OVERWRITTEN
*
*}}
<form method="post">
<table>
<thead>
<tr>
<th>{{$feed_t}}</th>
<th>{{$publicised_t}}</th>
<th>{{$comments_t}}</th>
<th>{{$expire_t}}</th>
</tr>
</thead>
<tbody>
{{foreach $feeds as $f}}
<tr>
<td>
<a href="{{$f.url}}">
<img style="vertical-align:middle" src='{{$f.micro}}'>
<span style="margin-left:1em">{{$f.name}}</span>
</a>
</td>
<td>
{{include file="field_yesno.tpl" field=$f.enabled}}
</td>
<td>
{{include file="field_yesno.tpl" field=$f.comments}}
</td>
<td>
<input name="publicise-expire-{{$f.id}}" value="{{$f.expire}}">
</td>
</tr>
{{/foreach}}
</tbody>
</table>
<input type="submit" size="70" value="{{$submit_t}}">
</form>

42
retriever/database.sql Normal file
View file

@ -0,0 +1,42 @@
CREATE TABLE IF NOT EXISTS `retriever_rule` (
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
`uid` int(11) NOT NULL,
`contact-id` int(11) NOT NULL,
`data` mediumtext NULL DEFAULT NULL,
PRIMARY KEY (`id`),
KEY `uid` (`uid`),
KEY `contact-id` (`contact-id`)
) DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
CREATE TABLE IF NOT EXISTS `retriever_item` (
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
`item-uri` varbinary(255) NOT NULL,
`item-uid` int(10) unsigned NOT NULL DEFAULT '0',
`contact-id` int(10) unsigned NOT NULL DEFAULT '0',
`resource` int(11) NOT NULL,
`finished` tinyint(1) unsigned NOT NULL DEFAULT '0',
KEY `resource` (`resource`),
KEY `finished` (`finished`),
KEY `item-uid` (`item-uid`),
KEY `all` (`item-uri`, `item-uid`, `contact-id`),
PRIMARY KEY (`id`)
) DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
CREATE TABLE IF NOT EXISTS `retriever_resource` (
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
`item-uid` int(10) unsigned NOT NULL DEFAULT '0',
`contact-id` int(10) unsigned NOT NULL DEFAULT '0',
`type` char(255) NULL DEFAULT NULL,
`binary` int(1) NOT NULL DEFAULT 0,
`url` varbinary(700) NOT NULL,
`created` timestamp NOT NULL DEFAULT now(),
`completed` timestamp NULL DEFAULT NULL,
`last-try` timestamp NULL DEFAULT NULL,
`num-tries` int(11) NOT NULL DEFAULT 0,
`data` mediumblob NULL DEFAULT NULL,
`http-code` smallint(1) unsigned NULL DEFAULT NULL,
`redirect-url` varbinary(700) NOT NULL,
KEY `url` (`url`),
KEY `completed` (`completed`),
PRIMARY KEY (`id`)
) DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;

1058
retriever/retriever.php Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,9 @@
{{*
* AUTOMATICALLY GENERATED TEMPLATE
* DO NOT EDIT THIS FILE, CHANGES WILL BE OVERWRITTEN
*
*}}
{{include file="field_input.tpl" field=$downloads_per_cron}}
{{include file="field_checkbox.tpl" field=$allow_images}}
<div class="submit"><input type="submit" name="page_site" value="{{$submit}}"></div>

View file

@ -0,0 +1,24 @@
<?xml version="1.0" encoding="utf-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
<xsl:output method="html" indent="yes" version="4.0"/>
<xsl:template match="text()"/>
{{function clause_xpath}}{{if !$clause.attribute}}{{$clause.element}}{{elseif $clause.attribute == 'class'}}{{$clause.element}}[contains(concat(' ', normalize-space(@class), ' '), '{{$clause.value}}')]{{else}}{{$clause.element}}[@{{$clause.attribute}}='{{$clause.value}}']{{/if}}{{/function}}
{{foreach $spec.include as $clause}}
<xsl:template match="{{clause_xpath clause=$clause}}">
<xsl:copy>
<xsl:apply-templates select="node()|@*" mode="remove"/>
</xsl:copy>
</xsl:template>{{/foreach}}
{{foreach $spec.exclude as $clause}}
<xsl:template match="{{clause_xpath clause=$clause}}" mode="remove"/>{{/foreach}}
<xsl:template match="node()|@*" mode="remove">
<xsl:copy>
<xsl:apply-templates select="node()|@*" mode="remove"/>
</xsl:copy>
</xsl:template>
</xsl:stylesheet>

View file

@ -0,0 +1,26 @@
<?xml version="1.0" encoding="utf-8"?>
<!-- attempt to replace relative URLs with absolute URLs -->
<!-- http://stackoverflow.com/questions/3824631/replace-href-value-in-anchor-tags-of-html-using-xslt -->
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
<xsl:output method="html" indent="yes" version="4.0"/>
<xsl:template match="node()|@*">
<xsl:copy>
<xsl:apply-templates select="node()|@*"/>
</xsl:copy>
</xsl:template>
<xsl:template match="*/@src[starts-with(.,'.')]">
<xsl:attribute name="src">
<xsl:value-of select="concat('{{$dirurl}}',.)"/>
</xsl:attribute>
</xsl:template>
<xsl:template match="*/@src[starts-with(.,'/')]">
<xsl:attribute name="src">
<xsl:value-of select="concat('{{$rooturl}}',.)"/>
</xsl:attribute>
</xsl:template>
</xsl:stylesheet>

View file

@ -0,0 +1,163 @@
<h2>Retriever Plugin Help</h2>
<p>
This plugin replaces the short excerpts you normally get in RSS feeds
with the full content of the article from the source website. You
specify which part of the page you're interested in with a set of
rules. When each item arrives, the plugin downloads the full page
from the website, extracts content using the rules, and replaces the
original article.
</p>
<p>
There's a few reasons you may want to do this. The source website
might be slow or overloaded. The source website might be
untrustworthy, in which case using Friendica to scrub the HTML is a
good idea. You might be on a LAN that blacklists certain websites.
It also works neatly with the mailstream plugin, allowing you to read
a news stream comfortably without needing continuous Internet
connectivity.
</p>
<p>
However, setting up retriever can be quite tricky since it depends on
the internal design of the website. That was designed to make life
easy for the website's developers, not for you. You'll need to have
some familiarity with HTML, and be willing to adapt when the website
suddenly changes everything without notice.
</p>
<h3>Configuring Retriever for a feed</h3>
<p>
To set up retriever for an RSS feed, go to the "Contacts" page and
find your feed. Then click on the drop-down menu on the contact.
Select "Retriever" to get to the retriever configuration.
</p>
<p>
The "Include" configuration section specifies parts of the page to
include in the article. Each row has three components:
</p>
<ul>
<li>An HTML tag (e.g. "div", "span", "p")</li>
<li>An attribute (usually "class" or "id")</li>
<li>A value for the attribute</li>
</ul>
<p>
A simple case is when the article is wrapped in a "div" element:
</p>
<pre>
...
&lt;div class="ArticleWrapper"&gt;
&lt;h2&gt;Man Bites Dog&lt;/h2&gt;
&lt;img src="mbd.jpg"&gt;
&lt;p&gt;
Residents of the sleepy community of Nowheresville were
shocked yesterday by the sight of creepy local weirdo Jim
McOddman assaulting innocent local dog Snufflekins with his
false teeth.
&lt;/p&gt;
...
&lt;/div&gt;
...
</pre>
<p>
You then specify the tag "div", attribute "class", and value
"ArticleWrapper". Everything else in the page, such as navigation
panels and menus and footers and so on, will be discarded. If there
is more than one section of the page you want to include, specify each
one on a separate row. If the matching section contains some sections
you want to remove, specify those in the "Exclude" section in the same
way.
</p>
<p>
Once you've got a configuration that you think will work, you can try
it out on some existing articles. Type a number into the
"Retrospectively Apply" box and click "Submit". After a while
(exactly how long depends on your system's cron configuration) the new
articles should be available.
</p>
<h3>Techniques</h3>
<p>
You can leave the attribute and value blank to include all the
corresponding elements with the specified tag name. You can also use
a tag name of just an asterisk ("*"), which will match any element type with the
specified attribute regardless of the tag.
</p>
<p>
Note that the "class" attribute is a special case. Many web page
templates will put multiple different classes in the same element,
separated by spaces. If you specify an attribute of "class" it will
match an element if any of its classes matches the specified value.
For example:
</p>
<pre>
&lt;div class="article breaking-news"&gt;
</pre>
<p>
In this case you can specify a value of "article", or "breaking-news".
You can also specify "article breaking-news", but that won't match if
the website suddenly changes to "breaking-news article", so that's not
recommended.
</p>
<p>
One useful trick you can try is using the website's "print" pages.
Many news sites have print versions of all their articles. These are
usually drastically simplified compared to the live website page.
Sometimes this is a good way to get the whole article when it's
normally split across multiple pages.
</p>
<p>
Hopefully the URL for the print page is a predictable variant of the
normal article URL. For example, an article URL like:
</p>
<pre>
http://www.newssite.com/article-8636.html
</pre>
<p>
...might have a print version at:
</p>
<pre>
http://www.newssite.com/print/article-8636.html
</pre>
<p>
To change the URL used to retrieve the page, use the "URL Pattern" and
"URL Replace" fields. The pattern is a regular expression matching
part of the URL to replace. In this case, you might use a pattern of
"/article" and a replace string of "/print/article". A common pattern
is simply a dollar sign ("$"), used to add the replace string to the end of the URL.
</p>
<h3>Background Processing</h3>
<p>
Note that retrieving and processing the articles can take some time,
so it's done in the background. Incoming articles will be marked as
invisible while they're in the process of being downloaded. If a URL
fails, the plugin will keep trying at progressively longer intervals
for up to a month, in case the website is temporarily overloaded or
the network is down.
</p>
{{if $allow_images}}
<h3>Retrieving Images</h3>
<p>
Retriever can also optionally download images and store them in the
local Friendica instance. Just check the "Download Images" box. You
can also download images in every item from your network, whether it's
an RSS feed or not. Go to the "Settings" page and
click <a href="$config">"Plugin settings"</a>. Then check the "All
Photos" box in the "Retriever Settings" section and click "Submit".
</p>
{{/if}}
<h2>Configure Feeds:</h2>
<div>
{{foreach $feeds as $feed}}
<div class="contact-entry-wrapper" id="contact-entry-wrapper-{{$feed.id}}">
<a href="{{$feed.url}} title="{{$feed.img_hover}}">
<div class="contact-entry-photo-wrapper">
<div class="contact-entry-photo mframe" id="contact-entry-photo-{{$feed.id}}">
<img src="{{$feed.thumb}}" {{$feed.sparkle}} alt="{{$feed.name}}"/>
</div>
</div>
<div class="contact-entry-desc">
<div class="contact-entry-name" id="contact-entry-name-{{$feed.id}}">
{{$feed.name}}
</div>
</div>
</a>
</div>
{{/foreach}}
</div>

View file

@ -0,0 +1,154 @@
<div class="settings-block">
<script language="javascript">
function retriever_add_row(id)
{
var tbody = document.getElementById(id);
var last = tbody.rows[tbody.childElementCount - 1];
var count = +last.id.replace(id + '-', '');
count++;
var row = document.createElement('tr');
row.id = id + '-' + count;
var cell1 = document.createElement('td');
var inptag = document.createElement('input');
inptag.name = row.id + '-element';
cell1.appendChild(inptag);
row.appendChild(cell1);
var cell2 = document.createElement('td');
var inpatt = document.createElement('input');
inpatt.name = row.id + '-attribute';
cell2.appendChild(inpatt);
row.appendChild(cell2);
var cell3 = document.createElement('td');
var inpval = document.createElement('input');
inpval.name = row.id + '-value';
cell3.appendChild(inpval);
row.appendChild(cell3);
var cell4 = document.createElement('td');
var butrem = document.createElement('input');
butrem.id = row.id + '-rem';
butrem.type = 'button';
butrem.onclick = function(){retriever_remove_row(id, count)};
butrem.value = '{{$remove_t}}';
cell4.appendChild(butrem);
row.appendChild(cell4);
tbody.appendChild(row);
}
function retriever_remove_row(id, number)
{
var tbody = document.getElementById(id);
var row = document.getElementById(id + '-' + number);
tbody.removeChild(row);
}
function retriever_toggle_url_block()
{
var pattern = document.querySelector("#id_retriever_pattern").parentNode;
if (document.querySelector("#id_retriever_modurl").checked) {
pattern.style.display = "block";
}
else {
pattern.style.display = "none";
}
var replace = document.querySelector("#id_retriever_replace").parentNode;
if (document.querySelector("#id_retriever_modurl").checked) {
replace.style.display = "block";
}
else {
replace.style.display = "none";
}
}
function retriever_toggle_cookiedata_block()
{
var div = document.querySelector("#id_retriever_cookiedata").parentNode;
if (document.querySelector("#id_retriever_storecookies").checked) {
div.style.display = "block";
}
else {
div.style.display = "none";
}
}
document.addEventListener('DOMContentLoaded', function() {
retriever_toggle_url_block();
document.querySelector("#id_retriever_modurl").addEventListener('change', retriever_toggle_url_block, false);
retriever_toggle_cookiedata_block();
document.querySelector("#id_retriever_storecookies").addEventListener('change', retriever_toggle_cookiedata_block, false);
}, false);
</script>
<h2>{{$title}}</h2>
<p><a href="{{$help}}">{{$help_t}}</a></p>
<form method="post">
<input type="hidden" name="id" value="{{$id}}">
{{include file="field_checkbox.tpl" field=$enable}}
<h3>{{$include_t}}:</h3>
<div>
<table>
<thead>
<tr><th>{{$tag_t}}</th><th>{{$attribute_t}}</th><th>{{$value_t}}</th></tr>
</thead>
<tbody id="retriever-include">
{{if $include}}
{{foreach $include as $k=>$m}}
<tr id="retriever-include-{{$k}}">
<td><input name="retriever-include-{{$k}}-element" value="{{$m.element}}"></td>
<td><input name="retriever-include-{{$k}}-attribute" value="{{$m.attribute}}"></td>
<td><input name="retriever-include-{{$k}}-value" value="{{$m.value}}"></td>
<td><input id="retrieve-include-{{$k}}-rem" type="button" onclick="retriever_remove_row('retriever-include', {{$k}})" value="{{$remove_t}}"></td>
</tr>
{{/foreach}}
{{else}}
<tr id="retriever-include-0">
<td><input name="retriever-include-0-element"></td>
<td><input name="retriever-include-0-attribute"></td>
<td><input name="retriever-include-0-value"></td>
<td><input id="retrieve-include-0-rem" type="button" onclick="retriever_remove_row('retriever-include', 0)" value="{{$remove_t}}"></td>
</tr>
{{/if}}
</tbody>
</table>
<input type="button" onclick="retriever_add_row('retriever-include')" value="{{$add_t}}">
</div>
<h3>{{$exclude_t}}:</h3>
<div>
<table>
<thead>
<tr><th>{{$tag_t}}</th><th>{{$attribute_t}}</th><th>{{$value_t}}</th></tr>
</thead>
<tbody id="retriever-exclude">
{{if $exclude}}
{{foreach $exclude as $k=>$r}}
<tr id="retriever-exclude-{{$k}}">
<td><input name="retriever-exclude-{{$k}}-element" value="{{$r.element}}"></td>
<td><input name="retriever-exclude-{{$k}}-attribute" value="{{$r.attribute}}"></td>
<td><input name="retriever-exclude-{{$k}}-value" value="{{$r.value}}"></td>
<td><input id="retrieve-exclude-{{$k}}-rem" type="button" onclick="retriever_remove_row('retriever-exclude', {{$k}})" value="{{$remove_t}}"></td>
</tr>
{{/foreach}}
{{else}}
<tr id="retriever-exclude-0">
<td><input name="retriever-exclude-0-element"></td>
<td><input name="retriever-exclude-0-attribute"></td>
<td><input name="retriever-exclude-0-value"></td>
<td><input id="retrieve-exclude-0-rem" type="button" onclick="retriever_remove_row('retriever-exclude', 0)" value="{{$remove_t}}"></td>
</tr>
{{/if}}
</tbody>
</table>
<input type="button" onclick="retriever_add_row('retriever-exclude')" value="{{$add_t}}">
</div>
{{include file="field_checkbox.tpl" field=$modurl}}
{{include file="field_input.tpl" field=$pattern}}
{{include file="field_input.tpl" field=$replace}}
{{if $allow_images}}
{{include file="field_checkbox.tpl" field=$images}}
{{/if}}
{{include file="field_textarea.tpl" field=$customxslt}}
{{include file="field_checkbox.tpl" field=$storecookies}}
{{include file="field_textarea.tpl" field=$cookiedata}}
{{include file="field_input.tpl" field=$retrospective}}
<input type="submit" size="70" value="{{$submit_t}}">
</form>
</div>

View file

@ -0,0 +1,16 @@
<span id="settings_retriever_inflated" class="settings-block fakelink" style="display: block;" onclick="openClose('settings_retriever_expanded'); openClose('settings_retriever_inflated');">
<h3>{{$title}}</h3>
</span>
<div id="settings_retriever_expanded" class="settings-block" style="display: none;">
<span class="fakelink" onclick="openClose('settings_retriever_expanded'); openClose('settings_retriever_inflated');">
<h3>{{$title}}</h3>
</span>
<p>
<a href="{{$help}}">Get Help</a>
</p>
{{if $allow_images}}
{{include file="field_checkbox.tpl" field=$allphotos}}
{{/if}}
{{include file="field_checkbox.tpl" field=$oembed}}
<input type="submit" value="{{$submit}}">
</div>

View file

@ -1368,7 +1368,7 @@ function twitter_fetchtimeline(int $uid): void
Logger::info('Posting mirror post', ['twitter-id' => $post->id_str, 'uid' => $uid]);
Post\Delayed::add($mirrorpost['extid'], $mirrorpost, Worker::PRIORITY_MEDIUM, Post\Delayed::PREPARED);
Post\Delayed::add($mirrorpost['extid'], $mirrorpost, Worker::PRIORITY_MEDIUM, Post\Delayed::UNPREPARED);
}
}
DI::pConfig()->set($uid, 'twitter', 'lastid', $lastid);