diff --git a/ifttt/ifttt.php b/ifttt/ifttt.php index 6d4f402c..faaea2ca 100644 --- a/ifttt/ifttt.php +++ b/ifttt/ifttt.php @@ -180,5 +180,5 @@ function ifttt_message($uid, $item) $link = hash('ripemd128', $item['msg']); } - Post\Delayed::add($link, $post, Worker::PRIORITY_MEDIUM, Post\Delayed::PREPARED); + Post\Delayed::add($link, $post, Worker::PRIORITY_MEDIUM, Post\Delayed::UNPREPARED); } diff --git a/mailstream/mailstream.php b/mailstream/mailstream.php index 542a1a42..5ece12a1 100644 --- a/mailstream/mailstream.php +++ b/mailstream/mailstream.php @@ -66,10 +66,9 @@ function mailstream_module() {} /** * Adds an item in "addon features" in the admin menu of the site * - * @param App $a App object (unused) * @param string $o HTML form data */ -function mailstream_addon_admin(App $a, string &$o) +function mailstream_addon_admin(string &$o) { $frommail = DI::config()->get('mailstream', 'frommail'); $template = Renderer::getMarkupTemplate('admin.tpl', 'addon/mailstream/'); @@ -103,14 +102,14 @@ function mailstream_addon_admin_post() */ function mailstream_generate_id(string $uri): string { - $host = DI::baseUrl()->getHostname(); + $host = DI::baseUrl()->getHost(); $resource = hash('md5', $uri); $message_id = "<" . $resource . "@" . $host . ">"; Logger::debug('mailstream: Generated message ID ' . $message_id . ' for URI ' . $uri); return $message_id; } -function mailstream_send_hook(App $a, array $data) +function mailstream_send_hook(array $data) { $criteria = array('uid' => $data['uid'], 'contact-id' => $data['contact-id'], 'uri' => $data['uri']); $item = Post::selectFirst([], $criteria); @@ -138,17 +137,17 @@ function mailstream_send_hook(App $a, array $data) * mailstream is enabled and the necessary data is available, forks a * workerqueue item to send the email. * - * @param App $a App object (unused) * @param array $item content of the item (may or may not already be stored in the item table) * @return void */ -function mailstream_post_hook(App $a, array &$item) +function mailstream_post_hook(array &$item) { mailstream_check_version(); + Logger::debug('@@@ mailstream_post_hook', ['item-uid' => $item['uid']]); if (!DI::pConfig()->get($item['uid'], 'mailstream', 'enabled')) { - Logger::debug('mailstream: not enabled.', ['item' => $item['id'], ' uid ' => $item['uid']]); - return; + Logger::debug('mailstream: not enabled for item ' . $item['id'] . ' uid ' . $item['uid']); + // return; } if (!$item['uid']) { Logger::debug('mailstream: no uid for item ' . $item['id']); @@ -414,7 +413,7 @@ function mailstream_send(string $message_id, array $item, array $user): bool $template = Renderer::getMarkupTemplate('mail.tpl', 'addon/mailstream/'); $mail->AltBody = BBCode::toPlaintext($item['body']); $item['body'] = BBCode::convertForUriId($item['uri-id'], $item['body'], BBCode::CONNECTORS); - $item['url'] = DI::baseUrl()->get() . '/display/' . $item['guid']; + $item['url'] = DI::baseUrl() . '/display/' . $item['guid']; $mail->Body = Renderer::replaceMacros($template, [ '$upstream' => DI::l10n()->t('Upstream'), '$uri' => DI::l10n()->t('URI'), @@ -468,7 +467,7 @@ function mailstream_convert_table_entries() 'message_id' => $ms_item_id['message-id'], 'tries' => 0); if (!$ms_item_id['message-id'] || !strlen($ms_item_id['message-id'])) { - Logger::info('mailstream_convert_table_entries: item has no message-id.', 'item' => $ms_item_id['id'], 'uri' => $ms_item_id['uri']]); + Logger::info('mailstream_convert_table_entries: item has no message-id.', ['item' => $ms_item_id['id'], 'uri' => $ms_item_id['uri']]); continue; } Logger::info('mailstream_convert_table_entries: convert item to workerqueue', $send_hook_data); @@ -480,11 +479,10 @@ function mailstream_convert_table_entries() /** * Form for configuring mailstream features for a user * - * @param App $a App object * @param array $data Hook data array * @throws \Friendica\Network\HTTPException\ServiceUnavailableException */ -function mailstream_addon_settings(App &$a, array &$data) +function mailstream_addon_settings(array &$data) { $enabled = DI::pConfig()->get(DI::userSession()->getLocalUserId(), 'mailstream', 'enabled'); $address = DI::pConfig()->get(DI::userSession()->getLocalUserId(), 'mailstream', 'address'); @@ -528,11 +526,10 @@ function mailstream_addon_settings(App &$a, array &$data) /** * Process data submitted to user's mailstream features form - * @param App $a * @param array $post POST data * @return void */ -function mailstream_addon_settings_post(App $a, array $post) +function mailstream_addon_settings_post(array $post) { if (!DI::userSession()->getLocalUserId() || empty($post['mailstream-submit'])) { return; diff --git a/phototrack/database.sql b/phototrack/database.sql new file mode 100644 index 00000000..f1b58f6b --- /dev/null +++ b/phototrack/database.sql @@ -0,0 +1,23 @@ +CREATE TABLE IF NOT EXISTS `phototrack_photo_use` ( + `id` int(11) unsigned NOT NULL AUTO_INCREMENT, + `resource-id` char(64) NOT NULL, + `table` char(64) NOT NULL, + `field` char(64) NOT NULL, + `row-id` int(11) NOT NULL, + `checked` timestamp NOT NULL DEFAULT now(), + PRIMARY KEY (`id`), + INDEX `resource-id` (`resource-id`), + INDEX `row` (`table`,`field`,`row-id`) +) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin; + +CREATE TABLE IF NOT EXISTS `phototrack_row_check` ( + `id` int(11) unsigned NOT NULL AUTO_INCREMENT, + `table` char(64) NOT NULL, + `row-id` int(11) NOT NULL, + `checked` timestamp NOT NULL DEFAULT now(), + PRIMARY KEY (`id`), + INDEX `row` (`table`,`row-id`), + INDEX `checked` (`checked`) +) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin; + +SELECT TRUE diff --git a/phototrack/phototrack.php b/phototrack/phototrack.php new file mode 100644 index 00000000..0ede2a1c --- /dev/null +++ b/phototrack/phototrack.php @@ -0,0 +1,274 @@ + + */ + +/* + * List of tables and the fields that are checked: + * + * contact: photo thumb micro about + * fcontact: photo + * fsuggest: photo + * gcontact: photo about + * item: body + * item-content: body + * mail: from-photo + * notify: photo + * profile: photo thumb about + */ + +use Friendica\Core\Addon; +use Friendica\Core\Logger; +use Friendica\Object\Image; +use Friendica\Database\DBA; +use Friendica\Util\Images; +use Friendica\Util\DateTimeFormat; +use Friendica\DI; + +if (!defined('PHOTOTRACK_DEFAULT_BATCH_SIZE')) { + define('PHOTOTRACK_DEFAULT_BATCH_SIZE', 1000); +} +// Time in *minutes* between searching for photo uses +if (!defined('PHOTOTRACK_DEFAULT_SEARCH_INTERVAL')) { + define('PHOTOTRACK_DEFAULT_SEARCH_INTERVAL', 10); +} + +function phototrack_install() { + global $db; + + Addon::registerHook('post_local_end', 'addon/phototrack/phototrack.php', 'phototrack_post_local_end'); + Addon::registerHook('post_remote_end', 'addon/phototrack/phototrack.php', 'phototrack_post_remote_end'); + Addon::registerHook('notifier_end', 'addon/phototrack/phototrack.php', 'phototrack_notifier_end'); + Addon::registerHook('cron', 'addon/phototrack/phototrack.php', 'phototrack_cron'); + + if (DI::config()->get('phototrack', 'dbversion') != '0.1') { + $schema = file_get_contents(dirname(__file__).'/database.sql'); + $arr = explode(';', $schema); + foreach ($arr as $a) { + if (!DBA::e($a)) { + Logger::warning('Unable to create database table: ' . DBA::errorMessage()); + return; + } + } + DI::config()->set('phototrack', 'dbversion', '0.1'); + } +} + +function phototrack_uninstall() { + Addon::unregisterHook('post_local_end', 'addon/phototrack/phototrack.php', 'phototrack_post_local_end'); + Addon::unregisterHook('post_remote_end', 'addon/phototrack/phototrack.php', 'phototrack_post_remote_end'); + Addon::unregisterHook('notifier_end', 'addon/phototrack/phototrack.php', 'phototrack_notifier_end'); + Addon::unregisterHook('cron', 'addon/phototrack/phototrack.php', 'phototrack_cron'); +} + +function phototrack_module() {} + +function phototrack_finished_row($table, $id) { + $existing = DBA::selectFirst('phototrack_row_check', ['id'], ['table' => $table, 'row-id' => $id]); + if (!is_bool($existing)) { + DBA::update('phototrack_row_check', ['checked' => DateTimeFormat::utcNow()], ['table' => $table, 'row-id' => $id]); + } + else { + DBA::insert('phototrack_row_check', ['table' => $table, 'row-id' => $id, 'checked' => DateTimeFormat::utcNow()]); + } +} + +function phototrack_photo_use($photo, $table, $field, $id) { + Logger::debug('@@@ phototrack_photo_use ' . $photo); + foreach (Images::supportedTypes() as $m => $e) { + $photo = str_replace(".$e", '', $photo); + } + if (substr($photo, -2, 1) == '-') { + $resolution = intval(substr($photo,-1,1)); + $photo = substr($photo,0,-2); + } + if (strlen($photo) != 32) { + return; + } + $r = DBA::selectFirst('photo', ['resource-id'], ['resource-id' => $photo]); + if (!DBA::isResult($r)) { + return; + } + $rid = $r['resource-id']; + $existing = DBA::selectFirst('phototrack_photo_use', ['id'], ['resource-id' => $rid, 'table' => $table, 'field' => $field, 'row-id' => $id]); + if (DBA::isResult($existing)) { + DBA::update('phototrack_photo_use', ['checked' => DateTimeFormat::utcNow()], ['resource-id' => $rid, 'table' => $table, 'field' => $field, 'row-id' => $id]); + } + else { + DBA::insert('phototrack_photo_use', ['resource-id' => $rid, 'table' => $table, 'field' => $field, 'row-id' => $id, 'checked' => DateTimeFormat::utcNow()]); + } +} + +function phototrack_check_field_url($a, $table, $field, $id, $url) { + Logger::info('@@@ phototrack_check_field_url table ' . $table . ' field ' . $field . ' id ' . $id . ' url ' . $url); + $baseurl = DI::baseUrl()->get(true); + if (strpos($url, $baseurl) === FALSE) { + return; + } + else { + $url = substr($url, strlen($baseurl)); + Logger::info('@@@ phototrack_check_field_url funny url stuff ' . $url . ' base ' . $baseurl); + } + if (strpos($url, '/photo/') === FALSE) { + return; + } + else { + $url = substr($url, strlen('/photo/')); + Logger::info('@@@ phototrack_check_field_url more url stuff ' . $url); + } + if (preg_match('/([0-9a-z]{32})/', $url, $matches)) { + $rid = $matches[0]; + Logger::info('@@@ phototrack_check_field_url rid ' . $rid); + phototrack_photo_use($rid, $table, $field, $id); + } +} + +function phototrack_check_field_bbcode($a, $table, $field, $id, $value) { + $baseurl = DI::baseUrl()->get(true); + $matches = array(); + preg_match_all("/\[img(\=([0-9]*)x([0-9]*))?\](.*?)\[\/img\]/ism", $value, $matches); + foreach ($matches[4] as $url) { + phototrack_check_field_url($a, $table, $field, $id, $url); + } +} + +function phototrack_post_local_end(&$a, &$item) { + phototrack_check_row($a, 'item', $item); + phototrack_check_row($a, 'item-content', $item); +} + +function phototrack_post_remote_end(&$a, &$item) { + phototrack_check_row($a, 'item', $item); + phototrack_check_row($a, 'item-content', $item); +} + +function phototrack_notifier_end($item) { +} + +function phototrack_check_row($a, $table, $row) { + switch ($table) { + case 'item': + $fields = array( + 'body' => 'bbcode'); + break; + case 'item-content': + $fields = array( + 'body' => 'bbcode'); + break; + case 'contact': + $fields = array( + 'photo' => 'url', + 'thumb' => 'url', + 'micro' => 'url', + 'about' => 'bbcode'); + break; + case 'fcontact': + $fields = array( + 'photo' => 'url'); + break; + case 'fsuggest': + $fields = array( + 'photo' => 'url'); + break; + case 'gcontact': + $fields = array( + 'photo' => 'url', + 'about' => 'bbcode'); + break; + default: $fields = array(); break; + } + foreach ($fields as $field => $type) { + switch ($type) { + case 'bbcode': phototrack_check_field_bbcode($a, $table, $field, $row['id'], $row[$field]); break; + case 'url': phototrack_check_field_url($a, $table, $field, $row['id'], $row[$field]); break; + } + } + phototrack_finished_row($table, $row['id']); +} + +function phototrack_batch_size() { + $batch_size = DI::config()->get('phototrack', 'batch_size'); + if ($batch_size > 0) { + return $batch_size; + } + return PHOTOTRACK_DEFAULT_BATCH_SIZE; +} + +function phototrack_search_table($a, $table) { + $batch_size = phototrack_batch_size(); + $rows = DBA::p("SELECT `$table`.* FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) ) ORDER BY phototrack_row_check.checked LIMIT $batch_size"); + if (DBA::isResult($rows)) { + while ($row = DBA::fetch($rows)) { + phototrack_check_row($a, $table, $row); + } + } + $r = DBA::p("SELECT COUNT(*) FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) )"); + Logger::info("@@@ phototrack_search_table " . print_r(DBA::fetch($r))); + $remaining = DBA::fetch($r)['count']; + Logger::info('phototrack: searched ' . DBA::numRows($rows) . ' rows in table ' . $table . ', ' . $remaining . ' still remaining to search'); + return $remaining; +} + +function phototrack_cron_time() { + $prev_remaining = DI::config()->get('phototrack', 'remaining_items'); + if ($prev_remaining > 10 * phototrack_batch_size()) { + Logger::debug('phototrack: more than ' . (10 * phototrack_batch_size()) . ' items remaining'); + return true; + } + $last = DI::config()->get('phototrack', 'last_search'); + $search_interval = intval(DI::config()->get('phototrack', 'search_interval')); + if (!$search_interval) { + $search_interval = PHOTOTRACK_DEFAULT_SEARCH_INTERVAL; + } + if ($last) { + $next = $last + ($search_interval * 60); + if ($next > time()) { + Logger::debug('phototrack: search interval not reached'); + return false; + } + } + return true; +} + +function phototrack_cron($a, $b) { + if (!phototrack_cron_time()) { + return; + } + DI::config()->set('phototrack', 'last_search', time()); + + $remaining = 0; + $remaining += phototrack_search_table($a, 'item'); + $remaining += phototrack_search_table($a, 'item-content'); + $remaining += phototrack_search_table($a, 'contact'); + $remaining += phototrack_search_table($a, 'fcontact'); + $remaining += phototrack_search_table($a, 'fsuggest'); + $remaining += phototrack_search_table($a, 'gcontact'); + + DI::config()->set('phototrack', 'remaining_items', $remaining); + if ($remaining === 0) { + phototrack_tidy(); + } +} + +function phototrack_tidy() { + $batch_size = phototrack_batch_size(); + DBA::e('CREATE TABLE IF NOT EXISTS `phototrack-temp` (`resource-id` char(255) not null)'); + DBA::e('INSERT INTO `phototrack-temp` SELECT DISTINCT(`resource-id`) FROM photo WHERE photo.`created` < DATE_SUB(NOW(), INTERVAL 2 MONTH)'); + $rows = DBA::p('SELECT `phototrack-temp`.`resource-id` FROM `phototrack-temp` LEFT OUTER JOIN phototrack_photo_use ON (`phototrack-temp`.`resource-id` = phototrack_photo_use.`resource-id`) WHERE phototrack_photo_use.id IS NULL limit ' . /*$batch_size*/1000); + if (DBA::isResult($rows)) { + foreach ($rows as $row) { + Logger::debug('phototrack: remove photo ' . $row['resource-id']); + DBA::e('DELETE FROM photo WHERE `resource-id` = "' . $row['resource-id'] . '"'); + } + Logger::info('phototrack_tidy: deleted ' . DBA::numRows($rows) . ' photos'); + } + DBA::e('DROP TABLE `phototrack-temp`'); + $rows = DBA::p('SELECT id FROM phototrack_photo_use WHERE checked < DATE_SUB(NOW(), INTERVAL 14 DAY)'); + foreach ($rows as $row) { + DBA::e( 'DELETE FROM phototrack_photo_use WHERE id = ' . $row['id']); + } + Logger::info('phototrack_tidy: deleted ' . DBA::numRows($rows) . ' phototrack_photo_use rows'); +} diff --git a/publicise/publicise.php b/publicise/publicise.php new file mode 100644 index 00000000..d2dbca24 --- /dev/null +++ b/publicise/publicise.php @@ -0,0 +1,11 @@ +"SELECT `uid` FROM `contact` WHERE `id` = %d AND `reason` = 'publicise'", intval($item['contact-id'])); + if (!$r1) { + return; + } + + Logger::debug('Publicise: moving to wall: ' . $item['uid'] . ' ' . $item['contact-id'] . ' ' . $item['uri']); + $item['type'] = 'wall'; + $item['wall'] = 1; + $item['private'] = 0; +} + diff --git a/publicise/templates/admin.tpl b/publicise/templates/admin.tpl new file mode 100644 index 00000000..b10c3546 --- /dev/null +++ b/publicise/templates/admin.tpl @@ -0,0 +1,39 @@ +{{* + * AUTOMATICALLY GENERATED TEMPLATE + * DO NOT EDIT THIS FILE, CHANGES WILL BE OVERWRITTEN + * + *}} +
diff --git a/retriever/database.sql b/retriever/database.sql new file mode 100644 index 00000000..2cabf9ef --- /dev/null +++ b/retriever/database.sql @@ -0,0 +1,42 @@ +CREATE TABLE IF NOT EXISTS `retriever_rule` ( + `id` int(11) unsigned NOT NULL AUTO_INCREMENT, + `uid` int(11) NOT NULL, + `contact-id` int(11) NOT NULL, + `data` mediumtext NULL DEFAULT NULL, + PRIMARY KEY (`id`), + KEY `uid` (`uid`), + KEY `contact-id` (`contact-id`) +) DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; + +CREATE TABLE IF NOT EXISTS `retriever_item` ( + `id` int(11) unsigned NOT NULL AUTO_INCREMENT, + `item-uri` varbinary(255) NOT NULL, + `item-uid` int(10) unsigned NOT NULL DEFAULT '0', + `contact-id` int(10) unsigned NOT NULL DEFAULT '0', + `resource` int(11) NOT NULL, + `finished` tinyint(1) unsigned NOT NULL DEFAULT '0', + KEY `resource` (`resource`), + KEY `finished` (`finished`), + KEY `item-uid` (`item-uid`), + KEY `all` (`item-uri`, `item-uid`, `contact-id`), + PRIMARY KEY (`id`) +) DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; + +CREATE TABLE IF NOT EXISTS `retriever_resource` ( + `id` int(11) unsigned NOT NULL AUTO_INCREMENT, + `item-uid` int(10) unsigned NOT NULL DEFAULT '0', + `contact-id` int(10) unsigned NOT NULL DEFAULT '0', + `type` char(255) NULL DEFAULT NULL, + `binary` int(1) NOT NULL DEFAULT 0, + `url` varbinary(700) NOT NULL, + `created` timestamp NOT NULL DEFAULT now(), + `completed` timestamp NULL DEFAULT NULL, + `last-try` timestamp NULL DEFAULT NULL, + `num-tries` int(11) NOT NULL DEFAULT 0, + `data` mediumblob NULL DEFAULT NULL, + `http-code` smallint(1) unsigned NULL DEFAULT NULL, + `redirect-url` varbinary(700) NOT NULL, + KEY `url` (`url`), + KEY `completed` (`completed`), + PRIMARY KEY (`id`) +) DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; diff --git a/retriever/retriever.php b/retriever/retriever.php new file mode 100644 index 00000000..9370271c --- /dev/null +++ b/retriever/retriever.php @@ -0,0 +1,1058 @@ + + */ + +use Friendica\App; +use Friendica\Core\Addon; +use Friendica\Core\Hook; +use Friendica\Core\Logger; +use Friendica\Core\Renderer; +use Friendica\Core\Session; +use Friendica\Core\System; +use Friendica\Content\Text\HTML; +use Friendica\Content\Text\BBCode; +use Friendica\Model\Photo; +use Friendica\Object\Image; +use Friendica\Util\Network; +use Friendica\Database\DBA; +use Friendica\Model\ItemURI; +use Friendica\Model\Item; +use Friendica\Model\Post; +use Friendica\Util\DateTimeFormat; +use Friendica\DI; + +/** + * @brief Installation hook for retriever plugin + */ +function retriever_install() { + Logger::debug('Install retriever'); + + Hook::register('addon_settings', 'addon/retriever/retriever.php', 'retriever_addon_settings'); + Hook::register('addon_settings_post', 'addon/retriever/retriever.php', 'retriever_addon_settings_post'); + Hook::register('post_remote', 'addon/retriever/retriever.php', 'retriever_post_remote_hook'); + Hook::register('contact_photo_menu', 'addon/retriever/retriever.php', 'retriever_contact_photo_menu'); + Hook::register('cron', 'addon/retriever/retriever.php', 'retriever_cron'); + + if (DI::config()->get('retriever', 'dbversion') == '0.14') { + if (!DBA::e("ALTER TABLE `retriever_rule` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci") || + !DBA::e("ALTER TABLE `retriever_item` MODIFY `item-uri` varbinary(255) NOT NULL") || + !DBA::e("ALTER TABLE `retriever_item` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci") || + !DBA::e("ALTER TABLE `retriever_resource` MODIFY `url` varbinary(700) NOT NULL") || + !DBA::e("ALTER TABLE `retriever_resource` MODIFY `redirect-url` varbinary(700) NOT NULL")) { + !DBA::e("ALTER TABLE `retriever_resource` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci") || + Logger::warning('Unable to update database tables: ' . DBA::errorMessage()); + return; + } + DI::config()->set('retriever', 'dbversion', '0.15'); + } + if (DI::config()->get('retriever', 'dbversion') != '0.15') { + $schema = file_get_contents(dirname(__file__).'/database.sql'); + $tables = explode(';', $schema); + foreach ($tables as $table) { + if (!DBA::e($table)) { + Logger::warning('Unable to create database table: ' . DBA::errorMessage()); + return; + } + } + DI::config()->set('retriever', 'downloads_per_cron', '100'); + DI::config()->set('retriever', 'dbversion', '0.14'); + } +} + +/** + * @brief Uninstallation hook for retriever plugin + */ +function retriever_uninstall() { + Logger::debug('Uninstall retriever'); + + Hook::unregister('addon_settings', 'addon/retriever/retriever.php', 'retriever_addon_settings'); + Hook::unregister('addon_settings_post', 'addon/retriever/retriever.php', 'retriever_addon_settings_post'); + Hook::unregister('post_remote', 'addon/retriever/retriever.php', 'retriever_post_remote_hook'); + Hook::unregister('addon_settings', 'addon/retriever/retriever.php', 'retriever_addon_settings'); + Hook::unregister('addon_settings_post', 'addon/retriever/retriever.php', 'retriever_addon_settings_post'); + Hook::unregister('contact_photo_menu', 'addon/retriever/retriever.php', 'retriever_contact_photo_menu'); + Hook::unregister('cron', 'addon/retriever/retriever.php', 'retriever_cron'); +} + +/** + * This is a statement rather than an actual function definition. The simple + * existence of this method is checked to figure out if the addon offers a + * module. + */ +function retriever_module() {} + +/** + * @brief Admin page hook for retriever plugin + * + * @param App $a App object (unused) + * @param string $o HTML to append content to (by ref) + */ +function retriever_addon_admin(App $a, string &$o) { + $template = Renderer::getMarkupTemplate('admin.tpl', 'addon/retriever/'); + + $downloads_per_cron = DI::config()->get('retriever', 'downloads_per_cron'); + $downloads_per_cron_config = ['downloads_per_cron', + DI::l10n()->t('Downloads per Cron'), + $downloads_per_cron, + DI::l10n()->t('Maximum number of downloads to attempt during each run of the cron job.')]; + + $allow_images = DI::config()->get('retriever', 'allow_images'); + $allow_images_config = ['allow_images', + DI::l10n()->t('Allow Retrieving Images'), + $allow_images, + DI::l10n()->t('Allow users to request images be downloaded as well as text.Please log in
"; + return; + } + if (isset(DI::args()->getArgv()[1]) and DI::args()->getArgv()[1] === 'help') { + $feeds = DBA::selectToArray('contact', ['id', 'name', 'thumb'], ['uid' => Session::getLocalUser(), 'network' => 'feed']); + for ($i = 0; $i < count($feeds); ++$i) { + $feeds[$i]['url'] = DI::baseUrl()->get(true) . '/retriever/' . $feeds[$i]['id']; + } + $template = Renderer::getMarkupTemplate('/help.tpl', 'addon/retriever/'); + $a->page['content'] .= Renderer::replaceMacros($template, array( + '$config' => DI::baseUrl()->get(true) . '/settings/addon', + '$allow_images' => DI::config()->get('retriever', 'allow_images'), + '$feeds' => $feeds)); + return; + } + if (isset(DI::args()->getArgv()[1])) { + $arg1 = DI::args()->getArgv()[1]; + $retriever_rule = get_retriever_rule($arg1, Session::getLocalUser(), false); + if (!$retriever_rule) { + $retriever_rule = ['id' => 0, 'data' => ['enable' => 0, 'modurl' => '', 'pattern' => '', 'replace' => '', 'images' => 0, 'storecookies' => 0, 'cookiedata' => '', 'customxslt' => '', 'include' => '', 'exclude' => '']]; + } + + if (!empty($_POST["id"])) { + $retriever_rule = get_retriever_rule($arg1, Session::getLocalUser(), true); + $retriever_rule['data'] = array(); + foreach (array('modurl', 'pattern', 'replace', 'enable', 'images', 'customxslt', 'storecookies', 'cookiedata') as $setting) { + if (empty($_POST['retriever_' . $setting])) { + $retriever_rule['data'][$setting] = NULL; + } + else { + $retriever_rule['data'][$setting] = $_POST['retriever_' . $setting]; + } + } + foreach ($_POST as $k=>$v) { + if (preg_match("/retriever-(include|exclude)-(\d+)-(element|attribute|value)/", $k, $matches)) { + $retriever_rule['data'][$matches[1]][intval($matches[2])][$matches[3]] = $v; + } + } + // You've gotta have an element, even if it's just "*" + foreach ($retriever_rule['data']['include'] as $k=>$clause) { + if (!$clause['element']) { + unset($retriever_rule['data']['include'][$k]); + } + } + foreach ($retriever_rule['data']['exclude'] as $k=>$clause) { + if (!$clause['element']) { + unset($retriever_rule['data']['exclude'][$k]); + } + } + DBA::update('retriever_rule', ['data' => json_encode($retriever_rule['data'])], ['id' => intval($retriever_rule["id"])], ['data' => '']); + $a->page['content'] .= "Settings Updated"; + if (!empty($_POST["retriever_retrospective"])) { + apply_retrospective($retriever_rule, $_POST["retriever_retrospective"]); + $a->page['content'] .= " and retrospectively applied to " . $_POST["retriever_retrospective"] . " posts"; + } + $a->page['content'] .= ".
"; + } + + $template = Renderer::getMarkupTemplate('/rule-config.tpl', 'addon/retriever/'); + DI::page()['content'] .= Renderer::replaceMacros($template, array( + '$enable' => array( + 'retriever_enable', + DI::l10n()->t('Enabled'), + $retriever_rule['data']['enable']), + '$modurl' => array( + 'retriever_modurl', + DI::l10n()->t('Modify URL'), + $retriever_rule['data']['modurl'], + DI::l10n()->t("Modify each article's URL with regular expressions before retrieving.")), + '$pattern' => array( + 'retriever_pattern', + DI::l10n()->t('URL Pattern'), + $retriever_rule['data']['pattern'], + DI::l10n()->t('Regular expression matching part of the URL to replace')), + '$replace' => array( + 'retriever_replace', + DI::l10n()->t('URL Replace'), + $retriever_rule['data']['replace'], + DI::l10n()->t('Text to replace matching part of above regular expression')), + '$allow_images' => DI::config()->get('retriever', 'allow_images'), + '$images' => array( + 'retriever_images', + DI::l10n()->t('Download Images'), + $retriever_rule['data']['images']), + '$retrospective' => array( + 'retriever_retrospective', + DI::l10n()->t('Retrospectively Apply'), + '0', + DI::l10n()->t('Reapply the rules to this number of posts')), + 'storecookies' => array( + 'retriever_storecookies', + DI::l10n()->t('Store cookies'), + $retriever_rule['data']['storecookies'], + DI::l10n()->t("Preserve cookie data across fetches.")), + '$cookiedata' => array( + 'retriever_cookiedata', + DI::l10n()->t('Cookie Data'), + $retriever_rule['data']['cookiedata'], + DI::l10n()->t("Latest cookie data for this feed. Example: [{\"Name\":\"cookie-name\",\"Value\":\"cookie-value\",\"Domain\":\"example.com\",\"Path\":\"\\/path\\/\",\"Max-Age\":null,\"Expires\":1682450014,\"Secure\":true,\"Discard\":false,\"HttpOnly\":true}]")), + '$customxslt' => array( + 'retriever_customxslt', + DI::l10n()->t('Custom XSLT'), + $retriever_rule['data']['customxslt'], + DI::l10n()->t("When standard rules aren't enough, apply custom XSLT to the article")), + '$title' => DI::l10n()->t('Retrieve Feed Content'), + '$help' => DI::baseUrl()->get(true) . '/retriever/help', + '$help_t' => DI::l10n()->t('Get Help'), + '$submit_t' => DI::l10n()->t('Submit'), + '$submit' => DI::l10n()->t('Save Settings'), + '$id' => ($retriever_rule["id"] ? $retriever_rule["id"] : "create"), + '$tag_t' => DI::l10n()->t('Tag'), + '$attribute_t' => DI::l10n()->t('Attribute'), + '$value_t' => DI::l10n()->t('Value'), + '$add_t' => DI::l10n()->t('Add'), + '$remove_t' => DI::l10n()->t('Remove'), + '$include_t' => DI::l10n()->t('Include'), + '$include' => $retriever_rule['data']['include'], + '$exclude_t' => DI::l10n()->t('Exclude'), + '$exclude' => $retriever_rule['data']['exclude'])); + return; + } +} + +/** + * @brief Hook that adds the retriever option to the contact menu + * + * @param App $a The App object (by ref) + * @param array $args Contact menu details to be filled in (by ref) + */ +function retriever_contact_photo_menu(App &$a, array &$args) { + if (!$args) { + return; + } + if ($args["contact"]["network"] == "feed") { + $args["menu"]['retriever'] = array(DI::l10n()->t('Retriever'), DI::baseUrl()->get(true) . '/retriever/' . $args["contact"]['id']); + } +} + +/** + * @brief Hook for processing new incoming items + * + * @param App $a The App object (by ref) + * @param array $item New item, which has not yet been inserted into database (by ref) + */ +function retriever_post_remote_hook(App &$a, array &$item) { + Logger::info('retriever_post_remote_hook: ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']); + + $retriever_rule = get_retriever_rule($item['contact-id'], $item["uid"], false); + if ($retriever_rule) { + retriever_on_item_insert($retriever_rule, $item); + } + else { + if (DI::config()->get($item["uid"], 'retriever', 'oembed')) { + // Convert to HTML and back to take advantage of bbcode's resolution of oembeds. + $body = retriever_get_body($item); + $body = HTML::toBBCode(BBCode::convert($body)); + retriever_set_body($item, $body); + } + if (DI::config()->get($item["uid"], 'retriever', 'all_photos')) { + retrieve_images($item); + } + } + retriever_check_item_completed($item); +} + +/** + * @brief Hook for adding per-user retriever settings to the user's settings page + * + * @param App $a The App object (by ref) + * @param string $s HTML string to which to append settings content (by ref) + */ +function retriever_addon_settings(App &$a, string &$s) { + $all_photos = DI::config()->get(Session::getLocalUser(), 'retriever', 'all_photos'); + $oembed = DI::config()->get(Session::getLocalUser(), 'retriever', 'oembed'); + $template = Renderer::getMarkupTemplate('/settings.tpl', 'addon/retriever/'); + $config = array('$submit' => DI::l10n()->t('Save Settings'), + '$title' => DI::l10n()->t('Retriever Settings'), + '$help' => DI::baseUrl()->get(true) . '/retriever/help', + '$allow_images' => DI::config()->get('retriever', 'allow_images')); + $config['$allphotos'] = array('retriever_all_photos', + DI::l10n()->t('All Photos'), + $all_photos, + DI::l10n()->t('Check this to retrieve photos for all posts')); + $config['$oembed'] = array('retriever_oembed', + DI::l10n()->t('Resolve OEmbed'), + $oembed, + DI::l10n()->t('Check this to attempt to retrieve embedded content for all posts')); + $s .= Renderer::replaceMacros($template, $config); +} + +/** + * @brief Hook for processing post results from user's settings page + * + * @param App $a The App object + * @param array $post Posted content + */ +function retriever_addon_settings_post(App $a, array $post) { + if ($post['retriever_all_photos']) { + DI::config()->set(Session::getLocalUser(), 'retriever', 'all_photos', $post['retriever_all_photos']); + } + else { + DI::config()->delete(Session::getLocalUser(), 'retriever', 'all_photos'); + } + if ($post['retriever_oembed']) { + DI::config()->set(Session::getLocalUser(), 'retriever', 'oembed', $post['retriever_oembed']); + } + else { + DI::config()->delete(Session::getLocalUser(), 'retriever', 'oembed'); + } +} diff --git a/retriever/templates/admin.tpl b/retriever/templates/admin.tpl new file mode 100644 index 00000000..71c8506e --- /dev/null +++ b/retriever/templates/admin.tpl @@ -0,0 +1,9 @@ +{{* + * AUTOMATICALLY GENERATED TEMPLATE + * DO NOT EDIT THIS FILE, CHANGES WILL BE OVERWRITTEN + * + *}} +{{include file="field_input.tpl" field=$downloads_per_cron}} +{{include file="field_checkbox.tpl" field=$allow_images}} + + diff --git a/retriever/templates/extract.tpl b/retriever/templates/extract.tpl new file mode 100644 index 00000000..ca67f683 --- /dev/null +++ b/retriever/templates/extract.tpl @@ -0,0 +1,24 @@ + ++This plugin replaces the short excerpts you normally get in RSS feeds +with the full content of the article from the source website. You +specify which part of the page you're interested in with a set of +rules. When each item arrives, the plugin downloads the full page +from the website, extracts content using the rules, and replaces the +original article. +
++There's a few reasons you may want to do this. The source website +might be slow or overloaded. The source website might be +untrustworthy, in which case using Friendica to scrub the HTML is a +good idea. You might be on a LAN that blacklists certain websites. +It also works neatly with the mailstream plugin, allowing you to read +a news stream comfortably without needing continuous Internet +connectivity. +
++However, setting up retriever can be quite tricky since it depends on +the internal design of the website. That was designed to make life +easy for the website's developers, not for you. You'll need to have +some familiarity with HTML, and be willing to adapt when the website +suddenly changes everything without notice. +
++To set up retriever for an RSS feed, go to the "Contacts" page and +find your feed. Then click on the drop-down menu on the contact. +Select "Retriever" to get to the retriever configuration. +
++The "Include" configuration section specifies parts of the page to +include in the article. Each row has three components: +
++A simple case is when the article is wrapped in a "div" element: +
++ ... + <div class="ArticleWrapper"> + <h2>Man Bites Dog</h2> + <img src="mbd.jpg"> + <p> + Residents of the sleepy community of Nowheresville were + shocked yesterday by the sight of creepy local weirdo Jim + McOddman assaulting innocent local dog Snufflekins with his + false teeth. + </p> + ... + </div> + ... ++
+You then specify the tag "div", attribute "class", and value +"ArticleWrapper". Everything else in the page, such as navigation +panels and menus and footers and so on, will be discarded. If there +is more than one section of the page you want to include, specify each +one on a separate row. If the matching section contains some sections +you want to remove, specify those in the "Exclude" section in the same +way. +
++Once you've got a configuration that you think will work, you can try +it out on some existing articles. Type a number into the +"Retrospectively Apply" box and click "Submit". After a while +(exactly how long depends on your system's cron configuration) the new +articles should be available. +
++You can leave the attribute and value blank to include all the +corresponding elements with the specified tag name. You can also use +a tag name of just an asterisk ("*"), which will match any element type with the +specified attribute regardless of the tag. +
++Note that the "class" attribute is a special case. Many web page +templates will put multiple different classes in the same element, +separated by spaces. If you specify an attribute of "class" it will +match an element if any of its classes matches the specified value. +For example: +
++ <div class="article breaking-news"> ++
+In this case you can specify a value of "article", or "breaking-news". +You can also specify "article breaking-news", but that won't match if +the website suddenly changes to "breaking-news article", so that's not +recommended. +
++One useful trick you can try is using the website's "print" pages. +Many news sites have print versions of all their articles. These are +usually drastically simplified compared to the live website page. +Sometimes this is a good way to get the whole article when it's +normally split across multiple pages. +
++Hopefully the URL for the print page is a predictable variant of the +normal article URL. For example, an article URL like: +
++ http://www.newssite.com/article-8636.html ++
+...might have a print version at: +
++ http://www.newssite.com/print/article-8636.html ++
+To change the URL used to retrieve the page, use the "URL Pattern" and +"URL Replace" fields. The pattern is a regular expression matching +part of the URL to replace. In this case, you might use a pattern of +"/article" and a replace string of "/print/article". A common pattern +is simply a dollar sign ("$"), used to add the replace string to the end of the URL. +
++Note that retrieving and processing the articles can take some time, +so it's done in the background. Incoming articles will be marked as +invisible while they're in the process of being downloaded. If a URL +fails, the plugin will keep trying at progressively longer intervals +for up to a month, in case the website is temporarily overloaded or +the network is down. +
+{{if $allow_images}} ++Retriever can also optionally download images and store them in the +local Friendica instance. Just check the "Download Images" box. You +can also download images in every item from your network, whether it's +an RSS feed or not. Go to the "Settings" page and +click "Plugin settings". Then check the "All +Photos" box in the "Retriever Settings" section and click "Submit". +
+{{/if}} +