";
$b .= DI::l10n()->t("This website is tracked using the Matomo analytics tool.");
$b .= " ";
- $the_url = "http://".$baseurl ."index.php?module=CoreAdminHome&action=optOut";
+ $the_url = "http://{$baseurl}index.php?module=CoreAdminHome&action=optOut";
$b .= DI::l10n()->t("If you do not want that your visits are logged in this way you can set a cookie to prevent Matomo / Piwik from tracking further visits of the site (opt-out).", $the_url);
$b .= "
";
}
@@ -104,6 +110,7 @@ function piwik_addon_admin (string &$o)
'$siteid' => ['siteid', DI::l10n()->t('Site ID'), DI::config()->get('piwik','siteid' ), ''],
'$optout' => ['optout', DI::l10n()->t('Show opt-out cookie link?'), DI::config()->get('piwik','optout' ), ''],
'$async' => ['async', DI::l10n()->t('Asynchronous tracking'), DI::config()->get('piwik','async' ), ''],
+ '$shortendpoint' => ['shortendpoint', DI::l10n()->t("Shortcut path to the script ('/js/' instead of '/piwik.js')"), DI::config()->get('piwik','shortendpoint' ), ''],
]);
}
@@ -113,4 +120,5 @@ function piwik_addon_admin_post()
DI::config()->set('piwik', 'siteid', trim($_POST['siteid'] ?? ''));
DI::config()->set('piwik', 'optout', trim($_POST['optout'] ?? ''));
DI::config()->set('piwik', 'async', trim($_POST['async'] ?? ''));
+ DI::config()->set('piwik', 'shortendpoint', trim($_POST['shortendpoint'] ?? ''));
}
diff --git a/piwik/templates/admin.tpl b/piwik/templates/admin.tpl
index 2ab1869b..7a9b5d71 100644
--- a/piwik/templates/admin.tpl
+++ b/piwik/templates/admin.tpl
@@ -2,4 +2,5 @@
{{include file="field_input.tpl" field=$siteid}}
{{include file="field_checkbox.tpl" field=$optout}}
{{include file="field_checkbox.tpl" field=$async}}
+{{include file="field_checkbox.tpl" field=$shortendpoint}}
From 13da605435662fe53daf92dc1ef00c0a01784551 Mon Sep 17 00:00:00 2001
From: Tobias Diekershoff
Date: Mon, 1 May 2023 07:40:12 +0200
Subject: [PATCH 006/551] [piwiki] regenerated messages-po file
---
piwik/lang/C/messages.po | 22 +++++++++++++---------
1 file changed, 13 insertions(+), 9 deletions(-)
diff --git a/piwik/lang/C/messages.po b/piwik/lang/C/messages.po
index 5a078056..881bd796 100644
--- a/piwik/lang/C/messages.po
+++ b/piwik/lang/C/messages.po
@@ -8,7 +8,7 @@ msgid ""
msgstr ""
"Project-Id-Version: \n"
"Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2021-02-01 18:15+0100\n"
+"POT-Creation-Date: 2023-05-01 07:39+0200\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME \n"
"Language-Team: LANGUAGE \n"
@@ -17,13 +17,13 @@ msgstr ""
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-#: piwik.php:87
+#: piwik.php:96
msgid ""
"This website is tracked using the Matomo "
"analytics tool."
msgstr ""
-#: piwik.php:90
+#: piwik.php:99
#, php-format
msgid ""
"If you do not want that your visits are logged in this way you (opt-out)."
msgstr ""
-#: piwik.php:97
+#: piwik.php:108
msgid "Save Settings"
msgstr ""
-#: piwik.php:98
+#: piwik.php:109
msgid "Matomo (Piwik) Base URL"
msgstr ""
-#: piwik.php:98
+#: piwik.php:109
msgid ""
"Absolute path to your Matomo (Piwik) installation. (without protocol (http/"
"s), with trailing slash)"
msgstr ""
-#: piwik.php:99
+#: piwik.php:110
msgid "Site ID"
msgstr ""
-#: piwik.php:100
+#: piwik.php:111
msgid "Show opt-out cookie link?"
msgstr ""
-#: piwik.php:101
+#: piwik.php:112
msgid "Asynchronous tracking"
msgstr ""
+
+#: piwik.php:113
+msgid "Shortcut path to the script ('/js/' instead of '/piwik.js')"
+msgstr ""
From ca134e9ed3ff5ad556d602598568bf13fccdb9e6 Mon Sep 17 00:00:00 2001
From: Tobias Diekershoff
Date: Mon, 1 May 2023 10:05:20 +0200
Subject: [PATCH 007/551] [piwik] updated DE translation
---
piwik/lang/de/messages.po | 26 +++++++++++++++-----------
piwik/lang/de/strings.php | 1 +
2 files changed, 16 insertions(+), 11 deletions(-)
diff --git a/piwik/lang/de/messages.po b/piwik/lang/de/messages.po
index 1348b58d..0fdaa7b3 100644
--- a/piwik/lang/de/messages.po
+++ b/piwik/lang/de/messages.po
@@ -7,15 +7,15 @@
# Andreas H., 2014-2015
# Till Mohr , 2021
# Tobias Diekershoff , 2014
-# Tobias Diekershoff , 2019
+# Tobias Diekershoff , 2019,2023
# Ulf Rompe , 2019
msgid ""
msgstr ""
"Project-Id-Version: friendica\n"
"Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2021-02-01 18:15+0100\n"
+"POT-Creation-Date: 2023-05-01 07:39+0200\n"
"PO-Revision-Date: 2014-06-23 11:18+0000\n"
-"Last-Translator: Till Mohr , 2021\n"
+"Last-Translator: Tobias Diekershoff , 2019,2023\n"
"Language-Team: German (http://app.transifex.com/Friendica/friendica/language/de/)\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
@@ -23,13 +23,13 @@ msgstr ""
"Language: de\n"
"Plural-Forms: nplurals=2; plural=(n != 1);\n"
-#: piwik.php:87
+#: piwik.php:96
msgid ""
"This website is tracked using the Matomo"
" analytics tool."
msgstr "Diese Website benutzt Matomo, eine Open-Source-Software zur statistischen Auswertung der Besucherzugriffe."
-#: piwik.php:90
+#: piwik.php:99
#, php-format
msgid ""
"If you do not want that your visits are logged in this way you (opt-out)."
msgstr "Wenn du nicht willst, dass Deine Besuche auf diese Weise gespeichert werden, kannst du ein Cookie setzen. Dann wird Matomo / Piwik dich auf dieser Website nicht mehr verfolgen (opt-out)."
-#: piwik.php:97
+#: piwik.php:108
msgid "Save Settings"
msgstr "Einstellungen speichern"
-#: piwik.php:98
+#: piwik.php:109
msgid "Matomo (Piwik) Base URL"
msgstr "Matomo-Basis-URL (Piwik-Basis-URL)"
-#: piwik.php:98
+#: piwik.php:109
msgid ""
"Absolute path to your Matomo (Piwik) installation. (without protocol "
"(http/s), with trailing slash)"
msgstr "Absoluter Pfad zu deiner Matomo-/Piwik-Installation (ohne \"http://\" oder \"https://\"), mit abschließendem Schrägstrich"
-#: piwik.php:99
+#: piwik.php:110
msgid "Site ID"
msgstr "Seiten-ID"
-#: piwik.php:100
+#: piwik.php:111
msgid "Show opt-out cookie link?"
msgstr "Link zum Setzen des Opt-Out-Cookies anzeigen?"
-#: piwik.php:101
+#: piwik.php:112
msgid "Asynchronous tracking"
msgstr "Asynchrones Tracking"
+
+#: piwik.php:113
+msgid "Shortcut path to the script ('/js/' instead of '/piwik.js')"
+msgstr "Shortcut Pfad zum Script ('/js/' anstelle von '/piwik.js')"
diff --git a/piwik/lang/de/strings.php b/piwik/lang/de/strings.php
index e218d6e3..9e126972 100644
--- a/piwik/lang/de/strings.php
+++ b/piwik/lang/de/strings.php
@@ -13,3 +13,4 @@ $a->strings['Absolute path to your Matomo (Piwik) installation. (without protoco
$a->strings['Site ID'] = 'Seiten-ID';
$a->strings['Show opt-out cookie link?'] = 'Link zum Setzen des Opt-Out-Cookies anzeigen?';
$a->strings['Asynchronous tracking'] = 'Asynchrones Tracking';
+$a->strings['Shortcut path to the script (\'/js/\' instead of \'/piwik.js\')'] = 'Shortcut Pfad zum Script (\'/js/\' anstelle von \'/piwik.js\')';
From a0574ab045fa0f90abea186766397ecfefdae1e9 Mon Sep 17 00:00:00 2001
From: Hypolite Petovan
Date: Thu, 4 May 2023 23:29:45 -0400
Subject: [PATCH 008/551] [tumblr] Have tumblr_get_contact_by_url return null
for unsuccessful probe
- Add result setting for unsuccessful authoritative probe
---
tumblr/tumblr.php | 30 ++++++++++++++++++------------
1 file changed, 18 insertions(+), 12 deletions(-)
diff --git a/tumblr/tumblr.php b/tumblr/tumblr.php
index c5713acd..21a7504a 100644
--- a/tumblr/tumblr.php
+++ b/tumblr/tumblr.php
@@ -70,7 +70,7 @@ function tumblr_load_config(ConfigFileManager $loader)
function tumblr_check_item_notification(array &$notification_data)
{
- if (!tumblr_enabled_for_user($notification_data['uid'])) {
+ if (!tumblr_enabled_for_user($notification_data['uid'])) {
return;
}
@@ -99,6 +99,11 @@ function tumblr_probe_detect(array &$hookData)
}
$hookData['result'] = tumblr_get_contact_by_url($hookData['uri']);
+
+ // Authoritative probe should set the result even if the probe was unsuccessful
+ if ($hookData['network'] == Protocol::TUMBLR && empty($hookData['result'])) {
+ $hookData['result'] = [];
+ }
}
function tumblr_item_by_link(array &$hookData)
@@ -115,7 +120,7 @@ function tumblr_item_by_link(array &$hookData)
if (!preg_match('#^https?://www\.tumblr.com/blog/view/(.+)/(\d+).*#', $hookData['uri'], $matches) && !preg_match('#^https?://www\.tumblr.com/(.+)/(\d+).*#', $hookData['uri'], $matches)) {
return;
}
-
+
Logger::debug('Found tumblr post', ['url' => $hookData['uri'], 'blog' => $matches[1], 'id' => $matches[2]]);
$parameters = ['id' => $matches[2], 'reblog_info' => false, 'notes_info' => false, 'npf' => false];
@@ -1201,7 +1206,7 @@ function tumblr_get_blogs(int $uid): array
return $blogs;
}
-function tumblr_enabled_for_user(int $uid)
+function tumblr_enabled_for_user(int $uid)
{
return !empty($uid) && !empty(DI::pConfig()->get($uid, 'tumblr', 'access_token')) &&
!empty(DI::pConfig()->get($uid, 'tumblr', 'refresh_token')) &&
@@ -1213,24 +1218,25 @@ function tumblr_enabled_for_user(int $uid)
* Get a contact array from a Tumblr url
*
* @param string $url
- * @return array
+ * @return array|null
+ * @throws \Friendica\Network\HTTPException\InternalServerErrorException
*/
-function tumblr_get_contact_by_url(string $url): array
+function tumblr_get_contact_by_url(string $url): ?array
{
$consumer_key = DI::config()->get('tumblr', 'consumer_key');
if (empty($consumer_key)) {
- return [];
+ return null;
}
if (!preg_match('#^https?://tumblr.com/(.+)#', $url, $matches) && !preg_match('#^https?://www\.tumblr.com/(.+)#', $url, $matches) && !preg_match('#^https?://(.+)\.tumblr.com#', $url, $matches)) {
try {
$curlResult = DI::httpClient()->get($url);
} catch (\Exception $e) {
- return [];
+ return null;
}
$html = $curlResult->getBody();
if (empty($html)) {
- return [];
+ return null;
}
$doc = new DOMDocument();
@$doc->loadHTML($html);
@@ -1244,7 +1250,7 @@ function tumblr_get_contact_by_url(string $url): array
}
if (empty($blog)) {
- return [];
+ return null;
}
Logger::debug('Update Tumblr blog data', ['url' => $url]);
@@ -1253,7 +1259,7 @@ function tumblr_get_contact_by_url(string $url): array
$body = $curlResult->getBody();
$data = json_decode($body);
if (empty($data)) {
- return [];
+ return null;
}
$baseurl = 'https://tumblr.com';
@@ -1403,13 +1409,13 @@ function tumblr_get_token(int $uid, string $code = ''): string
Logger::info('Error fetching token', ['uid' => $uid, 'code' => $code, 'result' => $curlResult->getBody(), 'parameters' => $parameters]);
return '';
}
-
+
$result = json_decode($curlResult->getBody());
if (empty($result)) {
Logger::info('Invalid result when updating token', ['uid' => $uid]);
return '';
}
-
+
$expires_at = time() + $result->expires_in;
Logger::debug('Renewed token', ['uid' => $uid, 'expires_at' => date('c', $expires_at)]);
}
From f5d8604e596f8f50f5e6fa281827c196cbc8148f Mon Sep 17 00:00:00 2001
From: Hypolite Petovan
Date: Thu, 4 May 2023 23:32:29 -0400
Subject: [PATCH 009/551] [twitter] Return null in the probe_detect hook result
key on unsuccessful probe
- Add authoritative probe result setting
---
twitter/twitter.php | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/twitter/twitter.php b/twitter/twitter.php
index c55489a0..aaa378d1 100644
--- a/twitter/twitter.php
+++ b/twitter/twitter.php
@@ -526,7 +526,12 @@ function twitter_probe_detect(array &$hookData)
$user = twitter_fetchuser($nick);
if ($user) {
- $hookData['result'] = twitter_user_to_contact($user);
+ $hookData['result'] = twitter_user_to_contact($user) ?: null;
+ }
+
+ // Authoritative probe should set the result even if the probe was unsuccessful
+ if ($hookData['network'] == Protocol::TWITTER && empty($hookData['result'])) {
+ $hookData['result'] = [];
}
}
From 8886c90d1ea0d8598bc298db77ddedb54dd2352a Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Fri, 5 May 2023 17:36:11 +0200
Subject: [PATCH 010/551] remove App arguments
---
mailstream/mailstream.php | 16 ++++++----------
1 file changed, 6 insertions(+), 10 deletions(-)
diff --git a/mailstream/mailstream.php b/mailstream/mailstream.php
index 542a1a42..1392d0d1 100644
--- a/mailstream/mailstream.php
+++ b/mailstream/mailstream.php
@@ -66,10 +66,9 @@ function mailstream_module() {}
/**
* Adds an item in "addon features" in the admin menu of the site
*
- * @param App $a App object (unused)
* @param string $o HTML form data
*/
-function mailstream_addon_admin(App $a, string &$o)
+function mailstream_addon_admin(string &$o)
{
$frommail = DI::config()->get('mailstream', 'frommail');
$template = Renderer::getMarkupTemplate('admin.tpl', 'addon/mailstream/');
@@ -110,7 +109,7 @@ function mailstream_generate_id(string $uri): string
return $message_id;
}
-function mailstream_send_hook(App $a, array $data)
+function mailstream_send_hook(array $data)
{
$criteria = array('uid' => $data['uid'], 'contact-id' => $data['contact-id'], 'uri' => $data['uri']);
$item = Post::selectFirst([], $criteria);
@@ -138,11 +137,10 @@ function mailstream_send_hook(App $a, array $data)
* mailstream is enabled and the necessary data is available, forks a
* workerqueue item to send the email.
*
- * @param App $a App object (unused)
* @param array $item content of the item (may or may not already be stored in the item table)
* @return void
*/
-function mailstream_post_hook(App $a, array &$item)
+function mailstream_post_hook(array &$item)
{
mailstream_check_version();
@@ -468,7 +466,7 @@ function mailstream_convert_table_entries()
'message_id' => $ms_item_id['message-id'],
'tries' => 0);
if (!$ms_item_id['message-id'] || !strlen($ms_item_id['message-id'])) {
- Logger::info('mailstream_convert_table_entries: item has no message-id.', 'item' => $ms_item_id['id'], 'uri' => $ms_item_id['uri']]);
+ Logger::info('mailstream_convert_table_entries: item has no message-id.', ['item' => $ms_item_id['id'], 'uri' => $ms_item_id['uri']]);
continue;
}
Logger::info('mailstream_convert_table_entries: convert item to workerqueue', $send_hook_data);
@@ -480,11 +478,10 @@ function mailstream_convert_table_entries()
/**
* Form for configuring mailstream features for a user
*
- * @param App $a App object
* @param array $data Hook data array
* @throws \Friendica\Network\HTTPException\ServiceUnavailableException
*/
-function mailstream_addon_settings(App &$a, array &$data)
+function mailstream_addon_settings(array &$data)
{
$enabled = DI::pConfig()->get(DI::userSession()->getLocalUserId(), 'mailstream', 'enabled');
$address = DI::pConfig()->get(DI::userSession()->getLocalUserId(), 'mailstream', 'address');
@@ -528,11 +525,10 @@ function mailstream_addon_settings(App &$a, array &$data)
/**
* Process data submitted to user's mailstream features form
- * @param App $a
* @param array $post POST data
* @return void
*/
-function mailstream_addon_settings_post(App $a, array $post)
+function mailstream_addon_settings_post(array $post)
{
if (!DI::userSession()->getLocalUserId() || empty($post['mailstream-submit'])) {
return;
From 631bfd83e901ff41d38798669be928d4a55bf9d8 Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Fri, 5 May 2023 18:08:51 +0200
Subject: [PATCH 011/551] use new style of accessing baseUrl
---
mailstream/mailstream.php | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/mailstream/mailstream.php b/mailstream/mailstream.php
index 1392d0d1..f53d4fe9 100644
--- a/mailstream/mailstream.php
+++ b/mailstream/mailstream.php
@@ -102,7 +102,7 @@ function mailstream_addon_admin_post()
*/
function mailstream_generate_id(string $uri): string
{
- $host = DI::baseUrl()->getHostname();
+ $host = DI::baseUrl()->getHost();
$resource = hash('md5', $uri);
$message_id = "<" . $resource . "@" . $host . ">";
Logger::debug('mailstream: Generated message ID ' . $message_id . ' for URI ' . $uri);
@@ -412,7 +412,7 @@ function mailstream_send(string $message_id, array $item, array $user): bool
$template = Renderer::getMarkupTemplate('mail.tpl', 'addon/mailstream/');
$mail->AltBody = BBCode::toPlaintext($item['body']);
$item['body'] = BBCode::convertForUriId($item['uri-id'], $item['body'], BBCode::CONNECTORS);
- $item['url'] = DI::baseUrl()->get() . '/display/' . $item['guid'];
+ $item['url'] = DI::baseUrl() . '/display/' . $item['guid'];
$mail->Body = Renderer::replaceMacros($template, [
'$upstream' => DI::l10n()->t('Upstream'),
'$uri' => DI::l10n()->t('URI'),
From 90eda431399cb9b37c20105d96f6d1ed84f3fa94 Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Sun, 7 Jul 2019 14:45:23 +0100
Subject: [PATCH 012/551] Latest version of retriever
---
retriever/database.sql | 40 ++
retriever/retriever.php | 832 ++++++++++++++++++++++++++++
retriever/templates/extract.tpl | 32 ++
retriever/templates/fix-urls.tpl | 26 +
retriever/templates/help.tpl | 148 +++++
retriever/templates/rule-config.tpl | 112 ++++
retriever/templates/settings.tpl | 9 +
7 files changed, 1199 insertions(+)
create mode 100644 retriever/database.sql
create mode 100644 retriever/retriever.php
create mode 100644 retriever/templates/extract.tpl
create mode 100644 retriever/templates/fix-urls.tpl
create mode 100644 retriever/templates/help.tpl
create mode 100644 retriever/templates/rule-config.tpl
create mode 100644 retriever/templates/settings.tpl
diff --git a/retriever/database.sql b/retriever/database.sql
new file mode 100644
index 00000000..340e33eb
--- /dev/null
+++ b/retriever/database.sql
@@ -0,0 +1,40 @@
+CREATE TABLE IF NOT EXISTS `retriever_rule` (
+ `id` int(11) unsigned NOT NULL AUTO_INCREMENT,
+ `uid` int(11) NOT NULL,
+ `contact-id` int(11) NOT NULL,
+ `data` mediumtext NULL DEFAULT NULL,
+ PRIMARY KEY (`id`),
+ KEY `uid` (`uid`),
+ KEY `contact-id` (`contact-id`)
+) DEFAULT CHARSET=utf8 COLLATE=utf8_bin;
+
+CREATE TABLE IF NOT EXISTS `retriever_item` (
+ `id` int(11) unsigned NOT NULL AUTO_INCREMENT,
+ `item-uri` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NOT NULL,
+ `item-uid` int(10) unsigned NOT NULL DEFAULT '0',
+ `contact-id` int(10) unsigned NOT NULL DEFAULT '0',
+ `resource` int(11) NOT NULL,
+ `finished` tinyint(1) unsigned NOT NULL DEFAULT '0',
+ KEY `resource` (`resource`),
+ KEY `finished` (`finished`),
+ KEY `item-uid` (`item-uid`),
+ KEY `all` (`item-uri`, `item-uid`, `contact-id`),
+ PRIMARY KEY (`id`)
+) DEFAULT CHARSET=utf8 COLLATE=utf8_bin;
+
+CREATE TABLE IF NOT EXISTS `retriever_resource` (
+ `id` int(11) unsigned NOT NULL AUTO_INCREMENT,
+ `type` char(255) NULL DEFAULT NULL,
+ `binary` int(1) NOT NULL DEFAULT 0,
+ `url` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NOT NULL,
+ `created` timestamp NOT NULL DEFAULT now(),
+ `completed` timestamp NULL DEFAULT NULL,
+ `last-try` timestamp NULL DEFAULT NULL,
+ `num-tries` int(11) NOT NULL DEFAULT 0,
+ `data` mediumblob NULL DEFAULT NULL,
+ `http-code` smallint(1) unsigned NULL DEFAULT NULL,
+ `redirect-url` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NULL DEFAULT NULL,
+ KEY `retriever_resource` ADD INDEX `url` (`url`),
+ KEY `retriever_resource` ADD INDEX `completed` (`completed`),
+ PRIMARY KEY (`id`)
+) DEFAULT CHARSET=utf8 COLLATE=utf8_bin
diff --git a/retriever/retriever.php b/retriever/retriever.php
new file mode 100644
index 00000000..78fe575f
--- /dev/null
+++ b/retriever/retriever.php
@@ -0,0 +1,832 @@
+
+ * Status: Unsupported
+ */
+
+use Friendica\Core\Addon;
+use Friendica\Core\Config;
+use Friendica\Core\PConfig;
+use Friendica\Content\Text\HTML;
+use Friendica\Content\Text\BBCode;
+use Friendica\Object\Image;
+use Friendica\Util\Network;
+use Friendica\Core\L10n;
+use Friendica\Database\DBA;
+
+function retriever_install() {
+ Addon::registerHook('plugin_settings', 'addon/retriever/retriever.php', 'retriever_plugin_settings');
+ Addon::registerHook('plugin_settings_post', 'addon/retriever/retriever.php', 'retriever_plugin_settings_post');
+ Addon::registerHook('post_remote', 'addon/retriever/retriever.php', 'retriever_post_remote_hook');
+ Addon::registerHook('contact_photo_menu', 'addon/retriever/retriever.php', 'retriever_contact_photo_menu');
+ Addon::registerHook('cron', 'addon/retriever/retriever.php', 'retriever_cron');
+
+ $r = q("SELECT `id` FROM `pconfig` WHERE `cat` LIKE 'retriever_%%'");
+ if (count($r) || (Config::get('retriever', 'dbversion') == '0.1')) {
+ $retrievers = array();
+ $r = q("SELECT SUBSTRING(`cat`, 10) AS `contact`, `k`, `v` FROM `pconfig` WHERE `cat` LIKE 'retriever%%'");
+ foreach ($r as $rr) {
+ $retrievers[$rr['contact']][$rr['k']] = $rr['v'];
+ }
+ foreach ($retrievers as $k => $v) {
+ $rr = q("SELECT `uid` FROM `contact` WHERE `id` = %d", intval($k));
+ $uid = $rr[0]['uid'];
+ $v['images'] = 'on';
+ q("INSERT INTO `retriever_rule` (`uid`, `contact-id`, `data`) VALUES (%d, %d, '%s')",
+ intval($uid), intval($k), DBA::escape(json_encode($v)));
+ }
+ q("DELETE FROM `pconfig` WHERE `cat` LIKE 'retriever_%%'");
+ Config::set('retriever', 'dbversion', '0.2');
+ }
+ if (Config::get('retriever', 'dbversion') == '0.2') {
+ q("ALTER TABLE `retriever_resource` DROP COLUMN `retriever`");
+ Config::set('retriever', 'dbversion', '0.3');
+ }
+ if (Config::get('retriever', 'dbversion') == '0.3') {
+ q("ALTER TABLE `retriever_item` MODIFY COLUMN `item-uri` varchar(800) CHARACTER SET ascii NOT NULL");
+ q("ALTER TABLE `retriever_resource` MODIFY COLUMN `url` varchar(800) CHARACTER SET ascii NOT NULL");
+ Config::set('retriever', 'dbversion', '0.4');
+ }
+ if (Config::get('retriever', 'dbversion') == '0.4') {
+ q("ALTER TABLE `retriever_item` ADD COLUMN `finished` tinyint(1) unsigned NOT NULL DEFAULT '0'");
+ Config::set('retriever', 'dbversion', '0.5');
+ }
+ if (Config::get('retriever', 'dbversion') == '0.5') {
+ q('ALTER TABLE `retriever_resource` CHANGE `created` `created` timestamp NOT NULL DEFAULT now()');
+ q('ALTER TABLE `retriever_resource` CHANGE `completed` `completed` timestamp NULL DEFAULT NULL');
+ q('ALTER TABLE `retriever_resource` CHANGE `last-try` `last-try` timestamp NULL DEFAULT NULL');
+ q('ALTER TABLE `retriever_item` DROP KEY `all`');
+ q('ALTER TABLE `retriever_item` ADD KEY `all` (`item-uri`, `item-uid`, `contact-id`)');
+ Config::set('retriever', 'dbversion', '0.6');
+ }
+ if (Config::get('retriever', 'dbversion') == '0.6') {
+ q('ALTER TABLE `retriever_item` CONVERT TO CHARACTER SET utf8 COLLATE utf8_bin');
+ q('ALTER TABLE `retriever_item` CHANGE `item-uri` `item-uri` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NOT NULL');
+ q('ALTER TABLE `retriever_resource` CONVERT TO CHARACTER SET utf8 COLLATE utf8_bin');
+ q('ALTER TABLE `retriever_resource` CHANGE `url` `url` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NOT NULL');
+ q('ALTER TABLE `retriever_rule` CONVERT TO CHARACTER SET utf8 COLLATE utf8_bin');
+ Config::set('retriever', 'dbversion', '0.7');
+ }
+ if (Config::get('retriever', 'dbversion') == '0.7') {
+ $r = q("SELECT `id`, `data` FROM `retriever_rule`");
+ foreach ($r as $rr) {
+ logger('retriever_install: retriever ' . $rr['id'] . ' old config ' . $rr['data'], LOGGER_DATA);
+ $data = json_decode($rr['data'], true);
+ if ($data['pattern']) {
+ $matches = array();
+ if (preg_match("/\/(.*)\//", $data['pattern'], $matches)) {
+ $data['pattern'] = $matches[1];
+ }
+ }
+ if ($data['match']) {
+ $include = array();
+ foreach (explode('|', $data['match']) as $component) {
+ $matches = array();
+ if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[@([A-Za-z][a-z0-9]*)='([^']*)'\]/", $component, $matches)) {
+ $include[] = array(
+ 'element' => $matches[1],
+ 'attribute' => $matches[2],
+ 'value' => $matches[3]);
+ }
+ if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[contains(concat(' ',normalize-space(@class),' '),' ([^ ']+) ')]/", $component, $matches)) {
+ $include[] = array(
+ 'element' => $matches[1],
+ 'attribute' => $matches[2],
+ 'value' => $matches[3]);
+ }
+ }
+ $data['include'] = $include;
+ unset($data['match']);
+ }
+ if ($data['remove']) {
+ $exclude = array();
+ foreach (explode('|', $data['remove']) as $component) {
+ $matches = array();
+ if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[@([A-Za-z][a-z0-9]*)='([^']*)'\]/", $component, $matches)) {
+ $exclude[] = array(
+ 'element' => $matches[1],
+ 'attribute' => $matches[2],
+ 'value' => $matches[3]);
+ }
+ if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[contains(concat(' ',normalize-space(@class),' '),' ([^ ']+) ')]/", $component, $matches)) {
+ $exclude[] = array(
+ 'element' => $matches[1],
+ 'attribute' => $matches[2],
+ 'value' => $matches[3]);
+ }
+ }
+ $data['exclude'] = $exclude;
+ unset($data['remove']);
+ }
+ $r = q('UPDATE `retriever_rule` SET `data` = "%s" WHERE `id` = %d', DBA::escape(json_encode($data)), $rr['id']);
+ logger('retriever_install: retriever ' . $rr['id'] . ' new config ' . json_encode($data), LOGGER_DATA);
+ }
+ Config::set('retriever', 'dbversion', '0.8');
+ }
+ if (Config::get('retriever', 'dbversion') == '0.8') {
+ q("ALTER TABLE `retriever_resource` ADD COLUMN `http-code` smallint(1) unsigned NULL DEFAULT NULL");
+ Config::set('retriever', 'dbversion', '0.9');
+ }
+ if (Config::get('retriever', 'dbversion') == '0.9') {
+ q("ALTER TABLE `retriever_item` DROP COLUMN `parent`");
+ q("ALTER TABLE `retriever_resource` ADD COLUMN `redirect-url` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NULL DEFAULT NULL");
+ Config::set('retriever', 'dbversion', '0.10');
+ }
+ if (Config::get('retriever', 'dbversion') == '0.10') {
+ q("ALTER TABLE `retriever_resource` MODIFY COLUMN `type` char(255) NULL DEFAULT NULL");
+ q("ALTER TABLE `retriever_resource` MODIFY COLUMN `data` mediumblob NULL DEFAULT NULL");
+ q("ALTER TABLE `retriever_rule` MODIFY COLUMN `data` mediumtext NULL DEFAULT NULL");
+ Config::set('retriever', 'dbversion', '0.11');
+ }
+ if (Config::get('retriever', 'dbversion') == '0.11') {
+ q("ALTER TABLE `retriever_resource` ADD INDEX `url` (`url`)");
+ q("ALTER TABLE `retriever_resource` ADD INDEX `completed` (`completed`)");
+ q("ALTER TABLE `retriever_item` ADD INDEX `finished` (`finished`)");
+ q("ALTER TABLE `retriever_item` ADD INDEX `item-uid` (`item-uid`)");
+ Config::set('retriever', 'dbversion', '0.12');
+ }
+ if (Config::get('retriever', 'dbversion') != '0.12') {
+ $schema = file_get_contents(dirname(__file__).'/database.sql');
+ $arr = explode(';', $schema);
+ foreach ($arr as $a) {
+ $r = q($a);
+ }
+ Config::set('retriever', 'dbversion', '0.12');
+ }
+}
+
+function retriever_uninstall() {
+ Addon::unregisterHook('plugin_settings', 'addon/retriever/retriever.php', 'retriever_plugin_settings');
+ Addon::unregisterHook('plugin_settings_post', 'addon/retriever/retriever.php', 'retriever_plugin_settings_post');
+ Addon::unregisterHook('post_remote', 'addon/retriever/retriever.php', 'retriever_post_remote_hook');
+ Addon::unregisterHook('plugin_settings', 'addon/retriever/retriever.php', 'retriever_plugin_settings');
+ Addon::unregisterHook('plugin_settings_post', 'addon/retriever/retriever.php', 'retriever_plugin_settings_post');
+ Addon::unregisterHook('contact_photo_menu', 'addon/retriever/retriever.php', 'retriever_contact_photo_menu');
+ Addon::unregisterHook('cron', 'addon/retriever/retriever.php', 'retriever_cron');
+}
+
+function retriever_module() {}
+
+function retriever_cron($a, $b) {
+ // 100 is a nice sane number. Maybe this should be configurable.
+ retriever_retrieve_items(100, $a);
+ retriever_tidy();
+}
+
+$retriever_item_count = 0;
+
+function retriever_retrieve_items($max_items, $a) {
+ global $retriever_item_count;
+
+ $retriever_schedule = array(array(1,'minute'),
+ array(10,'minute'),
+ array(1,'hour'),
+ array(1,'day'),
+ array(2,'day'),
+ array(1,'week'),
+ array(1,'month'));
+
+ $schedule_clauses = array();
+ for ($i = 0; $i < count($retriever_schedule); $i++) {
+ $num = $retriever_schedule[$i][0];
+ $unit = $retriever_schedule[$i][1];
+ array_push($schedule_clauses,
+ '(`num-tries` = ' . $i . ' AND TIMESTAMPADD(' . DBA::escape($unit) .
+ ', ' . intval($num) . ', `last-try`) < now())');
+ }
+
+ $retrieve_items = $max_items - $retriever_item_count;
+ logger('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . $retriever_item_count . ', retrieve ' . $retrieve_items, LOGGER_DEBUG);
+ do {
+ $r = q("SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR %s) ORDER BY `last-try` ASC LIMIT %d",
+ DBA::escape(implode($schedule_clauses, ' OR ')),
+ intval($retrieve_items));
+ if (!is_array($r)) {
+ break;
+ }
+ if (count($r) == 0) {
+ break;
+ }
+ logger('retriever_retrieve_items: found ' . count($r) . ' waiting resources in database', LOGGER_DEBUG);
+ foreach ($r as $rr) {
+ retrieve_resource($rr);
+ $retriever_item_count++;
+ }
+ $retrieve_items = $max_items - $retriever_item_count;
+ }
+ while ($retrieve_items > 0);
+
+ /* Look for items that are waiting even though the resource has
+ * completed. This usually happens because we've been asked to
+ * retrospectively apply a config change. It could also happen
+ * due to a cron job dying or something. */
+ $r = q("SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT %d",
+ intval($retrieve_items));
+ if (!$r) {
+ $r = array();
+ }
+ logger('retriever_retrieve_items: items waiting even though resource has completed: ' . count($r), LOGGER_DEBUG);
+ foreach ($r as $rr) {
+ $resource = q("SELECT * FROM retriever_resource WHERE `id` = %d", $rr['resource']);
+ $retriever_item = retriever_get_retriever_item($rr['item']);
+ if (!$retriever_item) {
+ logger('retriever_retrieve_items: no retriever item with id ' . $rr['item'], LOGGER_INFO);
+ continue;
+ }
+ $item = retriever_get_item($retriever_item);
+ if (!$item) {
+ logger('retriever_retrieve_items: no item ' . $retriever_item['item-uri'], LOGGER_INFO);
+ continue;
+ }
+ $retriever = get_retriever($item['contact-id'], $item['uid']);
+ if (!$retriever) {
+ logger('retriever_retrieve_items: no retriever for item ' .
+ $retriever_item['item-uri'] . ' ' . $retriever_item['uid'] . ' ' . $item['contact-id'],
+ LOGGER_INFO);
+ continue;
+ }
+ retriever_apply_completed_resource_to_item($retriever, $item, $resource[0], $a);
+ q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d",
+ intval($retriever_item['id']));
+ retriever_check_item_completed($item);
+ }
+}
+
+function retriever_tidy() {
+ q("DELETE FROM retriever_resource WHERE completed IS NOT NULL AND completed < DATE_SUB(now(), INTERVAL 1 WEEK)");
+ q("DELETE FROM retriever_resource WHERE completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)");
+
+ $r = q("SELECT retriever_item.id FROM retriever_item LEFT OUTER JOIN retriever_resource ON (retriever_item.resource = retriever_resource.id) WHERE retriever_resource.id is null");
+ logger('retriever_tidy: found ' . count($r) . ' retriever_items with no retriever_resource');
+ foreach ($r as $rr) {
+ q('DELETE FROM retriever_item WHERE id = %d', intval($rr['id']));
+ }
+}
+
+function retrieve_dataurl_resource($resource) {
+ if (!preg_match("/date:(.*);base64,(.*)/", $resource['url'], $matches)) {
+ logger('retrieve_dataurl_resource: ' . $resource['id'] . ' does not match pattern');
+ } else {
+ $resource['type'] = $matches[1];
+ $resource['data'] = base64url_decode($matches[2]);
+ }
+
+ // Succeed or fail, there's no point retrying
+ q("UPDATE `retriever_resource` SET `last-try` = now(), `num-tries` = `num-tries` + 1, `completed` = now(), `data` = '%s', `type` = '%s' WHERE id = %d",
+ DBA::escape($resource['data']),
+ DBA::escape($resource['type']),
+ intval($resource['id']));
+ retriever_resource_completed($resource, $a);
+}
+
+function retrieve_resource($resource) {
+ if (substr($resource['url'], 0, 5) == "data:") {
+ return retrieve_dataurl_resource($resource);
+ }
+
+ $a = get_app();
+
+ try {
+ logger('retrieve_resource: ' . ($resource['num-tries'] + 1) .
+ ' attempt at resource ' . $resource['id'] . ' ' . $resource['url'], LOGGER_DEBUG);
+ $redirects;
+ $cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-');
+ $fetch_result = Network::fetchUrlFull($resource['url'], $resource['binary'], $redirects, array('cookiejar' => $cookiejar));
+ unlink($cookiejar);
+ $resource['data'] = $fetch_result['body'];
+ $resource['http-code'] = $a->get_curl_code();
+ $resource['type'] = $a->get_curl_content_type();
+ $resource['redirect-url'] = $fetch_result['redirect_url'];
+ logger('retrieve_resource: got code ' . $resource['http-code'] .
+ ' retrieving resource ' . $resource['id'] .
+ ' final url ' . $resource['redirect-url'], LOGGER_DEBUG);
+ } catch (Exception $e) {
+ logger('retrieve_resource: unable to retrieve ' . $resource['url'] . ' - ' . $e->getMessage());
+ }
+ q("UPDATE `retriever_resource` SET `last-try` = now(), `num-tries` = `num-tries` + 1, `http-code` = %d, `redirect-url` = '%s' WHERE id = %d",
+ intval($resource['http-code']),
+ DBA::escape($resource['redirect-url']),
+ intval($resource['id']));
+ if ($resource['data']) {
+ q("UPDATE `retriever_resource` SET `completed` = now(), `data` = '%s', `type` = '%s' WHERE id = %d",
+ DBA::escape($resource['data']),
+ DBA::escape($resource['type']),
+ intval($resource['id']));
+ retriever_resource_completed($resource, $a);
+ }
+}
+
+function get_retriever($contact_id, $uid, $create = false) {
+ $r = q("SELECT * FROM `retriever_rule` WHERE `contact-id` = %d AND `uid` = %d",
+ intval($contact_id), intval($uid));
+ if (count($r)) {
+ $r[0]['data'] = json_decode($r[0]['data'], true);
+ return $r[0];
+ }
+ if ($create) {
+ q("INSERT INTO `retriever_rule` (`uid`, `contact-id`) VALUES (%d, %d)",
+ intval($uid), intval($contact_id));
+ $r = q("SELECT * FROM `retriever_rule` WHERE `contact-id` = %d AND `uid` = %d",
+ intval($contact_id), intval($uid));
+ return $r[0];
+ }
+}
+
+function retriever_get_retriever_item($id) {
+ $retriever_items = q("SELECT * FROM `retriever_item` WHERE id = %d", intval($id));
+ if (count($retriever_items) != 1) {
+ logger('retriever_get_retriever_item: unable to find retriever_item ' . $id, LOGGER_INFO);
+ return;
+ }
+ return $retriever_items[0];
+}
+
+function retriever_get_item($retriever_item) {
+ $items = q("SELECT * FROM `item` WHERE `uri` = '%s' AND `uid` = %d AND `contact-id` = %d",
+ DBA::escape($retriever_item['item-uri']),
+ intval($retriever_item['item-uid']),
+ intval($retriever_item['contact-id']));
+ if (count($items) != 1) {
+ logger('retriever_get_item: unexpected number of results ' .
+ count($items) . " when searching for item $uri $uid $cid", LOGGER_INFO);
+ return;
+ }
+ return $items[0];
+}
+
+function retriever_item_completed($retriever_item_id, $resource, $a) {
+ logger('retriever_item_completed: id ' . $retriever_item_id . ' url ' . $resource['url'], LOGGER_DEBUG);
+
+ $retriever_item = retriever_get_retriever_item($retriever_item_id);
+ if (!$retriever_item) {
+ return;
+ }
+ // Note: the retriever might be null. Doesn't matter.
+ $retriever = get_retriever($retriever_item['contact-id'], $retriever_item['item-uid']);
+ $item = retriever_get_item($retriever_item);
+ if (!$item) {
+ return;
+ }
+
+ retriever_apply_completed_resource_to_item($retriever, $item, $resource, $a);
+
+ q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d",
+ intval($retriever_item['id']));
+ retriever_check_item_completed($item);
+}
+
+function retriever_resource_completed($resource, $a) {
+ logger('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url'], LOGGER_DEBUG);
+ $r = q("SELECT `id` FROM `retriever_item` WHERE `resource` = %d", $resource['id']);
+ foreach ($r as $rr) {
+ retriever_item_completed($rr['id'], $resource, $a);
+ }
+}
+
+function apply_retrospective($a, $retriever, $num) {
+ $r = q("SELECT * FROM `item` WHERE `contact-id` = %d ORDER BY `received` DESC LIMIT %d",
+ intval($retriever['contact-id']), intval($num));
+ foreach ($r as $item) {
+ q('UPDATE `item` SET `visible` = 0 WHERE `id` = %d', $item['id']);
+ q('UPDATE `thread` SET `visible` = 0 WHERE `iid` = %d', $item['id']);
+ retriever_on_item_insert($a, $retriever, $item);
+ }
+}
+
+function retriever_on_item_insert($a, $retriever, &$item) {
+ if (!$retriever || !$retriever['id']) {
+ logger('retriever_on_item_insert: No retriever supplied', LOGGER_INFO);
+ return;
+ }
+ if (!$retriever["data"]['enable'] == "on") {
+ return;
+ }
+ if (array_key_exists('pattern', $retriever["data"]) && $retriever["data"]['pattern']) {
+ $url = preg_replace('/' . $retriever["data"]['pattern'] . '/', $retriever["data"]['replace'], $item['plink']);
+ logger('retriever_on_item_insert: Changed ' . $item['plink'] . ' to ' . $url, LOGGER_DATA);
+ }
+ else {
+ $url = $item['plink'];
+ }
+
+ $resource = add_retriever_resource($a, $url);
+ $retriever_item_id = add_retriever_item($item, $resource);
+}
+
+function add_retriever_resource($a, $url, $binary = false) {
+ logger('add_retriever_resource: ' . $url, LOGGER_DEBUG);
+
+ $scheme = parse_url($url, PHP_URL_SCHEME);
+ if ($scheme == 'data') {
+ $fp = fopen($url, 'r');
+ $meta = stream_get_meta_data($fp);
+ $type = $meta['mediatype'];
+ $data = stream_get_contents($fp);
+ fclose($fp);
+
+ $url = 'md5://' . hash('md5', $url);
+ $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url));
+ $resource = $r[0];
+ if (count($r)) {
+ logger('add_retriever_resource: Resource ' . $url . ' already requested', LOGGER_DEBUG);
+ return $resource;
+ }
+
+ logger('retrieve_resource: got data URL type ' . $resource['type'], LOGGER_DEBUG);
+ q("INSERT INTO `retriever_resource` (`type`, `binary`, `url`, `completed`, `data`) " .
+ "VALUES ('%s', %d, '%s', now(), '%s')",
+ DBA::escape($type),
+ intval($binary ? 1 : 0),
+ DBA::escape($url),
+ DBA::escape($data));
+ $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url));
+ $resource = $r[0];
+ if (count($r)) {
+ retriever_resource_completed($resource, $a);
+ }
+ return $resource;
+ }
+
+ if (strlen($url) > 800) {
+ logger('add_retriever_resource: URL is longer than 800 characters', LOGGER_INFO);
+ }
+
+ $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url));
+ if (count($r)) {
+ logger('add_retriever_resource: Resource ' . $url . ' already requested', LOGGER_DEBUG);
+ return $r[0];
+ }
+
+ q("INSERT INTO `retriever_resource` (`binary`, `url`) " .
+ "VALUES (%d, '%s')", intval($binary ? 1 : 0), DBA::escape($url));
+ $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url));
+ return $r[0];
+}
+
+function add_retriever_item(&$item, $resource) {
+ logger('add_retriever_item: ' . $resource['url'] . ' for ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], LOGGER_DEBUG);
+
+ q("INSERT INTO `retriever_item` (`item-uri`, `item-uid`, `contact-id`, `resource`) " .
+ "VALUES ('%s', %d, %d, %d)",
+ DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource["id"]));
+ $r = q("SELECT id FROM `retriever_item` WHERE " .
+ "`item-uri` = '%s' AND `item-uid` = %d AND `contact-id` = %d AND `resource` = %d ORDER BY id DESC",
+ DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource['id']));
+ if (!count($r)) {
+ logger("add_retriever_item: couldn't create retriever item for " .
+ $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'],
+ LOGGER_INFO);
+ return;
+ }
+ logger('add_retriever_item: created retriever_item ' . $r[0]['id'] . ' for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], LOGGER_DEBUG);
+ return $r[0]['id'];
+}
+
+function retriever_get_encoding($resource) {
+ $matches = array();
+ if (preg_match('/charset=(.*)/', $resource['type'], $matches)) {
+ return trim(array_pop($matches));
+ }
+ return 'utf-8';
+}
+
+function retriever_apply_xslt_text($xslt_text, $doc) {
+ if (!$xslt_text) {
+ logger('retriever_apply_xslt_text: empty XSLT text', LOGGER_INFO);
+ return $doc;
+ }
+ $xslt_doc = new DOMDocument();
+ if (!$xslt_doc->loadXML($xslt_text)) {
+ logger('retriever_apply_xslt_text: could not load XML', LOGGER_INFO);
+ return $doc;
+ }
+ $xp = new XsltProcessor();
+ $xp->importStylesheet($xslt_doc);
+ $result = $xp->transformToDoc($doc);
+ return $result;
+}
+
+function retriever_apply_dom_filter($retriever, &$item, $resource) {
+ logger('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['uri'] . ' contact ' . $item['contact-id'], LOGGER_DEBUG);
+
+ if (!$retriever['data']['include'] && !$retriever['data']['customxslt']) {
+ return;
+ }
+ if (!$resource['data']) {
+ logger('retriever_apply_dom_filter: no text to work with', LOGGER_INFO);
+ return;
+ }
+
+ $encoding = retriever_get_encoding($resource);
+ $content = mb_convert_encoding($resource['data'], 'HTML-ENTITIES', $encoding);
+ $doc = new DOMDocument('1.0', 'UTF-8');
+ if (strpos($resource['type'], 'html') !== false) {
+ @$doc->loadHTML($content);
+ }
+ else {
+ $doc->loadXML($content);
+ }
+
+ $params = array('$spec' => $retriever['data']);
+ $extract_template = get_markup_template('extract.tpl', 'addon/retriever/');
+ $extract_xslt = replace_macros($extract_template, $params);
+ if ($retriever['data']['include']) {
+ $doc = retriever_apply_xslt_text($extract_xslt, $doc);
+ }
+ if (array_key_exists('customxslt', $retriever['data']) && $retriever['data']['customxslt']) {
+ $doc = retriever_apply_xslt_text($retriever['data']['customxslt'], $doc);
+ }
+ if (!$doc) {
+ logger('retriever_apply_dom_filter: failed to apply extract XSLT template', LOGGER_INFO);
+ return;
+ }
+
+ $components = parse_url($resource['redirect-url']);
+ $rooturl = $components['scheme'] . "://" . $components['host'];
+ $dirurl = $rooturl . dirname($components['path']) . "/";
+ $params = array('$dirurl' => $dirurl, '$rooturl' => $rooturl);
+ $fix_urls_template = get_markup_template('fix-urls.tpl', 'addon/retriever/');
+ $fix_urls_xslt = replace_macros($fix_urls_template, $params);
+ $doc = retriever_apply_xslt_text($fix_urls_xslt, $doc);
+ if (!$doc) {
+ logger('retriever_apply_dom_filter: failed to apply fix urls XSLT template', LOGGER_INFO);
+ return;
+ }
+
+ $item['body'] = HTML::toBBCode($doc->saveHTML());
+ if (!strlen($item['body'])) {
+ logger('retriever_apply_dom_filter retriever ' . $retriever['id'] . ' item ' . $item['id'] . ': output was empty', LOGGER_INFO);
+ return;
+ }
+ $item['body'] .= "\n\n" . L10n::t('Retrieved') . ' ' . date("Y-m-d") . ': [url=';
+ $item['body'] .= $item['plink'];
+ $item['body'] .= ']' . $item['plink'] . '[/url]';
+ q("UPDATE `item` SET `body` = '%s' WHERE `id` = %d",
+ DBA::escape($item['body']), intval($item['id']));
+}
+
+function retrieve_images(&$item, $a) {
+ $matches1 = array();
+ preg_match_all("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", $item["body"], $matches1);
+ $matches2 = array();
+ preg_match_all("/\[img\](.*?)\[\/img\]/ism", $item["body"], $matches2);
+ $matches = array_merge($matches1[3], $matches2[1]);
+ logger('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], LOGGER_DEBUG);
+ foreach ($matches as $url) {
+ if (strpos($url, get_app()->get_baseurl()) === FALSE) {
+ $resource = add_retriever_resource($a, $url, true);
+ if (!$resource['completed']) {
+ add_retriever_item($item, $resource);
+ }
+ else {
+ retriever_transform_images($a, $item, $resource);
+ }
+ }
+ }
+}
+
+function retriever_check_item_completed(&$item)
+{
+ $r = q('SELECT count(*) FROM retriever_item WHERE `item-uri` = "%s" ' .
+ 'AND `item-uid` = %d AND `contact-id` = %d AND `finished` = 0',
+ DBA::escape($item['uri']), intval($item['uid']),
+ intval($item['contact-id']));
+ $waiting = $r[0]['count(*)'];
+ logger('retriever_check_item_completed: item ' . $item['uri'] . ' ' . $item['uid']
+ . ' '. $item['contact-id'] . ' waiting for ' . $waiting . ' resources', LOGGER_DEBUG);
+ $old_visible = $item['visible'];
+ $item['visible'] = $waiting ? 0 : 1;
+ if (array_key_exists('id', $item) && ($item['id'] > 0) && ($old_visible != $item['visible'])) {
+ logger('retriever_check_item_completed: changing visible flag to ' . $item['visible'] . ' and invoking notifier ("edit_post", ' . $item['id'] . ')', LOGGER_DEBUG);
+ q("UPDATE `item` SET `visible` = %d WHERE `id` = %d",
+ intval($item['visible']),
+ intval($item['id']));
+ q("UPDATE `thread` SET `visible` = %d WHERE `iid` = %d",
+ intval($item['visible']),
+ intval($item['id']));
+ }
+}
+
+function retriever_apply_completed_resource_to_item($retriever, &$item, $resource, $a) {
+ logger('retriever_apply_completed_resource_to_item: retriever ' .
+ ($retriever ? $retriever['id'] : 'none') .
+ ' resource ' . $resource['url'] . ' plink ' . $item['plink'], LOGGER_DEBUG);
+ if (strpos($resource['type'], 'image') !== false) {
+ retriever_transform_images($a, $item, $resource);
+ }
+ if (!$retriever) {
+ return;
+ }
+ if ((strpos($resource['type'], 'html') !== false) ||
+ (strpos($resource['type'], 'xml') !== false)) {
+ retriever_apply_dom_filter($retriever, $item, $resource);
+ if ($retriever["data"]['images'] ) {
+ retrieve_images($item, $a);
+ }
+ }
+}
+
+function retriever_transform_images($a, &$item, $resource) {
+ if (!$resource["data"]) {
+ logger('retriever_transform_images: no data available for '
+ . $resource['id'] . ' ' . $resource['url'], LOGGER_INFO);
+ return;
+ }
+
+ try {
+ $photo = Image::storePhoto($a, $item['uid'], $resource['data'], $resource['url']);
+ } catch (Exception $e) {
+ logger('retriever_transform_images caught exception ' . $e->getMessage());
+ return;
+ }
+ foreach ($photo as $k => $v)
+ {
+ logger('@@@ photo key ' . $k);
+ }
+ $new_url = $photo['full'];
+ logger('retriever_transform_images: replacing ' . $resource['url'] . ' with ' .
+ $new_url . ' in item ' . $item['plink'], LOGGER_DEBUG);
+ $transformed = str_replace($resource["url"], $new_url, $item['body']);
+ if ($transformed === $item['body']) {
+ return;
+ }
+
+ $item['body'] = $transformed;
+ q("UPDATE `item` SET `body` = '%s' WHERE `plink` = '%s' AND `uid` = %d AND `contact-id` = %d",
+ DBA::escape($item['body']),
+ DBA::escape($item['plink']),
+ intval($item['uid']),
+ intval($item['contact-id']));
+}
+
+function retriever_content($a) {
+ if (!local_user()) {
+ $a->page['content'] .= "
Please log in
";
+ return;
+ }
+ if ($a->argv[1] === 'help') {
+ $feeds = q("SELECT `id`, `name`, `thumb` FROM contact WHERE `uid` = %d AND `network` = 'feed'",
+ local_user());
+ foreach ($feeds as $k=>$v) {
+ $feeds[$k]['url'] = $a->get_baseurl() . '/retriever/' . $v['id'];
+ }
+ $template = get_markup_template('/help.tpl', 'addon/retriever/');
+ $a->page['content'] .= replace_macros($template, array(
+ '$config' => $a->get_baseurl() . '/settings/addon',
+ '$feeds' => $feeds));
+ return;
+ }
+ if ($a->argv[1]) {
+ $retriever = get_retriever($a->argv[1], local_user(), false);
+
+ if (x($_POST["id"])) {
+ $retriever = get_retriever($a->argv[1], local_user(), true);
+ $retriever["data"] = array();
+ foreach (array('pattern', 'replace', 'enable', 'images', 'customxslt') as $setting) {
+ if (x($_POST['retriever_' . $setting])) {
+ $retriever["data"][$setting] = $_POST['retriever_' . $setting];
+ }
+ }
+ foreach ($_POST as $k=>$v) {
+ if (preg_match("/retriever-(include|exclude)-(\d+)-(element|attribute|value)/", $k, $matches)) {
+ $retriever['data'][$matches[1]][intval($matches[2])][$matches[3]] = $v;
+ }
+ }
+ // You've gotta have an element, even if it's just "*"
+ foreach ($retriever['data']['include'] as $k=>$clause) {
+ if (!$clause['element']) {
+ unset($retriever['data']['include'][$k]);
+ }
+ }
+ foreach ($retriever['data']['exclude'] as $k=>$clause) {
+ if (!$clause['element']) {
+ unset($retriever['data']['exclude'][$k]);
+ }
+ }
+ q("UPDATE `retriever_rule` SET `data`='%s' WHERE `id` = %d",
+ DBA::escape(json_encode($retriever["data"])), intval($retriever["id"]));
+ $a->page['content'] .= "
Settings Updated";
+ if (x($_POST["retriever_retrospective"])) {
+ apply_retrospective($a, $retriever, $_POST["retriever_retrospective"]);
+ $a->page['content'] .= " and retrospectively applied to " . $_POST["apply"] . " posts";
+ }
+ $a->page['content'] .= ".
+This plugin replaces the short excerpts you normally get in RSS feeds
+with the full content of the article from the source website. You
+specify which part of the page you're interested in with a set of
+rules. When each item arrives, the plugin downloads the full page
+from the website, extracts content using the rules, and replaces the
+original article.
+
+
+There's a few reasons you may want to do this. The source website
+might be slow or overloaded. The source website might be
+untrustworthy, in which case using Friendica to scrub the HTML is a
+good idea. You might be on a LAN that blacklists certain websites.
+It also works neatly with the mailstream plugin, allowing you to read
+a news stream comfortably without needing continuous Internet
+connectivity.
+
+
+However, setting up retriever can be quite tricky since it depends on
+the internal design of the website. That was designed to make life
+easy for the website's developers, not for you. You'll need to have
+some familiarity with HTML, and be willing to adapt when the website
+suddenly changes everything without notice.
+
+
Configuring Retriever for a feed
+
+To set up retriever for an RSS feed, go to the "Contacts" page and
+find your feed. Then click on the drop-down menu on the contact.
+Select "Retriever" to get to the retriever configuration.
+
+
+The "Include" configuration section specifies parts of the page to
+include in the article. Each row has three components:
+
+
+
An HTML tag (e.g. "div", "span", "p")
+
An attribute (usually "class" or "id")
+
A value for the attribute
+
+
+A simple case is when the article is wrapped in a "div" element:
+
+
+ ...
+ <div class="ArticleWrapper">
+ <h2>Man Bites Dog</h2>
+ <img src="mbd.jpg">
+ <p>
+ Residents of the sleepy community of Nowheresville were
+ shocked yesterday by the sight of creepy local weirdo Jim
+ McOddman assaulting innocent local dog Snufflekins with his
+ false teeth.
+ </p>
+ ...
+ </div>
+ ...
+
+
+You then specify the tag "div", attribute "class", and value
+"ArticleWrapper". Everything else in the page, such as navigation
+panels and menus and footers and so on, will be discarded. If there
+is more than one section of the page you want to include, specify each
+one on a separate row. If the matching section contains some sections
+you want to remove, specify those in the "Exclude" section in the same
+way.
+
+
+Once you've got a configuration that you think will work, you can try
+it out on some existing articles. Type a number into the
+"Retrospectively Apply" box and click "Submit". After a while
+(exactly how long depends on your system's cron configuration) the new
+articles should be available.
+
+
Techniques
+
+You can leave the attribute and value blank to include all the
+corresponding elements with the specified tag name. You can also use
+a tag name of just an asterisk ("*"), which will match any element type with the
+specified attribute regardless of the tag.
+
+
+Note that the "class" attribute is a special case. Many web page
+templates will put multiple different classes in the same element,
+separated by spaces. If you specify an attribute of "class" it will
+match an element if any of its classes matches the specified value.
+For example:
+
+
+ <div class="article breaking-news">
+
+
+In this case you can specify a value of "article", or "breaking-news".
+You can also specify "article breaking-news", but that won't match if
+the website suddenly changes to "breaking-news article", so that's not
+recommended.
+
+
+One useful trick you can try is using the website's "print" pages.
+Many news sites have print versions of all their articles. These are
+usually drastically simplified compared to the live website page.
+Sometimes this is a good way to get the whole article when it's
+normally split across multiple pages.
+
+
+Hopefully the URL for the print page is a predictable variant of the
+normal article URL. For example, an article URL like:
+
+To change the URL used to retrieve the page, use the "URL Pattern" and
+"URL Replace" fields. The pattern is a regular expression matching
+part of the URL to replace. In this case, you might use a pattern of
+"/article" and a replace string of "/print/article". A common pattern
+is simply a dollar sign ("$"), used to add the replace string to the end of the URL.
+
+
Background Processing
+
+Note that retrieving and processing the articles can take some time,
+so it's done in the background. Incoming articles will be marked as
+invisible while they're in the process of being downloaded. If a URL
+fails, the plugin will keep trying at progressively longer intervals
+for up to a month, in case the website is temporarily overloaded or
+the network is down.
+
+
Retrieving Images
+
+Retriever can also optionally download images and store them in the
+local Friendica instance. Just check the "Download Images" box. You
+can also download images in every item from your network, whether it's
+an RSS feed or not. Go to the "Settings" page and
+click "Plugin settings". Then check the "All
+Photos" box in the "Retriever Settings" section and click "Submit".
+
+
Configure Feeds:
+
+{{foreach $feeds as $feed}}
+{{include file='contact_template.tpl' contact=$feed}}
+{{/foreach}}
+
diff --git a/retriever/templates/rule-config.tpl b/retriever/templates/rule-config.tpl
new file mode 100644
index 00000000..228d0326
--- /dev/null
+++ b/retriever/templates/rule-config.tpl
@@ -0,0 +1,112 @@
+