From 71eb147c1346748e6e985562e8a5c8bee910bb89 Mon Sep 17 00:00:00 2001
From: Michael
Date: Tue, 18 Apr 2023 05:56:32 +0000
Subject: [PATCH 001/527] Tumblr: Import the timeline
---
tumblr/library/tumblroauth.php | 109 +++-
tumblr/templates/connector_settings.tpl | 1 +
tumblr/tumblr.php | 683 +++++++++++++++++-------
3 files changed, 601 insertions(+), 192 deletions(-)
diff --git a/tumblr/library/tumblroauth.php b/tumblr/library/tumblroauth.php
index 2c95759a..162750f7 100644
--- a/tumblr/library/tumblroauth.php
+++ b/tumblr/library/tumblroauth.php
@@ -6,35 +6,62 @@
* The first PHP Library to support OAuth for Tumblr's REST API. (Originally for Twitter, modified for Tumblr by Lucas)
*/
+use Friendica\Core\Logger;
use Friendica\DI;
use Friendica\Security\OAuth1\OAuthConsumer;
use Friendica\Security\OAuth1\OAuthRequest;
use Friendica\Security\OAuth1\Signature\OAuthSignatureMethod_HMAC_SHA1;
use Friendica\Security\OAuth1\OAuthToken;
use Friendica\Security\OAuth1\OAuthUtil;
+use GuzzleHttp\Client;
+use GuzzleHttp\Exception\RequestException;
+use GuzzleHttp\HandlerStack;
+use GuzzleHttp\Subscriber\Oauth\Oauth1;
+use Psr\Http\Message\ResponseInterface;
/**
* Tumblr OAuth class
*/
class TumblrOAuth
{
- /* Contains the last HTTP status code returned. */
- public $http_code;
+ private $consumer_key;
+ private $consumer_secret;
+ private $oauth_token;
+ private $oauth_token_secret;
- /** @var OAuthConsumer */
- private $consumer;
- /** @var \Friendica\Security\OAuth1\Signature\OAuthSignatureMethod_HMAC_SHA1 */
- private $sha1_method;
+ /** @var GuzzleHttp\Client */
+ private $client;
// API URLs
const accessTokenURL = 'https://www.tumblr.com/oauth/access_token';
const authorizeURL = 'https://www.tumblr.com/oauth/authorize';
const requestTokenURL = 'https://www.tumblr.com/oauth/request_token';
- function __construct(string $consumer_key, string $consumer_secret)
+ function __construct(string $consumer_key, string $consumer_secret, string $oauth_token = '', string $oauth_token_secret = '')
{
- $this->sha1_method = new OAuthSignatureMethod_HMAC_SHA1();
- $this->consumer = new OAuthConsumer($consumer_key, $consumer_secret);
+ $this->consumer_key = $consumer_key;
+ $this->consumer_secret = $consumer_secret;
+ $this->oauth_token = $oauth_token;
+ $this->oauth_token_secret = $oauth_token_secret;
+
+ if (empty($this->oauth_token) || empty($this->oauth_token_secret)) {
+ return;
+ }
+
+ $stack = HandlerStack::create();
+
+ $middleware = new Oauth1([
+ 'consumer_key' => $this->consumer_key,
+ 'consumer_secret' => $this->consumer_secret,
+ 'token' => $this->oauth_token,
+ 'token_secret' => $this->oauth_token_secret
+ ]);
+ $stack->push($middleware);
+
+ $this->client = new Client([
+ 'base_uri' => 'https://api.tumblr.com/v2/',
+ 'handler' => $stack
+ ]);
}
/**
@@ -46,6 +73,9 @@ class TumblrOAuth
function getRequestToken(string $oauth_callback): array
{
$request = $this->oAuthRequest(self::requestTokenURL, ['oauth_callback' => $oauth_callback]);
+ if (empty($request)) {
+ return [];
+ }
return OAuthUtil::parse_parameters($request);
}
@@ -82,6 +112,9 @@ class TumblrOAuth
}
$request = $this->oAuthRequest(self::accessTokenURL, $parameters, $token);
+ if (empty($request)) {
+ return [];
+ }
return OAuthUtil::parse_parameters($request);
}
@@ -95,14 +128,64 @@ class TumblrOAuth
*/
private function oAuthRequest(string $url, array $parameters, OAuthToken $token = null): string
{
- $request = OAuthRequest::from_consumer_and_token($this->consumer, 'GET', $url, $parameters, $token);
- $request->sign_request($this->sha1_method, $this->consumer, $token);
+ $consumer = new OAuthConsumer($this->consumer_key, $this->consumer_secret);
+ $sha1_method = new OAuthSignatureMethod_HMAC_SHA1();
+
+ $request = OAuthRequest::from_consumer_and_token($consumer, 'GET', $url, $parameters, $token);
+ $request->sign_request($sha1_method, $consumer, $token);
$curlResult = DI::httpClient()->get($request->to_url());
- $this->http_code = $curlResult->getReturnCode();
if ($curlResult->isSuccess()) {
return $curlResult->getBody();
}
return '';
}
-}
+
+ public function get(string $url, array $parameters = []): stdClass
+ {
+ if (!empty($parameters)) {
+ $url .= '?' . http_build_query($parameters);
+ }
+
+ try {
+ $response = $this->client->get($url, ['auth' => 'oauth']);
+ } catch (RequestException $exception) {
+ $response = $exception->getResponse();
+ Logger::notice('Get failed', ['code' => $exception->getCode(), 'message' => $exception->getMessage()]);
+ }
+
+ return $this->formatResponse($response);
+ }
+
+ public function post(string $url, array $parameter): stdClass
+ {
+ try {
+ $response = $this->client->post($url, ['auth' => 'oauth', 'json' => $parameter]);
+ } catch (RequestException $exception) {
+ $response = $exception->getResponse();
+ Logger::notice('Post failed', ['code' => $exception->getCode(), 'message' => $exception->getMessage()]);
+ }
+
+ return $this->formatResponse($response);
+ }
+
+ private function formatResponse(ResponseInterface $response = null): stdClass
+ {
+ if (!is_null($response)) {
+ $content = $response->getBody()->getContents();
+ if (!empty($content)) {
+ $result = json_decode($content);
+ }
+ }
+
+ if (empty($result) || empty($result->meta)) {
+ $result = new stdClass;
+ $result->meta = new stdClass;
+ $result->meta->status = 500;
+ $result->meta->msg = '';
+ $result->response = [];
+ $result->errors = [];
+ }
+ return $result;
+ }
+}
\ No newline at end of file
diff --git a/tumblr/templates/connector_settings.tpl b/tumblr/templates/connector_settings.tpl
index d28fab9d..b5069e6e 100644
--- a/tumblr/templates/connector_settings.tpl
+++ b/tumblr/templates/connector_settings.tpl
@@ -2,6 +2,7 @@
{{include file="field_checkbox.tpl" field=$enable}}
{{include file="field_checkbox.tpl" field=$bydefault}}
+{{include file="field_checkbox.tpl" field=$import}}
{{if $page_select}}
{{include file="field_select.tpl" field=$page_select}}
diff --git a/tumblr/tumblr.php b/tumblr/tumblr.php
index 8fba3cc6..18d03a6a 100644
--- a/tumblr/tumblr.php
+++ b/tumblr/tumblr.php
@@ -9,32 +9,40 @@
require_once __DIR__ . DIRECTORY_SEPARATOR . 'library' . DIRECTORY_SEPARATOR . 'tumblroauth.php';
+use Friendica\Content\PageInfo;
use Friendica\Content\Text\BBCode;
+use Friendica\Content\Text\HTML;
use Friendica\Content\Text\NPF;
+use Friendica\Core\Cache\Enum\Duration;
use Friendica\Core\Hook;
use Friendica\Core\Logger;
+use Friendica\Core\Protocol;
use Friendica\Core\Renderer;
use Friendica\Core\System;
+use Friendica\Database\DBA;
use Friendica\DI;
+use Friendica\Model\Contact;
use Friendica\Model\Item;
+use Friendica\Model\ItemURI;
use Friendica\Model\Photo;
use Friendica\Model\Post;
use Friendica\Model\Tag;
+use Friendica\Protocol\Activity;
use Friendica\Util\DateTimeFormat;
use Friendica\Util\Network;
-use GuzzleHttp\Client;
-use GuzzleHttp\Exception\RequestException;
-use GuzzleHttp\HandlerStack;
-use GuzzleHttp\Subscriber\Oauth\Oauth1;
+use Friendica\Util\Strings;
+
+define('TUMBLR_DEFAULT_POLL_INTERVAL', 10); // given in minutes
function tumblr_install()
{
- Hook::register('hook_fork', 'addon/tumblr/tumblr.php', 'tumblr_hook_fork');
- Hook::register('post_local', 'addon/tumblr/tumblr.php', 'tumblr_post_local');
- Hook::register('notifier_normal', 'addon/tumblr/tumblr.php', 'tumblr_send');
- Hook::register('jot_networks', 'addon/tumblr/tumblr.php', 'tumblr_jot_nets');
- Hook::register('connector_settings', 'addon/tumblr/tumblr.php', 'tumblr_settings');
- Hook::register('connector_settings_post', 'addon/tumblr/tumblr.php', 'tumblr_settings_post');
+ Hook::register('hook_fork', __FILE__, 'tumblr_hook_fork');
+ Hook::register('post_local', __FILE__, 'tumblr_post_local');
+ Hook::register('notifier_normal', __FILE__, 'tumblr_send');
+ Hook::register('jot_networks', __FILE__, 'tumblr_jot_nets');
+ Hook::register('connector_settings', __FILE__, 'tumblr_settings');
+ Hook::register('connector_settings_post', __FILE__, 'tumblr_settings_post');
+ Hook::register('cron' , __FILE__, 'tumblr_cron');
}
/**
@@ -53,106 +61,84 @@ function tumblr_content()
return '';
}
- if (isset(DI::args()->getArgv()[1])) {
- switch (DI::args()->getArgv()[1]) {
- case 'connect':
- $o = tumblr_connect();
- break;
+ if (!isset(DI::args()->getArgv()[1])) {
+ DI::baseUrl()->redirect('settings/connectors/tumblr');
+ }
- case 'callback':
- $o = tumblr_callback();
- break;
+ switch (DI::args()->getArgv()[1]) {
+ case 'connect':
+ $o = tumblr_connect();
+ break;
- default:
- $o = print_r(DI::args()->getArgv(), true);
- break;
- }
- } else {
- $o = tumblr_connect();
+ case 'callback':
+ $o = tumblr_callback();
+ break;
+
+ default:
+ DI::baseUrl()->redirect('settings/connectors/tumblr');
+ break;
}
return $o;
}
-function tumblr_addon_admin(string &$o)
-{
- $t = Renderer::getMarkupTemplate('admin.tpl', 'addon/tumblr/');
-
- $o = Renderer::replaceMacros($t, [
- '$submit' => DI::l10n()->t('Save Settings'),
- // name, label, value, help, [extra values]
- '$consumer_key' => ['consumer_key', DI::l10n()->t('Consumer Key'), DI::config()->get('tumblr', 'consumer_key'), ''],
- '$consumer_secret' => ['consumer_secret', DI::l10n()->t('Consumer Secret'), DI::config()->get('tumblr', 'consumer_secret'), ''],
- ]);
-}
-
-function tumblr_addon_admin_post()
-{
- DI::config()->set('tumblr', 'consumer_key', trim($_POST['consumer_key'] ?? ''));
- DI::config()->set('tumblr', 'consumer_secret', trim($_POST['consumer_secret'] ?? ''));
-}
-
function tumblr_connect()
{
- // Start a session. This is necessary to hold on to a few keys the callback script will also need
- session_start();
-
// Define the needed keys
- $consumer_key = DI::config()->get('tumblr', 'consumer_key');
+ $consumer_key = DI::config()->get('tumblr', 'consumer_key');
$consumer_secret = DI::config()->get('tumblr', 'consumer_secret');
+ if (empty($consumer_key) || empty($consumer_secret)) {
+ DI::baseUrl()->redirect('settings/connectors/tumblr');
+ }
+
// The callback URL is the script that gets called after the user authenticates with tumblr
// In this example, it would be the included callback.php
$callback_url = DI::baseUrl() . '/tumblr/callback';
- // Let's begin. First we need a Request Token. The request token is required to send the user
+ // Let's begin. First we need a Request Token. The request token is required to send the user
// to Tumblr's login page.
- // Create a new instance of the TumblrOAuth library. For this step, all we need to give the library is our
+ // Create a new instance of the TumblrOAuth library. For this step, all we need to give the library is our
// Consumer Key and Consumer Secret
$tum_oauth = new TumblrOAuth($consumer_key, $consumer_secret);
- // Ask Tumblr for a Request Token. Specify the Callback URL here too (although this should be optional)
+ // Ask Tumblr for a Request Token. Specify the Callback URL here too (although this should be optional)
$request_token = $tum_oauth->getRequestToken($callback_url);
+ if (empty($request_token)) {
+ // Give an error message
+ return DI::l10n()->t('Could not connect to Tumblr. Refresh the page or try again later.');
+ }
+
// Store the request token and Request Token Secret as out callback.php script will need this
DI::session()->set('request_token', $request_token['oauth_token']);
DI::session()->set('request_token_secret', $request_token['oauth_token_secret']);
- // Check the HTTP Code. It should be a 200 (OK), if it's anything else then something didn't work.
- switch ($tum_oauth->http_code) {
- case 200:
- // Ask Tumblr to give us a special address to their login page
- $url = $tum_oauth->getAuthorizeURL($request_token['oauth_token']);
+ // Ask Tumblr to give us a special address to their login page
+ $url = $tum_oauth->getAuthorizeURL($request_token['oauth_token']);
- // Redirect the user to the login URL given to us by Tumblr
- System::externalRedirect($url);
+ // Redirect the user to the login URL given to us by Tumblr
+ System::externalRedirect($url);
- /*
- * That's it for our side. The user is sent to a Tumblr Login page and
- * asked to authroize our app. After that, Tumblr sends the user back to
- * our Callback URL (callback.php) along with some information we need to get
- * an access token.
- */
- break;
-
- default:
- // Give an error message
- $o = 'Could not connect to Tumblr. Refresh the page or try again later.';
- }
-
- return $o;
+ /*
+ * That's it for our side. The user is sent to a Tumblr Login page and
+ * asked to authroize our app. After that, Tumblr sends the user back to
+ * our Callback URL (callback.php) along with some information we need to get
+ * an access token.
+ */
}
function tumblr_callback()
{
- // Start a session, load the library
- session_start();
-
// Define the needed keys
$consumer_key = DI::config()->get('tumblr', 'consumer_key');
$consumer_secret = DI::config()->get('tumblr', 'consumer_secret');
+ if (empty($_REQUEST['oauth_verifier']) || empty($consumer_key) || empty($consumer_secret)) {
+ DI::baseUrl()->redirect('settings/connectors/tumblr');
+ }
+
// Once the user approves your app at Tumblr, they are sent back to this script.
// This script is passed two parameters in the URL, oauth_token (our Request Token)
// and oauth_verifier (Key that we need to get Access Token).
@@ -169,11 +155,8 @@ function tumblr_callback()
DI::session()->remove('request_token');
DI::session()->remove('request_token_secret');
- // Make sure nothing went wrong.
- if (200 == $tum_oauth->http_code) {
- // good to go
- } else {
- return 'Unable to authenticate';
+ if (empty($access_token)) {
+ return DI::l10n()->t('Unable to authenticate');
}
// What's next? Now that we have an Access Token and Secret, we can make an API call.
@@ -183,6 +166,76 @@ function tumblr_callback()
DI::baseUrl()->redirect('settings/connectors/tumblr');
}
+function tumblr_addon_admin(string &$o)
+{
+ $t = Renderer::getMarkupTemplate('admin.tpl', 'addon/tumblr/');
+
+ $o = Renderer::replaceMacros($t, [
+ '$submit' => DI::l10n()->t('Save Settings'),
+ // name, label, value, help, [extra values]
+ '$consumer_key' => ['consumer_key', DI::l10n()->t('Consumer Key'), DI::config()->get('tumblr', 'consumer_key'), ''],
+ '$consumer_secret' => ['consumer_secret', DI::l10n()->t('Consumer Secret'), DI::config()->get('tumblr', 'consumer_secret'), ''],
+ ]);
+}
+
+function tumblr_addon_admin_post()
+{
+ DI::config()->set('tumblr', 'consumer_key', trim($_POST['consumer_key'] ?? ''));
+ DI::config()->set('tumblr', 'consumer_secret', trim($_POST['consumer_secret'] ?? ''));
+}
+
+function tumblr_settings(array &$data)
+{
+ if (!DI::userSession()->getLocalUserId()) {
+ return;
+ }
+
+ $enabled = DI::pConfig()->get(DI::userSession()->getLocalUserId(), 'tumblr', 'post', false);
+ $def_enabled = DI::pConfig()->get(DI::userSession()->getLocalUserId(), 'tumblr', 'post_by_default', false);
+ $import = DI::pConfig()->get(DI::userSession()->getLocalUserId(), 'tumblr', 'import', false);
+
+ $cachekey = 'tumblr-blogs-' . DI::userSession()->getLocalUserId();
+ $blogs = DI::cache()->get($cachekey);
+ if (empty($blogs)) {
+ $blogs = tumblr_get_blogs(DI::userSession()->getLocalUserId());
+ if (!empty($blogs)) {
+ DI::cache()->set($cachekey, $blogs, Duration::HALF_HOUR);
+ }
+ } elseif (empty(tumblr_connection(DI::userSession()->getLocalUserId()))) {
+ $blogs = null;
+ DI::cache()->delete($cachekey);
+ }
+
+ if (!empty($blogs)) {
+ $page = tumblr_get_page(DI::userSession()->getLocalUserId(), $blogs);
+
+ $page_select = ['tumblr_page', DI::l10n()->t('Post to page:'), $page, '', $blogs];
+ }
+
+ $t = Renderer::getMarkupTemplate('connector_settings.tpl', 'addon/tumblr/');
+ $html = Renderer::replaceMacros($t, [
+ '$l10n' => [
+ 'connect' => DI::l10n()->t('(Re-)Authenticate your tumblr page'),
+ 'noconnect' => DI::l10n()->t('You are not authenticated to tumblr'),
+ ],
+
+ '$authenticate_url' => DI::baseUrl() . '/tumblr/connect',
+
+ '$enable' => ['tumblr', DI::l10n()->t('Enable Tumblr Post Addon'), $enabled],
+ '$bydefault' => ['tumblr_bydefault', DI::l10n()->t('Post to Tumblr by default'), $def_enabled],
+ '$import' => ['tumblr_import', DI::l10n()->t('Import the remote timeline'), $import],
+ '$page_select' => $page_select ?? '',
+ ]);
+
+ $data = [
+ 'connector' => 'tumblr',
+ 'title' => DI::l10n()->t('Tumblr Export'),
+ 'image' => 'images/tumblr.png',
+ 'enabled' => $enabled,
+ 'html' => $html,
+ ];
+}
+
function tumblr_jot_nets(array &$jotnets_fields)
{
if (!DI::userSession()->getLocalUserId()) {
@@ -201,51 +254,13 @@ function tumblr_jot_nets(array &$jotnets_fields)
}
}
-function tumblr_settings(array &$data)
-{
- if (!DI::userSession()->getLocalUserId()) {
- return;
- }
-
- $enabled = DI::pConfig()->get(DI::userSession()->getLocalUserId(), 'tumblr', 'post', false);
- $def_enabled = DI::pConfig()->get(DI::userSession()->getLocalUserId(), 'tumblr', 'post_by_default', false);
-
- $blogs = tumblr_get_blogs(DI::userSession()->getLocalUserId());
- if (!empty($blogs)) {
- $page = tumblr_get_page(DI::userSession()->getLocalUserId(), $blogs);
-
- $page_select = ['tumblr_page', DI::l10n()->t('Post to page:'), $page, '', $blogs];
- }
-
- $t = Renderer::getMarkupTemplate('connector_settings.tpl', 'addon/tumblr/');
- $html = Renderer::replaceMacros($t, [
- '$l10n' => [
- 'connect' => DI::l10n()->t('(Re-)Authenticate your tumblr page'),
- 'noconnect' => DI::l10n()->t('You are not authenticated to tumblr'),
- ],
-
- '$authenticate_url' => DI::baseUrl() . '/tumblr/connect',
-
- '$enable' => ['tumblr', DI::l10n()->t('Enable Tumblr Post Addon'), $enabled],
- '$bydefault' => ['tumblr_bydefault', DI::l10n()->t('Post to Tumblr by default'), $def_enabled],
- '$page_select' => $page_select ?? '',
- ]);
-
- $data = [
- 'connector' => 'tumblr',
- 'title' => DI::l10n()->t('Tumblr Export'),
- 'image' => 'images/tumblr.png',
- 'enabled' => $enabled,
- 'html' => $html,
- ];
-}
-
function tumblr_settings_post(array &$b)
{
if (!empty($_POST['tumblr-submit'])) {
DI::pConfig()->set(DI::userSession()->getLocalUserId(), 'tumblr', 'post', intval($_POST['tumblr']));
DI::pConfig()->set(DI::userSession()->getLocalUserId(), 'tumblr', 'page', $_POST['tumblr_page']);
DI::pConfig()->set(DI::userSession()->getLocalUserId(), 'tumblr', 'post_by_default', intval($_POST['tumblr_bydefault']));
+ DI::pConfig()->set(DI::userSession()->getLocalUserId(), 'tumblr', 'import', intval($_POST['tumblr_import']));
}
}
@@ -283,9 +298,9 @@ function tumblr_post_local(array &$b)
}
$tmbl_post = intval(DI::pConfig()->get(DI::userSession()->getLocalUserId(), 'tumblr', 'post'));
-
$tmbl_enable = (($tmbl_post && !empty($_REQUEST['tumblr_enable'])) ? intval($_REQUEST['tumblr_enable']) : 0);
+ // if API is used, default to the chosen settings
if ($b['api_source'] && intval(DI::pConfig()->get(DI::userSession()->getLocalUserId(), 'tumblr', 'post_by_default'))) {
$tmbl_enable = 1;
}
@@ -395,13 +410,13 @@ function tumblr_send(array &$b)
$page = tumblr_get_page($b['uid']);
- $result = tumblr_post($connection, 'blog/' . $page . '/post', $params);
+ $result = $connection->post('blog/' . $page . '/post', $params);
- if ($result['success']) {
- Logger::info('success', ['blog' => $page, 'params' => $params]);
+ if ($result->meta->status < 400) {
+ Logger::info('Success (legacy)', ['blog' => $page, 'meta' => $result->meta, 'response' => $result->response]);
return true;
} else {
- Logger::notice('error', ['blog' => $page, 'params' => $params, 'result' => $result['data']]);
+ Logger::notice('Error posting blog (legacy)', ['blog' => $page, 'meta' => $result->meta, 'response' => $result->response, 'errors' => $result->errors, 'params' => $params]);
return false;
}
}
@@ -416,7 +431,7 @@ function tumblr_send_npf(array $post): bool
// "true" is returned, since the legacy function will fail as well.
return true;
}
-
+
$post['body'] = Post\Media::addAttachmentsToBody($post['uri-id'], $post['body']);
if (!empty($post['title'])) {
$post['body'] = '[h1]' . $post['title'] . "[/h1]\n" . $post['body'];
@@ -431,18 +446,374 @@ function tumblr_send_npf(array $post): bool
'interactability_reblog' => 'everyone'
];
- $result = tumblr_post($connection, 'blog/' . $page . '/posts', $params);
+ $result = $connection->post('blog/' . $page . '/posts', $params);
- if ($result['success']) {
- Logger::info('success', ['blog' => $page, 'params' => $params]);
+ if ($result->meta->status < 400) {
+ Logger::info('Success (NPF)', ['blog' => $page, 'meta' => $result->meta, 'response' => $result->response]);
return true;
} else {
- Logger::notice('error', ['blog' => $page, 'params' => $params, 'result' => $result['data']]);
+ Logger::notice('Error posting blog (NPF)', ['blog' => $page, 'meta' => $result->meta, 'response' => $result->response, 'errors' => $result->errors, 'params' => $params]);
return false;
}
}
-function tumblr_connection(int $uid): ?GuzzleHttp\Client
+function tumblr_cron()
+{
+ $last = DI::keyValue()->get('tumblr_last_poll');
+
+ $poll_interval = intval(DI::config()->get('tumblr', 'poll_interval'));
+ if (!$poll_interval) {
+ $poll_interval = TUMBLR_DEFAULT_POLL_INTERVAL;
+ }
+
+ if ($last) {
+ $next = $last + ($poll_interval * 60);
+ if ($next > time()) {
+ Logger::notice('poll intervall not reached');
+ return;
+ }
+ }
+ Logger::notice('cron_start');
+
+ $abandon_days = intval(DI::config()->get('system', 'account_abandon_days'));
+ if ($abandon_days < 1) {
+ $abandon_days = 0;
+ }
+
+ $abandon_limit = date(DateTimeFormat::MYSQL, time() - $abandon_days * 86400);
+
+ $pconfigs = DBA::selectToArray('pconfig', [], ['cat' => 'tumblr', 'k' => 'import', 'v' => true]);
+ foreach ($pconfigs as $pconfig) {
+ if ($abandon_days != 0) {
+ if (!DBA::exists('user', ["`uid` = ? AND `login_date` >= ?", $pconfig['uid'], $abandon_limit])) {
+ Logger::notice('abandoned account: timeline from user will not be imported', ['user' => $pconfig['uid']]);
+ continue;
+ }
+ }
+
+ Logger::notice('importing timeline - start', ['user' => $pconfig['uid']]);
+ tumblr_fetch_dashboard($pconfig['uid']);
+ Logger::notice('importing timeline - done', ['user' => $pconfig['uid']]);
+ }
+
+ Logger::notice('cron_end');
+
+ DI::keyValue()->set('tumblr_last_poll', time());
+}
+
+function tumblr_add_npf_data($html)
+{
+ $doc = new DOMDocument();
+
+ $doc->formatOutput = true;
+ @$doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
+ $xpath = new DomXPath($doc);
+ $list = $xpath->query('//p[@class="npf_link"]');
+ foreach ($list as $node) {
+ $data = tumblr_get_npf_data($node);
+ if (empty($data)) {
+ continue;
+ }
+
+ tumblr_replace_with_npf($doc, $node, PageInfo::getFooterFromUrl($data['url']));
+ }
+
+ $list = $xpath->query('//figure[@data-provider="youtube"]');
+ foreach ($list as $node) {
+ $attributes = tumblr_get_attributes($node);
+ if (empty($attributes['data-url'])) {
+ continue;
+ }
+ tumblr_replace_with_npf($doc, $node, '[youtube]' . $attributes['data-url'] . '[/youtube]');
+ }
+
+ return $doc->saveHTML();
+}
+
+function tumblr_get_attributes($node): array
+{
+ $attributes = [];
+ foreach ($node->attributes as $key => $attribute) {
+ $attributes[$key] = trim($attribute->value);
+ }
+ return $attributes;
+}
+
+function tumblr_get_npf_data($node): array
+{
+ $attributes = tumblr_get_attributes($node);
+ if (empty($attributes['data-npf'])) {
+ return [];
+ }
+
+ return json_decode($attributes['data-npf'], true);
+}
+
+function tumblr_replace_with_npf($doc, $node, $replacement)
+{
+ $replace = $doc->createTextNode($replacement);
+ $node->parentNode->insertBefore($replace, $node);
+ $node->parentNode->removeChild($node);
+}
+
+function tumblr_fetch_dashboard(int $uid)
+{
+ $page = tumblr_get_page($uid);
+
+ $parameters = ['reblog_info' => false, 'notes_info' => false, 'npf' => false];
+
+ $last = DI::pConfig()->get($uid, 'tumblr', 'last_id');
+ if (!empty($last)) {
+ $parameters['since_id'] = $last;
+ }
+
+ $connection = tumblr_connection($uid);
+ $dashboard = $connection->get('user/dashboard', $parameters);
+ if ($dashboard->meta->status > 399) {
+ Logger::notice('Error fetching dashboard', ['meta' => $dashboard->meta, 'response' => $dashboard->response, 'errors' => $dashboard->errors]);
+ return [];
+ }
+
+ if (empty($dashboard->response->posts)) {
+ return;
+ }
+
+ foreach (array_reverse($dashboard->response->posts) as $post) {
+ $uri = 'tumblr::' . $post->id_string;
+
+ if ($post->id > $last) {
+ $last = $post->id;
+ }
+
+ if (Post::exists(['uri' => $uri, 'uid' => $uid]) || ($post->blog->uuid == $page)) {
+ DI::pConfig()->set($uid, 'tumblr', 'last_id', $last);
+ continue;
+ }
+
+ $item = tumblr_get_header($post, $uri, $uid);
+
+ $item = tumblr_get_content($item, $post);
+ item::insert($item);
+
+ DI::pConfig()->set($uid, 'tumblr', 'last_id', $last);
+ }
+}
+
+function tumblr_get_header(stdClass $post, string $uri, int $uid): array
+{
+ $contact = tumblr_get_contact($post->blog, $uid);
+ $item = [
+ 'network' => Protocol::TUMBLR,
+ 'uid' => $uid,
+ 'wall' => false,
+ 'uri' => $uri,
+ 'private' => Item::UNLISTED,
+ 'verb' => Activity::POST,
+ 'contact-id' => $contact['id'],
+ 'author-name' => $contact['name'],
+ 'author-link' => $contact['url'],
+ 'author-avatar' => $contact['avatar'],
+ 'plink' => $post->post_url,
+ 'created' => date(DateTimeFormat::MYSQL, $post->timestamp)
+ ];
+
+ $item['owner-name'] = $item['author-name'];
+ $item['owner-link'] = $item['author-link'];
+ $item['owner-avatar'] = $item['author-avatar'];
+
+ // @todo process $post->tags;
+
+ return $item;
+}
+
+function tumblr_get_content(array $item, stdClass $post): array
+{
+ switch ($post->type) {
+ case 'text':
+ $item['title'] = $post->title;
+ $item['body'] = HTML::toBBCode($post->body);
+ break;
+
+ case 'quote':
+ if (empty($post->text)) {
+ $body = HTML::toBBCode($post->text) . "\n";
+ } else {
+ $body = '';
+ }
+ if (!empty($post->source_title) && !empty($post->source_url)) {
+ $body .= '[url=' . $post->source_url . ']' . $post->source_title . "[/url]:\n";
+ } elseif (!empty($post->source_title)) {
+ $body .= $post->source_title . ":\n";
+ }
+ $body .= '[quote]' . HTML::toBBCode($post->source) . '[/quote]';
+ $item['body'] = $body;
+ break;
+
+ case 'link':
+ $item['body'] = HTML::toBBCode($post->description) . "\n" . PageInfo::getFooterFromUrl($post->url);
+ break;
+
+ case 'answer':
+ if (!empty($post->asking_name) && !empty($post->asking_url)) {
+ $body = '[url=' . $post->asking_url . ']' . $post->asking_name . "[/url]:\n";
+ } elseif (!empty($post->asking_name)) {
+ $body = $post->asking_name . ":\n";
+ } else {
+ $body = '';
+ }
+ $body .= '[quote]' . HTML::toBBCode($post->question) . "[/quote]\n" . HTML::toBBCode($post->answer);
+ $item['body'] = $body;
+ break;
+
+ case 'video':
+ $item['body'] = HTML::toBBCode($post->caption);
+ if (!empty($post->video_url)) {
+ $item['body'] .= "\n[video]" . $post->video_url . "[/video]\n";
+ } elseif(!empty($post->thumbnail_url)) {
+ $item['body'] .= "\n[url=" . $post->permalink_url ."][img]" . $post->thumbnail_url . "[/img][/url]\n";
+ } elseif(!empty($post->permalink_url)) {
+ $item['body'] .= "\n[url]" . $post->permalink_url ."[/url]\n";
+ } elseif(!empty($post->source_url) && !empty($post->source_title)) {
+ $item['body'] .= "\n[url=" . $post->source_url ."]" . $post->source_title . "[/url]\n";
+ } elseif(!empty($post->source_url)) {
+ $item['body'] .= "\n[url]" . $post->source_url ."[/url]\n";
+ }
+ break;
+
+ case 'audio':
+ $item['body'] = HTML::toBBCode($post->caption);
+ if(!empty($post->source_url) && !empty($post->source_title)) {
+ $item['body'] .= "\n[url=" . $post->source_url ."]" . $post->source_title . "[/url]\n";
+ } elseif(!empty($post->source_url)) {
+ $item['body'] .= "\n[url]" . $post->source_url ."[/url]\n";
+ }
+ break;
+
+ case 'photo':
+ $item['body'] = HTML::toBBCode($post->caption);
+ foreach ($post->photos as $photo) {
+ if (!empty($photo->original_size)) {
+ $item['body'] .= "\n[img]" . $photo->original_size->url . "[/img]";
+ } elseif (!empty($photo->alt_sizes)) {
+ $item['body'] .= "\n[img]" . $photo->alt_sizes[0]->url . "[/img]";
+ }
+ }
+ break;
+
+ case 'chat':
+ $item['title'] = $post->title;
+ $item['body'] = "\n[ul]";
+ foreach ($post->dialogue as $line) {
+ $item['body'] .= "\n[li]" . $line->label . " " . $line->phrase . "[/li]";
+ }
+ $item['body'] .= "[/ul]\n";
+ break;
+ }
+ return $item;
+}
+
+function tumblr_get_contact(stdClass $blog, int $uid)
+{
+ $condition = ['network' => Protocol::TUMBLR, 'uid' => $uid, 'poll' => 'tumblr::' . $blog->uuid];
+ $contact = Contact::selectFirst([], $condition);
+ if (!empty($contact) && (strtotime($contact['updated']) >= $blog->updated)) {
+ return $contact;
+ }
+ if (empty($contact)) {
+ $cid = tumblr_insert_contact($blog, $uid);
+ } else {
+ $cid = $contact['id'];
+ }
+
+ $condition['uid'] = 0;
+
+ $contact = Contact::selectFirst([], $condition);
+ if (empty($contact)) {
+ $pcid = tumblr_insert_contact($blog, 0);
+ } else {
+ $pcid = $contact['id'];
+ }
+
+ tumblr_update_contact($blog, $uid, $cid, $pcid);
+
+ return Contact::getById($cid);
+}
+
+function tumblr_insert_contact(stdClass $blog, int $uid)
+{
+ $baseurl = 'https://tumblr.com';
+ $url = $baseurl . '/' . $blog->name;
+
+ $fields = [
+ 'uid' => $uid,
+ 'network' => Protocol::TUMBLR,
+ 'poll' => 'tumblr::' . $blog->uuid,
+ 'baseurl' => $baseurl,
+ 'priority' => 1,
+ 'writable' => false, // @todo Allow interaction at a later point in time
+ 'blocked' => false,
+ 'readonly' => false,
+ 'pending' => false,
+ 'url' => $url,
+ 'nurl' => Strings::normaliseLink($url),
+ 'alias' => $blog->url,
+ 'name' => $blog->title,
+ 'nick' => $blog->name,
+ 'addr' => $blog->name . '@tumblr.com',
+ 'about' => $blog->description,
+ 'updated' => date(DateTimeFormat::MYSQL, $blog->updated)
+ ];
+ return Contact::insert($fields);
+}
+
+function tumblr_update_contact(stdClass $blog, int $uid, int $cid, int $pcid)
+{
+ $connection = tumblr_connection($uid);
+ $info = $connection->get('blog/' . $blog->uuid . '/info');
+ if ($info->meta->status > 399) {
+ Logger::notice('Error fetching dashboard', ['meta' => $info->meta, 'response' => $info->response, 'errors' => $info->errors]);
+ return;
+ }
+
+ $avatar = $info->response->blog->avatar;
+ if (!empty($avatar)) {
+ Contact::updateAvatar($cid, $avatar[0]->url);
+ }
+
+ $baseurl = 'https://tumblr.com';
+ $url = $baseurl . '/' . $info->response->blog->name;
+
+ if ($info->response->blog->followed && $info->response->blog->subscribed) {
+ $rel = Contact::FRIEND;
+ } elseif ($info->response->blog->followed && !$info->response->blog->subscribed) {
+ $rel = Contact::SHARING;
+ } elseif (!$info->response->blog->followed && $info->response->blog->subscribed) {
+ $rel = Contact::FOLLOWER;
+ } else {
+ $rel = Contact::NOTHING;
+ }
+
+ $fields = [
+ 'url' => $url,
+ 'nurl' => Strings::normaliseLink($url),
+ 'uri-id' => ItemURI::getIdByURI($url),
+ 'alias' => $info->response->blog->url,
+ 'name' => $info->response->blog->title,
+ 'nick' => $info->response->blog->name,
+ 'addr' => $info->response->blog->name . '@tumblr.com',
+ 'about' => $info->response->blog->description,
+ 'updated' => date(DateTimeFormat::MYSQL, $info->response->blog->updated),
+ 'header' => $info->response->blog->theme->header_image_focused,
+ 'rel' => $rel,
+ ];
+
+ Contact::update($fields, ['id' => $cid]);
+
+ $fields['rel'] = Contact::NOTHING;
+ Contact::update($fields, ['id' => $pcid]);
+}
+
+function tumblr_connection(int $uid): ?TumblrOAuth
{
$oauth_token = DI::pConfig()->get($uid, 'tumblr', 'oauth_token');
$oauth_token_secret = DI::pConfig()->get($uid, 'tumblr', 'oauth_token_secret');
@@ -454,25 +825,8 @@ function tumblr_connection(int $uid): ?GuzzleHttp\Client
Logger::notice('Missing data, connection is not established', ['uid' => $uid]);
return null;
}
- return tumblr_client($consumer_key, $consumer_secret, $oauth_token, $oauth_token_secret);
-}
-function tumblr_client(string $consumer_key, string $consumer_secret, string $oauth_token, string $oauth_token_secret): GuzzleHttp\Client
-{
- $stack = HandlerStack::create();
-
- $middleware = new Oauth1([
- 'consumer_key' => $consumer_key,
- 'consumer_secret' => $consumer_secret,
- 'token' => $oauth_token,
- 'token_secret' => $oauth_token_secret
- ]);
- $stack->push($middleware);
-
- return new Client([
- 'base_uri' => 'https://api.tumblr.com/v2/',
- 'handler' => $stack
- ]);
+ return new TumblrOAuth($consumer_key, $consumer_secret, $oauth_token, $oauth_token_secret);
}
function tumblr_get_page(int $uid, array $blogs = [])
@@ -503,44 +857,15 @@ function tumblr_get_blogs(int $uid)
return [];
}
- $userinfo = tumblr_get($connection, 'user/info');
- if (empty($userinfo['success'])) {
+ $userinfo = $connection->get('user/info');
+ if ($userinfo->meta->status > 299) {
+ Logger::notice('Error fetching blogs', ['meta' => $userinfo->meta, 'response' => $userinfo->response, 'errors' => $userinfo->errors]);
return [];
}
$blogs = [];
- foreach ($userinfo['data']->response->user->blogs as $blog) {
+ foreach ($userinfo->response->user->blogs as $blog) {
$blogs[$blog->uuid] = $blog->name;
}
return $blogs;
-}
-
-function tumblr_get($connection, string $url)
-{
- try {
- $res = $connection->get($url, ['auth' => 'oauth']);
-
- $success = true;
- $data = json_decode($res->getBody()->getContents());
- } catch (RequestException $exception) {
- $success = false;
- $data = [];
- Logger::notice('Request failed', ['code' => $exception->getCode(), 'message' => $exception->getMessage()]);
- }
- return ['success' => $success, 'data' => $data];
-}
-
-function tumblr_post($connection, string $url, array $parameter)
-{
- try {
- $res = $connection->post($url, ['auth' => 'oauth', 'json' => $parameter]);
-
- $success = true;
- $data = json_decode($res->getBody()->getContents());
- } catch (RequestException $exception) {
- $success = false;
- $data = json_decode($exception->getResponse()->getBody()->getContents());
- Logger::notice('Post failed', ['code' => $exception->getCode(), 'message' => $exception->getMessage()]);
- }
- return ['success' => $success, 'data' => $data];
}
\ No newline at end of file
From 9c8e7a23a6e7fe54b6d80f174bb5692fc1aab741 Mon Sep 17 00:00:00 2001
From: Michael
Date: Tue, 18 Apr 2023 21:05:31 +0000
Subject: [PATCH 002/527] Add more types
---
tumblr/tumblr.php | 50 ++++++++++++++++++++++++++++++++++++++---------
1 file changed, 41 insertions(+), 9 deletions(-)
diff --git a/tumblr/tumblr.php b/tumblr/tumblr.php
index 18d03a6a..606e73d1 100644
--- a/tumblr/tumblr.php
+++ b/tumblr/tumblr.php
@@ -501,7 +501,7 @@ function tumblr_cron()
DI::keyValue()->set('tumblr_last_poll', time());
}
-function tumblr_add_npf_data($html)
+function tumblr_add_npf_data(string $html, string $plink): string
{
$doc = new DOMDocument();
@@ -514,8 +514,18 @@ function tumblr_add_npf_data($html)
if (empty($data)) {
continue;
}
-
- tumblr_replace_with_npf($doc, $node, PageInfo::getFooterFromUrl($data['url']));
+
+ tumblr_replace_with_npf($doc, $node, tumblr_get_type_replacement($data, $plink));
+ }
+
+ $list = $xpath->query('//div[@data-npf]');
+ foreach ($list as $node) {
+ $data = tumblr_get_npf_data($node);
+ if (empty($data)) {
+ continue;
+ }
+
+ tumblr_replace_with_npf($doc, $node, tumblr_get_type_replacement($data, $plink));
}
$list = $xpath->query('//figure[@data-provider="youtube"]');
@@ -527,7 +537,29 @@ function tumblr_add_npf_data($html)
tumblr_replace_with_npf($doc, $node, '[youtube]' . $attributes['data-url'] . '[/youtube]');
}
- return $doc->saveHTML();
+ return $doc->saveHTML();
+}
+
+function tumblr_get_type_replacement(array $data, string $plink): string
+{
+ switch ($data['type']) {
+ case 'poll':
+ $body = '[p][url=' . $plink. ']'. $data['question'] . '[/url][/p][ul]';
+ foreach ($data['answers'] as $answer) {
+ $body .= '[li]' . $answer['answer_text'] . '[/li]';
+ }
+ $body .= '[/ul]';
+ break;
+
+ case 'link':
+ $body = PageInfo::getFooterFromUrl($data['url']);
+
+ default:
+ Logger::notice('Unknown type', ['type' => $data['type'], 'data' => $data, 'plink' => $plink]);
+ $body = '';
+ }
+
+ return $body;
}
function tumblr_get_attributes($node): array
@@ -539,7 +571,7 @@ function tumblr_get_attributes($node): array
return $attributes;
}
-function tumblr_get_npf_data($node): array
+function tumblr_get_npf_data(DOMNode $node): array
{
$attributes = tumblr_get_attributes($node);
if (empty($attributes['data-npf'])) {
@@ -549,7 +581,7 @@ function tumblr_get_npf_data($node): array
return json_decode($attributes['data-npf'], true);
}
-function tumblr_replace_with_npf($doc, $node, $replacement)
+function tumblr_replace_with_npf(DOMDocument $doc, DOMNode $node, string $replacement)
{
$replace = $doc->createTextNode($replacement);
$node->parentNode->insertBefore($replace, $node);
@@ -631,7 +663,7 @@ function tumblr_get_content(array $item, stdClass $post): array
switch ($post->type) {
case 'text':
$item['title'] = $post->title;
- $item['body'] = HTML::toBBCode($post->body);
+ $item['body'] = HTML::toBBCode(tumblr_add_npf_data($post->body, $post->post_url));
break;
case 'quote':
@@ -829,7 +861,7 @@ function tumblr_connection(int $uid): ?TumblrOAuth
return new TumblrOAuth($consumer_key, $consumer_secret, $oauth_token, $oauth_token_secret);
}
-function tumblr_get_page(int $uid, array $blogs = [])
+function tumblr_get_page(int $uid, array $blogs = []): string
{
$page = DI::pConfig()->get($uid, 'tumblr', 'page');
@@ -850,7 +882,7 @@ function tumblr_get_page(int $uid, array $blogs = [])
return '';
}
-function tumblr_get_blogs(int $uid)
+function tumblr_get_blogs(int $uid): array
{
$connection = tumblr_connection($uid);
if (empty($connection)) {
From 9e7f06ed4407cc177eab918cf5ec0a9c6ab80202 Mon Sep 17 00:00:00 2001
From: Michael
Date: Sat, 22 Apr 2023 10:01:09 +0000
Subject: [PATCH 003/527] Tumblr: Dashboard import and activities are working
---
tumblr/lang/C/messages.po | 52 +++++++-------
tumblr/tumblr.php | 143 ++++++++++++++++++++++++++++++++------
2 files changed, 151 insertions(+), 44 deletions(-)
diff --git a/tumblr/lang/C/messages.po b/tumblr/lang/C/messages.po
index 83434406..f3c2fc63 100644
--- a/tumblr/lang/C/messages.po
+++ b/tumblr/lang/C/messages.po
@@ -8,7 +8,7 @@ msgid ""
msgstr ""
"Project-Id-Version: \n"
"Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2021-11-21 19:17-0500\n"
+"POT-Creation-Date: 2023-04-22 10:00+0000\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME \n"
"Language-Team: LANGUAGE \n"
@@ -17,54 +17,58 @@ msgstr ""
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-#: tumblr.php:39
+#: tumblr.php:60
msgid "Permission denied."
msgstr ""
-#: tumblr.php:69
+#: tumblr.php:111
+msgid "Could not connect to Tumblr. Refresh the page or try again later."
+msgstr ""
+
+#: tumblr.php:159
+msgid "Unable to authenticate"
+msgstr ""
+
+#: tumblr.php:174
msgid "Save Settings"
msgstr ""
-#: tumblr.php:71
+#: tumblr.php:176
msgid "Consumer Key"
msgstr ""
-#: tumblr.php:72
+#: tumblr.php:177
msgid "Consumer Secret"
msgstr ""
-#: tumblr.php:177
-msgid "You are now authenticated to tumblr."
-msgstr ""
-
-#: tumblr.php:178
-msgid "return to the connector page"
-msgstr ""
-
-#: tumblr.php:194
-msgid "Post to Tumblr"
-msgstr ""
-
-#: tumblr.php:225
+#: tumblr.php:212
msgid "Post to page:"
msgstr ""
-#: tumblr.php:231
+#: tumblr.php:218
msgid "(Re-)Authenticate your tumblr page"
msgstr ""
-#: tumblr.php:232
+#: tumblr.php:219
msgid "You are not authenticated to tumblr"
msgstr ""
-#: tumblr.php:237
+#: tumblr.php:224
msgid "Enable Tumblr Post Addon"
msgstr ""
-#: tumblr.php:238
+#: tumblr.php:225
msgid "Post to Tumblr by default"
msgstr ""
-#: tumblr.php:244
-msgid "Tumblr Export"
+#: tumblr.php:226
+msgid "Import the remote timeline"
+msgstr ""
+
+#: tumblr.php:232
+msgid "Tumblr Import/Export"
+msgstr ""
+
+#: tumblr.php:250
+msgid "Post to Tumblr"
msgstr ""
diff --git a/tumblr/tumblr.php b/tumblr/tumblr.php
index 606e73d1..88ae748b 100644
--- a/tumblr/tumblr.php
+++ b/tumblr/tumblr.php
@@ -229,7 +229,7 @@ function tumblr_settings(array &$data)
$data = [
'connector' => 'tumblr',
- 'title' => DI::l10n()->t('Tumblr Export'),
+ 'title' => DI::l10n()->t('Tumblr Import/Export'),
'image' => 'images/tumblr.png',
'enabled' => $enabled,
'html' => $html,
@@ -272,10 +272,22 @@ function tumblr_hook_fork(array &$b)
$post = $b['data'];
- if (
- $post['deleted'] || $post['private'] || ($post['created'] !== $post['edited']) ||
- !strstr($post['postopts'] ?? '', 'tumblr') || ($post['parent'] != $post['id'])
- ) {
+ // Editing is not supported by the addon
+ if (($post['created'] !== $post['edited']) && !$post['deleted']) {
+ DI::logger()->info('Editing is not supported by the addon');
+ $b['execute'] = false;
+ return;
+ }
+
+ if (DI::pConfig()->get($post['uid'], 'tumblr', 'import')) {
+ // Don't post if it isn't a reply to a tumblr post
+ if (($post['parent'] != $post['id']) && !Post::exists(['id' => $post['parent'], 'network' => Protocol::TUMBLR])) {
+ Logger::notice('No tumblr parent found', ['item' => $post['id']]);
+ $b['execute'] = false;
+ return;
+ }
+ } elseif (!strstr($post['postopts'] ?? '', 'tumblr') || ($post['parent'] != $post['id']) || $post['private']) {
+ DI::logger()->info('Activities are never exported when we don\'t import the tumblr timeline');
$b['execute'] = false;
return;
}
@@ -283,8 +295,6 @@ function tumblr_hook_fork(array &$b)
function tumblr_post_local(array &$b)
{
- // This can probably be changed to allow editing by pointing to a different API endpoint
-
if ($b['edit']) {
return;
}
@@ -318,15 +328,63 @@ function tumblr_post_local(array &$b)
function tumblr_send(array &$b)
{
- if ($b['deleted'] || $b['private'] || ($b['created'] !== $b['edited'])) {
- return;
- }
-
- if (!strstr($b['postopts'], 'tumblr')) {
+ if (($b['created'] !== $b['edited']) && !$b['deleted']) {
return;
}
if ($b['gravity'] != Item::GRAVITY_PARENT) {
+ Logger::debug('Got comment', ['item' => $b]);
+
+ $parent = tumblr_get_post_from_uri($b['thr-parent']);
+ if (empty($parent)) {
+ Logger::notice('No tumblr post', ['thr-parent' => $b['thr-parent']]);
+ return;
+ }
+
+ Logger::debug('Parent found', ['parent' => $parent]);
+
+ $connection = tumblr_connection($b['uid']);
+ if (empty($connection)) {
+ return;
+ }
+
+ $page = tumblr_get_page($b['uid']);
+
+ if ($b['gravity'] == Item::GRAVITY_COMMENT) {
+ Logger::notice('Commenting is not supported (yet)');
+ } else {
+ if (($b['verb'] == Activity::LIKE) && !$b['deleted']) {
+ $params = ['id' => $parent['id'], 'reblog_key' => $parent['reblog_key']];
+ $result = $connection->post('user/like', $params);
+ } elseif (($b['verb'] == Activity::LIKE) && $b['deleted']) {
+ $params = ['id' => $parent['id'], 'reblog_key' => $parent['reblog_key']];
+ $result = $connection->post('user/unlike', $params);
+ } elseif (($b['verb'] == Activity::ANNOUNCE) && !$b['deleted']) {
+ $params = ['id' => $parent['id'], 'reblog_key' => $parent['reblog_key']];
+ $result = $connection->post('blog/' . $page . '/post/reblog', $params);
+ } elseif (($b['verb'] == Activity::ANNOUNCE) && $b['deleted']) {
+ $announce = tumblr_get_post_from_uri($b['extid']);
+ if (empty($announce)) {
+ return;
+ }
+ $params = ['id' => $announce['id']];
+ $result = $connection->post('blog/' . $page . '/post/delete', $params);
+ } else {
+ // Unsupported activity
+ return;
+ }
+
+ if ($result->meta->status < 400) {
+ Logger::info('Successfully performed activity', ['verb' => $b['verb'], 'deleted' => $b['deleted'], 'meta' => $result->meta, 'response' => $result->response]);
+ if (!$b['deleted'] && !empty($result->response->id_string)) {
+ Item::update(['extid' => 'tumblr::' . $result->response->id_string], ['id' => $b['id']]);
+ }
+ } else {
+ Logger::notice('Error while performing activity', ['verb' => $b['verb'], 'deleted' => $b['deleted'], 'meta' => $result->meta, 'response' => $result->response, 'errors' => $result->errors, 'params' => $params]);
+ }
+ }
+ return;
+ } elseif ($b['private'] || !strstr($b['postopts'], 'tumblr')) {
return;
}
@@ -421,6 +479,20 @@ function tumblr_send(array &$b)
}
}
+function tumblr_get_post_from_uri(string $uri): array
+{
+ $parts = explode(':', $uri);
+ if (($parts[0] != 'tumblr') || empty($parts[2])) {
+ return [];
+ }
+
+ $post ['id'] = $parts[2];
+ $post['reblog_key'] = $parts[3] ?? '';
+
+ $post['reblog_key'] = str_replace('@t', '', $post['reblog_key']); // Temp
+ return $post;
+}
+
function tumblr_send_npf(array $post): bool
{
$page = tumblr_get_page($post['uid']);
@@ -537,6 +609,15 @@ function tumblr_add_npf_data(string $html, string $plink): string
tumblr_replace_with_npf($doc, $node, '[youtube]' . $attributes['data-url'] . '[/youtube]');
}
+ $list = $xpath->query('//figure[@data-npf]');
+ foreach ($list as $node) {
+ $data = tumblr_get_npf_data($node);
+ if (empty($data)) {
+ continue;
+ }
+ tumblr_replace_with_npf($doc, $node, tumblr_get_type_replacement($data, $plink));
+ }
+
return $doc->saveHTML();
}
@@ -552,8 +633,15 @@ function tumblr_get_type_replacement(array $data, string $plink): string
break;
case 'link':
- $body = PageInfo::getFooterFromUrl($data['url']);
+ $body = PageInfo::getFooterFromUrl(str_replace('https://href.li/?', '', $data['url']));
+ break;
+ case 'video':
+ if (!empty($data['url']) && ($data['provider'] == 'tumblr')) {
+ $body = '[video]' . $data['url'] . '[/video]';
+ break;
+ }
+
default:
Logger::notice('Unknown type', ['type' => $data['type'], 'data' => $data, 'plink' => $plink]);
$body = '';
@@ -583,6 +671,9 @@ function tumblr_get_npf_data(DOMNode $node): array
function tumblr_replace_with_npf(DOMDocument $doc, DOMNode $node, string $replacement)
{
+ if (empty($replacement)) {
+ return;
+ }
$replace = $doc->createTextNode($replacement);
$node->parentNode->insertBefore($replace, $node);
$node->parentNode->removeChild($node);
@@ -611,12 +702,14 @@ function tumblr_fetch_dashboard(int $uid)
}
foreach (array_reverse($dashboard->response->posts) as $post) {
- $uri = 'tumblr::' . $post->id_string;
+ $uri = 'tumblr::' . $post->id_string . ':' . $post->reblog_key;
if ($post->id > $last) {
$last = $post->id;
}
+ Logger::debug('Importing post', ['uid' => $uid, 'created' => date(DateTimeFormat::MYSQL, $post->timestamp), 'uri' => $uri]);
+
if (Post::exists(['uri' => $uri, 'uid' => $uid]) || ($post->blog->uuid == $page)) {
DI::pConfig()->set($uid, 'tumblr', 'last_id', $last);
continue;
@@ -625,7 +718,18 @@ function tumblr_fetch_dashboard(int $uid)
$item = tumblr_get_header($post, $uri, $uid);
$item = tumblr_get_content($item, $post);
- item::insert($item);
+
+ $id = item::insert($item);
+
+ if ($id) {
+ $stored = Post::selectFirst(['uri-id'], ['id' => $id]);
+
+ if (!empty($post->tags)) {
+ foreach ($post->tags as $tag) {
+ Tag::store($stored['uri-id'], Tag::HASHTAG, $tag);
+ }
+ }
+ }
DI::pConfig()->set($uid, 'tumblr', 'last_id', $last);
}
@@ -653,8 +757,6 @@ function tumblr_get_header(stdClass $post, string $uri, int $uid): array
$item['owner-link'] = $item['author-link'];
$item['owner-avatar'] = $item['author-avatar'];
- // @todo process $post->tags;
-
return $item;
}
@@ -782,7 +884,7 @@ function tumblr_insert_contact(stdClass $blog, int $uid)
'poll' => 'tumblr::' . $blog->uuid,
'baseurl' => $baseurl,
'priority' => 1,
- 'writable' => false, // @todo Allow interaction at a later point in time
+ 'writable' => true,
'blocked' => false,
'readonly' => false,
'pending' => false,
@@ -825,15 +927,16 @@ function tumblr_update_contact(stdClass $blog, int $uid, int $cid, int $pcid)
$rel = Contact::NOTHING;
}
+ $uri_id = ItemURI::getIdByURI($url);
$fields = [
'url' => $url,
'nurl' => Strings::normaliseLink($url),
- 'uri-id' => ItemURI::getIdByURI($url),
+ 'uri-id' => $uri_id,
'alias' => $info->response->blog->url,
'name' => $info->response->blog->title,
'nick' => $info->response->blog->name,
'addr' => $info->response->blog->name . '@tumblr.com',
- 'about' => $info->response->blog->description,
+ 'about' => BBCode::convertForUriId($uri_id, $info->response->blog->description, BBCode::CONNECTORS),
'updated' => date(DateTimeFormat::MYSQL, $info->response->blog->updated),
'header' => $info->response->blog->theme->header_image_focused,
'rel' => $rel,
From da65314df5f67b00de97f3831f1e2424d9d37438 Mon Sep 17 00:00:00 2001
From: Michael
Date: Sun, 23 Apr 2023 10:26:19 +0000
Subject: [PATCH 004/527] Resructured code, added documentation
---
tumblr/library/tumblroauth.php | 20 ++
tumblr/tumblr.php | 422 +++++++++++++++++++--------------
2 files changed, 263 insertions(+), 179 deletions(-)
diff --git a/tumblr/library/tumblroauth.php b/tumblr/library/tumblroauth.php
index 162750f7..d7438f1d 100644
--- a/tumblr/library/tumblroauth.php
+++ b/tumblr/library/tumblroauth.php
@@ -141,6 +141,13 @@ class TumblrOAuth
return '';
}
+ /**
+ * OAuth get from a given url with given parameters
+ *
+ * @param string $url
+ * @param array $parameters
+ * @return stdClass
+ */
public function get(string $url, array $parameters = []): stdClass
{
if (!empty($parameters)) {
@@ -157,6 +164,13 @@ class TumblrOAuth
return $this->formatResponse($response);
}
+ /**
+ * OAuth Post to a given url with given parameters
+ *
+ * @param string $url
+ * @param array $parameter
+ * @return stdClass
+ */
public function post(string $url, array $parameter): stdClass
{
try {
@@ -169,6 +183,12 @@ class TumblrOAuth
return $this->formatResponse($response);
}
+ /**
+ * Convert the body in the given response to a class
+ *
+ * @param ResponseInterface|null $response
+ * @return stdClass
+ */
private function formatResponse(ResponseInterface $response = null): stdClass
{
if (!is_null($response)) {
diff --git a/tumblr/tumblr.php b/tumblr/tumblr.php
index 88ae748b..6ee9dca6 100644
--- a/tumblr/tumblr.php
+++ b/tumblr/tumblr.php
@@ -264,6 +264,50 @@ function tumblr_settings_post(array &$b)
}
}
+function tumblr_cron()
+{
+ $last = DI::keyValue()->get('tumblr_last_poll');
+
+ $poll_interval = intval(DI::config()->get('tumblr', 'poll_interval'));
+ if (!$poll_interval) {
+ $poll_interval = TUMBLR_DEFAULT_POLL_INTERVAL;
+ }
+
+ if ($last) {
+ $next = $last + ($poll_interval * 60);
+ if ($next > time()) {
+ Logger::notice('poll intervall not reached');
+ return;
+ }
+ }
+ Logger::notice('cron_start');
+
+ $abandon_days = intval(DI::config()->get('system', 'account_abandon_days'));
+ if ($abandon_days < 1) {
+ $abandon_days = 0;
+ }
+
+ $abandon_limit = date(DateTimeFormat::MYSQL, time() - $abandon_days * 86400);
+
+ $pconfigs = DBA::selectToArray('pconfig', [], ['cat' => 'tumblr', 'k' => 'import', 'v' => true]);
+ foreach ($pconfigs as $pconfig) {
+ if ($abandon_days != 0) {
+ if (!DBA::exists('user', ["`uid` = ? AND `login_date` >= ?", $pconfig['uid'], $abandon_limit])) {
+ Logger::notice('abandoned account: timeline from user will not be imported', ['user' => $pconfig['uid']]);
+ continue;
+ }
+ }
+
+ Logger::notice('importing timeline - start', ['user' => $pconfig['uid']]);
+ tumblr_fetch_dashboard($pconfig['uid']);
+ Logger::notice('importing timeline - done', ['user' => $pconfig['uid']]);
+ }
+
+ Logger::notice('cron_end');
+
+ DI::keyValue()->set('tumblr_last_poll', time());
+}
+
function tumblr_hook_fork(array &$b)
{
if ($b['name'] != 'notifier_normal') {
@@ -388,10 +432,13 @@ function tumblr_send(array &$b)
return;
}
- if (tumblr_send_npf($b)) {
- return;
+ if (!tumblr_send_npf($b)) {
+ tumblr_send_legacy($b);
}
+}
+function tumblr_send_legacy(array $b)
+{
$connection = tumblr_connection($b['uid']);
if (empty($connection)) {
return;
@@ -472,27 +519,11 @@ function tumblr_send(array &$b)
if ($result->meta->status < 400) {
Logger::info('Success (legacy)', ['blog' => $page, 'meta' => $result->meta, 'response' => $result->response]);
- return true;
} else {
Logger::notice('Error posting blog (legacy)', ['blog' => $page, 'meta' => $result->meta, 'response' => $result->response, 'errors' => $result->errors, 'params' => $params]);
- return false;
}
}
-function tumblr_get_post_from_uri(string $uri): array
-{
- $parts = explode(':', $uri);
- if (($parts[0] != 'tumblr') || empty($parts[2])) {
- return [];
- }
-
- $post ['id'] = $parts[2];
- $post['reblog_key'] = $parts[3] ?? '';
-
- $post['reblog_key'] = str_replace('@t', '', $post['reblog_key']); // Temp
- return $post;
-}
-
function tumblr_send_npf(array $post): bool
{
$page = tumblr_get_page($post['uid']);
@@ -529,156 +560,26 @@ function tumblr_send_npf(array $post): bool
}
}
-function tumblr_cron()
+function tumblr_get_post_from_uri(string $uri): array
{
- $last = DI::keyValue()->get('tumblr_last_poll');
-
- $poll_interval = intval(DI::config()->get('tumblr', 'poll_interval'));
- if (!$poll_interval) {
- $poll_interval = TUMBLR_DEFAULT_POLL_INTERVAL;
- }
-
- if ($last) {
- $next = $last + ($poll_interval * 60);
- if ($next > time()) {
- Logger::notice('poll intervall not reached');
- return;
- }
- }
- Logger::notice('cron_start');
-
- $abandon_days = intval(DI::config()->get('system', 'account_abandon_days'));
- if ($abandon_days < 1) {
- $abandon_days = 0;
- }
-
- $abandon_limit = date(DateTimeFormat::MYSQL, time() - $abandon_days * 86400);
-
- $pconfigs = DBA::selectToArray('pconfig', [], ['cat' => 'tumblr', 'k' => 'import', 'v' => true]);
- foreach ($pconfigs as $pconfig) {
- if ($abandon_days != 0) {
- if (!DBA::exists('user', ["`uid` = ? AND `login_date` >= ?", $pconfig['uid'], $abandon_limit])) {
- Logger::notice('abandoned account: timeline from user will not be imported', ['user' => $pconfig['uid']]);
- continue;
- }
- }
-
- Logger::notice('importing timeline - start', ['user' => $pconfig['uid']]);
- tumblr_fetch_dashboard($pconfig['uid']);
- Logger::notice('importing timeline - done', ['user' => $pconfig['uid']]);
- }
-
- Logger::notice('cron_end');
-
- DI::keyValue()->set('tumblr_last_poll', time());
-}
-
-function tumblr_add_npf_data(string $html, string $plink): string
-{
- $doc = new DOMDocument();
-
- $doc->formatOutput = true;
- @$doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
- $xpath = new DomXPath($doc);
- $list = $xpath->query('//p[@class="npf_link"]');
- foreach ($list as $node) {
- $data = tumblr_get_npf_data($node);
- if (empty($data)) {
- continue;
- }
-
- tumblr_replace_with_npf($doc, $node, tumblr_get_type_replacement($data, $plink));
- }
-
- $list = $xpath->query('//div[@data-npf]');
- foreach ($list as $node) {
- $data = tumblr_get_npf_data($node);
- if (empty($data)) {
- continue;
- }
-
- tumblr_replace_with_npf($doc, $node, tumblr_get_type_replacement($data, $plink));
- }
-
- $list = $xpath->query('//figure[@data-provider="youtube"]');
- foreach ($list as $node) {
- $attributes = tumblr_get_attributes($node);
- if (empty($attributes['data-url'])) {
- continue;
- }
- tumblr_replace_with_npf($doc, $node, '[youtube]' . $attributes['data-url'] . '[/youtube]');
- }
-
- $list = $xpath->query('//figure[@data-npf]');
- foreach ($list as $node) {
- $data = tumblr_get_npf_data($node);
- if (empty($data)) {
- continue;
- }
- tumblr_replace_with_npf($doc, $node, tumblr_get_type_replacement($data, $plink));
- }
-
- return $doc->saveHTML();
-}
-
-function tumblr_get_type_replacement(array $data, string $plink): string
-{
- switch ($data['type']) {
- case 'poll':
- $body = '[p][url=' . $plink. ']'. $data['question'] . '[/url][/p][ul]';
- foreach ($data['answers'] as $answer) {
- $body .= '[li]' . $answer['answer_text'] . '[/li]';
- }
- $body .= '[/ul]';
- break;
-
- case 'link':
- $body = PageInfo::getFooterFromUrl(str_replace('https://href.li/?', '', $data['url']));
- break;
-
- case 'video':
- if (!empty($data['url']) && ($data['provider'] == 'tumblr')) {
- $body = '[video]' . $data['url'] . '[/video]';
- break;
- }
-
- default:
- Logger::notice('Unknown type', ['type' => $data['type'], 'data' => $data, 'plink' => $plink]);
- $body = '';
- }
-
- return $body;
-}
-
-function tumblr_get_attributes($node): array
-{
- $attributes = [];
- foreach ($node->attributes as $key => $attribute) {
- $attributes[$key] = trim($attribute->value);
- }
- return $attributes;
-}
-
-function tumblr_get_npf_data(DOMNode $node): array
-{
- $attributes = tumblr_get_attributes($node);
- if (empty($attributes['data-npf'])) {
+ $parts = explode(':', $uri);
+ if (($parts[0] != 'tumblr') || empty($parts[2])) {
return [];
}
+
+ $post ['id'] = $parts[2];
+ $post['reblog_key'] = $parts[3] ?? '';
- return json_decode($attributes['data-npf'], true);
-}
-
-function tumblr_replace_with_npf(DOMDocument $doc, DOMNode $node, string $replacement)
-{
- if (empty($replacement)) {
- return;
- }
- $replace = $doc->createTextNode($replacement);
- $node->parentNode->insertBefore($replace, $node);
- $node->parentNode->removeChild($node);
+ $post['reblog_key'] = str_replace('@t', '', $post['reblog_key']); // Temp
+ return $post;
}
+/**
+ * Fetch the dashboard (timeline) for the given user
+ *
+ * @param integer $uid
+ * @return void
+ */
function tumblr_fetch_dashboard(int $uid)
{
$page = tumblr_get_page($uid);
@@ -735,6 +636,14 @@ function tumblr_fetch_dashboard(int $uid)
}
}
+/**
+ * Sets the initial data for the item array
+ *
+ * @param stdClass $post
+ * @param string $uri
+ * @param integer $uid
+ * @return array
+ */
function tumblr_get_header(stdClass $post, string $uri, int $uid): array
{
$contact = tumblr_get_contact($post->blog, $uid);
@@ -760,6 +669,13 @@ function tumblr_get_header(stdClass $post, string $uri, int $uid): array
return $item;
}
+/**
+ * Set the body according the given content type
+ *
+ * @param array $item
+ * @param stdClass $post
+ * @return array
+ */
function tumblr_get_content(array $item, stdClass $post): array
{
switch ($post->type) {
@@ -846,7 +762,120 @@ function tumblr_get_content(array $item, stdClass $post): array
return $item;
}
-function tumblr_get_contact(stdClass $blog, int $uid)
+function tumblr_add_npf_data(string $html, string $plink): string
+{
+ $doc = new DOMDocument();
+
+ $doc->formatOutput = true;
+ @$doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
+ $xpath = new DomXPath($doc);
+ $list = $xpath->query('//p[@class="npf_link"]');
+ foreach ($list as $node) {
+ $data = tumblr_get_npf_data($node);
+ if (empty($data)) {
+ continue;
+ }
+
+ tumblr_replace_with_npf($doc, $node, tumblr_get_type_replacement($data, $plink));
+ }
+
+ $list = $xpath->query('//div[@data-npf]');
+ foreach ($list as $node) {
+ $data = tumblr_get_npf_data($node);
+ if (empty($data)) {
+ continue;
+ }
+
+ tumblr_replace_with_npf($doc, $node, tumblr_get_type_replacement($data, $plink));
+ }
+
+ $list = $xpath->query('//figure[@data-provider="youtube"]');
+ foreach ($list as $node) {
+ $attributes = tumblr_get_attributes($node);
+ if (empty($attributes['data-url'])) {
+ continue;
+ }
+ tumblr_replace_with_npf($doc, $node, '[youtube]' . $attributes['data-url'] . '[/youtube]');
+ }
+
+ $list = $xpath->query('//figure[@data-npf]');
+ foreach ($list as $node) {
+ $data = tumblr_get_npf_data($node);
+ if (empty($data)) {
+ continue;
+ }
+ tumblr_replace_with_npf($doc, $node, tumblr_get_type_replacement($data, $plink));
+ }
+
+ return $doc->saveHTML();
+}
+
+function tumblr_replace_with_npf(DOMDocument $doc, DOMNode $node, string $replacement)
+{
+ if (empty($replacement)) {
+ return;
+ }
+ $replace = $doc->createTextNode($replacement);
+ $node->parentNode->insertBefore($replace, $node);
+ $node->parentNode->removeChild($node);
+}
+
+function tumblr_get_npf_data(DOMNode $node): array
+{
+ $attributes = tumblr_get_attributes($node);
+ if (empty($attributes['data-npf'])) {
+ return [];
+ }
+
+ return json_decode($attributes['data-npf'], true);
+}
+
+function tumblr_get_attributes($node): array
+{
+ $attributes = [];
+ foreach ($node->attributes as $key => $attribute) {
+ $attributes[$key] = trim($attribute->value);
+ }
+ return $attributes;
+}
+
+function tumblr_get_type_replacement(array $data, string $plink): string
+{
+ switch ($data['type']) {
+ case 'poll':
+ $body = '[p][url=' . $plink. ']'. $data['question'] . '[/url][/p][ul]';
+ foreach ($data['answers'] as $answer) {
+ $body .= '[li]' . $answer['answer_text'] . '[/li]';
+ }
+ $body .= '[/ul]';
+ break;
+
+ case 'link':
+ $body = PageInfo::getFooterFromUrl(str_replace('https://href.li/?', '', $data['url']));
+ break;
+
+ case 'video':
+ if (!empty($data['url']) && ($data['provider'] == 'tumblr')) {
+ $body = '[video]' . $data['url'] . '[/video]';
+ break;
+ }
+
+ default:
+ Logger::notice('Unknown type', ['type' => $data['type'], 'data' => $data, 'plink' => $plink]);
+ $body = '';
+ }
+
+ return $body;
+}
+
+/**
+ * Get a contact array for the given blog
+ *
+ * @param stdClass $blog
+ * @param integer $uid
+ * @return array
+ */
+function tumblr_get_contact(stdClass $blog, int $uid): array
{
$condition = ['network' => Protocol::TUMBLR, 'uid' => $uid, 'poll' => 'tumblr::' . $blog->uuid];
$contact = Contact::selectFirst([], $condition);
@@ -873,6 +902,13 @@ function tumblr_get_contact(stdClass $blog, int $uid)
return Contact::getById($cid);
}
+/**
+ * Create a new contact
+ *
+ * @param stdClass $blog
+ * @param integer $uid
+ * @return void
+ */
function tumblr_insert_contact(stdClass $blog, int $uid)
{
$baseurl = 'https://tumblr.com';
@@ -900,6 +936,15 @@ function tumblr_insert_contact(stdClass $blog, int $uid)
return Contact::insert($fields);
}
+/**
+ * Updates the given contact for the given user and proviced contact ids
+ *
+ * @param stdClass $blog
+ * @param integer $uid
+ * @param integer $cid
+ * @param integer $pcid
+ * @return void
+ */
function tumblr_update_contact(stdClass $blog, int $uid, int $cid, int $pcid)
{
$connection = tumblr_connection($uid);
@@ -948,22 +993,13 @@ function tumblr_update_contact(stdClass $blog, int $uid, int $cid, int $pcid)
Contact::update($fields, ['id' => $pcid]);
}
-function tumblr_connection(int $uid): ?TumblrOAuth
-{
- $oauth_token = DI::pConfig()->get($uid, 'tumblr', 'oauth_token');
- $oauth_token_secret = DI::pConfig()->get($uid, 'tumblr', 'oauth_token_secret');
-
- $consumer_key = DI::config()->get('tumblr', 'consumer_key');
- $consumer_secret = DI::config()->get('tumblr', 'consumer_secret');
-
- if (!$consumer_key || !$consumer_secret || !$oauth_token || !$oauth_token_secret) {
- Logger::notice('Missing data, connection is not established', ['uid' => $uid]);
- return null;
- }
-
- return new TumblrOAuth($consumer_key, $consumer_secret, $oauth_token, $oauth_token_secret);
-}
-
+/**
+ * Get the default page for posting. Detects the value if not provided or has got a bad value.
+ *
+ * @param integer $uid
+ * @param array $blogs
+ * @return string
+ */
function tumblr_get_page(int $uid, array $blogs = []): string
{
$page = DI::pConfig()->get($uid, 'tumblr', 'page');
@@ -985,6 +1021,12 @@ function tumblr_get_page(int $uid, array $blogs = []): string
return '';
}
+/**
+ * Get an array of blogs for the given user
+ *
+ * @param integer $uid
+ * @return array
+ */
function tumblr_get_blogs(int $uid): array
{
$connection = tumblr_connection($uid);
@@ -1003,4 +1045,26 @@ function tumblr_get_blogs(int $uid): array
$blogs[$blog->uuid] = $blog->name;
}
return $blogs;
+}
+
+/**
+ * Creates a OAuth connection for the given user
+ *
+ * @param integer $uid
+ * @return TumblrOAuth|null
+ */
+function tumblr_connection(int $uid): ?TumblrOAuth
+{
+ $oauth_token = DI::pConfig()->get($uid, 'tumblr', 'oauth_token');
+ $oauth_token_secret = DI::pConfig()->get($uid, 'tumblr', 'oauth_token_secret');
+
+ $consumer_key = DI::config()->get('tumblr', 'consumer_key');
+ $consumer_secret = DI::config()->get('tumblr', 'consumer_secret');
+
+ if (!$consumer_key || !$consumer_secret || !$oauth_token || !$oauth_token_secret) {
+ Logger::notice('Missing data, connection is not established', ['uid' => $uid]);
+ return null;
+ }
+
+ return new TumblrOAuth($consumer_key, $consumer_secret, $oauth_token, $oauth_token_secret);
}
\ No newline at end of file
From c811f549586832e38dbefadb35c97178e29eb30f Mon Sep 17 00:00:00 2001
From: Michael
Date: Tue, 25 Apr 2023 18:33:39 +0000
Subject: [PATCH 005/527] Tumblr is now using OAuth2
---
tumblr/library/tumblroauth.php | 211 --------------------
tumblr/tumblr.php | 348 +++++++++++++++++++--------------
2 files changed, 203 insertions(+), 356 deletions(-)
delete mode 100644 tumblr/library/tumblroauth.php
diff --git a/tumblr/library/tumblroauth.php b/tumblr/library/tumblroauth.php
deleted file mode 100644
index d7438f1d..00000000
--- a/tumblr/library/tumblroauth.php
+++ /dev/null
@@ -1,211 +0,0 @@
-consumer_key = $consumer_key;
- $this->consumer_secret = $consumer_secret;
- $this->oauth_token = $oauth_token;
- $this->oauth_token_secret = $oauth_token_secret;
-
- if (empty($this->oauth_token) || empty($this->oauth_token_secret)) {
- return;
- }
-
- $stack = HandlerStack::create();
-
- $middleware = new Oauth1([
- 'consumer_key' => $this->consumer_key,
- 'consumer_secret' => $this->consumer_secret,
- 'token' => $this->oauth_token,
- 'token_secret' => $this->oauth_token_secret
- ]);
- $stack->push($middleware);
-
- $this->client = new Client([
- 'base_uri' => 'https://api.tumblr.com/v2/',
- 'handler' => $stack
- ]);
- }
-
- /**
- * Get a request_token from Tumblr
- *
- * @param string $oauth_callback
- * @return array
- */
- function getRequestToken(string $oauth_callback): array
- {
- $request = $this->oAuthRequest(self::requestTokenURL, ['oauth_callback' => $oauth_callback]);
- if (empty($request)) {
- return [];
- }
- return OAuthUtil::parse_parameters($request);
- }
-
- /**
- * Get the authorize URL
- *
- * @param string $oauth_token
- * @return string
- */
- function getAuthorizeURL(string $oauth_token): string
- {
- return self::authorizeURL . "?oauth_token={$oauth_token}";
- }
-
- /**
- * Exchange request token and secret for an access token and
- * secret, to sign API calls.
- *
- * @param string $oauth_verifier
- * @param string $request_token
- * @param string $request_token_secret
- * @return array ("oauth_token" => "the-access-token",
- * "oauth_token_secret" => "the-access-secret",
- * "user_id" => "9436992",
- * "screen_name" => "abraham")
- */
- function getAccessToken(string $oauth_verifier, string $request_token, string $request_token_secret): array
- {
- $token = new OAuthToken($request_token, $request_token_secret);
-
- $parameters = [];
- if (!empty($oauth_verifier)) {
- $parameters['oauth_verifier'] = $oauth_verifier;
- }
-
- $request = $this->oAuthRequest(self::accessTokenURL, $parameters, $token);
- if (empty($request)) {
- return [];
- }
- return OAuthUtil::parse_parameters($request);
- }
-
- /**
- * Format and sign an OAuth / API request
- *
- * @param string $url
- * @param array $parameters
- * @param OAuthToken $token $name
- * @return string
- */
- private function oAuthRequest(string $url, array $parameters, OAuthToken $token = null): string
- {
- $consumer = new OAuthConsumer($this->consumer_key, $this->consumer_secret);
- $sha1_method = new OAuthSignatureMethod_HMAC_SHA1();
-
- $request = OAuthRequest::from_consumer_and_token($consumer, 'GET', $url, $parameters, $token);
- $request->sign_request($sha1_method, $consumer, $token);
-
- $curlResult = DI::httpClient()->get($request->to_url());
- if ($curlResult->isSuccess()) {
- return $curlResult->getBody();
- }
- return '';
- }
-
- /**
- * OAuth get from a given url with given parameters
- *
- * @param string $url
- * @param array $parameters
- * @return stdClass
- */
- public function get(string $url, array $parameters = []): stdClass
- {
- if (!empty($parameters)) {
- $url .= '?' . http_build_query($parameters);
- }
-
- try {
- $response = $this->client->get($url, ['auth' => 'oauth']);
- } catch (RequestException $exception) {
- $response = $exception->getResponse();
- Logger::notice('Get failed', ['code' => $exception->getCode(), 'message' => $exception->getMessage()]);
- }
-
- return $this->formatResponse($response);
- }
-
- /**
- * OAuth Post to a given url with given parameters
- *
- * @param string $url
- * @param array $parameter
- * @return stdClass
- */
- public function post(string $url, array $parameter): stdClass
- {
- try {
- $response = $this->client->post($url, ['auth' => 'oauth', 'json' => $parameter]);
- } catch (RequestException $exception) {
- $response = $exception->getResponse();
- Logger::notice('Post failed', ['code' => $exception->getCode(), 'message' => $exception->getMessage()]);
- }
-
- return $this->formatResponse($response);
- }
-
- /**
- * Convert the body in the given response to a class
- *
- * @param ResponseInterface|null $response
- * @return stdClass
- */
- private function formatResponse(ResponseInterface $response = null): stdClass
- {
- if (!is_null($response)) {
- $content = $response->getBody()->getContents();
- if (!empty($content)) {
- $result = json_decode($content);
- }
- }
-
- if (empty($result) || empty($result->meta)) {
- $result = new stdClass;
- $result->meta = new stdClass;
- $result->meta->status = 500;
- $result->meta->msg = '';
- $result->response = [];
- $result->errors = [];
- }
- return $result;
- }
-}
\ No newline at end of file
diff --git a/tumblr/tumblr.php b/tumblr/tumblr.php
index 6ee9dca6..2efa70b3 100644
--- a/tumblr/tumblr.php
+++ b/tumblr/tumblr.php
@@ -7,8 +7,6 @@
* Author: Michael Vogel
*/
-require_once __DIR__ . DIRECTORY_SEPARATOR . 'library' . DIRECTORY_SEPARATOR . 'tumblroauth.php';
-
use Friendica\Content\PageInfo;
use Friendica\Content\Text\BBCode;
use Friendica\Content\Text\HTML;
@@ -27,10 +25,17 @@ use Friendica\Model\ItemURI;
use Friendica\Model\Photo;
use Friendica\Model\Post;
use Friendica\Model\Tag;
+use Friendica\Network\HTTPClient\Capability\ICanHandleHttpResponses;
+use Friendica\Network\HTTPClient\Client\HttpClientAccept;
+use Friendica\Network\HTTPClient\Client\HttpClientOptions;
use Friendica\Protocol\Activity;
use Friendica\Util\DateTimeFormat;
use Friendica\Util\Network;
use Friendica\Util\Strings;
+use GuzzleHttp\Client;
+use GuzzleHttp\Exception\RequestException;
+use GuzzleHttp\HandlerStack;
+use GuzzleHttp\Subscriber\Oauth\Oauth1;
define('TUMBLR_DEFAULT_POLL_INTERVAL', 10); // given in minutes
@@ -42,7 +47,7 @@ function tumblr_install()
Hook::register('jot_networks', __FILE__, 'tumblr_jot_nets');
Hook::register('connector_settings', __FILE__, 'tumblr_settings');
Hook::register('connector_settings_post', __FILE__, 'tumblr_settings_post');
- Hook::register('cron' , __FILE__, 'tumblr_cron');
+ Hook::register('cron', __FILE__, 'tumblr_cron');
}
/**
@@ -58,28 +63,28 @@ function tumblr_content()
{
if (!DI::userSession()->getLocalUserId()) {
DI::sysmsg()->addNotice(DI::l10n()->t('Permission denied.'));
- return '';
+ return;
}
- if (!isset(DI::args()->getArgv()[1])) {
- DI::baseUrl()->redirect('settings/connectors/tumblr');
- }
-
- switch (DI::args()->getArgv()[1]) {
+ switch (DI::args()->getArgv()[1] ?? '') {
case 'connect':
- $o = tumblr_connect();
+ tumblr_connect();
break;
- case 'callback':
- $o = tumblr_callback();
- break;
-
- default:
- DI::baseUrl()->redirect('settings/connectors/tumblr');
+ case 'redirect':
+ tumblr_redirect();
break;
}
+ DI::baseUrl()->redirect('settings/connectors/tumblr');
+}
- return $o;
+function tumblr_redirect()
+{
+ if (($_REQUEST['state'] ?? '') != DI::session()->get('oauth_state')) {
+ return;
+ }
+
+ tumblr_get_token(DI::userSession()->getLocalUserId(), $_REQUEST['code'] ?? '');
}
function tumblr_connect()
@@ -89,81 +94,20 @@ function tumblr_connect()
$consumer_secret = DI::config()->get('tumblr', 'consumer_secret');
if (empty($consumer_key) || empty($consumer_secret)) {
- DI::baseUrl()->redirect('settings/connectors/tumblr');
+ return;
}
- // The callback URL is the script that gets called after the user authenticates with tumblr
- // In this example, it would be the included callback.php
- $callback_url = DI::baseUrl() . '/tumblr/callback';
+ $state = base64_encode(random_bytes(20));
+ DI::session()->set('oauth_state', $state);
- // Let's begin. First we need a Request Token. The request token is required to send the user
- // to Tumblr's login page.
+ $parameters = [
+ 'client_id' => $consumer_key,
+ 'response_type' => 'code',
+ 'scope' => 'basic write offline_access',
+ 'state' => $state
+ ];
- // Create a new instance of the TumblrOAuth library. For this step, all we need to give the library is our
- // Consumer Key and Consumer Secret
- $tum_oauth = new TumblrOAuth($consumer_key, $consumer_secret);
-
- // Ask Tumblr for a Request Token. Specify the Callback URL here too (although this should be optional)
- $request_token = $tum_oauth->getRequestToken($callback_url);
-
- if (empty($request_token)) {
- // Give an error message
- return DI::l10n()->t('Could not connect to Tumblr. Refresh the page or try again later.');
- }
-
- // Store the request token and Request Token Secret as out callback.php script will need this
- DI::session()->set('request_token', $request_token['oauth_token']);
- DI::session()->set('request_token_secret', $request_token['oauth_token_secret']);
-
- // Ask Tumblr to give us a special address to their login page
- $url = $tum_oauth->getAuthorizeURL($request_token['oauth_token']);
-
- // Redirect the user to the login URL given to us by Tumblr
- System::externalRedirect($url);
-
- /*
- * That's it for our side. The user is sent to a Tumblr Login page and
- * asked to authroize our app. After that, Tumblr sends the user back to
- * our Callback URL (callback.php) along with some information we need to get
- * an access token.
- */
-}
-
-function tumblr_callback()
-{
- // Define the needed keys
- $consumer_key = DI::config()->get('tumblr', 'consumer_key');
- $consumer_secret = DI::config()->get('tumblr', 'consumer_secret');
-
- if (empty($_REQUEST['oauth_verifier']) || empty($consumer_key) || empty($consumer_secret)) {
- DI::baseUrl()->redirect('settings/connectors/tumblr');
- }
-
- // Once the user approves your app at Tumblr, they are sent back to this script.
- // This script is passed two parameters in the URL, oauth_token (our Request Token)
- // and oauth_verifier (Key that we need to get Access Token).
- // We'll also need out Request Token Secret, which we stored in a session.
-
- // Create instance of TumblrOAuth.
- // It'll need our Consumer Key and Secret as well as our Request Token and Secret
- $tum_oauth = new TumblrOAuth($consumer_key, $consumer_secret);
-
- // Ok, let's get an Access Token. We'll need to pass along our oauth_verifier which was given to us in the URL.
- $access_token = $tum_oauth->getAccessToken($_REQUEST['oauth_verifier'], DI::session()->get('request_token'), DI::session()->get('request_token_secret'));
-
- // We're done with the Request Token and Secret so let's remove those.
- DI::session()->remove('request_token');
- DI::session()->remove('request_token_secret');
-
- if (empty($access_token)) {
- return DI::l10n()->t('Unable to authenticate');
- }
-
- // What's next? Now that we have an Access Token and Secret, we can make an API call.
- DI::pConfig()->set(DI::userSession()->getLocalUserId(), 'tumblr', 'oauth_token', $access_token['oauth_token']);
- DI::pConfig()->set(DI::userSession()->getLocalUserId(), 'tumblr', 'oauth_token_secret', $access_token['oauth_token_secret']);
-
- DI::baseUrl()->redirect('settings/connectors/tumblr');
+ System::externalRedirect('https://www.tumblr.com/oauth2/authorize?' . http_build_query($parameters));
}
function tumblr_addon_admin(string &$o)
@@ -172,7 +116,6 @@ function tumblr_addon_admin(string &$o)
$o = Renderer::replaceMacros($t, [
'$submit' => DI::l10n()->t('Save Settings'),
- // name, label, value, help, [extra values]
'$consumer_key' => ['consumer_key', DI::l10n()->t('Consumer Key'), DI::config()->get('tumblr', 'consumer_key'), ''],
'$consumer_secret' => ['consumer_secret', DI::l10n()->t('Consumer Secret'), DI::config()->get('tumblr', 'consumer_secret'), ''],
]);
@@ -201,9 +144,6 @@ function tumblr_settings(array &$data)
if (!empty($blogs)) {
DI::cache()->set($cachekey, $blogs, Duration::HALF_HOUR);
}
- } elseif (empty(tumblr_connection(DI::userSession()->getLocalUserId()))) {
- $blogs = null;
- DI::cache()->delete($cachekey);
}
if (!empty($blogs)) {
@@ -331,7 +271,7 @@ function tumblr_hook_fork(array &$b)
return;
}
} elseif (!strstr($post['postopts'] ?? '', 'tumblr') || ($post['parent'] != $post['id']) || $post['private']) {
- DI::logger()->info('Activities are never exported when we don\'t import the tumblr timeline');
+ DI::logger()->info('Activities are never exported when we don\'t import the tumblr timeline', ['uid' => $post['uid']]);
$b['execute'] = false;
return;
}
@@ -387,11 +327,6 @@ function tumblr_send(array &$b)
Logger::debug('Parent found', ['parent' => $parent]);
- $connection = tumblr_connection($b['uid']);
- if (empty($connection)) {
- return;
- }
-
$page = tumblr_get_page($b['uid']);
if ($b['gravity'] == Item::GRAVITY_COMMENT) {
@@ -399,20 +334,20 @@ function tumblr_send(array &$b)
} else {
if (($b['verb'] == Activity::LIKE) && !$b['deleted']) {
$params = ['id' => $parent['id'], 'reblog_key' => $parent['reblog_key']];
- $result = $connection->post('user/like', $params);
+ $result = tumblr_post($b['uid'], 'user/like', $params);
} elseif (($b['verb'] == Activity::LIKE) && $b['deleted']) {
$params = ['id' => $parent['id'], 'reblog_key' => $parent['reblog_key']];
- $result = $connection->post('user/unlike', $params);
+ $result = tumblr_post($b['uid'], 'user/unlike', $params);
} elseif (($b['verb'] == Activity::ANNOUNCE) && !$b['deleted']) {
$params = ['id' => $parent['id'], 'reblog_key' => $parent['reblog_key']];
- $result = $connection->post('blog/' . $page . '/post/reblog', $params);
+ $result = tumblr_post($b['uid'], 'blog/' . $page . '/post/reblog', $params);
} elseif (($b['verb'] == Activity::ANNOUNCE) && $b['deleted']) {
$announce = tumblr_get_post_from_uri($b['extid']);
if (empty($announce)) {
return;
}
$params = ['id' => $announce['id']];
- $result = $connection->post('blog/' . $page . '/post/delete', $params);
+ $result = tumblr_post($b['uid'], 'blog/' . $page . '/post/delete', $params);
} else {
// Unsupported activity
return;
@@ -439,11 +374,6 @@ function tumblr_send(array &$b)
function tumblr_send_legacy(array $b)
{
- $connection = tumblr_connection($b['uid']);
- if (empty($connection)) {
- return;
- }
-
$b['body'] = BBCode::removeAttachment($b['body']);
$title = trim($b['title']);
@@ -515,7 +445,7 @@ function tumblr_send_legacy(array $b)
$page = tumblr_get_page($b['uid']);
- $result = $connection->post('blog/' . $page . '/post', $params);
+ $result = tumblr_post($b['uid'], 'blog/' . $page . '/post', $params);
if ($result->meta->status < 400) {
Logger::info('Success (legacy)', ['blog' => $page, 'meta' => $result->meta, 'response' => $result->response]);
@@ -528,7 +458,6 @@ function tumblr_send_npf(array $post): bool
{
$page = tumblr_get_page($post['uid']);
- $connection = tumblr_connection($post['uid']);
if (empty($page)) {
Logger::notice('Missing page, post will not be send to Tumblr.', ['uid' => $post['uid'], 'page' => $page, 'id' => $post['id']]);
// "true" is returned, since the legacy function will fail as well.
@@ -549,7 +478,7 @@ function tumblr_send_npf(array $post): bool
'interactability_reblog' => 'everyone'
];
- $result = $connection->post('blog/' . $page . '/posts', $params);
+ $result = tumblr_post($post['uid'], 'blog/' . $page . '/posts', $params);
if ($result->meta->status < 400) {
Logger::info('Success (NPF)', ['blog' => $page, 'meta' => $result->meta, 'response' => $result->response]);
@@ -566,8 +495,8 @@ function tumblr_get_post_from_uri(string $uri): array
if (($parts[0] != 'tumblr') || empty($parts[2])) {
return [];
}
-
- $post ['id'] = $parts[2];
+
+ $post['id'] = $parts[2];
$post['reblog_key'] = $parts[3] ?? '';
$post['reblog_key'] = str_replace('@t', '', $post['reblog_key']); // Temp
@@ -591,8 +520,7 @@ function tumblr_fetch_dashboard(int $uid)
$parameters['since_id'] = $last;
}
- $connection = tumblr_connection($uid);
- $dashboard = $connection->get('user/dashboard', $parameters);
+ $dashboard = tumblr_get($uid, 'user/dashboard', $parameters);
if ($dashboard->meta->status > 399) {
Logger::notice('Error fetching dashboard', ['meta' => $dashboard->meta, 'response' => $dashboard->response, 'errors' => $dashboard->errors]);
return [];
@@ -719,23 +647,23 @@ function tumblr_get_content(array $item, stdClass $post): array
$item['body'] = HTML::toBBCode($post->caption);
if (!empty($post->video_url)) {
$item['body'] .= "\n[video]" . $post->video_url . "[/video]\n";
- } elseif(!empty($post->thumbnail_url)) {
- $item['body'] .= "\n[url=" . $post->permalink_url ."][img]" . $post->thumbnail_url . "[/img][/url]\n";
- } elseif(!empty($post->permalink_url)) {
- $item['body'] .= "\n[url]" . $post->permalink_url ."[/url]\n";
- } elseif(!empty($post->source_url) && !empty($post->source_title)) {
- $item['body'] .= "\n[url=" . $post->source_url ."]" . $post->source_title . "[/url]\n";
- } elseif(!empty($post->source_url)) {
- $item['body'] .= "\n[url]" . $post->source_url ."[/url]\n";
+ } elseif (!empty($post->thumbnail_url)) {
+ $item['body'] .= "\n[url=" . $post->permalink_url . "][img]" . $post->thumbnail_url . "[/img][/url]\n";
+ } elseif (!empty($post->permalink_url)) {
+ $item['body'] .= "\n[url]" . $post->permalink_url . "[/url]\n";
+ } elseif (!empty($post->source_url) && !empty($post->source_title)) {
+ $item['body'] .= "\n[url=" . $post->source_url . "]" . $post->source_title . "[/url]\n";
+ } elseif (!empty($post->source_url)) {
+ $item['body'] .= "\n[url]" . $post->source_url . "[/url]\n";
}
break;
case 'audio':
$item['body'] = HTML::toBBCode($post->caption);
- if(!empty($post->source_url) && !empty($post->source_title)) {
- $item['body'] .= "\n[url=" . $post->source_url ."]" . $post->source_title . "[/url]\n";
- } elseif(!empty($post->source_url)) {
- $item['body'] .= "\n[url]" . $post->source_url ."[/url]\n";
+ if (!empty($post->source_url) && !empty($post->source_title)) {
+ $item['body'] .= "\n[url=" . $post->source_url . "]" . $post->source_title . "[/url]\n";
+ } elseif (!empty($post->source_url)) {
+ $item['body'] .= "\n[url]" . $post->source_url . "[/url]\n";
}
break;
@@ -843,7 +771,7 @@ function tumblr_get_type_replacement(array $data, string $plink): string
{
switch ($data['type']) {
case 'poll':
- $body = '[p][url=' . $plink. ']'. $data['question'] . '[/url][/p][ul]';
+ $body = '[p][url=' . $plink . ']' . $data['question'] . '[/url][/p][ul]';
foreach ($data['answers'] as $answer) {
$body .= '[li]' . $answer['answer_text'] . '[/li]';
}
@@ -859,7 +787,7 @@ function tumblr_get_type_replacement(array $data, string $plink): string
$body = '[video]' . $data['url'] . '[/video]';
break;
}
-
+
default:
Logger::notice('Unknown type', ['type' => $data['type'], 'data' => $data, 'plink' => $plink]);
$body = '';
@@ -947,8 +875,7 @@ function tumblr_insert_contact(stdClass $blog, int $uid)
*/
function tumblr_update_contact(stdClass $blog, int $uid, int $cid, int $pcid)
{
- $connection = tumblr_connection($uid);
- $info = $connection->get('blog/' . $blog->uuid . '/info');
+ $info = tumblr_get($uid, 'blog/' . $blog->uuid . '/info');
if ($info->meta->status > 399) {
Logger::notice('Error fetching dashboard', ['meta' => $info->meta, 'response' => $info->response, 'errors' => $info->errors]);
return;
@@ -1029,12 +956,7 @@ function tumblr_get_page(int $uid, array $blogs = []): string
*/
function tumblr_get_blogs(int $uid): array
{
- $connection = tumblr_connection($uid);
- if (empty($connection)) {
- return [];
- }
-
- $userinfo = $connection->get('user/info');
+ $userinfo = tumblr_get($uid, 'user/info');
if ($userinfo->meta->status > 299) {
Logger::notice('Error fetching blogs', ['meta' => $userinfo->meta, 'response' => $userinfo->response, 'errors' => $userinfo->errors]);
return [];
@@ -1048,12 +970,131 @@ function tumblr_get_blogs(int $uid): array
}
/**
- * Creates a OAuth connection for the given user
+ * Perform an OAuth2 GET request
*
* @param integer $uid
- * @return TumblrOAuth|null
+ * @param string $url
+ * @param array $parameters
+ * @return stdClass
*/
-function tumblr_connection(int $uid): ?TumblrOAuth
+function tumblr_get(int $uid, string $url, array $parameters = []): stdClass
+{
+ $url = 'https://api.tumblr.com/v2/' . $url;
+
+ if (!empty($parameters)) {
+ $url .= '?' . http_build_query($parameters);
+ }
+
+ $curlResult = DI::httpClient()->get($url, HttpClientAccept::JSON, [HttpClientOptions::HEADERS => ['Authorization' => ['Bearer ' . tumblr_get_token($uid)]]]);
+ return tumblr_format_result($curlResult);
+}
+
+/**
+ * Perform an OAuth2 POST request
+ *
+ * @param integer $uid
+ * @param string $url
+ * @param array $parameters
+ * @return stdClass
+ */
+function tumblr_post(int $uid, string $url, array $parameters): stdClass
+{
+ $url = 'https://api.tumblr.com/v2/' . $url;
+
+ $curlResult = DI::httpClient()->post($url, $parameters, ['Authorization' => ['Bearer ' . tumblr_get_token($uid)]]);
+ return tumblr_format_result($curlResult);
+}
+
+/**
+ * Format the get/post result value
+ *
+ * @param ICanHandleHttpResponses $curlResult
+ * @return stdClass
+ */
+function tumblr_format_result(ICanHandleHttpResponses $curlResult): stdClass
+{
+ $result = json_decode($curlResult->getBody());
+ if (empty($result) || empty($result->meta)) {
+ $result = new stdClass;
+ $result->meta = new stdClass;
+ $result->meta->status = 500;
+ $result->meta->msg = '';
+ $result->response = [];
+ $result->errors = [];
+ }
+ return $result;
+}
+
+/**
+ * Fetch the OAuth token, update it if needed
+ *
+ * @param integer $uid
+ * @param string $code
+ * @return string
+ */
+function tumblr_get_token(int $uid, string $code = ''): string
+{
+ $access_token = DI::pConfig()->get($uid, 'tumblr', 'access_token');
+ $expires_at = DI::pConfig()->get($uid, 'tumblr', 'expires_at');
+ $refresh_token = DI::pConfig()->get($uid, 'tumblr', 'refresh_token');
+
+ if (empty($code) && !empty($access_token) && ($expires_at > (time()))) {
+ Logger::debug('Got token', ['uid' => $uid, 'expires_at' => date('c', $expires_at)]);
+ return $access_token;
+ }
+
+ $consumer_key = DI::config()->get('tumblr', 'consumer_key');
+ $consumer_secret = DI::config()->get('tumblr', 'consumer_secret');
+
+ $parameters = ['client_id' => $consumer_key, 'client_secret' => $consumer_secret];
+
+ if (empty($refresh_token) && empty($code)) {
+ $result = tumblr_exchange_token($uid);
+ if (empty($result)) {
+ Logger::info('Invalid result while exchanging token', ['uid' => $uid]);
+ return '';
+ }
+ $expires_at = time() + $result->expires_in;
+ Logger::debug('Updated token from OAuth1 to OAuth2', ['uid' => $uid, 'expires_at' => date('c', $expires_at)]);
+ } else {
+ if (!empty($code)) {
+ $parameters['code'] = $code;
+ $parameters['grant_type'] = 'authorization_code';
+ } else {
+ $parameters['refresh_token'] = $refresh_token;
+ $parameters['grant_type'] = 'refresh_token';
+ }
+
+ $curlResult = DI::httpClient()->post('https://api.tumblr.com/v2/oauth2/token', $parameters);
+ if (!$curlResult->isSuccess()) {
+ Logger::info('Error fetching token', ['uid' => $uid, 'code' => $code, 'result' => $curlResult->getBody(), 'parameters' => $parameters]);
+ return '';
+ }
+
+ $result = json_decode($curlResult->getBody());
+ if (empty($result)) {
+ Logger::info('Invalid result when updating token', ['uid' => $uid]);
+ return '';
+ }
+
+ $expires_at = time() + $result->expires_in;
+ Logger::debug('Renewed token', ['uid' => $uid, 'expires_at' => date('c', $expires_at)]);
+ }
+
+ DI::pConfig()->set($uid, 'tumblr', 'access_token', $result->access_token);
+ DI::pConfig()->set($uid, 'tumblr', 'expires_at', $expires_at);
+ DI::pConfig()->set($uid, 'tumblr', 'refresh_token', $result->refresh_token);
+
+ return $result->access_token;
+}
+
+/**
+ * Create an OAuth2 token out of an OAuth1 token
+ *
+ * @param int $uid
+ * @return stdClass
+ */
+function tumblr_exchange_token(int $uid): stdClass
{
$oauth_token = DI::pConfig()->get($uid, 'tumblr', 'oauth_token');
$oauth_token_secret = DI::pConfig()->get($uid, 'tumblr', 'oauth_token_secret');
@@ -1061,10 +1102,27 @@ function tumblr_connection(int $uid): ?TumblrOAuth
$consumer_key = DI::config()->get('tumblr', 'consumer_key');
$consumer_secret = DI::config()->get('tumblr', 'consumer_secret');
- if (!$consumer_key || !$consumer_secret || !$oauth_token || !$oauth_token_secret) {
- Logger::notice('Missing data, connection is not established', ['uid' => $uid]);
- return null;
- }
+ $stack = HandlerStack::create();
- return new TumblrOAuth($consumer_key, $consumer_secret, $oauth_token, $oauth_token_secret);
-}
\ No newline at end of file
+ $middleware = new Oauth1([
+ 'consumer_key' => $consumer_key,
+ 'consumer_secret' => $consumer_secret,
+ 'token' => $oauth_token,
+ 'token_secret' => $oauth_token_secret
+ ]);
+
+ $stack->push($middleware);
+
+ try {
+ $client = new Client([
+ 'base_uri' => 'https://api.tumblr.com/v2/',
+ 'handler' => $stack
+ ]);
+
+ $response = $client->post('oauth2/exchange', ['auth' => 'oauth']);
+ return json_decode($response->getBody()->getContents());
+ } catch (RequestException $exception) {
+ Logger::notice('Exchange failed', ['code' => $exception->getCode(), 'message' => $exception->getMessage()]);
+ return new stdClass;
+ }
+}
From 4c3b4dea9671a706551178f82079d09878a4f221 Mon Sep 17 00:00:00 2001
From: Michael
Date: Tue, 25 Apr 2023 18:38:18 +0000
Subject: [PATCH 006/527] Improved result check
---
tumblr/tumblr.php | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tumblr/tumblr.php b/tumblr/tumblr.php
index 2efa70b3..2aa80f3b 100644
--- a/tumblr/tumblr.php
+++ b/tumblr/tumblr.php
@@ -1050,7 +1050,7 @@ function tumblr_get_token(int $uid, string $code = ''): string
if (empty($refresh_token) && empty($code)) {
$result = tumblr_exchange_token($uid);
- if (empty($result)) {
+ if (empty($result->refresh_token)) {
Logger::info('Invalid result while exchanging token', ['uid' => $uid]);
return '';
}
From 5307ef3a58611104b305a9e6f560d1e4d20d1410 Mon Sep 17 00:00:00 2001
From: Michael
Date: Tue, 25 Apr 2023 18:47:54 +0000
Subject: [PATCH 007/527] Updated readme
---
tumblr/README.md | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/tumblr/README.md b/tumblr/README.md
index 57ca145a..70b0157b 100644
--- a/tumblr/README.md
+++ b/tumblr/README.md
@@ -1,8 +1,8 @@
Installation
------------
-[Register](http://www.tumblr.com/oauth/apps) an application and use (your server name)/addon/tumblr/callback.php as
-callback URL
+[Register](http://www.tumblr.com/oauth/apps) an application and use (your server name)/tumblr/callback as
+callback URL and (your server name)/tumblr/redirect as OAuth2 redirect URL.
After the registration please enter the values for "Consumer Key" and "Consumer Secret" in the [administration](admin/addons/tumblr).
From 477e646fbaa6e12bff60da44aa5cca64e8d6318e Mon Sep 17 00:00:00 2001
From: Michael
Date: Tue, 25 Apr 2023 18:48:58 +0000
Subject: [PATCH 008/527] Further updated the readme
---
tumblr/README.md | 7 +------
1 file changed, 1 insertion(+), 6 deletions(-)
diff --git a/tumblr/README.md b/tumblr/README.md
index 70b0157b..98ff2161 100644
--- a/tumblr/README.md
+++ b/tumblr/README.md
@@ -4,9 +4,4 @@ Installation
[Register](http://www.tumblr.com/oauth/apps) an application and use (your server name)/tumblr/callback as
callback URL and (your server name)/tumblr/redirect as OAuth2 redirect URL.
-After the registration please enter the values for "Consumer Key" and "Consumer Secret" in the [administration](admin/addons/tumblr).
-
-Notice
-------
-This connector is using the Tumblr-OAuth-Library:
-[https://groups.google.com/d/msg/tumblr-api/g6SeIBWvsnE/gnWqT9jFSlEJ](https://groups.google.com/d/msg/tumblr-api/g6SeIBWvsnE/gnWqT9jFSlEJ)
+After the registration please enter the values for "Consumer Key" and "Consumer Secret" in the [administration](admin/addons/tumblr).
\ No newline at end of file
From b994de33082d23fe40a0a894b86e58bac3a4acb6 Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Sun, 7 Jul 2019 14:45:23 +0100
Subject: [PATCH 009/527] Latest version of retriever
---
retriever/database.sql | 40 ++
retriever/retriever.php | 832 ++++++++++++++++++++++++++++
retriever/templates/extract.tpl | 32 ++
retriever/templates/fix-urls.tpl | 26 +
retriever/templates/help.tpl | 148 +++++
retriever/templates/rule-config.tpl | 112 ++++
retriever/templates/settings.tpl | 9 +
7 files changed, 1199 insertions(+)
create mode 100644 retriever/database.sql
create mode 100644 retriever/retriever.php
create mode 100644 retriever/templates/extract.tpl
create mode 100644 retriever/templates/fix-urls.tpl
create mode 100644 retriever/templates/help.tpl
create mode 100644 retriever/templates/rule-config.tpl
create mode 100644 retriever/templates/settings.tpl
diff --git a/retriever/database.sql b/retriever/database.sql
new file mode 100644
index 00000000..340e33eb
--- /dev/null
+++ b/retriever/database.sql
@@ -0,0 +1,40 @@
+CREATE TABLE IF NOT EXISTS `retriever_rule` (
+ `id` int(11) unsigned NOT NULL AUTO_INCREMENT,
+ `uid` int(11) NOT NULL,
+ `contact-id` int(11) NOT NULL,
+ `data` mediumtext NULL DEFAULT NULL,
+ PRIMARY KEY (`id`),
+ KEY `uid` (`uid`),
+ KEY `contact-id` (`contact-id`)
+) DEFAULT CHARSET=utf8 COLLATE=utf8_bin;
+
+CREATE TABLE IF NOT EXISTS `retriever_item` (
+ `id` int(11) unsigned NOT NULL AUTO_INCREMENT,
+ `item-uri` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NOT NULL,
+ `item-uid` int(10) unsigned NOT NULL DEFAULT '0',
+ `contact-id` int(10) unsigned NOT NULL DEFAULT '0',
+ `resource` int(11) NOT NULL,
+ `finished` tinyint(1) unsigned NOT NULL DEFAULT '0',
+ KEY `resource` (`resource`),
+ KEY `finished` (`finished`),
+ KEY `item-uid` (`item-uid`),
+ KEY `all` (`item-uri`, `item-uid`, `contact-id`),
+ PRIMARY KEY (`id`)
+) DEFAULT CHARSET=utf8 COLLATE=utf8_bin;
+
+CREATE TABLE IF NOT EXISTS `retriever_resource` (
+ `id` int(11) unsigned NOT NULL AUTO_INCREMENT,
+ `type` char(255) NULL DEFAULT NULL,
+ `binary` int(1) NOT NULL DEFAULT 0,
+ `url` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NOT NULL,
+ `created` timestamp NOT NULL DEFAULT now(),
+ `completed` timestamp NULL DEFAULT NULL,
+ `last-try` timestamp NULL DEFAULT NULL,
+ `num-tries` int(11) NOT NULL DEFAULT 0,
+ `data` mediumblob NULL DEFAULT NULL,
+ `http-code` smallint(1) unsigned NULL DEFAULT NULL,
+ `redirect-url` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NULL DEFAULT NULL,
+ KEY `retriever_resource` ADD INDEX `url` (`url`),
+ KEY `retriever_resource` ADD INDEX `completed` (`completed`),
+ PRIMARY KEY (`id`)
+) DEFAULT CHARSET=utf8 COLLATE=utf8_bin
diff --git a/retriever/retriever.php b/retriever/retriever.php
new file mode 100644
index 00000000..78fe575f
--- /dev/null
+++ b/retriever/retriever.php
@@ -0,0 +1,832 @@
+
+ * Status: Unsupported
+ */
+
+use Friendica\Core\Addon;
+use Friendica\Core\Config;
+use Friendica\Core\PConfig;
+use Friendica\Content\Text\HTML;
+use Friendica\Content\Text\BBCode;
+use Friendica\Object\Image;
+use Friendica\Util\Network;
+use Friendica\Core\L10n;
+use Friendica\Database\DBA;
+
+function retriever_install() {
+ Addon::registerHook('plugin_settings', 'addon/retriever/retriever.php', 'retriever_plugin_settings');
+ Addon::registerHook('plugin_settings_post', 'addon/retriever/retriever.php', 'retriever_plugin_settings_post');
+ Addon::registerHook('post_remote', 'addon/retriever/retriever.php', 'retriever_post_remote_hook');
+ Addon::registerHook('contact_photo_menu', 'addon/retriever/retriever.php', 'retriever_contact_photo_menu');
+ Addon::registerHook('cron', 'addon/retriever/retriever.php', 'retriever_cron');
+
+ $r = q("SELECT `id` FROM `pconfig` WHERE `cat` LIKE 'retriever_%%'");
+ if (count($r) || (Config::get('retriever', 'dbversion') == '0.1')) {
+ $retrievers = array();
+ $r = q("SELECT SUBSTRING(`cat`, 10) AS `contact`, `k`, `v` FROM `pconfig` WHERE `cat` LIKE 'retriever%%'");
+ foreach ($r as $rr) {
+ $retrievers[$rr['contact']][$rr['k']] = $rr['v'];
+ }
+ foreach ($retrievers as $k => $v) {
+ $rr = q("SELECT `uid` FROM `contact` WHERE `id` = %d", intval($k));
+ $uid = $rr[0]['uid'];
+ $v['images'] = 'on';
+ q("INSERT INTO `retriever_rule` (`uid`, `contact-id`, `data`) VALUES (%d, %d, '%s')",
+ intval($uid), intval($k), DBA::escape(json_encode($v)));
+ }
+ q("DELETE FROM `pconfig` WHERE `cat` LIKE 'retriever_%%'");
+ Config::set('retriever', 'dbversion', '0.2');
+ }
+ if (Config::get('retriever', 'dbversion') == '0.2') {
+ q("ALTER TABLE `retriever_resource` DROP COLUMN `retriever`");
+ Config::set('retriever', 'dbversion', '0.3');
+ }
+ if (Config::get('retriever', 'dbversion') == '0.3') {
+ q("ALTER TABLE `retriever_item` MODIFY COLUMN `item-uri` varchar(800) CHARACTER SET ascii NOT NULL");
+ q("ALTER TABLE `retriever_resource` MODIFY COLUMN `url` varchar(800) CHARACTER SET ascii NOT NULL");
+ Config::set('retriever', 'dbversion', '0.4');
+ }
+ if (Config::get('retriever', 'dbversion') == '0.4') {
+ q("ALTER TABLE `retriever_item` ADD COLUMN `finished` tinyint(1) unsigned NOT NULL DEFAULT '0'");
+ Config::set('retriever', 'dbversion', '0.5');
+ }
+ if (Config::get('retriever', 'dbversion') == '0.5') {
+ q('ALTER TABLE `retriever_resource` CHANGE `created` `created` timestamp NOT NULL DEFAULT now()');
+ q('ALTER TABLE `retriever_resource` CHANGE `completed` `completed` timestamp NULL DEFAULT NULL');
+ q('ALTER TABLE `retriever_resource` CHANGE `last-try` `last-try` timestamp NULL DEFAULT NULL');
+ q('ALTER TABLE `retriever_item` DROP KEY `all`');
+ q('ALTER TABLE `retriever_item` ADD KEY `all` (`item-uri`, `item-uid`, `contact-id`)');
+ Config::set('retriever', 'dbversion', '0.6');
+ }
+ if (Config::get('retriever', 'dbversion') == '0.6') {
+ q('ALTER TABLE `retriever_item` CONVERT TO CHARACTER SET utf8 COLLATE utf8_bin');
+ q('ALTER TABLE `retriever_item` CHANGE `item-uri` `item-uri` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NOT NULL');
+ q('ALTER TABLE `retriever_resource` CONVERT TO CHARACTER SET utf8 COLLATE utf8_bin');
+ q('ALTER TABLE `retriever_resource` CHANGE `url` `url` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NOT NULL');
+ q('ALTER TABLE `retriever_rule` CONVERT TO CHARACTER SET utf8 COLLATE utf8_bin');
+ Config::set('retriever', 'dbversion', '0.7');
+ }
+ if (Config::get('retriever', 'dbversion') == '0.7') {
+ $r = q("SELECT `id`, `data` FROM `retriever_rule`");
+ foreach ($r as $rr) {
+ logger('retriever_install: retriever ' . $rr['id'] . ' old config ' . $rr['data'], LOGGER_DATA);
+ $data = json_decode($rr['data'], true);
+ if ($data['pattern']) {
+ $matches = array();
+ if (preg_match("/\/(.*)\//", $data['pattern'], $matches)) {
+ $data['pattern'] = $matches[1];
+ }
+ }
+ if ($data['match']) {
+ $include = array();
+ foreach (explode('|', $data['match']) as $component) {
+ $matches = array();
+ if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[@([A-Za-z][a-z0-9]*)='([^']*)'\]/", $component, $matches)) {
+ $include[] = array(
+ 'element' => $matches[1],
+ 'attribute' => $matches[2],
+ 'value' => $matches[3]);
+ }
+ if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[contains(concat(' ',normalize-space(@class),' '),' ([^ ']+) ')]/", $component, $matches)) {
+ $include[] = array(
+ 'element' => $matches[1],
+ 'attribute' => $matches[2],
+ 'value' => $matches[3]);
+ }
+ }
+ $data['include'] = $include;
+ unset($data['match']);
+ }
+ if ($data['remove']) {
+ $exclude = array();
+ foreach (explode('|', $data['remove']) as $component) {
+ $matches = array();
+ if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[@([A-Za-z][a-z0-9]*)='([^']*)'\]/", $component, $matches)) {
+ $exclude[] = array(
+ 'element' => $matches[1],
+ 'attribute' => $matches[2],
+ 'value' => $matches[3]);
+ }
+ if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[contains(concat(' ',normalize-space(@class),' '),' ([^ ']+) ')]/", $component, $matches)) {
+ $exclude[] = array(
+ 'element' => $matches[1],
+ 'attribute' => $matches[2],
+ 'value' => $matches[3]);
+ }
+ }
+ $data['exclude'] = $exclude;
+ unset($data['remove']);
+ }
+ $r = q('UPDATE `retriever_rule` SET `data` = "%s" WHERE `id` = %d', DBA::escape(json_encode($data)), $rr['id']);
+ logger('retriever_install: retriever ' . $rr['id'] . ' new config ' . json_encode($data), LOGGER_DATA);
+ }
+ Config::set('retriever', 'dbversion', '0.8');
+ }
+ if (Config::get('retriever', 'dbversion') == '0.8') {
+ q("ALTER TABLE `retriever_resource` ADD COLUMN `http-code` smallint(1) unsigned NULL DEFAULT NULL");
+ Config::set('retriever', 'dbversion', '0.9');
+ }
+ if (Config::get('retriever', 'dbversion') == '0.9') {
+ q("ALTER TABLE `retriever_item` DROP COLUMN `parent`");
+ q("ALTER TABLE `retriever_resource` ADD COLUMN `redirect-url` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NULL DEFAULT NULL");
+ Config::set('retriever', 'dbversion', '0.10');
+ }
+ if (Config::get('retriever', 'dbversion') == '0.10') {
+ q("ALTER TABLE `retriever_resource` MODIFY COLUMN `type` char(255) NULL DEFAULT NULL");
+ q("ALTER TABLE `retriever_resource` MODIFY COLUMN `data` mediumblob NULL DEFAULT NULL");
+ q("ALTER TABLE `retriever_rule` MODIFY COLUMN `data` mediumtext NULL DEFAULT NULL");
+ Config::set('retriever', 'dbversion', '0.11');
+ }
+ if (Config::get('retriever', 'dbversion') == '0.11') {
+ q("ALTER TABLE `retriever_resource` ADD INDEX `url` (`url`)");
+ q("ALTER TABLE `retriever_resource` ADD INDEX `completed` (`completed`)");
+ q("ALTER TABLE `retriever_item` ADD INDEX `finished` (`finished`)");
+ q("ALTER TABLE `retriever_item` ADD INDEX `item-uid` (`item-uid`)");
+ Config::set('retriever', 'dbversion', '0.12');
+ }
+ if (Config::get('retriever', 'dbversion') != '0.12') {
+ $schema = file_get_contents(dirname(__file__).'/database.sql');
+ $arr = explode(';', $schema);
+ foreach ($arr as $a) {
+ $r = q($a);
+ }
+ Config::set('retriever', 'dbversion', '0.12');
+ }
+}
+
+function retriever_uninstall() {
+ Addon::unregisterHook('plugin_settings', 'addon/retriever/retriever.php', 'retriever_plugin_settings');
+ Addon::unregisterHook('plugin_settings_post', 'addon/retriever/retriever.php', 'retriever_plugin_settings_post');
+ Addon::unregisterHook('post_remote', 'addon/retriever/retriever.php', 'retriever_post_remote_hook');
+ Addon::unregisterHook('plugin_settings', 'addon/retriever/retriever.php', 'retriever_plugin_settings');
+ Addon::unregisterHook('plugin_settings_post', 'addon/retriever/retriever.php', 'retriever_plugin_settings_post');
+ Addon::unregisterHook('contact_photo_menu', 'addon/retriever/retriever.php', 'retriever_contact_photo_menu');
+ Addon::unregisterHook('cron', 'addon/retriever/retriever.php', 'retriever_cron');
+}
+
+function retriever_module() {}
+
+function retriever_cron($a, $b) {
+ // 100 is a nice sane number. Maybe this should be configurable.
+ retriever_retrieve_items(100, $a);
+ retriever_tidy();
+}
+
+$retriever_item_count = 0;
+
+function retriever_retrieve_items($max_items, $a) {
+ global $retriever_item_count;
+
+ $retriever_schedule = array(array(1,'minute'),
+ array(10,'minute'),
+ array(1,'hour'),
+ array(1,'day'),
+ array(2,'day'),
+ array(1,'week'),
+ array(1,'month'));
+
+ $schedule_clauses = array();
+ for ($i = 0; $i < count($retriever_schedule); $i++) {
+ $num = $retriever_schedule[$i][0];
+ $unit = $retriever_schedule[$i][1];
+ array_push($schedule_clauses,
+ '(`num-tries` = ' . $i . ' AND TIMESTAMPADD(' . DBA::escape($unit) .
+ ', ' . intval($num) . ', `last-try`) < now())');
+ }
+
+ $retrieve_items = $max_items - $retriever_item_count;
+ logger('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . $retriever_item_count . ', retrieve ' . $retrieve_items, LOGGER_DEBUG);
+ do {
+ $r = q("SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR %s) ORDER BY `last-try` ASC LIMIT %d",
+ DBA::escape(implode($schedule_clauses, ' OR ')),
+ intval($retrieve_items));
+ if (!is_array($r)) {
+ break;
+ }
+ if (count($r) == 0) {
+ break;
+ }
+ logger('retriever_retrieve_items: found ' . count($r) . ' waiting resources in database', LOGGER_DEBUG);
+ foreach ($r as $rr) {
+ retrieve_resource($rr);
+ $retriever_item_count++;
+ }
+ $retrieve_items = $max_items - $retriever_item_count;
+ }
+ while ($retrieve_items > 0);
+
+ /* Look for items that are waiting even though the resource has
+ * completed. This usually happens because we've been asked to
+ * retrospectively apply a config change. It could also happen
+ * due to a cron job dying or something. */
+ $r = q("SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT %d",
+ intval($retrieve_items));
+ if (!$r) {
+ $r = array();
+ }
+ logger('retriever_retrieve_items: items waiting even though resource has completed: ' . count($r), LOGGER_DEBUG);
+ foreach ($r as $rr) {
+ $resource = q("SELECT * FROM retriever_resource WHERE `id` = %d", $rr['resource']);
+ $retriever_item = retriever_get_retriever_item($rr['item']);
+ if (!$retriever_item) {
+ logger('retriever_retrieve_items: no retriever item with id ' . $rr['item'], LOGGER_INFO);
+ continue;
+ }
+ $item = retriever_get_item($retriever_item);
+ if (!$item) {
+ logger('retriever_retrieve_items: no item ' . $retriever_item['item-uri'], LOGGER_INFO);
+ continue;
+ }
+ $retriever = get_retriever($item['contact-id'], $item['uid']);
+ if (!$retriever) {
+ logger('retriever_retrieve_items: no retriever for item ' .
+ $retriever_item['item-uri'] . ' ' . $retriever_item['uid'] . ' ' . $item['contact-id'],
+ LOGGER_INFO);
+ continue;
+ }
+ retriever_apply_completed_resource_to_item($retriever, $item, $resource[0], $a);
+ q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d",
+ intval($retriever_item['id']));
+ retriever_check_item_completed($item);
+ }
+}
+
+function retriever_tidy() {
+ q("DELETE FROM retriever_resource WHERE completed IS NOT NULL AND completed < DATE_SUB(now(), INTERVAL 1 WEEK)");
+ q("DELETE FROM retriever_resource WHERE completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)");
+
+ $r = q("SELECT retriever_item.id FROM retriever_item LEFT OUTER JOIN retriever_resource ON (retriever_item.resource = retriever_resource.id) WHERE retriever_resource.id is null");
+ logger('retriever_tidy: found ' . count($r) . ' retriever_items with no retriever_resource');
+ foreach ($r as $rr) {
+ q('DELETE FROM retriever_item WHERE id = %d', intval($rr['id']));
+ }
+}
+
+function retrieve_dataurl_resource($resource) {
+ if (!preg_match("/date:(.*);base64,(.*)/", $resource['url'], $matches)) {
+ logger('retrieve_dataurl_resource: ' . $resource['id'] . ' does not match pattern');
+ } else {
+ $resource['type'] = $matches[1];
+ $resource['data'] = base64url_decode($matches[2]);
+ }
+
+ // Succeed or fail, there's no point retrying
+ q("UPDATE `retriever_resource` SET `last-try` = now(), `num-tries` = `num-tries` + 1, `completed` = now(), `data` = '%s', `type` = '%s' WHERE id = %d",
+ DBA::escape($resource['data']),
+ DBA::escape($resource['type']),
+ intval($resource['id']));
+ retriever_resource_completed($resource, $a);
+}
+
+function retrieve_resource($resource) {
+ if (substr($resource['url'], 0, 5) == "data:") {
+ return retrieve_dataurl_resource($resource);
+ }
+
+ $a = get_app();
+
+ try {
+ logger('retrieve_resource: ' . ($resource['num-tries'] + 1) .
+ ' attempt at resource ' . $resource['id'] . ' ' . $resource['url'], LOGGER_DEBUG);
+ $redirects;
+ $cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-');
+ $fetch_result = Network::fetchUrlFull($resource['url'], $resource['binary'], $redirects, array('cookiejar' => $cookiejar));
+ unlink($cookiejar);
+ $resource['data'] = $fetch_result['body'];
+ $resource['http-code'] = $a->get_curl_code();
+ $resource['type'] = $a->get_curl_content_type();
+ $resource['redirect-url'] = $fetch_result['redirect_url'];
+ logger('retrieve_resource: got code ' . $resource['http-code'] .
+ ' retrieving resource ' . $resource['id'] .
+ ' final url ' . $resource['redirect-url'], LOGGER_DEBUG);
+ } catch (Exception $e) {
+ logger('retrieve_resource: unable to retrieve ' . $resource['url'] . ' - ' . $e->getMessage());
+ }
+ q("UPDATE `retriever_resource` SET `last-try` = now(), `num-tries` = `num-tries` + 1, `http-code` = %d, `redirect-url` = '%s' WHERE id = %d",
+ intval($resource['http-code']),
+ DBA::escape($resource['redirect-url']),
+ intval($resource['id']));
+ if ($resource['data']) {
+ q("UPDATE `retriever_resource` SET `completed` = now(), `data` = '%s', `type` = '%s' WHERE id = %d",
+ DBA::escape($resource['data']),
+ DBA::escape($resource['type']),
+ intval($resource['id']));
+ retriever_resource_completed($resource, $a);
+ }
+}
+
+function get_retriever($contact_id, $uid, $create = false) {
+ $r = q("SELECT * FROM `retriever_rule` WHERE `contact-id` = %d AND `uid` = %d",
+ intval($contact_id), intval($uid));
+ if (count($r)) {
+ $r[0]['data'] = json_decode($r[0]['data'], true);
+ return $r[0];
+ }
+ if ($create) {
+ q("INSERT INTO `retriever_rule` (`uid`, `contact-id`) VALUES (%d, %d)",
+ intval($uid), intval($contact_id));
+ $r = q("SELECT * FROM `retriever_rule` WHERE `contact-id` = %d AND `uid` = %d",
+ intval($contact_id), intval($uid));
+ return $r[0];
+ }
+}
+
+function retriever_get_retriever_item($id) {
+ $retriever_items = q("SELECT * FROM `retriever_item` WHERE id = %d", intval($id));
+ if (count($retriever_items) != 1) {
+ logger('retriever_get_retriever_item: unable to find retriever_item ' . $id, LOGGER_INFO);
+ return;
+ }
+ return $retriever_items[0];
+}
+
+function retriever_get_item($retriever_item) {
+ $items = q("SELECT * FROM `item` WHERE `uri` = '%s' AND `uid` = %d AND `contact-id` = %d",
+ DBA::escape($retriever_item['item-uri']),
+ intval($retriever_item['item-uid']),
+ intval($retriever_item['contact-id']));
+ if (count($items) != 1) {
+ logger('retriever_get_item: unexpected number of results ' .
+ count($items) . " when searching for item $uri $uid $cid", LOGGER_INFO);
+ return;
+ }
+ return $items[0];
+}
+
+function retriever_item_completed($retriever_item_id, $resource, $a) {
+ logger('retriever_item_completed: id ' . $retriever_item_id . ' url ' . $resource['url'], LOGGER_DEBUG);
+
+ $retriever_item = retriever_get_retriever_item($retriever_item_id);
+ if (!$retriever_item) {
+ return;
+ }
+ // Note: the retriever might be null. Doesn't matter.
+ $retriever = get_retriever($retriever_item['contact-id'], $retriever_item['item-uid']);
+ $item = retriever_get_item($retriever_item);
+ if (!$item) {
+ return;
+ }
+
+ retriever_apply_completed_resource_to_item($retriever, $item, $resource, $a);
+
+ q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d",
+ intval($retriever_item['id']));
+ retriever_check_item_completed($item);
+}
+
+function retriever_resource_completed($resource, $a) {
+ logger('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url'], LOGGER_DEBUG);
+ $r = q("SELECT `id` FROM `retriever_item` WHERE `resource` = %d", $resource['id']);
+ foreach ($r as $rr) {
+ retriever_item_completed($rr['id'], $resource, $a);
+ }
+}
+
+function apply_retrospective($a, $retriever, $num) {
+ $r = q("SELECT * FROM `item` WHERE `contact-id` = %d ORDER BY `received` DESC LIMIT %d",
+ intval($retriever['contact-id']), intval($num));
+ foreach ($r as $item) {
+ q('UPDATE `item` SET `visible` = 0 WHERE `id` = %d', $item['id']);
+ q('UPDATE `thread` SET `visible` = 0 WHERE `iid` = %d', $item['id']);
+ retriever_on_item_insert($a, $retriever, $item);
+ }
+}
+
+function retriever_on_item_insert($a, $retriever, &$item) {
+ if (!$retriever || !$retriever['id']) {
+ logger('retriever_on_item_insert: No retriever supplied', LOGGER_INFO);
+ return;
+ }
+ if (!$retriever["data"]['enable'] == "on") {
+ return;
+ }
+ if (array_key_exists('pattern', $retriever["data"]) && $retriever["data"]['pattern']) {
+ $url = preg_replace('/' . $retriever["data"]['pattern'] . '/', $retriever["data"]['replace'], $item['plink']);
+ logger('retriever_on_item_insert: Changed ' . $item['plink'] . ' to ' . $url, LOGGER_DATA);
+ }
+ else {
+ $url = $item['plink'];
+ }
+
+ $resource = add_retriever_resource($a, $url);
+ $retriever_item_id = add_retriever_item($item, $resource);
+}
+
+function add_retriever_resource($a, $url, $binary = false) {
+ logger('add_retriever_resource: ' . $url, LOGGER_DEBUG);
+
+ $scheme = parse_url($url, PHP_URL_SCHEME);
+ if ($scheme == 'data') {
+ $fp = fopen($url, 'r');
+ $meta = stream_get_meta_data($fp);
+ $type = $meta['mediatype'];
+ $data = stream_get_contents($fp);
+ fclose($fp);
+
+ $url = 'md5://' . hash('md5', $url);
+ $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url));
+ $resource = $r[0];
+ if (count($r)) {
+ logger('add_retriever_resource: Resource ' . $url . ' already requested', LOGGER_DEBUG);
+ return $resource;
+ }
+
+ logger('retrieve_resource: got data URL type ' . $resource['type'], LOGGER_DEBUG);
+ q("INSERT INTO `retriever_resource` (`type`, `binary`, `url`, `completed`, `data`) " .
+ "VALUES ('%s', %d, '%s', now(), '%s')",
+ DBA::escape($type),
+ intval($binary ? 1 : 0),
+ DBA::escape($url),
+ DBA::escape($data));
+ $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url));
+ $resource = $r[0];
+ if (count($r)) {
+ retriever_resource_completed($resource, $a);
+ }
+ return $resource;
+ }
+
+ if (strlen($url) > 800) {
+ logger('add_retriever_resource: URL is longer than 800 characters', LOGGER_INFO);
+ }
+
+ $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url));
+ if (count($r)) {
+ logger('add_retriever_resource: Resource ' . $url . ' already requested', LOGGER_DEBUG);
+ return $r[0];
+ }
+
+ q("INSERT INTO `retriever_resource` (`binary`, `url`) " .
+ "VALUES (%d, '%s')", intval($binary ? 1 : 0), DBA::escape($url));
+ $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url));
+ return $r[0];
+}
+
+function add_retriever_item(&$item, $resource) {
+ logger('add_retriever_item: ' . $resource['url'] . ' for ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], LOGGER_DEBUG);
+
+ q("INSERT INTO `retriever_item` (`item-uri`, `item-uid`, `contact-id`, `resource`) " .
+ "VALUES ('%s', %d, %d, %d)",
+ DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource["id"]));
+ $r = q("SELECT id FROM `retriever_item` WHERE " .
+ "`item-uri` = '%s' AND `item-uid` = %d AND `contact-id` = %d AND `resource` = %d ORDER BY id DESC",
+ DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource['id']));
+ if (!count($r)) {
+ logger("add_retriever_item: couldn't create retriever item for " .
+ $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'],
+ LOGGER_INFO);
+ return;
+ }
+ logger('add_retriever_item: created retriever_item ' . $r[0]['id'] . ' for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], LOGGER_DEBUG);
+ return $r[0]['id'];
+}
+
+function retriever_get_encoding($resource) {
+ $matches = array();
+ if (preg_match('/charset=(.*)/', $resource['type'], $matches)) {
+ return trim(array_pop($matches));
+ }
+ return 'utf-8';
+}
+
+function retriever_apply_xslt_text($xslt_text, $doc) {
+ if (!$xslt_text) {
+ logger('retriever_apply_xslt_text: empty XSLT text', LOGGER_INFO);
+ return $doc;
+ }
+ $xslt_doc = new DOMDocument();
+ if (!$xslt_doc->loadXML($xslt_text)) {
+ logger('retriever_apply_xslt_text: could not load XML', LOGGER_INFO);
+ return $doc;
+ }
+ $xp = new XsltProcessor();
+ $xp->importStylesheet($xslt_doc);
+ $result = $xp->transformToDoc($doc);
+ return $result;
+}
+
+function retriever_apply_dom_filter($retriever, &$item, $resource) {
+ logger('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['uri'] . ' contact ' . $item['contact-id'], LOGGER_DEBUG);
+
+ if (!$retriever['data']['include'] && !$retriever['data']['customxslt']) {
+ return;
+ }
+ if (!$resource['data']) {
+ logger('retriever_apply_dom_filter: no text to work with', LOGGER_INFO);
+ return;
+ }
+
+ $encoding = retriever_get_encoding($resource);
+ $content = mb_convert_encoding($resource['data'], 'HTML-ENTITIES', $encoding);
+ $doc = new DOMDocument('1.0', 'UTF-8');
+ if (strpos($resource['type'], 'html') !== false) {
+ @$doc->loadHTML($content);
+ }
+ else {
+ $doc->loadXML($content);
+ }
+
+ $params = array('$spec' => $retriever['data']);
+ $extract_template = get_markup_template('extract.tpl', 'addon/retriever/');
+ $extract_xslt = replace_macros($extract_template, $params);
+ if ($retriever['data']['include']) {
+ $doc = retriever_apply_xslt_text($extract_xslt, $doc);
+ }
+ if (array_key_exists('customxslt', $retriever['data']) && $retriever['data']['customxslt']) {
+ $doc = retriever_apply_xslt_text($retriever['data']['customxslt'], $doc);
+ }
+ if (!$doc) {
+ logger('retriever_apply_dom_filter: failed to apply extract XSLT template', LOGGER_INFO);
+ return;
+ }
+
+ $components = parse_url($resource['redirect-url']);
+ $rooturl = $components['scheme'] . "://" . $components['host'];
+ $dirurl = $rooturl . dirname($components['path']) . "/";
+ $params = array('$dirurl' => $dirurl, '$rooturl' => $rooturl);
+ $fix_urls_template = get_markup_template('fix-urls.tpl', 'addon/retriever/');
+ $fix_urls_xslt = replace_macros($fix_urls_template, $params);
+ $doc = retriever_apply_xslt_text($fix_urls_xslt, $doc);
+ if (!$doc) {
+ logger('retriever_apply_dom_filter: failed to apply fix urls XSLT template', LOGGER_INFO);
+ return;
+ }
+
+ $item['body'] = HTML::toBBCode($doc->saveHTML());
+ if (!strlen($item['body'])) {
+ logger('retriever_apply_dom_filter retriever ' . $retriever['id'] . ' item ' . $item['id'] . ': output was empty', LOGGER_INFO);
+ return;
+ }
+ $item['body'] .= "\n\n" . L10n::t('Retrieved') . ' ' . date("Y-m-d") . ': [url=';
+ $item['body'] .= $item['plink'];
+ $item['body'] .= ']' . $item['plink'] . '[/url]';
+ q("UPDATE `item` SET `body` = '%s' WHERE `id` = %d",
+ DBA::escape($item['body']), intval($item['id']));
+}
+
+function retrieve_images(&$item, $a) {
+ $matches1 = array();
+ preg_match_all("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", $item["body"], $matches1);
+ $matches2 = array();
+ preg_match_all("/\[img\](.*?)\[\/img\]/ism", $item["body"], $matches2);
+ $matches = array_merge($matches1[3], $matches2[1]);
+ logger('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], LOGGER_DEBUG);
+ foreach ($matches as $url) {
+ if (strpos($url, get_app()->get_baseurl()) === FALSE) {
+ $resource = add_retriever_resource($a, $url, true);
+ if (!$resource['completed']) {
+ add_retriever_item($item, $resource);
+ }
+ else {
+ retriever_transform_images($a, $item, $resource);
+ }
+ }
+ }
+}
+
+function retriever_check_item_completed(&$item)
+{
+ $r = q('SELECT count(*) FROM retriever_item WHERE `item-uri` = "%s" ' .
+ 'AND `item-uid` = %d AND `contact-id` = %d AND `finished` = 0',
+ DBA::escape($item['uri']), intval($item['uid']),
+ intval($item['contact-id']));
+ $waiting = $r[0]['count(*)'];
+ logger('retriever_check_item_completed: item ' . $item['uri'] . ' ' . $item['uid']
+ . ' '. $item['contact-id'] . ' waiting for ' . $waiting . ' resources', LOGGER_DEBUG);
+ $old_visible = $item['visible'];
+ $item['visible'] = $waiting ? 0 : 1;
+ if (array_key_exists('id', $item) && ($item['id'] > 0) && ($old_visible != $item['visible'])) {
+ logger('retriever_check_item_completed: changing visible flag to ' . $item['visible'] . ' and invoking notifier ("edit_post", ' . $item['id'] . ')', LOGGER_DEBUG);
+ q("UPDATE `item` SET `visible` = %d WHERE `id` = %d",
+ intval($item['visible']),
+ intval($item['id']));
+ q("UPDATE `thread` SET `visible` = %d WHERE `iid` = %d",
+ intval($item['visible']),
+ intval($item['id']));
+ }
+}
+
+function retriever_apply_completed_resource_to_item($retriever, &$item, $resource, $a) {
+ logger('retriever_apply_completed_resource_to_item: retriever ' .
+ ($retriever ? $retriever['id'] : 'none') .
+ ' resource ' . $resource['url'] . ' plink ' . $item['plink'], LOGGER_DEBUG);
+ if (strpos($resource['type'], 'image') !== false) {
+ retriever_transform_images($a, $item, $resource);
+ }
+ if (!$retriever) {
+ return;
+ }
+ if ((strpos($resource['type'], 'html') !== false) ||
+ (strpos($resource['type'], 'xml') !== false)) {
+ retriever_apply_dom_filter($retriever, $item, $resource);
+ if ($retriever["data"]['images'] ) {
+ retrieve_images($item, $a);
+ }
+ }
+}
+
+function retriever_transform_images($a, &$item, $resource) {
+ if (!$resource["data"]) {
+ logger('retriever_transform_images: no data available for '
+ . $resource['id'] . ' ' . $resource['url'], LOGGER_INFO);
+ return;
+ }
+
+ try {
+ $photo = Image::storePhoto($a, $item['uid'], $resource['data'], $resource['url']);
+ } catch (Exception $e) {
+ logger('retriever_transform_images caught exception ' . $e->getMessage());
+ return;
+ }
+ foreach ($photo as $k => $v)
+ {
+ logger('@@@ photo key ' . $k);
+ }
+ $new_url = $photo['full'];
+ logger('retriever_transform_images: replacing ' . $resource['url'] . ' with ' .
+ $new_url . ' in item ' . $item['plink'], LOGGER_DEBUG);
+ $transformed = str_replace($resource["url"], $new_url, $item['body']);
+ if ($transformed === $item['body']) {
+ return;
+ }
+
+ $item['body'] = $transformed;
+ q("UPDATE `item` SET `body` = '%s' WHERE `plink` = '%s' AND `uid` = %d AND `contact-id` = %d",
+ DBA::escape($item['body']),
+ DBA::escape($item['plink']),
+ intval($item['uid']),
+ intval($item['contact-id']));
+}
+
+function retriever_content($a) {
+ if (!local_user()) {
+ $a->page['content'] .= "Please log in
";
+ return;
+ }
+ if ($a->argv[1] === 'help') {
+ $feeds = q("SELECT `id`, `name`, `thumb` FROM contact WHERE `uid` = %d AND `network` = 'feed'",
+ local_user());
+ foreach ($feeds as $k=>$v) {
+ $feeds[$k]['url'] = $a->get_baseurl() . '/retriever/' . $v['id'];
+ }
+ $template = get_markup_template('/help.tpl', 'addon/retriever/');
+ $a->page['content'] .= replace_macros($template, array(
+ '$config' => $a->get_baseurl() . '/settings/addon',
+ '$feeds' => $feeds));
+ return;
+ }
+ if ($a->argv[1]) {
+ $retriever = get_retriever($a->argv[1], local_user(), false);
+
+ if (x($_POST["id"])) {
+ $retriever = get_retriever($a->argv[1], local_user(), true);
+ $retriever["data"] = array();
+ foreach (array('pattern', 'replace', 'enable', 'images', 'customxslt') as $setting) {
+ if (x($_POST['retriever_' . $setting])) {
+ $retriever["data"][$setting] = $_POST['retriever_' . $setting];
+ }
+ }
+ foreach ($_POST as $k=>$v) {
+ if (preg_match("/retriever-(include|exclude)-(\d+)-(element|attribute|value)/", $k, $matches)) {
+ $retriever['data'][$matches[1]][intval($matches[2])][$matches[3]] = $v;
+ }
+ }
+ // You've gotta have an element, even if it's just "*"
+ foreach ($retriever['data']['include'] as $k=>$clause) {
+ if (!$clause['element']) {
+ unset($retriever['data']['include'][$k]);
+ }
+ }
+ foreach ($retriever['data']['exclude'] as $k=>$clause) {
+ if (!$clause['element']) {
+ unset($retriever['data']['exclude'][$k]);
+ }
+ }
+ q("UPDATE `retriever_rule` SET `data`='%s' WHERE `id` = %d",
+ DBA::escape(json_encode($retriever["data"])), intval($retriever["id"]));
+ $a->page['content'] .= "Settings Updated";
+ if (x($_POST["retriever_retrospective"])) {
+ apply_retrospective($a, $retriever, $_POST["retriever_retrospective"]);
+ $a->page['content'] .= " and retrospectively applied to " . $_POST["apply"] . " posts";
+ }
+ $a->page['content'] .= ".
";
+ }
+
+ $template = get_markup_template('/rule-config.tpl', 'addon/retriever/');
+ $a->page['content'] .= replace_macros($template, array(
+ '$enable' => array(
+ 'retriever_enable',
+ L10n::t('Enabled'),
+ $retriever['data']['enable']),
+ '$pattern' => array(
+ 'retriever_pattern',
+ L10n::t('URL Pattern'),
+ $retriever["data"]['pattern'],
+ L10n::t('Regular expression matching part of the URL to replace')),
+ '$replace' => array(
+ 'retriever_replace',
+ L10n::t('URL Replace'),
+ $retriever["data"]['replace'],
+ L10n::t('Text to replace matching part of above regular expression')),
+ '$images' => array(
+ 'retriever_images',
+ L10n::t('Download Images'),
+ $retriever['data']['images']),
+ '$retrospective' => array(
+ 'retriever_retrospective',
+ L10n::t('Retrospectively Apply'),
+ '0',
+ L10n::t('Reapply the rules to this number of posts')),
+ '$customxslt' => array(
+ 'retriever_customxslt',
+ L10n::t('Custom XSLT'),
+ $retriever['data']['customxslt'],
+ L10n::t("When standard rules aren't enough, apply custom XSLT to the article")),
+ '$title' => L10n::t('Retrieve Feed Content'),
+ '$help' => $a->get_baseurl() . '/retriever/help',
+ '$help_t' => L10n::t('Get Help'),
+ '$submit_t' => L10n::t('Submit'),
+ '$submit' => L10n::t('Save Settings'),
+ '$id' => ($retriever["id"] ? $retriever["id"] : "create"),
+ '$tag_t' => L10n::t('Tag'),
+ '$attribute_t' => L10n::t('Attribute'),
+ '$value_t' => L10n::t('Value'),
+ '$add_t' => L10n::t('Add'),
+ '$remove_t' => L10n::t('Remove'),
+ '$include_t' => L10n::t('Include'),
+ '$include' => $retriever['data']['include'],
+ '$exclude_t' => L10n::t('Exclude'),
+ '$exclude' => $retriever["data"]['exclude']));
+ return;
+ }
+}
+
+function retriever_contact_photo_menu($a, &$args) {
+ if (!$args) {
+ return;
+ }
+ if ($args["contact"]["network"] == "feed") {
+ $args["menu"][ 'retriever' ] = array(L10n::t('Retriever'), $a->get_baseurl() . '/retriever/' . $args["contact"]['id']);
+ }
+}
+
+function retriever_post_remote_hook(&$a, &$item) {
+ logger('retriever_post_remote_hook: ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], LOGGER_DEBUG);
+
+ $retriever = get_retriever($item['contact-id'], $item["uid"], false);
+ if ($retriever) {
+ retriever_on_item_insert($a, $retriever, $item);
+ }
+ else {
+ if (PConfig::get($item["uid"], 'retriever', 'oembed')) {
+ // Convert to HTML and back to take advantage of bbcode's resolution of oembeds.
+ $body = HTML::toBBCode(BBCode::convert($item['body']));
+ if ($body) {
+ $item['body'] = $body;
+ }
+ }
+ if (PConfig::get($item["uid"], 'retriever', 'all_photos')) {
+ retrieve_images($item, $a);
+ }
+ }
+ retriever_check_item_completed($item);
+}
+
+function retriever_plugin_settings(&$a,&$s) {
+ $all_photos = PConfig::get(local_user(), 'retriever', 'all_photos');
+ $oembed = PConfig::get(local_user(), 'retriever', 'oembed');
+ $template = get_markup_template('/settings.tpl', 'addon/retriever/');
+ $s .= replace_macros($template, array(
+ '$allphotos' => array(
+ 'retriever_all_photos',
+ L10n::t('All Photos'),
+ $all_photos,
+ L10n::t('Check this to retrieve photos for all posts')),
+ '$oembed' => array(
+ 'retriever_oembed',
+ L10n::t('Resolve OEmbed'),
+ $oembed,
+ L10n::t('Check this to attempt to retrieve embedded content for all posts - useful e.g. for Facebook posts')),
+ '$submit' => L10n::t('Save Settings'),
+ '$title' => L10n::t('Retriever Settings'),
+ '$help' => $a->get_baseurl() . '/retriever/help'));
+}
+
+function retriever_plugin_settings_post($a,$post) {
+ if ($_POST['retriever_all_photos']) {
+ PConfig::set(local_user(), 'retriever', 'all_photos', $_POST['retriever_all_photos']);
+ }
+ else {
+ PConfig::del(local_user(), 'retriever', 'all_photos');
+ }
+ if ($_POST['retriever_oembed']) {
+ PConfig::set(local_user(), 'retriever', 'oembed', $_POST['retriever_oembed']);
+ }
+ else {
+ PConfig::del(local_user(), 'retriever', 'oembed');
+ }
+}
diff --git a/retriever/templates/extract.tpl b/retriever/templates/extract.tpl
new file mode 100644
index 00000000..f24a860d
--- /dev/null
+++ b/retriever/templates/extract.tpl
@@ -0,0 +1,32 @@
+
+
+
+
+
+
+{{function clause_xpath}}
+{{if !$clause.attribute}}
+{{$clause.element}}{{elseif $clause.attribute == 'class'}}
+{{$clause.element}}[contains(concat(' ', normalize-space(@class), ' '), '{{$clause.value}}')]{{else}}
+{{$clause.element}}[@{{$clause.attribute}}='{{$clause.value}}']{{/if}}
+{{/function}}
+
+{{foreach $spec.include as $clause}}
+
+
+
+
+
+{{/foreach}}
+
+{{foreach $spec.exclude as $clause}}
+
+{{/foreach}}
+
+
+
+
+
+
+
+
diff --git a/retriever/templates/fix-urls.tpl b/retriever/templates/fix-urls.tpl
new file mode 100644
index 00000000..248d4770
--- /dev/null
+++ b/retriever/templates/fix-urls.tpl
@@ -0,0 +1,26 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/retriever/templates/help.tpl b/retriever/templates/help.tpl
new file mode 100644
index 00000000..10b421d0
--- /dev/null
+++ b/retriever/templates/help.tpl
@@ -0,0 +1,148 @@
+Retriever Plugin Help
+
+This plugin replaces the short excerpts you normally get in RSS feeds
+with the full content of the article from the source website. You
+specify which part of the page you're interested in with a set of
+rules. When each item arrives, the plugin downloads the full page
+from the website, extracts content using the rules, and replaces the
+original article.
+
+
+There's a few reasons you may want to do this. The source website
+might be slow or overloaded. The source website might be
+untrustworthy, in which case using Friendica to scrub the HTML is a
+good idea. You might be on a LAN that blacklists certain websites.
+It also works neatly with the mailstream plugin, allowing you to read
+a news stream comfortably without needing continuous Internet
+connectivity.
+
+
+However, setting up retriever can be quite tricky since it depends on
+the internal design of the website. That was designed to make life
+easy for the website's developers, not for you. You'll need to have
+some familiarity with HTML, and be willing to adapt when the website
+suddenly changes everything without notice.
+
+Configuring Retriever for a feed
+
+To set up retriever for an RSS feed, go to the "Contacts" page and
+find your feed. Then click on the drop-down menu on the contact.
+Select "Retriever" to get to the retriever configuration.
+
+
+The "Include" configuration section specifies parts of the page to
+include in the article. Each row has three components:
+
+
+An HTML tag (e.g. "div", "span", "p")
+An attribute (usually "class" or "id")
+A value for the attribute
+
+
+A simple case is when the article is wrapped in a "div" element:
+
+
+ ...
+ <div class="ArticleWrapper">
+ <h2>Man Bites Dog</h2>
+ <img src="mbd.jpg">
+ <p>
+ Residents of the sleepy community of Nowheresville were
+ shocked yesterday by the sight of creepy local weirdo Jim
+ McOddman assaulting innocent local dog Snufflekins with his
+ false teeth.
+ </p>
+ ...
+ </div>
+ ...
+
+
+You then specify the tag "div", attribute "class", and value
+"ArticleWrapper". Everything else in the page, such as navigation
+panels and menus and footers and so on, will be discarded. If there
+is more than one section of the page you want to include, specify each
+one on a separate row. If the matching section contains some sections
+you want to remove, specify those in the "Exclude" section in the same
+way.
+
+
+Once you've got a configuration that you think will work, you can try
+it out on some existing articles. Type a number into the
+"Retrospectively Apply" box and click "Submit". After a while
+(exactly how long depends on your system's cron configuration) the new
+articles should be available.
+
+Techniques
+
+You can leave the attribute and value blank to include all the
+corresponding elements with the specified tag name. You can also use
+a tag name of just an asterisk ("*"), which will match any element type with the
+specified attribute regardless of the tag.
+
+
+Note that the "class" attribute is a special case. Many web page
+templates will put multiple different classes in the same element,
+separated by spaces. If you specify an attribute of "class" it will
+match an element if any of its classes matches the specified value.
+For example:
+
+
+ <div class="article breaking-news">
+
+
+In this case you can specify a value of "article", or "breaking-news".
+You can also specify "article breaking-news", but that won't match if
+the website suddenly changes to "breaking-news article", so that's not
+recommended.
+
+
+One useful trick you can try is using the website's "print" pages.
+Many news sites have print versions of all their articles. These are
+usually drastically simplified compared to the live website page.
+Sometimes this is a good way to get the whole article when it's
+normally split across multiple pages.
+
+
+Hopefully the URL for the print page is a predictable variant of the
+normal article URL. For example, an article URL like:
+
+
+ http://www.newssite.com/article-8636.html
+
+
+...might have a print version at:
+
+
+ http://www.newssite.com/print/article-8636.html
+
+
+To change the URL used to retrieve the page, use the "URL Pattern" and
+"URL Replace" fields. The pattern is a regular expression matching
+part of the URL to replace. In this case, you might use a pattern of
+"/article" and a replace string of "/print/article". A common pattern
+is simply a dollar sign ("$"), used to add the replace string to the end of the URL.
+
+Background Processing
+
+Note that retrieving and processing the articles can take some time,
+so it's done in the background. Incoming articles will be marked as
+invisible while they're in the process of being downloaded. If a URL
+fails, the plugin will keep trying at progressively longer intervals
+for up to a month, in case the website is temporarily overloaded or
+the network is down.
+
+Retrieving Images
+
+Retriever can also optionally download images and store them in the
+local Friendica instance. Just check the "Download Images" box. You
+can also download images in every item from your network, whether it's
+an RSS feed or not. Go to the "Settings" page and
+click "Plugin settings" . Then check the "All
+Photos" box in the "Retriever Settings" section and click "Submit".
+
+Configure Feeds:
+
+{{foreach $feeds as $feed}}
+{{include file='contact_template.tpl' contact=$feed}}
+{{/foreach}}
+
diff --git a/retriever/templates/rule-config.tpl b/retriever/templates/rule-config.tpl
new file mode 100644
index 00000000..228d0326
--- /dev/null
+++ b/retriever/templates/rule-config.tpl
@@ -0,0 +1,112 @@
+
diff --git a/retriever/templates/settings.tpl b/retriever/templates/settings.tpl
new file mode 100644
index 00000000..8bfe8db0
--- /dev/null
+++ b/retriever/templates/settings.tpl
@@ -0,0 +1,9 @@
+
+
{{$title}}
+
+ Get Help
+
+{{include file="field_checkbox.tpl" field=$allphotos}}
+{{include file="field_checkbox.tpl" field=$oembed}}
+
+
From f453c15259e5fecf151a987ac84c58126653e793 Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Sat, 20 Jul 2019 10:44:38 +0100
Subject: [PATCH 010/527] Fixes for retriever
---
retriever/retriever.php | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/retriever/retriever.php b/retriever/retriever.php
index 78fe575f..5f2b855a 100644
--- a/retriever/retriever.php
+++ b/retriever/retriever.php
@@ -512,7 +512,8 @@ function retriever_apply_xslt_text($xslt_text, $doc) {
function retriever_apply_dom_filter($retriever, &$item, $resource) {
logger('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['uri'] . ' contact ' . $item['contact-id'], LOGGER_DEBUG);
- if (!$retriever['data']['include'] && !$retriever['data']['customxslt']) {
+ if (!array_key_exists('include', $retriever['data']) && !array_key_exists('customxslt', $retriever['data'])) {
+ logger('retriever_apply_dom_filter: no include and no customxslt', LOGGER_INFO);
return;
}
if (!$resource['data']) {
@@ -564,8 +565,8 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) {
$item['body'] .= "\n\n" . L10n::t('Retrieved') . ' ' . date("Y-m-d") . ': [url=';
$item['body'] .= $item['plink'];
$item['body'] .= ']' . $item['plink'] . '[/url]';
- q("UPDATE `item` SET `body` = '%s' WHERE `id` = %d",
- DBA::escape($item['body']), intval($item['id']));
+ DBA::update('item', ['body' => $item['body']], ['id' => $item['id']]);
+ DBA::update('item-content', ['body' => $item['body']], ['uri' => $item['uri']]);
}
function retrieve_images(&$item, $a) {
@@ -642,9 +643,9 @@ function retriever_transform_images($a, &$item, $resource) {
logger('retriever_transform_images caught exception ' . $e->getMessage());
return;
}
- foreach ($photo as $k => $v)
- {
- logger('@@@ photo key ' . $k);
+ if (!array_key_exists('full', $photo)) {
+ logger('retriever_transform_images: no replacement URL for image ' . $resource['url']);
+ return;
}
$new_url = $photo['full'];
logger('retriever_transform_images: replacing ' . $resource['url'] . ' with ' .
From ae3fa6cea2d8e2a480958b68deaf2323d45d24ac Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Sat, 20 Jul 2019 14:37:57 +0100
Subject: [PATCH 011/527] more fixes
---
retriever/retriever.php | 119 ++++++++++++++++++++--------------------
1 file changed, 60 insertions(+), 59 deletions(-)
diff --git a/retriever/retriever.php b/retriever/retriever.php
index 5f2b855a..18351f1e 100644
--- a/retriever/retriever.php
+++ b/retriever/retriever.php
@@ -10,6 +10,7 @@
use Friendica\Core\Addon;
use Friendica\Core\Config;
use Friendica\Core\PConfig;
+use Friendica\Core\Logger;
use Friendica\Content\Text\HTML;
use Friendica\Content\Text\BBCode;
use Friendica\Object\Image;
@@ -73,7 +74,7 @@ function retriever_install() {
if (Config::get('retriever', 'dbversion') == '0.7') {
$r = q("SELECT `id`, `data` FROM `retriever_rule`");
foreach ($r as $rr) {
- logger('retriever_install: retriever ' . $rr['id'] . ' old config ' . $rr['data'], LOGGER_DATA);
+ Logger::log('retriever_install: retriever ' . $rr['id'] . ' old config ' . $rr['data'], Logger::DATA);
$data = json_decode($rr['data'], true);
if ($data['pattern']) {
$matches = array();
@@ -122,7 +123,7 @@ function retriever_install() {
unset($data['remove']);
}
$r = q('UPDATE `retriever_rule` SET `data` = "%s" WHERE `id` = %d', DBA::escape(json_encode($data)), $rr['id']);
- logger('retriever_install: retriever ' . $rr['id'] . ' new config ' . json_encode($data), LOGGER_DATA);
+ Logger::log('retriever_install: retriever ' . $rr['id'] . ' new config ' . json_encode($data), Logger::DATA);
}
Config::set('retriever', 'dbversion', '0.8');
}
@@ -199,7 +200,7 @@ function retriever_retrieve_items($max_items, $a) {
}
$retrieve_items = $max_items - $retriever_item_count;
- logger('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . $retriever_item_count . ', retrieve ' . $retrieve_items, LOGGER_DEBUG);
+ Logger::log('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . $retriever_item_count . ', retrieve ' . $retrieve_items, Logger::DEBUG);
do {
$r = q("SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR %s) ORDER BY `last-try` ASC LIMIT %d",
DBA::escape(implode($schedule_clauses, ' OR ')),
@@ -210,7 +211,7 @@ function retriever_retrieve_items($max_items, $a) {
if (count($r) == 0) {
break;
}
- logger('retriever_retrieve_items: found ' . count($r) . ' waiting resources in database', LOGGER_DEBUG);
+ Logger::log('retriever_retrieve_items: found ' . count($r) . ' waiting resources in database', Logger::DEBUG);
foreach ($r as $rr) {
retrieve_resource($rr);
$retriever_item_count++;
@@ -228,24 +229,24 @@ function retriever_retrieve_items($max_items, $a) {
if (!$r) {
$r = array();
}
- logger('retriever_retrieve_items: items waiting even though resource has completed: ' . count($r), LOGGER_DEBUG);
+ Logger::log('retriever_retrieve_items: items waiting even though resource has completed: ' . count($r), Logger::DEBUG);
foreach ($r as $rr) {
$resource = q("SELECT * FROM retriever_resource WHERE `id` = %d", $rr['resource']);
$retriever_item = retriever_get_retriever_item($rr['item']);
if (!$retriever_item) {
- logger('retriever_retrieve_items: no retriever item with id ' . $rr['item'], LOGGER_INFO);
+ Logger::log('retriever_retrieve_items: no retriever item with id ' . $rr['item'], Logger::INFO);
continue;
}
$item = retriever_get_item($retriever_item);
if (!$item) {
- logger('retriever_retrieve_items: no item ' . $retriever_item['item-uri'], LOGGER_INFO);
+ Logger::log('retriever_retrieve_items: no item ' . $retriever_item['item-uri'], Logger::INFO);
continue;
}
$retriever = get_retriever($item['contact-id'], $item['uid']);
if (!$retriever) {
- logger('retriever_retrieve_items: no retriever for item ' .
+ Logger::log('retriever_retrieve_items: no retriever for item ' .
$retriever_item['item-uri'] . ' ' . $retriever_item['uid'] . ' ' . $item['contact-id'],
- LOGGER_INFO);
+ Logger::INFO);
continue;
}
retriever_apply_completed_resource_to_item($retriever, $item, $resource[0], $a);
@@ -260,7 +261,7 @@ function retriever_tidy() {
q("DELETE FROM retriever_resource WHERE completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)");
$r = q("SELECT retriever_item.id FROM retriever_item LEFT OUTER JOIN retriever_resource ON (retriever_item.resource = retriever_resource.id) WHERE retriever_resource.id is null");
- logger('retriever_tidy: found ' . count($r) . ' retriever_items with no retriever_resource');
+ Logger::log('retriever_tidy: found ' . count($r) . ' retriever_items with no retriever_resource');
foreach ($r as $rr) {
q('DELETE FROM retriever_item WHERE id = %d', intval($rr['id']));
}
@@ -268,7 +269,7 @@ function retriever_tidy() {
function retrieve_dataurl_resource($resource) {
if (!preg_match("/date:(.*);base64,(.*)/", $resource['url'], $matches)) {
- logger('retrieve_dataurl_resource: ' . $resource['id'] . ' does not match pattern');
+ Logger::log('retrieve_dataurl_resource: ' . $resource['id'] . ' does not match pattern');
} else {
$resource['type'] = $matches[1];
$resource['data'] = base64url_decode($matches[2]);
@@ -290,21 +291,21 @@ function retrieve_resource($resource) {
$a = get_app();
try {
- logger('retrieve_resource: ' . ($resource['num-tries'] + 1) .
- ' attempt at resource ' . $resource['id'] . ' ' . $resource['url'], LOGGER_DEBUG);
+ Logger::log('retrieve_resource: ' . ($resource['num-tries'] + 1) .
+ ' attempt at resource ' . $resource['id'] . ' ' . $resource['url'], Logger::DEBUG);
$redirects;
$cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-');
$fetch_result = Network::fetchUrlFull($resource['url'], $resource['binary'], $redirects, array('cookiejar' => $cookiejar));
unlink($cookiejar);
- $resource['data'] = $fetch_result['body'];
- $resource['http-code'] = $a->get_curl_code();
- $resource['type'] = $a->get_curl_content_type();
- $resource['redirect-url'] = $fetch_result['redirect_url'];
- logger('retrieve_resource: got code ' . $resource['http-code'] .
+ $resource['data'] = $fetch_result->getBody();
+ $resource['http-code'] = $fetch_result->getReturnCode();
+ $resource['type'] = $fetch_result->getContentType();
+ $resource['redirect-url'] = $fetch_result->getRedirectUrl();
+ Logger::log('retrieve_resource: got code ' . $resource['http-code'] .
' retrieving resource ' . $resource['id'] .
- ' final url ' . $resource['redirect-url'], LOGGER_DEBUG);
+ ' final url ' . $resource['redirect-url'], Logger::DEBUG);
} catch (Exception $e) {
- logger('retrieve_resource: unable to retrieve ' . $resource['url'] . ' - ' . $e->getMessage());
+ Logger::log('retrieve_resource: unable to retrieve ' . $resource['url'] . ' - ' . $e->getMessage());
}
q("UPDATE `retriever_resource` SET `last-try` = now(), `num-tries` = `num-tries` + 1, `http-code` = %d, `redirect-url` = '%s' WHERE id = %d",
intval($resource['http-code']),
@@ -338,7 +339,7 @@ function get_retriever($contact_id, $uid, $create = false) {
function retriever_get_retriever_item($id) {
$retriever_items = q("SELECT * FROM `retriever_item` WHERE id = %d", intval($id));
if (count($retriever_items) != 1) {
- logger('retriever_get_retriever_item: unable to find retriever_item ' . $id, LOGGER_INFO);
+ Logger::log('retriever_get_retriever_item: unable to find retriever_item ' . $id, Logger::INFO);
return;
}
return $retriever_items[0];
@@ -350,15 +351,15 @@ function retriever_get_item($retriever_item) {
intval($retriever_item['item-uid']),
intval($retriever_item['contact-id']));
if (count($items) != 1) {
- logger('retriever_get_item: unexpected number of results ' .
- count($items) . " when searching for item $uri $uid $cid", LOGGER_INFO);
+ Logger::log('retriever_get_item: unexpected number of results ' .
+ count($items) . " when searching for item $uri $uid $cid", Logger::INFO);
return;
}
return $items[0];
}
function retriever_item_completed($retriever_item_id, $resource, $a) {
- logger('retriever_item_completed: id ' . $retriever_item_id . ' url ' . $resource['url'], LOGGER_DEBUG);
+ Logger::log('retriever_item_completed: id ' . $retriever_item_id . ' url ' . $resource['url'], Logger::DEBUG);
$retriever_item = retriever_get_retriever_item($retriever_item_id);
if (!$retriever_item) {
@@ -379,7 +380,7 @@ function retriever_item_completed($retriever_item_id, $resource, $a) {
}
function retriever_resource_completed($resource, $a) {
- logger('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url'], LOGGER_DEBUG);
+ Logger::log('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url'], Logger::DEBUG);
$r = q("SELECT `id` FROM `retriever_item` WHERE `resource` = %d", $resource['id']);
foreach ($r as $rr) {
retriever_item_completed($rr['id'], $resource, $a);
@@ -398,7 +399,7 @@ function apply_retrospective($a, $retriever, $num) {
function retriever_on_item_insert($a, $retriever, &$item) {
if (!$retriever || !$retriever['id']) {
- logger('retriever_on_item_insert: No retriever supplied', LOGGER_INFO);
+ Logger::log('retriever_on_item_insert: No retriever supplied', Logger::INFO);
return;
}
if (!$retriever["data"]['enable'] == "on") {
@@ -406,7 +407,7 @@ function retriever_on_item_insert($a, $retriever, &$item) {
}
if (array_key_exists('pattern', $retriever["data"]) && $retriever["data"]['pattern']) {
$url = preg_replace('/' . $retriever["data"]['pattern'] . '/', $retriever["data"]['replace'], $item['plink']);
- logger('retriever_on_item_insert: Changed ' . $item['plink'] . ' to ' . $url, LOGGER_DATA);
+ Logger::log('retriever_on_item_insert: Changed ' . $item['plink'] . ' to ' . $url, Logger::DATA);
}
else {
$url = $item['plink'];
@@ -417,7 +418,7 @@ function retriever_on_item_insert($a, $retriever, &$item) {
}
function add_retriever_resource($a, $url, $binary = false) {
- logger('add_retriever_resource: ' . $url, LOGGER_DEBUG);
+ Logger::log('add_retriever_resource: ' . $url, Logger::DEBUG);
$scheme = parse_url($url, PHP_URL_SCHEME);
if ($scheme == 'data') {
@@ -431,11 +432,11 @@ function add_retriever_resource($a, $url, $binary = false) {
$r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url));
$resource = $r[0];
if (count($r)) {
- logger('add_retriever_resource: Resource ' . $url . ' already requested', LOGGER_DEBUG);
+ Logger::log('add_retriever_resource: Resource ' . $url . ' already requested', Logger::DEBUG);
return $resource;
}
- logger('retrieve_resource: got data URL type ' . $resource['type'], LOGGER_DEBUG);
+ Logger::log('retrieve_resource: got data URL type ' . $resource['type'], Logger::DEBUG);
q("INSERT INTO `retriever_resource` (`type`, `binary`, `url`, `completed`, `data`) " .
"VALUES ('%s', %d, '%s', now(), '%s')",
DBA::escape($type),
@@ -451,12 +452,12 @@ function add_retriever_resource($a, $url, $binary = false) {
}
if (strlen($url) > 800) {
- logger('add_retriever_resource: URL is longer than 800 characters', LOGGER_INFO);
+ Logger::log('add_retriever_resource: URL is longer than 800 characters', Logger::INFO);
}
$r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url));
if (count($r)) {
- logger('add_retriever_resource: Resource ' . $url . ' already requested', LOGGER_DEBUG);
+ Logger::log('add_retriever_resource: Resource ' . $url . ' already requested', Logger::DEBUG);
return $r[0];
}
@@ -467,7 +468,7 @@ function add_retriever_resource($a, $url, $binary = false) {
}
function add_retriever_item(&$item, $resource) {
- logger('add_retriever_item: ' . $resource['url'] . ' for ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], LOGGER_DEBUG);
+ Logger::log('add_retriever_item: ' . $resource['url'] . ' for ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG);
q("INSERT INTO `retriever_item` (`item-uri`, `item-uid`, `contact-id`, `resource`) " .
"VALUES ('%s', %d, %d, %d)",
@@ -476,12 +477,12 @@ function add_retriever_item(&$item, $resource) {
"`item-uri` = '%s' AND `item-uid` = %d AND `contact-id` = %d AND `resource` = %d ORDER BY id DESC",
DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource['id']));
if (!count($r)) {
- logger("add_retriever_item: couldn't create retriever item for " .
+ Logger::log("add_retriever_item: couldn't create retriever item for " .
$item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'],
- LOGGER_INFO);
+ Logger::INFO);
return;
}
- logger('add_retriever_item: created retriever_item ' . $r[0]['id'] . ' for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], LOGGER_DEBUG);
+ Logger::log('add_retriever_item: created retriever_item ' . $r[0]['id'] . ' for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG);
return $r[0]['id'];
}
@@ -495,12 +496,12 @@ function retriever_get_encoding($resource) {
function retriever_apply_xslt_text($xslt_text, $doc) {
if (!$xslt_text) {
- logger('retriever_apply_xslt_text: empty XSLT text', LOGGER_INFO);
+ Logger::log('retriever_apply_xslt_text: empty XSLT text', Logger::INFO);
return $doc;
}
$xslt_doc = new DOMDocument();
if (!$xslt_doc->loadXML($xslt_text)) {
- logger('retriever_apply_xslt_text: could not load XML', LOGGER_INFO);
+ Logger::log('retriever_apply_xslt_text: could not load XML', Logger::INFO);
return $doc;
}
$xp = new XsltProcessor();
@@ -510,14 +511,14 @@ function retriever_apply_xslt_text($xslt_text, $doc) {
}
function retriever_apply_dom_filter($retriever, &$item, $resource) {
- logger('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['uri'] . ' contact ' . $item['contact-id'], LOGGER_DEBUG);
+ Logger::log('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['uri'] . ' contact ' . $item['contact-id'], Logger::DEBUG);
if (!array_key_exists('include', $retriever['data']) && !array_key_exists('customxslt', $retriever['data'])) {
- logger('retriever_apply_dom_filter: no include and no customxslt', LOGGER_INFO);
+ Logger::log('retriever_apply_dom_filter: no include and no customxslt', Logger::INFO);
return;
}
if (!$resource['data']) {
- logger('retriever_apply_dom_filter: no text to work with', LOGGER_INFO);
+ Logger::log('retriever_apply_dom_filter: no text to work with', Logger::INFO);
return;
}
@@ -541,7 +542,7 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) {
$doc = retriever_apply_xslt_text($retriever['data']['customxslt'], $doc);
}
if (!$doc) {
- logger('retriever_apply_dom_filter: failed to apply extract XSLT template', LOGGER_INFO);
+ Logger::log('retriever_apply_dom_filter: failed to apply extract XSLT template', Logger::INFO);
return;
}
@@ -553,13 +554,13 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) {
$fix_urls_xslt = replace_macros($fix_urls_template, $params);
$doc = retriever_apply_xslt_text($fix_urls_xslt, $doc);
if (!$doc) {
- logger('retriever_apply_dom_filter: failed to apply fix urls XSLT template', LOGGER_INFO);
+ Logger::log('retriever_apply_dom_filter: failed to apply fix urls XSLT template', Logger::INFO);
return;
}
$item['body'] = HTML::toBBCode($doc->saveHTML());
if (!strlen($item['body'])) {
- logger('retriever_apply_dom_filter retriever ' . $retriever['id'] . ' item ' . $item['id'] . ': output was empty', LOGGER_INFO);
+ Logger::log('retriever_apply_dom_filter retriever ' . $retriever['id'] . ' item ' . $item['id'] . ': output was empty', Logger::INFO);
return;
}
$item['body'] .= "\n\n" . L10n::t('Retrieved') . ' ' . date("Y-m-d") . ': [url=';
@@ -575,9 +576,9 @@ function retrieve_images(&$item, $a) {
$matches2 = array();
preg_match_all("/\[img\](.*?)\[\/img\]/ism", $item["body"], $matches2);
$matches = array_merge($matches1[3], $matches2[1]);
- logger('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], LOGGER_DEBUG);
+ Logger::log('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG);
foreach ($matches as $url) {
- if (strpos($url, get_app()->get_baseurl()) === FALSE) {
+ if (strpos($url, get_app()->getBaseUrl()) === FALSE) {
$resource = add_retriever_resource($a, $url, true);
if (!$resource['completed']) {
add_retriever_item($item, $resource);
@@ -596,12 +597,12 @@ function retriever_check_item_completed(&$item)
DBA::escape($item['uri']), intval($item['uid']),
intval($item['contact-id']));
$waiting = $r[0]['count(*)'];
- logger('retriever_check_item_completed: item ' . $item['uri'] . ' ' . $item['uid']
- . ' '. $item['contact-id'] . ' waiting for ' . $waiting . ' resources', LOGGER_DEBUG);
+ Logger::log('retriever_check_item_completed: item ' . $item['uri'] . ' ' . $item['uid']
+ . ' '. $item['contact-id'] . ' waiting for ' . $waiting . ' resources', Logger::DEBUG);
$old_visible = $item['visible'];
$item['visible'] = $waiting ? 0 : 1;
if (array_key_exists('id', $item) && ($item['id'] > 0) && ($old_visible != $item['visible'])) {
- logger('retriever_check_item_completed: changing visible flag to ' . $item['visible'] . ' and invoking notifier ("edit_post", ' . $item['id'] . ')', LOGGER_DEBUG);
+ Logger::log('retriever_check_item_completed: changing visible flag to ' . $item['visible'] . ' and invoking notifier ("edit_post", ' . $item['id'] . ')', Logger::DEBUG);
q("UPDATE `item` SET `visible` = %d WHERE `id` = %d",
intval($item['visible']),
intval($item['id']));
@@ -612,9 +613,9 @@ function retriever_check_item_completed(&$item)
}
function retriever_apply_completed_resource_to_item($retriever, &$item, $resource, $a) {
- logger('retriever_apply_completed_resource_to_item: retriever ' .
+ Logger::log('retriever_apply_completed_resource_to_item: retriever ' .
($retriever ? $retriever['id'] : 'none') .
- ' resource ' . $resource['url'] . ' plink ' . $item['plink'], LOGGER_DEBUG);
+ ' resource ' . $resource['url'] . ' plink ' . $item['plink'], Logger::DEBUG);
if (strpos($resource['type'], 'image') !== false) {
retriever_transform_images($a, $item, $resource);
}
@@ -632,24 +633,24 @@ function retriever_apply_completed_resource_to_item($retriever, &$item, $resourc
function retriever_transform_images($a, &$item, $resource) {
if (!$resource["data"]) {
- logger('retriever_transform_images: no data available for '
- . $resource['id'] . ' ' . $resource['url'], LOGGER_INFO);
+ Logger::log('retriever_transform_images: no data available for '
+ . $resource['id'] . ' ' . $resource['url'], Logger::INFO);
return;
}
try {
$photo = Image::storePhoto($a, $item['uid'], $resource['data'], $resource['url']);
} catch (Exception $e) {
- logger('retriever_transform_images caught exception ' . $e->getMessage());
+ Logger::log('retriever_transform_images caught exception ' . $e->getMessage());
return;
}
if (!array_key_exists('full', $photo)) {
- logger('retriever_transform_images: no replacement URL for image ' . $resource['url']);
+ Logger::log('retriever_transform_images: no replacement URL for image ' . $resource['url']);
return;
}
$new_url = $photo['full'];
- logger('retriever_transform_images: replacing ' . $resource['url'] . ' with ' .
- $new_url . ' in item ' . $item['plink'], LOGGER_DEBUG);
+ Logger::log('retriever_transform_images: replacing ' . $resource['url'] . ' with ' .
+ $new_url . ' in item ' . $item['plink'], Logger::DEBUG);
$transformed = str_replace($resource["url"], $new_url, $item['body']);
if ($transformed === $item['body']) {
return;
@@ -672,7 +673,7 @@ function retriever_content($a) {
$feeds = q("SELECT `id`, `name`, `thumb` FROM contact WHERE `uid` = %d AND `network` = 'feed'",
local_user());
foreach ($feeds as $k=>$v) {
- $feeds[$k]['url'] = $a->get_baseurl() . '/retriever/' . $v['id'];
+ $feeds[$k]['url'] = $a->getBaseUrl() . '/retriever/' . $v['id'];
}
$template = get_markup_template('/help.tpl', 'addon/retriever/');
$a->page['content'] .= replace_macros($template, array(
@@ -776,7 +777,7 @@ function retriever_contact_photo_menu($a, &$args) {
}
function retriever_post_remote_hook(&$a, &$item) {
- logger('retriever_post_remote_hook: ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], LOGGER_DEBUG);
+ Logger::log('retriever_post_remote_hook: ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG);
$retriever = get_retriever($item['contact-id'], $item["uid"], false);
if ($retriever) {
From 738d1ab5880698a82d0d330ffb785d12b74e6541 Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Sat, 20 Jul 2019 14:45:10 +0100
Subject: [PATCH 012/527] more fixes
---
retriever/retriever.php | 21 +++++++++++----------
1 file changed, 11 insertions(+), 10 deletions(-)
diff --git a/retriever/retriever.php b/retriever/retriever.php
index 18351f1e..3072a743 100644
--- a/retriever/retriever.php
+++ b/retriever/retriever.php
@@ -11,6 +11,7 @@ use Friendica\Core\Addon;
use Friendica\Core\Config;
use Friendica\Core\PConfig;
use Friendica\Core\Logger;
+use Friendica\Core\Renderer;
use Friendica\Content\Text\HTML;
use Friendica\Content\Text\BBCode;
use Friendica\Object\Image;
@@ -533,8 +534,8 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) {
}
$params = array('$spec' => $retriever['data']);
- $extract_template = get_markup_template('extract.tpl', 'addon/retriever/');
- $extract_xslt = replace_macros($extract_template, $params);
+ $extract_template = Renderer::getMarkupTemplate('extract.tpl', 'addon/retriever/');
+ $extract_xslt = Renderer::replaceMacros($extract_template, $params);
if ($retriever['data']['include']) {
$doc = retriever_apply_xslt_text($extract_xslt, $doc);
}
@@ -550,8 +551,8 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) {
$rooturl = $components['scheme'] . "://" . $components['host'];
$dirurl = $rooturl . dirname($components['path']) . "/";
$params = array('$dirurl' => $dirurl, '$rooturl' => $rooturl);
- $fix_urls_template = get_markup_template('fix-urls.tpl', 'addon/retriever/');
- $fix_urls_xslt = replace_macros($fix_urls_template, $params);
+ $fix_urls_template = Renderer::getMarkupTemplate('fix-urls.tpl', 'addon/retriever/');
+ $fix_urls_xslt = Renderer::replaceMacros($fix_urls_template, $params);
$doc = retriever_apply_xslt_text($fix_urls_xslt, $doc);
if (!$doc) {
Logger::log('retriever_apply_dom_filter: failed to apply fix urls XSLT template', Logger::INFO);
@@ -675,8 +676,8 @@ function retriever_content($a) {
foreach ($feeds as $k=>$v) {
$feeds[$k]['url'] = $a->getBaseUrl() . '/retriever/' . $v['id'];
}
- $template = get_markup_template('/help.tpl', 'addon/retriever/');
- $a->page['content'] .= replace_macros($template, array(
+ $template = Renderer::getMarkupTemplate('/help.tpl', 'addon/retriever/');
+ $a->page['content'] .= Renderer::replaceMacros($template, array(
'$config' => $a->get_baseurl() . '/settings/addon',
'$feeds' => $feeds));
return;
@@ -718,8 +719,8 @@ function retriever_content($a) {
$a->page['content'] .= ".
";
}
- $template = get_markup_template('/rule-config.tpl', 'addon/retriever/');
- $a->page['content'] .= replace_macros($template, array(
+ $template = Renderer::getMarkupTemplate('/rule-config.tpl', 'addon/retriever/');
+ $a->page['content'] .= Renderer::replaceMacros($template, array(
'$enable' => array(
'retriever_enable',
L10n::t('Enabled'),
@@ -801,8 +802,8 @@ function retriever_post_remote_hook(&$a, &$item) {
function retriever_plugin_settings(&$a,&$s) {
$all_photos = PConfig::get(local_user(), 'retriever', 'all_photos');
$oembed = PConfig::get(local_user(), 'retriever', 'oembed');
- $template = get_markup_template('/settings.tpl', 'addon/retriever/');
- $s .= replace_macros($template, array(
+ $template = Renderer::getMarkupTemplate('/settings.tpl', 'addon/retriever/');
+ $s .= Renderer::replaceMacros($template, array(
'$allphotos' => array(
'retriever_all_photos',
L10n::t('All Photos'),
From c9f4ad7405370a7655f77dc1c0dac3392cca9b77 Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Sun, 21 Jul 2019 18:27:14 +0100
Subject: [PATCH 013/527] now working retriever
---
retriever/retriever.php | 147 +++++++---------------------------------
1 file changed, 23 insertions(+), 124 deletions(-)
diff --git a/retriever/retriever.php b/retriever/retriever.php
index 3072a743..97f29694 100644
--- a/retriever/retriever.php
+++ b/retriever/retriever.php
@@ -18,6 +18,7 @@ use Friendica\Object\Image;
use Friendica\Util\Network;
use Friendica\Core\L10n;
use Friendica\Database\DBA;
+use Friendica\Model\ItemURI;
function retriever_install() {
Addon::registerHook('plugin_settings', 'addon/retriever/retriever.php', 'retriever_plugin_settings');
@@ -27,116 +28,6 @@ function retriever_install() {
Addon::registerHook('cron', 'addon/retriever/retriever.php', 'retriever_cron');
$r = q("SELECT `id` FROM `pconfig` WHERE `cat` LIKE 'retriever_%%'");
- if (count($r) || (Config::get('retriever', 'dbversion') == '0.1')) {
- $retrievers = array();
- $r = q("SELECT SUBSTRING(`cat`, 10) AS `contact`, `k`, `v` FROM `pconfig` WHERE `cat` LIKE 'retriever%%'");
- foreach ($r as $rr) {
- $retrievers[$rr['contact']][$rr['k']] = $rr['v'];
- }
- foreach ($retrievers as $k => $v) {
- $rr = q("SELECT `uid` FROM `contact` WHERE `id` = %d", intval($k));
- $uid = $rr[0]['uid'];
- $v['images'] = 'on';
- q("INSERT INTO `retriever_rule` (`uid`, `contact-id`, `data`) VALUES (%d, %d, '%s')",
- intval($uid), intval($k), DBA::escape(json_encode($v)));
- }
- q("DELETE FROM `pconfig` WHERE `cat` LIKE 'retriever_%%'");
- Config::set('retriever', 'dbversion', '0.2');
- }
- if (Config::get('retriever', 'dbversion') == '0.2') {
- q("ALTER TABLE `retriever_resource` DROP COLUMN `retriever`");
- Config::set('retriever', 'dbversion', '0.3');
- }
- if (Config::get('retriever', 'dbversion') == '0.3') {
- q("ALTER TABLE `retriever_item` MODIFY COLUMN `item-uri` varchar(800) CHARACTER SET ascii NOT NULL");
- q("ALTER TABLE `retriever_resource` MODIFY COLUMN `url` varchar(800) CHARACTER SET ascii NOT NULL");
- Config::set('retriever', 'dbversion', '0.4');
- }
- if (Config::get('retriever', 'dbversion') == '0.4') {
- q("ALTER TABLE `retriever_item` ADD COLUMN `finished` tinyint(1) unsigned NOT NULL DEFAULT '0'");
- Config::set('retriever', 'dbversion', '0.5');
- }
- if (Config::get('retriever', 'dbversion') == '0.5') {
- q('ALTER TABLE `retriever_resource` CHANGE `created` `created` timestamp NOT NULL DEFAULT now()');
- q('ALTER TABLE `retriever_resource` CHANGE `completed` `completed` timestamp NULL DEFAULT NULL');
- q('ALTER TABLE `retriever_resource` CHANGE `last-try` `last-try` timestamp NULL DEFAULT NULL');
- q('ALTER TABLE `retriever_item` DROP KEY `all`');
- q('ALTER TABLE `retriever_item` ADD KEY `all` (`item-uri`, `item-uid`, `contact-id`)');
- Config::set('retriever', 'dbversion', '0.6');
- }
- if (Config::get('retriever', 'dbversion') == '0.6') {
- q('ALTER TABLE `retriever_item` CONVERT TO CHARACTER SET utf8 COLLATE utf8_bin');
- q('ALTER TABLE `retriever_item` CHANGE `item-uri` `item-uri` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NOT NULL');
- q('ALTER TABLE `retriever_resource` CONVERT TO CHARACTER SET utf8 COLLATE utf8_bin');
- q('ALTER TABLE `retriever_resource` CHANGE `url` `url` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NOT NULL');
- q('ALTER TABLE `retriever_rule` CONVERT TO CHARACTER SET utf8 COLLATE utf8_bin');
- Config::set('retriever', 'dbversion', '0.7');
- }
- if (Config::get('retriever', 'dbversion') == '0.7') {
- $r = q("SELECT `id`, `data` FROM `retriever_rule`");
- foreach ($r as $rr) {
- Logger::log('retriever_install: retriever ' . $rr['id'] . ' old config ' . $rr['data'], Logger::DATA);
- $data = json_decode($rr['data'], true);
- if ($data['pattern']) {
- $matches = array();
- if (preg_match("/\/(.*)\//", $data['pattern'], $matches)) {
- $data['pattern'] = $matches[1];
- }
- }
- if ($data['match']) {
- $include = array();
- foreach (explode('|', $data['match']) as $component) {
- $matches = array();
- if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[@([A-Za-z][a-z0-9]*)='([^']*)'\]/", $component, $matches)) {
- $include[] = array(
- 'element' => $matches[1],
- 'attribute' => $matches[2],
- 'value' => $matches[3]);
- }
- if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[contains(concat(' ',normalize-space(@class),' '),' ([^ ']+) ')]/", $component, $matches)) {
- $include[] = array(
- 'element' => $matches[1],
- 'attribute' => $matches[2],
- 'value' => $matches[3]);
- }
- }
- $data['include'] = $include;
- unset($data['match']);
- }
- if ($data['remove']) {
- $exclude = array();
- foreach (explode('|', $data['remove']) as $component) {
- $matches = array();
- if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[@([A-Za-z][a-z0-9]*)='([^']*)'\]/", $component, $matches)) {
- $exclude[] = array(
- 'element' => $matches[1],
- 'attribute' => $matches[2],
- 'value' => $matches[3]);
- }
- if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[contains(concat(' ',normalize-space(@class),' '),' ([^ ']+) ')]/", $component, $matches)) {
- $exclude[] = array(
- 'element' => $matches[1],
- 'attribute' => $matches[2],
- 'value' => $matches[3]);
- }
- }
- $data['exclude'] = $exclude;
- unset($data['remove']);
- }
- $r = q('UPDATE `retriever_rule` SET `data` = "%s" WHERE `id` = %d', DBA::escape(json_encode($data)), $rr['id']);
- Logger::log('retriever_install: retriever ' . $rr['id'] . ' new config ' . json_encode($data), Logger::DATA);
- }
- Config::set('retriever', 'dbversion', '0.8');
- }
- if (Config::get('retriever', 'dbversion') == '0.8') {
- q("ALTER TABLE `retriever_resource` ADD COLUMN `http-code` smallint(1) unsigned NULL DEFAULT NULL");
- Config::set('retriever', 'dbversion', '0.9');
- }
- if (Config::get('retriever', 'dbversion') == '0.9') {
- q("ALTER TABLE `retriever_item` DROP COLUMN `parent`");
- q("ALTER TABLE `retriever_resource` ADD COLUMN `redirect-url` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NULL DEFAULT NULL");
- Config::set('retriever', 'dbversion', '0.10');
- }
if (Config::get('retriever', 'dbversion') == '0.10') {
q("ALTER TABLE `retriever_resource` MODIFY COLUMN `type` char(255) NULL DEFAULT NULL");
q("ALTER TABLE `retriever_resource` MODIFY COLUMN `data` mediumblob NULL DEFAULT NULL");
@@ -347,6 +238,7 @@ function retriever_get_retriever_item($id) {
}
function retriever_get_item($retriever_item) {
+ // @@@ Need to replace this with Item::selectFirst
$items = q("SELECT * FROM `item` WHERE `uri` = '%s' AND `uid` = %d AND `contact-id` = %d",
DBA::escape($retriever_item['item-uri']),
intval($retriever_item['item-uid']),
@@ -537,9 +429,11 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) {
$extract_template = Renderer::getMarkupTemplate('extract.tpl', 'addon/retriever/');
$extract_xslt = Renderer::replaceMacros($extract_template, $params);
if ($retriever['data']['include']) {
+ Logger::log('retriever_apply_dom_filter: applying include/exclude template \"' . $extract_xslt . '\"', Logger::DEBUG);
$doc = retriever_apply_xslt_text($extract_xslt, $doc);
}
if (array_key_exists('customxslt', $retriever['data']) && $retriever['data']['customxslt']) {
+ Logger::log('retriever_apply_dom_filter: applying custom XSLT \"' . $retriever['data']['customxslt'] . '\"', Logger::DEBUG);
$doc = retriever_apply_xslt_text($retriever['data']['customxslt'], $doc);
}
if (!$doc) {
@@ -559,16 +453,21 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) {
return;
}
- $item['body'] = HTML::toBBCode($doc->saveHTML());
- if (!strlen($item['body'])) {
+ $body = HTML::toBBCode($doc->saveHTML());
+ if (!strlen($body)) {
Logger::log('retriever_apply_dom_filter retriever ' . $retriever['id'] . ' item ' . $item['id'] . ': output was empty', Logger::INFO);
return;
}
- $item['body'] .= "\n\n" . L10n::t('Retrieved') . ' ' . date("Y-m-d") . ': [url=';
- $item['body'] .= $item['plink'];
- $item['body'] .= ']' . $item['plink'] . '[/url]';
- DBA::update('item', ['body' => $item['body']], ['id' => $item['id']]);
- DBA::update('item-content', ['body' => $item['body']], ['uri' => $item['uri']]);
+ $body .= "\n\n" . L10n::t('Retrieved') . ' ' . date("Y-m-d") . ': [url=';
+ $body .= $item['plink'];
+ $body .= ']' . $item['plink'] . '[/url]';
+
+ $uri_id = ItemURI::getIdByURI($item['uri']);
+ //@@@ remove this
+ $item['body'] = $body;
+ Logger::log('retriever_apply_dom_filter: XSLT result \"' . $body . '\"', Logger::DATA);
+ DBA::update('item', ['body' => $body], ['id' => $item['id']]);
+ DBA::update('item-content', ['body' => $body], ['uri-id' => $uri_id]);
}
function retrieve_images(&$item, $a) {
@@ -678,18 +577,18 @@ function retriever_content($a) {
}
$template = Renderer::getMarkupTemplate('/help.tpl', 'addon/retriever/');
$a->page['content'] .= Renderer::replaceMacros($template, array(
- '$config' => $a->get_baseurl() . '/settings/addon',
+ '$config' => $a->getBaseUrl() . '/settings/addon',
'$feeds' => $feeds));
return;
}
if ($a->argv[1]) {
$retriever = get_retriever($a->argv[1], local_user(), false);
- if (x($_POST["id"])) {
+ if (!empty($_POST["id"])) {
$retriever = get_retriever($a->argv[1], local_user(), true);
$retriever["data"] = array();
foreach (array('pattern', 'replace', 'enable', 'images', 'customxslt') as $setting) {
- if (x($_POST['retriever_' . $setting])) {
+ if (!empty($_POST['retriever_' . $setting])) {
$retriever["data"][$setting] = $_POST['retriever_' . $setting];
}
}
@@ -712,7 +611,7 @@ function retriever_content($a) {
q("UPDATE `retriever_rule` SET `data`='%s' WHERE `id` = %d",
DBA::escape(json_encode($retriever["data"])), intval($retriever["id"]));
$a->page['content'] .= "Settings Updated";
- if (x($_POST["retriever_retrospective"])) {
+ if (!empty($_POST["retriever_retrospective"])) {
apply_retrospective($a, $retriever, $_POST["retriever_retrospective"]);
$a->page['content'] .= " and retrospectively applied to " . $_POST["apply"] . " posts";
}
@@ -750,7 +649,7 @@ function retriever_content($a) {
$retriever['data']['customxslt'],
L10n::t("When standard rules aren't enough, apply custom XSLT to the article")),
'$title' => L10n::t('Retrieve Feed Content'),
- '$help' => $a->get_baseurl() . '/retriever/help',
+ '$help' => $a->getBaseUrl() . '/retriever/help',
'$help_t' => L10n::t('Get Help'),
'$submit_t' => L10n::t('Submit'),
'$submit' => L10n::t('Save Settings'),
@@ -773,7 +672,7 @@ function retriever_contact_photo_menu($a, &$args) {
return;
}
if ($args["contact"]["network"] == "feed") {
- $args["menu"][ 'retriever' ] = array(L10n::t('Retriever'), $a->get_baseurl() . '/retriever/' . $args["contact"]['id']);
+ $args["menu"][ 'retriever' ] = array(L10n::t('Retriever'), $a->getBaseUrl() . '/retriever/' . $args["contact"]['id']);
}
}
@@ -816,7 +715,7 @@ function retriever_plugin_settings(&$a,&$s) {
L10n::t('Check this to attempt to retrieve embedded content for all posts - useful e.g. for Facebook posts')),
'$submit' => L10n::t('Save Settings'),
'$title' => L10n::t('Retriever Settings'),
- '$help' => $a->get_baseurl() . '/retriever/help'));
+ '$help' => $a->getBaseUrl() . '/retriever/help'));
}
function retriever_plugin_settings_post($a,$post) {
From 8b6a9c017ad13496a4adcf926fff0bc3cc9907ba Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Sun, 21 Jul 2019 18:27:55 +0100
Subject: [PATCH 014/527] beginnings of persistent cookiejar support
---
retriever/retriever.php | 5 +++++
retriever/templates/rule-config.tpl | 1 +
2 files changed, 6 insertions(+)
diff --git a/retriever/retriever.php b/retriever/retriever.php
index 97f29694..78a79a0e 100644
--- a/retriever/retriever.php
+++ b/retriever/retriever.php
@@ -643,6 +643,11 @@ function retriever_content($a) {
L10n::t('Retrospectively Apply'),
'0',
L10n::t('Reapply the rules to this number of posts')),
+ '$cookies' => array(
+ 'retriever_cookies',
+ L10n::t('Cookies'),
+ $retriever['data']['cookies'],
+ L10n::t("Persistent cookies for this feed. Netscape cookie file format.")),
'$customxslt' => array(
'retriever_customxslt',
L10n::t('Custom XSLT'),
diff --git a/retriever/templates/rule-config.tpl b/retriever/templates/rule-config.tpl
index 228d0326..847d9c3f 100644
--- a/retriever/templates/rule-config.tpl
+++ b/retriever/templates/rule-config.tpl
@@ -106,6 +106,7 @@ function retriever_remove_row(id, number)
+{{include file="field_textarea.tpl" field=$cookies}}
{{include file="field_textarea.tpl" field=$customxslt}}
From eb61f8f09acb85f84d00669e92516c824303d309 Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Sun, 21 Jul 2019 19:32:30 +0100
Subject: [PATCH 015/527] More preparation for persistent cookies
---
retriever/retriever.php | 17 +++++++++++------
retriever/templates/rule-config.tpl | 19 ++++++++++++++++++-
2 files changed, 29 insertions(+), 7 deletions(-)
diff --git a/retriever/retriever.php b/retriever/retriever.php
index 78a79a0e..bb3460a1 100644
--- a/retriever/retriever.php
+++ b/retriever/retriever.php
@@ -587,7 +587,7 @@ function retriever_content($a) {
if (!empty($_POST["id"])) {
$retriever = get_retriever($a->argv[1], local_user(), true);
$retriever["data"] = array();
- foreach (array('pattern', 'replace', 'enable', 'images', 'customxslt') as $setting) {
+ foreach (array('pattern', 'replace', 'enable', 'images', 'customxslt', 'storecookies', 'cookiedata') as $setting) {
if (!empty($_POST['retriever_' . $setting])) {
$retriever["data"][$setting] = $_POST['retriever_' . $setting];
}
@@ -643,11 +643,16 @@ function retriever_content($a) {
L10n::t('Retrospectively Apply'),
'0',
L10n::t('Reapply the rules to this number of posts')),
- '$cookies' => array(
- 'retriever_cookies',
- L10n::t('Cookies'),
- $retriever['data']['cookies'],
- L10n::t("Persistent cookies for this feed. Netscape cookie file format.")),
+ 'storecookies' => array(
+ 'retriever_storecookies',
+ L10n::t('Store cookies'),
+ $retriever['data']['storecookies'],
+ L10n::t("Preserve cookie data across fetches.")),
+ '$cookiedata' => array(
+ 'retriever_cookiedata',
+ L10n::t('Cookie Data'),
+ $retriever['data']['cookiedata'],
+ L10n::t("Latest cookie data for this feed. Netscape cookie file format.")),
'$customxslt' => array(
'retriever_customxslt',
L10n::t('Custom XSLT'),
diff --git a/retriever/templates/rule-config.tpl b/retriever/templates/rule-config.tpl
index 847d9c3f..9061d1ff 100644
--- a/retriever/templates/rule-config.tpl
+++ b/retriever/templates/rule-config.tpl
@@ -40,6 +40,22 @@ function retriever_remove_row(id, number)
var row = document.getElementById(id + '-' + number);
tbody.removeChild(row);
}
+
+function retriever_toggle_cookiedata_block()
+{
+ var div = document.querySelector("#id_retriever_cookiedata").parentNode;
+ if (document.querySelector("#id_retriever_storecookies").checked) {
+ div.style.display = "block";
+ }
+ else {
+ div.style.display = "none";
+ }
+}
+
+document.addEventListener('DOMContentLoaded', function() {
+ retriever_toggle_cookiedata_block();
+ document.querySelector("#id_retriever_storecookies").addEventListener('change', retriever_toggle_cookiedata_block, false);
+}, false);
{{$title}}
{{$help_t}}
@@ -106,8 +122,9 @@ function retriever_remove_row(id, number)
-{{include file="field_textarea.tpl" field=$cookies}}
{{include file="field_textarea.tpl" field=$customxslt}}
+{{include file="field_checkbox.tpl" field=$storecookies}}
+{{include file="field_textarea.tpl" field=$cookiedata}}
From df7ea6c3755fca9d1db13151f7608f116c50e6fa Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Wed, 24 Jul 2019 06:48:23 +0100
Subject: [PATCH 016/527] tentative database work
---
retriever/database.sql | 1 +
retriever/retriever.php | 4 ++++
2 files changed, 5 insertions(+)
diff --git a/retriever/database.sql b/retriever/database.sql
index 340e33eb..2a0db966 100644
--- a/retriever/database.sql
+++ b/retriever/database.sql
@@ -24,6 +24,7 @@ CREATE TABLE IF NOT EXISTS `retriever_item` (
CREATE TABLE IF NOT EXISTS `retriever_resource` (
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
+ `contact-id` int(10) unsigned NOT NULL DEFAULT '0',
`type` char(255) NULL DEFAULT NULL,
`binary` int(1) NOT NULL DEFAULT 0,
`url` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NOT NULL,
diff --git a/retriever/retriever.php b/retriever/retriever.php
index bb3460a1..adf9681e 100644
--- a/retriever/retriever.php
+++ b/retriever/retriever.php
@@ -41,6 +41,10 @@ function retriever_install() {
q("ALTER TABLE `retriever_item` ADD INDEX `item-uid` (`item-uid`)");
Config::set('retriever', 'dbversion', '0.12');
}
+ /* if (Config::get('retriever', 'dbversion') == '0.12') { */
+ /* q("ALTER TABLE `retriever_resource` ADD COLUMN `contact-id` int(10) unsigned NULL AFTER `id`"); */
+ /* Config::set('retriever', 'dbversion', '0.13'); */
+ /* } */
if (Config::get('retriever', 'dbversion') != '0.12') {
$schema = file_get_contents(dirname(__file__).'/database.sql');
$arr = explode(';', $schema);
From 034ed5fcd665bb3c784a247136cbef56c08e4955 Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Fri, 26 Jul 2019 05:49:53 +0100
Subject: [PATCH 017/527] fix
---
retriever/retriever.php | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/retriever/retriever.php b/retriever/retriever.php
index adf9681e..65471be9 100644
--- a/retriever/retriever.php
+++ b/retriever/retriever.php
@@ -189,9 +189,9 @@ function retrieve_resource($resource) {
try {
Logger::log('retrieve_resource: ' . ($resource['num-tries'] + 1) .
' attempt at resource ' . $resource['id'] . ' ' . $resource['url'], Logger::DEBUG);
- $redirects;
+ $redirects = 0;
$cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-');
- $fetch_result = Network::fetchUrlFull($resource['url'], $resource['binary'], $redirects, array('cookiejar' => $cookiejar));
+ $fetch_result = Network::fetchUrlFull($resource['url'], $resource['binary'], $redirects, '', $cookiejar);
unlink($cookiejar);
$resource['data'] = $fetch_result->getBody();
$resource['http-code'] = $fetch_result->getReturnCode();
From 10f7be958b59ce1a958adc69de4ce1f186384779 Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Sun, 15 Sep 2019 09:26:25 +0100
Subject: [PATCH 018/527] fixed a bug and commented on another
---
retriever/retriever.php | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/retriever/retriever.php b/retriever/retriever.php
index 65471be9..c70f906e 100644
--- a/retriever/retriever.php
+++ b/retriever/retriever.php
@@ -367,6 +367,15 @@ function add_retriever_resource($a, $url, $binary = false) {
function add_retriever_item(&$item, $resource) {
Logger::log('add_retriever_item: ' . $resource['url'] . ' for ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG);
+ $r = q("SELECT COUNT(*) FROM `retriever_item` WHERE " .
+ "`item-uri` = '%s' AND `item-uid` = %d AND `contact-id` = %d AND `resource` = %d",
+ DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource['id']));
+ if ($r[0]['COUNT(*)'] > 0) {
+ Logger::log("add_retriever_item: retriever item already present for " .
+ $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'],
+ Logger::INFO);
+ return;
+ }
q("INSERT INTO `retriever_item` (`item-uri`, `item-uid`, `contact-id`, `resource`) " .
"VALUES ('%s', %d, %d, %d)",
DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource["id"]));
@@ -536,6 +545,7 @@ function retriever_apply_completed_resource_to_item($retriever, &$item, $resourc
}
function retriever_transform_images($a, &$item, $resource) {
+ return; //@@@ not working
if (!$resource["data"]) {
Logger::log('retriever_transform_images: no data available for '
. $resource['id'] . ' ' . $resource['url'], Logger::INFO);
From 61e925630dce4eee5e6616f05a01a3df24535122 Mon Sep 17 00:00:00 2001
From: Administrator
Date: Sun, 22 Sep 2019 11:47:30 +0200
Subject: [PATCH 019/527] this is working OK
---
retriever/database.sql | 1 +
retriever/retriever.php | 387 +++++++++++++++++++++++++---------------
2 files changed, 247 insertions(+), 141 deletions(-)
diff --git a/retriever/database.sql b/retriever/database.sql
index 2a0db966..a29135e7 100644
--- a/retriever/database.sql
+++ b/retriever/database.sql
@@ -24,6 +24,7 @@ CREATE TABLE IF NOT EXISTS `retriever_item` (
CREATE TABLE IF NOT EXISTS `retriever_resource` (
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
+ `item-uid` int(10) unsigned NOT NULL DEFAULT '0',
`contact-id` int(10) unsigned NOT NULL DEFAULT '0',
`type` char(255) NULL DEFAULT NULL,
`binary` int(1) NOT NULL DEFAULT 0,
diff --git a/retriever/retriever.php b/retriever/retriever.php
index c70f906e..5644952a 100644
--- a/retriever/retriever.php
+++ b/retriever/retriever.php
@@ -12,13 +12,16 @@ use Friendica\Core\Config;
use Friendica\Core\PConfig;
use Friendica\Core\Logger;
use Friendica\Core\Renderer;
+use Friendica\Core\System;
use Friendica\Content\Text\HTML;
use Friendica\Content\Text\BBCode;
+use Friendica\Model\Photo;
use Friendica\Object\Image;
use Friendica\Util\Network;
use Friendica\Core\L10n;
use Friendica\Database\DBA;
use Friendica\Model\ItemURI;
+use Friendica\Model\Item;
function retriever_install() {
Addon::registerHook('plugin_settings', 'addon/retriever/retriever.php', 'retriever_plugin_settings');
@@ -41,17 +44,18 @@ function retriever_install() {
q("ALTER TABLE `retriever_item` ADD INDEX `item-uid` (`item-uid`)");
Config::set('retriever', 'dbversion', '0.12');
}
- /* if (Config::get('retriever', 'dbversion') == '0.12') { */
- /* q("ALTER TABLE `retriever_resource` ADD COLUMN `contact-id` int(10) unsigned NULL AFTER `id`"); */
- /* Config::set('retriever', 'dbversion', '0.13'); */
- /* } */
- if (Config::get('retriever', 'dbversion') != '0.12') {
+ if (Config::get('retriever', 'dbversion') == '0.12') {
+ q("ALTER TABLE `retriever_resource` ADD COLUMN `contact-id` int(10) unsigned NOT NULL DEFAULT '0' AFTER `id`");
+ q("ALTER TABLE `retriever_resource` ADD COLUMN `item-uid` int(10) unsigned NOT NULL DEFAULT '0' AFTER `id`");
+ Config::set('retriever', 'dbversion', '0.13');
+ }
+ if (Config::get('retriever', 'dbversion') != '0.13') {
$schema = file_get_contents(dirname(__file__).'/database.sql');
$arr = explode(';', $schema);
foreach ($arr as $a) {
$r = q($a);
}
- Config::set('retriever', 'dbversion', '0.12');
+ Config::set('retriever', 'dbversion', '0.13');
}
}
@@ -68,7 +72,11 @@ function retriever_uninstall() {
function retriever_module() {}
function retriever_cron($a, $b) {
- // 100 is a nice sane number. Maybe this should be configurable.
+ // 100 is a nice sane number. Maybe this should be configurable. @@@
+
+ // Do this first, otherwise it can interfere with retreiver_retrieve_items
+ retriever_clean_up_completed_resources(100, $a);
+
retriever_retrieve_items(100, $a);
retriever_tidy();
}
@@ -76,6 +84,7 @@ function retriever_cron($a, $b) {
$retriever_item_count = 0;
function retriever_retrieve_items($max_items, $a) {
+ Logger::log('@@@ retriever_retrieve_items', Logger::INFO);
global $retriever_item_count;
$retriever_schedule = array(array(1,'minute'),
@@ -98,56 +107,61 @@ function retriever_retrieve_items($max_items, $a) {
$retrieve_items = $max_items - $retriever_item_count;
Logger::log('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . $retriever_item_count . ', retrieve ' . $retrieve_items, Logger::DEBUG);
do {
- $r = q("SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR %s) ORDER BY `last-try` ASC LIMIT %d",
+ Logger::log('@@@ retriever_retrieve_items loop max ' . $max_items . ' count ' . $retriever_item_count, Logger::INFO);
+ Logger::log("@@@ SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR " . implode($schedule_clauses, ' OR ') . ") ORDER BY `last-try` ASC LIMIT " . $retrieve_items, Logger::INFO);
+ $retriever_resources = q("SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR %s) ORDER BY `last-try` ASC LIMIT %d",
DBA::escape(implode($schedule_clauses, ' OR ')),
intval($retrieve_items));
- if (!is_array($r)) {
+ if (!is_array($retriever_resources)) {
break;
}
- if (count($r) == 0) {
+ if (count($retriever_resources) == 0) {
break;
}
- Logger::log('retriever_retrieve_items: found ' . count($r) . ' waiting resources in database', Logger::DEBUG);
- foreach ($r as $rr) {
- retrieve_resource($rr);
+ Logger::log('retriever_retrieve_items: found ' . count($retriever_resources) . ' waiting resources in database', Logger::DEBUG);
+ foreach ($retriever_resources as $retriever_resource) {
+ Logger::log('@@@ need to get the retriever config here cid ' . $retriever_resource['contact-id'] . ' uid ' . $retriever_resource['item-uid'], Logger::INFO);
+ retrieve_resource($retriever_resource);
$retriever_item_count++;
}
$retrieve_items = $max_items - $retriever_item_count;
}
while ($retrieve_items > 0);
+ // @@@ todo: when items add further items (i.e. images), do the new images go round this loop again?
+ Logger::log('@@@ retriever_retrieve_items: finished retrieving items', Logger::INFO);
+}
- /* Look for items that are waiting even though the resource has
- * completed. This usually happens because we've been asked to
- * retrospectively apply a config change. It could also happen
- * due to a cron job dying or something. */
+/* Look for items that are waiting even though the resource has
+ * completed. This usually happens because we've been asked to
+ * retrospectively apply a config change. It could also happen due to
+ * a cron job dying or something. */
+function retriever_clean_up_completed_resources($max_items, $a) {
$r = q("SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT %d",
- intval($retrieve_items));
+ intval($max_items));
if (!$r) {
$r = array();
}
- Logger::log('retriever_retrieve_items: items waiting even though resource has completed: ' . count($r), Logger::DEBUG);
+ Logger::log('retriever_clean_up_completed_resources: items waiting even though resource has completed: ' . count($r), Logger::DEBUG);
foreach ($r as $rr) {
$resource = q("SELECT * FROM retriever_resource WHERE `id` = %d", $rr['resource']);
$retriever_item = retriever_get_retriever_item($rr['item']);
- if (!$retriever_item) {
- Logger::log('retriever_retrieve_items: no retriever item with id ' . $rr['item'], Logger::INFO);
+ if (!DBA::isResult($retriever_item)) {
+ Logger::log('retriever_clean_up_completed_resources: no retriever item with id ' . $rr['item'], Logger::WARNING);
continue;
}
$item = retriever_get_item($retriever_item);
if (!$item) {
- Logger::log('retriever_retrieve_items: no item ' . $retriever_item['item-uri'], Logger::INFO);
+ Logger::log('retriever_clean_up_completed_resources: no item ' . $retriever_item['item-uri'], Logger::WARNING);
continue;
}
- $retriever = get_retriever($item['contact-id'], $item['uid']);
- if (!$retriever) {
- Logger::log('retriever_retrieve_items: no retriever for item ' .
- $retriever_item['item-uri'] . ' ' . $retriever_item['uid'] . ' ' . $item['contact-id'],
- Logger::INFO);
+ $retriever_rule = get_retriever_rule($retriever_item['contact-id'], $item['uid']);
+ if (!$retriever_rule) {
+ Logger::log('retriever_clean_up_completed_resources: no retriever for uri ' . $retriever_item['item-uri'] . ' uid ' . $retriever_item['uid'] . ' ' . $retriever_item['contact-id'], Logger::WARNING);
continue;
}
- retriever_apply_completed_resource_to_item($retriever, $item, $resource[0], $a);
- q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d",
- intval($retriever_item['id']));
+ Logger::log('@@@ retriever_clean_up_completed_resources: about to retriever_apply_completed_resource_to_item', Logger::INFO);
+ retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource[0], $a);
+ q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d", intval($retriever_item['id']));
retriever_check_item_completed($item);
}
}
@@ -157,7 +171,7 @@ function retriever_tidy() {
q("DELETE FROM retriever_resource WHERE completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)");
$r = q("SELECT retriever_item.id FROM retriever_item LEFT OUTER JOIN retriever_resource ON (retriever_item.resource = retriever_resource.id) WHERE retriever_resource.id is null");
- Logger::log('retriever_tidy: found ' . count($r) . ' retriever_items with no retriever_resource');
+ Logger::log('retriever_tidy: found ' . count($r) . ' retriever_items with no retriever_resource', Logger::INFO);
foreach ($r as $rr) {
q('DELETE FROM retriever_item WHERE id = %d', intval($rr['id']));
}
@@ -165,7 +179,7 @@ function retriever_tidy() {
function retrieve_dataurl_resource($resource) {
if (!preg_match("/date:(.*);base64,(.*)/", $resource['url'], $matches)) {
- Logger::log('retrieve_dataurl_resource: ' . $resource['id'] . ' does not match pattern');
+ Logger::log('retrieve_dataurl_resource: ' . $resource['id'] . ' does not match pattern', Logger::INFO);
} else {
$resource['type'] = $matches[1];
$resource['data'] = base64url_decode($matches[2]);
@@ -180,28 +194,36 @@ function retrieve_dataurl_resource($resource) {
}
function retrieve_resource($resource) {
+ Logger::log('@@@ retrieve_resource: url ' . $resource['url'] . ' uid ' . $resource['item-uid'] . ' cid ' . $resource['contact-id'], Logger::INFO);
+
if (substr($resource['url'], 0, 5) == "data:") {
return retrieve_dataurl_resource($resource);
}
$a = get_app();
+ $retriever_rule = get_retriever_rule($resource['contact-id'], $resource['item-uid']);
+
try {
- Logger::log('retrieve_resource: ' . ($resource['num-tries'] + 1) .
- ' attempt at resource ' . $resource['id'] . ' ' . $resource['url'], Logger::DEBUG);
+ Logger::log('retrieve_resource: ' . ($resource['num-tries'] + 1) . ' attempt at resource ' . $resource['id'] . ' ' . $resource['url'], Logger::DEBUG);
$redirects = 0;
$cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-');
+ if ($retriever_rule['storecookies']) {
+ file_put_contents($cookiejar, $retriever_rule['cookiedata']);
+ }
$fetch_result = Network::fetchUrlFull($resource['url'], $resource['binary'], $redirects, '', $cookiejar);
+ if ($retriever_rule['storecookies']) {
+ $retriever_rule['cookiedata'] = file_get_contents($cookiejar);
+ //@@@ do the store here
+ }
unlink($cookiejar);
$resource['data'] = $fetch_result->getBody();
$resource['http-code'] = $fetch_result->getReturnCode();
$resource['type'] = $fetch_result->getContentType();
$resource['redirect-url'] = $fetch_result->getRedirectUrl();
- Logger::log('retrieve_resource: got code ' . $resource['http-code'] .
- ' retrieving resource ' . $resource['id'] .
- ' final url ' . $resource['redirect-url'], Logger::DEBUG);
+ Logger::log('retrieve_resource: got code ' . $resource['http-code'] . ' retrieving resource ' . $resource['id'] . ' final url ' . $resource['redirect-url'], Logger::DEBUG);
} catch (Exception $e) {
- Logger::log('retrieve_resource: unable to retrieve ' . $resource['url'] . ' - ' . $e->getMessage());
+ Logger::log('retrieve_resource: unable to retrieve ' . $resource['url'] . ' - ' . $e->getMessage(), Logger::INFO);
}
q("UPDATE `retriever_resource` SET `last-try` = now(), `num-tries` = `num-tries` + 1, `http-code` = %d, `redirect-url` = '%s' WHERE id = %d",
intval($resource['http-code']),
@@ -214,13 +236,17 @@ function retrieve_resource($resource) {
intval($resource['id']));
retriever_resource_completed($resource, $a);
}
+ Logger::log('@@@ retrieve_resource finished: ' . $resource['url'], Logger::INFO);
}
-function get_retriever($contact_id, $uid, $create = false) {
+function get_retriever_rule($contact_id, $uid, $create = false) {
+ Logger::log('@@@ get_retriever_rule ' . "SELECT * FROM `retriever_rule` WHERE `contact-id` = " . intval($contact_id) . " AND `uid` = " . intval($uid), Logger::INFO);
$r = q("SELECT * FROM `retriever_rule` WHERE `contact-id` = %d AND `uid` = %d",
intval($contact_id), intval($uid));
+ Logger::log('@@@ get_retriever_rule count is ' . count($r), Logger::INFO);
if (count($r)) {
$r[0]['data'] = json_decode($r[0]['data'], true);
+ Logger::log('@@@ get_retriever_rule returning an actual thing', Logger::INFO);
return $r[0];
}
if ($create) {
@@ -233,43 +259,62 @@ function get_retriever($contact_id, $uid, $create = false) {
}
function retriever_get_retriever_item($id) {
- $retriever_items = q("SELECT * FROM `retriever_item` WHERE id = %d", intval($id));
- if (count($retriever_items) != 1) {
- Logger::log('retriever_get_retriever_item: unable to find retriever_item ' . $id, Logger::INFO);
- return;
+ return DBA::selectFirst('retriever_item', [], ['id' => intval($id)]);
+}
+
+function retriever_class_of_item($item) { //@@@
+ if (!$item) {
+ return 'false';
}
- return $retriever_items[0];
+ if (array_key_exists('finished', $item)) {
+ Logger::log('@@@ oh no this is a bad thing', Logger::INFO);
+ return 'retriever_item';
+ }
+ if (array_key_exists('moderated', $item)) {
+ return 'friendica_item';
+ }
+ return 'unknown';
+}
+
+function mat_test($item) { //@@@
+ return 'mat_test';
}
function retriever_get_item($retriever_item) {
- // @@@ Need to replace this with Item::selectFirst
- $items = q("SELECT * FROM `item` WHERE `uri` = '%s' AND `uid` = %d AND `contact-id` = %d",
- DBA::escape($retriever_item['item-uri']),
- intval($retriever_item['item-uid']),
- intval($retriever_item['contact-id']));
- if (count($items) != 1) {
- Logger::log('retriever_get_item: unexpected number of results ' .
- count($items) . " when searching for item $uri $uid $cid", Logger::INFO);
- return;
+ // @@@ add contact id as a search term
+ Logger::log('@@@ retriever_get_item uri ' . $retriever_item['item-uri'] . ' uid ' . $retriever_item['item-uid'] . ' cid ' . $retriever_item['contact-id'], Logger::INFO);
+ try {//@@@ not necessary
+ $item = Item::selectFirst([], ['uri' => $retriever_item['item-uri'], 'uid' => intval($retriever_item['item-uid'])]);
+ Logger::log('@@@ 1 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
+ if (!DBA::isResult($item)) {
+ Logger::log('retriever_get_item: no item found for uri ' . $retriever_item['item-uri'], Logger::INFO);
+ return;
+ }
+ Logger::log('@@@ retriever_get_item: yay item found for uri ' . $retriever_item['item-uri'] . ' guid ' . $item['guid'] . ' plink ' . $item['plink'], Logger::INFO);
+ return $item;
+ } catch (Exception $e) {
+ Logger::log('retriever_get_item: exception ' . $e->getMessage(), Logger::INFO);
}
- return $items[0];
}
function retriever_item_completed($retriever_item_id, $resource, $a) {
Logger::log('retriever_item_completed: id ' . $retriever_item_id . ' url ' . $resource['url'], Logger::DEBUG);
$retriever_item = retriever_get_retriever_item($retriever_item_id);
- if (!$retriever_item) {
+ if (!DBA::isResult($retriever_item)) {
+ Logger::log('retriever_item_completed: no retriever item with id ' . $retriever_item_id, Logger::INFO);
+ return;
+ }
+ $item = retriever_get_item($retriever_item);
+ Logger::log('@@@ 2 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
+ if (!$item) {
+ Logger::log('retriever_item_completed: no item ' . $retriever_item['item-uri'], Logger::INFO);
return;
}
// Note: the retriever might be null. Doesn't matter.
- $retriever = get_retriever($retriever_item['contact-id'], $retriever_item['item-uid']);
- $item = retriever_get_item($retriever_item);
- if (!$item) {
- return;
- }
+ $retriever_rule = get_retriever_rule($retriever_item['contact-id'], $retriever_item['item-uid']);
- retriever_apply_completed_resource_to_item($retriever, $item, $resource, $a);
+ retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource, $a);
q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d",
intval($retriever_item['id']));
@@ -288,18 +333,24 @@ function apply_retrospective($a, $retriever, $num) {
$r = q("SELECT * FROM `item` WHERE `contact-id` = %d ORDER BY `received` DESC LIMIT %d",
intval($retriever['contact-id']), intval($num));
foreach ($r as $item) {
+ Logger::log('@@@ 3 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); //@@@ already know this is wrong
q('UPDATE `item` SET `visible` = 0 WHERE `id` = %d', $item['id']);
q('UPDATE `thread` SET `visible` = 0 WHERE `iid` = %d', $item['id']);
retriever_on_item_insert($a, $retriever, $item);
}
}
+//@@@ make this trigger a retriever immediately somehow
+//@@@ need a lock to say something is doing something
function retriever_on_item_insert($a, $retriever, &$item) {
+ Logger::log('@@@ 4 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
+ Logger::log('@@@ retriever_on_item_insert start ' . $item['plink'], Logger::INFO);
if (!$retriever || !$retriever['id']) {
Logger::log('retriever_on_item_insert: No retriever supplied', Logger::INFO);
return;
}
if (!$retriever["data"]['enable'] == "on") {
+ Logger::log('@@@ retriever_on_item_insert: Disabled', Logger::INFO);
return;
}
if (array_key_exists('pattern', $retriever["data"]) && $retriever["data"]['pattern']) {
@@ -310,12 +361,13 @@ function retriever_on_item_insert($a, $retriever, &$item) {
$url = $item['plink'];
}
- $resource = add_retriever_resource($a, $url);
+ Logger::log('@@@ retriever_on_item_insert: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id'], Logger::DEBUG);
+ $resource = add_retriever_resource($a, $url, $item['uid'], $item['contact-id']);
$retriever_item_id = add_retriever_item($item, $resource);
}
-function add_retriever_resource($a, $url, $binary = false) {
- Logger::log('add_retriever_resource: ' . $url, Logger::DEBUG);
+function add_retriever_resource($a, $url, $uid, $cid, $binary = false) {
+ Logger::log('add_retriever_resource: url ' . $url . ' uid ' . $uid . ' contact-id ' . $cid, Logger::DEBUG);
$scheme = parse_url($url, PHP_URL_SCHEME);
if ($scheme == 'data') {
@@ -326,7 +378,7 @@ function add_retriever_resource($a, $url, $binary = false) {
fclose($fp);
$url = 'md5://' . hash('md5', $url);
- $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url));
+ $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s' AND `item-uid` = %d AND `contact-id` = %d", DBA::escape($url), intval($uid), intval($cid));
$resource = $r[0];
if (count($r)) {
Logger::log('add_retriever_resource: Resource ' . $url . ' already requested', Logger::DEBUG);
@@ -334,8 +386,10 @@ function add_retriever_resource($a, $url, $binary = false) {
}
Logger::log('retrieve_resource: got data URL type ' . $resource['type'], Logger::DEBUG);
- q("INSERT INTO `retriever_resource` (`type`, `binary`, `url`, `completed`, `data`) " .
- "VALUES ('%s', %d, '%s', now(), '%s')",
+ q("INSERT INTO `retriever_resource` (`item-uid`, `contact-id`, `type`, `binary`, `url`, `completed`, `data`) " .
+ "VALUES (%d, %d, '%s', %d, '%s', now(), '%s')",
+ intval($uid),
+ intval($cid),
DBA::escape($type),
intval($binary ? 1 : 0),
DBA::escape($url),
@@ -349,31 +403,30 @@ function add_retriever_resource($a, $url, $binary = false) {
}
if (strlen($url) > 800) {
- Logger::log('add_retriever_resource: URL is longer than 800 characters', Logger::INFO);
+ Logger::log('add_retriever_resource: URL is longer than 800 characters', Logger::WARNING);
}
- $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url));
+ $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s' AND `item-uid` = %d AND `contact-id` = %d", DBA::escape($url), intval($uid), intval($cid));
if (count($r)) {
- Logger::log('add_retriever_resource: Resource ' . $url . ' already requested', Logger::DEBUG);
+ Logger::log('add_retriever_resource: Resource ' . $url . ' uid ' . $uid . ' cid ' . $cid . ' already requested', Logger::DEBUG);
return $r[0];
}
- q("INSERT INTO `retriever_resource` (`binary`, `url`) " .
- "VALUES (%d, '%s')", intval($binary ? 1 : 0), DBA::escape($url));
+ q("INSERT INTO `retriever_resource` (`item-uid`, `contact-id`, `binary`, `url`) " .
+ "VALUES (%d, %d, %d, '%s')", intval($uid), intval($cid), intval($binary ? 1 : 0), DBA::escape($url));
$r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url));
return $r[0];
}
function add_retriever_item(&$item, $resource) {
+ Logger::log('@@@ 5 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item), Logger::DEBUG);
Logger::log('add_retriever_item: ' . $resource['url'] . ' for ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG);
$r = q("SELECT COUNT(*) FROM `retriever_item` WHERE " .
"`item-uri` = '%s' AND `item-uid` = %d AND `contact-id` = %d AND `resource` = %d",
DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource['id']));
if ($r[0]['COUNT(*)'] > 0) {
- Logger::log("add_retriever_item: retriever item already present for " .
- $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'],
- Logger::INFO);
+ Logger::log("add_retriever_item: retriever item already present for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::INFO);
return;
}
q("INSERT INTO `retriever_item` (`item-uri`, `item-uid`, `contact-id`, `resource`) " .
@@ -383,9 +436,7 @@ function add_retriever_item(&$item, $resource) {
"`item-uri` = '%s' AND `item-uid` = %d AND `contact-id` = %d AND `resource` = %d ORDER BY id DESC",
DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource['id']));
if (!count($r)) {
- Logger::log("add_retriever_item: couldn't create retriever item for " .
- $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'],
- Logger::INFO);
+ Logger::log("add_retriever_item: couldn't create retriever item for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::INFO);
return;
}
Logger::log('add_retriever_item: created retriever_item ' . $r[0]['id'] . ' for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG);
@@ -416,7 +467,9 @@ function retriever_apply_xslt_text($xslt_text, $doc) {
return $result;
}
+//@@@ is that an item or a resource_item? I really want an item here so I can update it
function retriever_apply_dom_filter($retriever, &$item, $resource) {
+ Logger::log('@@@ 6 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item), Logger::DEBUG);
Logger::log('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['uri'] . ' contact ' . $item['contact-id'], Logger::DEBUG);
if (!array_key_exists('include', $retriever['data']) && !array_key_exists('customxslt', $retriever['data'])) {
@@ -454,18 +507,23 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) {
return;
}
+ Logger::log('@@@ retriever_apply_dom_filter: 1', Logger::INFO);
$components = parse_url($resource['redirect-url']);
$rooturl = $components['scheme'] . "://" . $components['host'];
$dirurl = $rooturl . dirname($components['path']) . "/";
+ Logger::log('@@@ retriever_apply_dom_filter: 2', Logger::INFO);
$params = array('$dirurl' => $dirurl, '$rooturl' => $rooturl);
$fix_urls_template = Renderer::getMarkupTemplate('fix-urls.tpl', 'addon/retriever/');
$fix_urls_xslt = Renderer::replaceMacros($fix_urls_template, $params);
+ Logger::log('@@@ retriever_apply_dom_filter: 3', Logger::INFO);
$doc = retriever_apply_xslt_text($fix_urls_xslt, $doc);
+ Logger::log('@@@ retriever_apply_dom_filter: 4', Logger::INFO);
if (!$doc) {
Logger::log('retriever_apply_dom_filter: failed to apply fix urls XSLT template', Logger::INFO);
return;
}
+ Logger::log('@@@ retriever_apply_dom_filter: 5', Logger::INFO);
$body = HTML::toBBCode($doc->saveHTML());
if (!strlen($body)) {
Logger::log('retriever_apply_dom_filter retriever ' . $retriever['id'] . ' item ' . $item['id'] . ': output was empty', Logger::INFO);
@@ -475,47 +533,66 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) {
$body .= $item['plink'];
$body .= ']' . $item['plink'] . '[/url]';
- $uri_id = ItemURI::getIdByURI($item['uri']);
- //@@@ remove this
- $item['body'] = $body;
+ Logger::log('@@@ retriever_apply_dom_filter: 6', Logger::INFO);
+ $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself?
+ Logger::log('@@@ retriever_apply_dom_filter: item id is ' . $item['id'] . ' uri id is ' . $uri_id, Logger::INFO);
Logger::log('retriever_apply_dom_filter: XSLT result \"' . $body . '\"', Logger::DATA);
- DBA::update('item', ['body' => $body], ['id' => $item['id']]);
- DBA::update('item-content', ['body' => $body], ['uri-id' => $uri_id]);
+ DBA::update('item-content', ['body' => $body], ['uri-id' => $uri_id]); //@@@ isn't there a better interface to that?
+ //@@@ probably Item::updateContent
}
function retrieve_images(&$item, $a) {
+ $blah_item_class = retriever_class_of_item($item) . ' ' . mat_test($item);
+ Logger::log('@@@ 7 item class is ' . $blah_item_class, Logger::DEBUG);
+
+ $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself?
+
+ $content = DBA::selectFirst('item-content', [], ['uri-id' => $uri_id]);
+ $body = $content['body'];
+ if (!strlen($body)) {
+ Logger::log('retrieve_images: no body for uri-id ' . $uri_id, Logger::WARNING);
+ return;
+ }
+
+ Logger::log('@@@ retrieve_images start looking in body "' . $body . '"', Logger::INFO);
$matches1 = array();
- preg_match_all("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", $item["body"], $matches1);
+ preg_match_all("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", $body, $matches1);
$matches2 = array();
- preg_match_all("/\[img\](.*?)\[\/img\]/ism", $item["body"], $matches2);
+ preg_match_all("/\[img\](.*?)\[\/img\]/ism", $body, $matches2);
$matches = array_merge($matches1[3], $matches2[1]);
Logger::log('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG);
foreach ($matches as $url) {
+ Logger::log('@@@ retrieve_images: url ' . $url, Logger::DEBUG);
if (strpos($url, get_app()->getBaseUrl()) === FALSE) {
- $resource = add_retriever_resource($a, $url, true);
+ Logger::log('@@@ retrieve_images: it is from somewhere else', Logger::DEBUG);
+ Logger::log('@@@ retrieve_images: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id'], Logger::DEBUG);
+ $resource = add_retriever_resource($a, $url, $item['uid'], $item['contact-id'], true);
if (!$resource['completed']) {
+ Logger::log('@@@ retrieve_images: do not have it yet, get it later', Logger::DEBUG);
add_retriever_item($item, $resource);
}
else {
+ Logger::log('@@@ retrieve_images: got it already, transform', Logger::DEBUG);
retriever_transform_images($a, $item, $resource);
}
}
}
+ Logger::log('@@@ retrieve_images end', Logger::INFO);
}
function retriever_check_item_completed(&$item)
{
+ Logger::log('@@@ 9 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item), Logger::DEBUG);
$r = q('SELECT count(*) FROM retriever_item WHERE `item-uri` = "%s" ' .
'AND `item-uid` = %d AND `contact-id` = %d AND `finished` = 0',
DBA::escape($item['uri']), intval($item['uid']),
intval($item['contact-id']));
$waiting = $r[0]['count(*)'];
- Logger::log('retriever_check_item_completed: item ' . $item['uri'] . ' ' . $item['uid']
- . ' '. $item['contact-id'] . ' waiting for ' . $waiting . ' resources', Logger::DEBUG);
+ Logger::log('retriever_check_item_completed: item ' . $item['uri'] . ' ' . $item['uid'] . ' '. $item['contact-id'] . ' waiting for ' . $waiting . ' resources', Logger::DEBUG);
$old_visible = $item['visible'];
$item['visible'] = $waiting ? 0 : 1;
if (array_key_exists('id', $item) && ($item['id'] > 0) && ($old_visible != $item['visible'])) {
- Logger::log('retriever_check_item_completed: changing visible flag to ' . $item['visible'] . ' and invoking notifier ("edit_post", ' . $item['id'] . ')', Logger::DEBUG);
+ Logger::log('retriever_check_item_completed: changing visible flag to ' . $item['visible'], Logger::DEBUG);
q("UPDATE `item` SET `visible` = %d WHERE `id` = %d",
intval($item['visible']),
intval($item['id']));
@@ -526,10 +603,10 @@ function retriever_check_item_completed(&$item)
}
function retriever_apply_completed_resource_to_item($retriever, &$item, $resource, $a) {
- Logger::log('retriever_apply_completed_resource_to_item: retriever ' .
- ($retriever ? $retriever['id'] : 'none') .
- ' resource ' . $resource['url'] . ' plink ' . $item['plink'], Logger::DEBUG);
+ Logger::log('@@@ 10 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item), Logger::DEBUG);
+ Logger::log('retriever_apply_completed_resource_to_item: retriever ' . ($retriever ? $retriever['id'] : 'none') . ' resource ' . $resource['url'] . ' plink ' . $item['plink'], Logger::DEBUG);
if (strpos($resource['type'], 'image') !== false) {
+ Logger::log('@@@ retriever_apply_completed_resource_to_item this is an image must transform', Logger::INFO);
retriever_transform_images($a, $item, $resource);
}
if (!$retriever) {
@@ -544,38 +621,61 @@ function retriever_apply_completed_resource_to_item($retriever, &$item, $resourc
}
}
+//@@@ todo: change all Logger::log to Logger::info etc
+//@@@ todo: what is this reference for? document if needed delete if not
function retriever_transform_images($a, &$item, $resource) {
- return; //@@@ not working
+ Logger::log('@@@ 11 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item), Logger::DEBUG);
+ Logger::log('@@@ retriever_transform_images', Logger::INFO);
if (!$resource["data"]) {
- Logger::log('retriever_transform_images: no data available for '
- . $resource['id'] . ' ' . $resource['url'], Logger::INFO);
+ Logger::log('retriever_transform_images: no data available for ' . $resource['id'] . ' ' . $resource['url'], Logger::INFO);
return;
}
- try {
- $photo = Image::storePhoto($a, $item['uid'], $resource['data'], $resource['url']);
+ $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself?
+
+ try { //@@@ probably can get rid of this try/catch
+ $data = $resource['data'];
+ $type = $resource['type'];
+ $uid = $item['uid'];
+ $cid = $item['contact-id'];
+ $rid = Photo::newResource();
+ $path = parse_url($resource['url'], PHP_URL_PATH);
+ $parts = pathinfo($path);
+ $filename = $parts['filename'] . (array_key_exists('extension', $parts) ? '.' . $parts['extension'] : '');
+ Logger::log('@@@ retriever_transform_images url ' . $resource['url'] . ' path ' . $path . ' filename ' . $parts['filename'], Logger::INFO);
+ $album = 'Wall Photos';
+ $scale = 0;
+ $desc = ''; // TODO: store alt text with resource when it's requested so we can fill this in
+ Logger::log('retriever_transform_images storing ' . strlen($data) . ' bytes type ' . $type . ': uid ' . $uid . ' cid ' . $cid . ' rid ' . $rid . ' filename ' . $filename . ' album ' . $album . ' scale ' . $scale . ' desc ' . $desc, Logger::DEBUG);
+ Logger::log('@@@ retriever_transform_images before new Image', Logger::INFO);
+ $image = new Image($data, $type);
+ Logger::log('@@@ retriever_transform_images after new Image', Logger::INFO);
+ Logger::log('@@@ retriever_transform_images before Photo::store', Logger::INFO);
+ $photo = Photo::store($image, $uid, $cid, $rid, $filename, $album, 0, 0, "", "", "", "", $desc);
+ Logger::log('@@@ retriever_transform_images after Photo::store', Logger::INFO);
+ $new_url = System::baseUrl() . '/photo/' . $rid . '-0.' . $image->getExt();
+ Logger::log('@@@ retriever_transform_images new url ' . $new_url . ' rid ' . $rid . ' ext ' . $image->getExt(), Logger::INFO);
+ if (!strlen($new_url)) {
+ Logger::log('retriever_transform_images: no replacement URL for image ' . $resource['url'], Logger::WARNING);
+ return;
+ }
+
+ $content = DBA::selectFirst('item-content', [], ['uri-id' => $uri_id]);
+ $body = $content['body'];
+ Logger::log('@@@ retriever_transform_images: found body for uri id ' . $uri_id . ': ' . $body, Logger::INFO);
+
+ Logger::log('retriever_transform_images: replacing ' . $resource['url'] . ' with ' . $new_url . ' in item ' . $item['uri'], Logger::DEBUG);
+ Logger::log('@@@ retriever_transform_images: replacing ' . $resource['url'] . ' with ' . $new_url . ' in body ' . $body, Logger::DEBUG);
+ $body = str_replace($resource["url"], $new_url, $body);
+
+ Logger::log('@@@ retriever_transform_images: result \"' . $body . '\"', Logger::INFO);
+ DBA::update('item-content', ['body' => $body], ['uri-id' => $uri_id]); //@@@ isn't there a better interface to that?
+ //@@@ probably Item::updateContent
+ //@@ actually no, Item::update
} catch (Exception $e) {
- Logger::log('retriever_transform_images caught exception ' . $e->getMessage());
+ Logger::log('retriever_transform_images caught exception ' . $e->getMessage(), Logger::INFO);
return;
}
- if (!array_key_exists('full', $photo)) {
- Logger::log('retriever_transform_images: no replacement URL for image ' . $resource['url']);
- return;
- }
- $new_url = $photo['full'];
- Logger::log('retriever_transform_images: replacing ' . $resource['url'] . ' with ' .
- $new_url . ' in item ' . $item['plink'], Logger::DEBUG);
- $transformed = str_replace($resource["url"], $new_url, $item['body']);
- if ($transformed === $item['body']) {
- return;
- }
-
- $item['body'] = $transformed;
- q("UPDATE `item` SET `body` = '%s' WHERE `plink` = '%s' AND `uid` = %d AND `contact-id` = %d",
- DBA::escape($item['body']),
- DBA::escape($item['plink']),
- intval($item['uid']),
- intval($item['contact-id']));
}
function retriever_content($a) {
@@ -596,37 +696,37 @@ function retriever_content($a) {
return;
}
if ($a->argv[1]) {
- $retriever = get_retriever($a->argv[1], local_user(), false);
+ $retriever_rule = get_retriever_rule($a->argv[1], local_user(), false);
if (!empty($_POST["id"])) {
- $retriever = get_retriever($a->argv[1], local_user(), true);
- $retriever["data"] = array();
+ $retriever_rule = get_retriever_rule($a->argv[1], local_user(), true);
+ $retriever_rule["data"] = array();
foreach (array('pattern', 'replace', 'enable', 'images', 'customxslt', 'storecookies', 'cookiedata') as $setting) {
if (!empty($_POST['retriever_' . $setting])) {
- $retriever["data"][$setting] = $_POST['retriever_' . $setting];
+ $retriever_rule["data"][$setting] = $_POST['retriever_' . $setting];
}
}
foreach ($_POST as $k=>$v) {
if (preg_match("/retriever-(include|exclude)-(\d+)-(element|attribute|value)/", $k, $matches)) {
- $retriever['data'][$matches[1]][intval($matches[2])][$matches[3]] = $v;
+ $retriever_rule['data'][$matches[1]][intval($matches[2])][$matches[3]] = $v;
}
}
// You've gotta have an element, even if it's just "*"
- foreach ($retriever['data']['include'] as $k=>$clause) {
+ foreach ($retriever_rule['data']['include'] as $k=>$clause) {
if (!$clause['element']) {
- unset($retriever['data']['include'][$k]);
+ unset($retriever_rule['data']['include'][$k]);
}
}
- foreach ($retriever['data']['exclude'] as $k=>$clause) {
+ foreach ($retriever_rule['data']['exclude'] as $k=>$clause) {
if (!$clause['element']) {
- unset($retriever['data']['exclude'][$k]);
+ unset($retriever_rule['data']['exclude'][$k]);
}
}
q("UPDATE `retriever_rule` SET `data`='%s' WHERE `id` = %d",
- DBA::escape(json_encode($retriever["data"])), intval($retriever["id"]));
+ DBA::escape(json_encode($retriever_rule["data"])), intval($retriever_rule["id"]));
$a->page['content'] .= "Settings Updated";
if (!empty($_POST["retriever_retrospective"])) {
- apply_retrospective($a, $retriever, $_POST["retriever_retrospective"]);
+ apply_retrospective($a, $retriever_rule, $_POST["retriever_retrospective"]);
$a->page['content'] .= " and retrospectively applied to " . $_POST["apply"] . " posts";
}
$a->page['content'] .= ".
";
@@ -637,21 +737,21 @@ function retriever_content($a) {
'$enable' => array(
'retriever_enable',
L10n::t('Enabled'),
- $retriever['data']['enable']),
+ $retriever_rule['data']['enable']),
'$pattern' => array(
'retriever_pattern',
L10n::t('URL Pattern'),
- $retriever["data"]['pattern'],
+ $retriever_rule["data"]['pattern'],
L10n::t('Regular expression matching part of the URL to replace')),
'$replace' => array(
'retriever_replace',
L10n::t('URL Replace'),
- $retriever["data"]['replace'],
+ $retriever_rule["data"]['replace'],
L10n::t('Text to replace matching part of above regular expression')),
'$images' => array(
'retriever_images',
L10n::t('Download Images'),
- $retriever['data']['images']),
+ $retriever_rule['data']['images']),
'$retrospective' => array(
'retriever_retrospective',
L10n::t('Retrospectively Apply'),
@@ -660,33 +760,33 @@ function retriever_content($a) {
'storecookies' => array(
'retriever_storecookies',
L10n::t('Store cookies'),
- $retriever['data']['storecookies'],
+ $retriever_rule['data']['storecookies'],
L10n::t("Preserve cookie data across fetches.")),
'$cookiedata' => array(
'retriever_cookiedata',
L10n::t('Cookie Data'),
- $retriever['data']['cookiedata'],
+ $retriever_rule['data']['cookiedata'],
L10n::t("Latest cookie data for this feed. Netscape cookie file format.")),
'$customxslt' => array(
'retriever_customxslt',
L10n::t('Custom XSLT'),
- $retriever['data']['customxslt'],
+ $retriever_rule['data']['customxslt'],
L10n::t("When standard rules aren't enough, apply custom XSLT to the article")),
'$title' => L10n::t('Retrieve Feed Content'),
'$help' => $a->getBaseUrl() . '/retriever/help',
'$help_t' => L10n::t('Get Help'),
'$submit_t' => L10n::t('Submit'),
'$submit' => L10n::t('Save Settings'),
- '$id' => ($retriever["id"] ? $retriever["id"] : "create"),
+ '$id' => ($retriever_rule["id"] ? $retriever_rule["id"] : "create"),
'$tag_t' => L10n::t('Tag'),
'$attribute_t' => L10n::t('Attribute'),
'$value_t' => L10n::t('Value'),
'$add_t' => L10n::t('Add'),
'$remove_t' => L10n::t('Remove'),
'$include_t' => L10n::t('Include'),
- '$include' => $retriever['data']['include'],
+ '$include' => $retriever_rule['data']['include'],
'$exclude_t' => L10n::t('Exclude'),
- '$exclude' => $retriever["data"]['exclude']));
+ '$exclude' => $retriever_rule["data"]['exclude']));
return;
}
}
@@ -701,18 +801,23 @@ function retriever_contact_photo_menu($a, &$args) {
}
function retriever_post_remote_hook(&$a, &$item) {
+ Logger::log('@@@ 12 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
Logger::log('retriever_post_remote_hook: ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG);
- $retriever = get_retriever($item['contact-id'], $item["uid"], false);
- if ($retriever) {
- retriever_on_item_insert($a, $retriever, $item);
+ $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself?
+ $retriever_rule = get_retriever_rule($item['contact-id'], $item["uid"], false);
+ if ($retriever_rule) {
+ retriever_on_item_insert($a, $retriever_rule, $item);
}
else {
if (PConfig::get($item["uid"], 'retriever', 'oembed')) {
// Convert to HTML and back to take advantage of bbcode's resolution of oembeds.
- $body = HTML::toBBCode(BBCode::convert($item['body']));
+ $content = DBA::selectFirst('item-content', [], ['uri-id' => $uri_id]);
+ $body = HTML::toBBCode(BBCode::convert($content['body']));
+ Logger::log('@@@ retriever_post_remote_hook item uri-id ' . $uri_id . ' body "' . $item['body'] . '" item content body "' . $body . '"', Logger::DEBUG);
if ($body) {
$item['body'] = $body;
+ DBA::update('item-content', ['body' => $body], ['uri-id' => $uri_id]); //@@@ isn't there a better interface to that?
}
}
if (PConfig::get($item["uid"], 'retriever', 'all_photos')) {
From ef6709d861bc9ba710bef507d7d298d39a58fd6c Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Sun, 22 Sep 2019 17:05:23 +0200
Subject: [PATCH 020/527] Improvement
---
retriever/retriever.php | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/retriever/retriever.php b/retriever/retriever.php
index 5644952a..704bff34 100644
--- a/retriever/retriever.php
+++ b/retriever/retriever.php
@@ -650,6 +650,10 @@ function retriever_transform_images($a, &$item, $resource) {
Logger::log('@@@ retriever_transform_images before new Image', Logger::INFO);
$image = new Image($data, $type);
Logger::log('@@@ retriever_transform_images after new Image', Logger::INFO);
+ if (!$image->isValid()) {
+ Logger::log('retriever_transform_images: invalid image found at URL ' . $resource['url'] ' for item ' . $item['id'], Logger::WARNING);
+ return;
+ }
Logger::log('@@@ retriever_transform_images before Photo::store', Logger::INFO);
$photo = Photo::store($image, $uid, $cid, $rid, $filename, $album, 0, 0, "", "", "", "", $desc);
Logger::log('@@@ retriever_transform_images after Photo::store', Logger::INFO);
From 615992810a8cfff2e7213f8bd7d2d9dcc259831a Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Sun, 22 Sep 2019 19:55:07 +0200
Subject: [PATCH 021/527] Change logging functions
---
retriever/retriever.php | 210 ++++++++++++++++++++--------------------
1 file changed, 105 insertions(+), 105 deletions(-)
diff --git a/retriever/retriever.php b/retriever/retriever.php
index 704bff34..ac6b321a 100644
--- a/retriever/retriever.php
+++ b/retriever/retriever.php
@@ -84,7 +84,7 @@ function retriever_cron($a, $b) {
$retriever_item_count = 0;
function retriever_retrieve_items($max_items, $a) {
- Logger::log('@@@ retriever_retrieve_items', Logger::INFO);
+ Logger::info('@@@ retriever_retrieve_items');
global $retriever_item_count;
$retriever_schedule = array(array(1,'minute'),
@@ -105,10 +105,10 @@ function retriever_retrieve_items($max_items, $a) {
}
$retrieve_items = $max_items - $retriever_item_count;
- Logger::log('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . $retriever_item_count . ', retrieve ' . $retrieve_items, Logger::DEBUG);
+ Logger::debug('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . $retriever_item_count . ', retrieve ' . $retrieve_items);
do {
- Logger::log('@@@ retriever_retrieve_items loop max ' . $max_items . ' count ' . $retriever_item_count, Logger::INFO);
- Logger::log("@@@ SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR " . implode($schedule_clauses, ' OR ') . ") ORDER BY `last-try` ASC LIMIT " . $retrieve_items, Logger::INFO);
+ Logger::info('@@@ retriever_retrieve_items loop max ' . $max_items . ' count ' . $retriever_item_count);
+ Logger::info("@@@ SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR " . implode($schedule_clauses, ' OR ') . ") ORDER BY `last-try` ASC LIMIT " . $retrieve_items);
$retriever_resources = q("SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR %s) ORDER BY `last-try` ASC LIMIT %d",
DBA::escape(implode($schedule_clauses, ' OR ')),
intval($retrieve_items));
@@ -118,9 +118,9 @@ function retriever_retrieve_items($max_items, $a) {
if (count($retriever_resources) == 0) {
break;
}
- Logger::log('retriever_retrieve_items: found ' . count($retriever_resources) . ' waiting resources in database', Logger::DEBUG);
+ Logger::debug('retriever_retrieve_items: found ' . count($retriever_resources) . ' waiting resources in database');
foreach ($retriever_resources as $retriever_resource) {
- Logger::log('@@@ need to get the retriever config here cid ' . $retriever_resource['contact-id'] . ' uid ' . $retriever_resource['item-uid'], Logger::INFO);
+ Logger::info('@@@ need to get the retriever config here cid ' . $retriever_resource['contact-id'] . ' uid ' . $retriever_resource['item-uid']);
retrieve_resource($retriever_resource);
$retriever_item_count++;
}
@@ -128,7 +128,7 @@ function retriever_retrieve_items($max_items, $a) {
}
while ($retrieve_items > 0);
// @@@ todo: when items add further items (i.e. images), do the new images go round this loop again?
- Logger::log('@@@ retriever_retrieve_items: finished retrieving items', Logger::INFO);
+ Logger::info('@@@ retriever_retrieve_items: finished retrieving items');
}
/* Look for items that are waiting even though the resource has
@@ -141,25 +141,25 @@ function retriever_clean_up_completed_resources($max_items, $a) {
if (!$r) {
$r = array();
}
- Logger::log('retriever_clean_up_completed_resources: items waiting even though resource has completed: ' . count($r), Logger::DEBUG);
+ Logger::debug('retriever_clean_up_completed_resources: items waiting even though resource has completed: ' . count($r));
foreach ($r as $rr) {
$resource = q("SELECT * FROM retriever_resource WHERE `id` = %d", $rr['resource']);
$retriever_item = retriever_get_retriever_item($rr['item']);
if (!DBA::isResult($retriever_item)) {
- Logger::log('retriever_clean_up_completed_resources: no retriever item with id ' . $rr['item'], Logger::WARNING);
+ Logger::warning('retriever_clean_up_completed_resources: no retriever item with id ' . $rr['item']);
continue;
}
$item = retriever_get_item($retriever_item);
if (!$item) {
- Logger::log('retriever_clean_up_completed_resources: no item ' . $retriever_item['item-uri'], Logger::WARNING);
+ Logger::warning('retriever_clean_up_completed_resources: no item ' . $retriever_item['item-uri']);
continue;
}
$retriever_rule = get_retriever_rule($retriever_item['contact-id'], $item['uid']);
if (!$retriever_rule) {
- Logger::log('retriever_clean_up_completed_resources: no retriever for uri ' . $retriever_item['item-uri'] . ' uid ' . $retriever_item['uid'] . ' ' . $retriever_item['contact-id'], Logger::WARNING);
+ Logger::warning('retriever_clean_up_completed_resources: no retriever for uri ' . $retriever_item['item-uri'] . ' uid ' . $retriever_item['uid'] . ' ' . $retriever_item['contact-id']);
continue;
}
- Logger::log('@@@ retriever_clean_up_completed_resources: about to retriever_apply_completed_resource_to_item', Logger::INFO);
+ Logger::info('@@@ retriever_clean_up_completed_resources: about to retriever_apply_completed_resource_to_item');
retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource[0], $a);
q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d", intval($retriever_item['id']));
retriever_check_item_completed($item);
@@ -171,7 +171,7 @@ function retriever_tidy() {
q("DELETE FROM retriever_resource WHERE completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)");
$r = q("SELECT retriever_item.id FROM retriever_item LEFT OUTER JOIN retriever_resource ON (retriever_item.resource = retriever_resource.id) WHERE retriever_resource.id is null");
- Logger::log('retriever_tidy: found ' . count($r) . ' retriever_items with no retriever_resource', Logger::INFO);
+ Logger::info('retriever_tidy: found ' . count($r) . ' retriever_items with no retriever_resource');
foreach ($r as $rr) {
q('DELETE FROM retriever_item WHERE id = %d', intval($rr['id']));
}
@@ -179,7 +179,7 @@ function retriever_tidy() {
function retrieve_dataurl_resource($resource) {
if (!preg_match("/date:(.*);base64,(.*)/", $resource['url'], $matches)) {
- Logger::log('retrieve_dataurl_resource: ' . $resource['id'] . ' does not match pattern', Logger::INFO);
+ Logger::info('retrieve_dataurl_resource: ' . $resource['id'] . ' does not match pattern');
} else {
$resource['type'] = $matches[1];
$resource['data'] = base64url_decode($matches[2]);
@@ -194,7 +194,7 @@ function retrieve_dataurl_resource($resource) {
}
function retrieve_resource($resource) {
- Logger::log('@@@ retrieve_resource: url ' . $resource['url'] . ' uid ' . $resource['item-uid'] . ' cid ' . $resource['contact-id'], Logger::INFO);
+ Logger::info('@@@ retrieve_resource: url ' . $resource['url'] . ' uid ' . $resource['item-uid'] . ' cid ' . $resource['contact-id']);
if (substr($resource['url'], 0, 5) == "data:") {
return retrieve_dataurl_resource($resource);
@@ -205,14 +205,14 @@ function retrieve_resource($resource) {
$retriever_rule = get_retriever_rule($resource['contact-id'], $resource['item-uid']);
try {
- Logger::log('retrieve_resource: ' . ($resource['num-tries'] + 1) . ' attempt at resource ' . $resource['id'] . ' ' . $resource['url'], Logger::DEBUG);
+ Logger::debug('retrieve_resource: ' . ($resource['num-tries'] + 1) . ' attempt at resource ' . $resource['id'] . ' ' . $resource['url']);
$redirects = 0;
$cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-');
- if ($retriever_rule['storecookies']) {
+ if (array_key_exists('storecookies', $retriever_rule) && $retriever_rule['storecookies']) {
file_put_contents($cookiejar, $retriever_rule['cookiedata']);
}
$fetch_result = Network::fetchUrlFull($resource['url'], $resource['binary'], $redirects, '', $cookiejar);
- if ($retriever_rule['storecookies']) {
+ if (array_key_exists('storecookies', $retriever_rule) && $retriever_rule['storecookies']) {
$retriever_rule['cookiedata'] = file_get_contents($cookiejar);
//@@@ do the store here
}
@@ -221,9 +221,9 @@ function retrieve_resource($resource) {
$resource['http-code'] = $fetch_result->getReturnCode();
$resource['type'] = $fetch_result->getContentType();
$resource['redirect-url'] = $fetch_result->getRedirectUrl();
- Logger::log('retrieve_resource: got code ' . $resource['http-code'] . ' retrieving resource ' . $resource['id'] . ' final url ' . $resource['redirect-url'], Logger::DEBUG);
+ Logger::debug('retrieve_resource: got code ' . $resource['http-code'] . ' retrieving resource ' . $resource['id'] . ' final url ' . $resource['redirect-url']);
} catch (Exception $e) {
- Logger::log('retrieve_resource: unable to retrieve ' . $resource['url'] . ' - ' . $e->getMessage(), Logger::INFO);
+ Logger::info('retrieve_resource: unable to retrieve ' . $resource['url'] . ' - ' . $e->getMessage());
}
q("UPDATE `retriever_resource` SET `last-try` = now(), `num-tries` = `num-tries` + 1, `http-code` = %d, `redirect-url` = '%s' WHERE id = %d",
intval($resource['http-code']),
@@ -236,17 +236,17 @@ function retrieve_resource($resource) {
intval($resource['id']));
retriever_resource_completed($resource, $a);
}
- Logger::log('@@@ retrieve_resource finished: ' . $resource['url'], Logger::INFO);
+ Logger::info('@@@ retrieve_resource finished: ' . $resource['url']);
}
function get_retriever_rule($contact_id, $uid, $create = false) {
- Logger::log('@@@ get_retriever_rule ' . "SELECT * FROM `retriever_rule` WHERE `contact-id` = " . intval($contact_id) . " AND `uid` = " . intval($uid), Logger::INFO);
+ Logger::info('@@@ get_retriever_rule ' . "SELECT * FROM `retriever_rule` WHERE `contact-id` = " . intval($contact_id) . " AND `uid` = " . intval($uid));
$r = q("SELECT * FROM `retriever_rule` WHERE `contact-id` = %d AND `uid` = %d",
intval($contact_id), intval($uid));
- Logger::log('@@@ get_retriever_rule count is ' . count($r), Logger::INFO);
+ Logger::info('@@@ get_retriever_rule count is ' . count($r));
if (count($r)) {
$r[0]['data'] = json_decode($r[0]['data'], true);
- Logger::log('@@@ get_retriever_rule returning an actual thing', Logger::INFO);
+ Logger::info('@@@ get_retriever_rule returning an actual thing');
return $r[0];
}
if ($create) {
@@ -267,7 +267,7 @@ function retriever_class_of_item($item) { //@@@
return 'false';
}
if (array_key_exists('finished', $item)) {
- Logger::log('@@@ oh no this is a bad thing', Logger::INFO);
+ Logger::info('@@@ oh no this is a bad thing');
return 'retriever_item';
}
if (array_key_exists('moderated', $item)) {
@@ -282,33 +282,33 @@ function mat_test($item) { //@@@
function retriever_get_item($retriever_item) {
// @@@ add contact id as a search term
- Logger::log('@@@ retriever_get_item uri ' . $retriever_item['item-uri'] . ' uid ' . $retriever_item['item-uid'] . ' cid ' . $retriever_item['contact-id'], Logger::INFO);
+ Logger::info('@@@ retriever_get_item uri ' . $retriever_item['item-uri'] . ' uid ' . $retriever_item['item-uid'] . ' cid ' . $retriever_item['contact-id']);
try {//@@@ not necessary
$item = Item::selectFirst([], ['uri' => $retriever_item['item-uri'], 'uid' => intval($retriever_item['item-uid'])]);
Logger::log('@@@ 1 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
if (!DBA::isResult($item)) {
- Logger::log('retriever_get_item: no item found for uri ' . $retriever_item['item-uri'], Logger::INFO);
+ Logger::log('retriever_get_item: no item found for uri ' . $retriever_item['item-uri']);
return;
}
- Logger::log('@@@ retriever_get_item: yay item found for uri ' . $retriever_item['item-uri'] . ' guid ' . $item['guid'] . ' plink ' . $item['plink'], Logger::INFO);
+ Logger::info('@@@ retriever_get_item: yay item found for uri ' . $retriever_item['item-uri'] . ' guid ' . $item['guid'] . ' plink ' . $item['plink']);
return $item;
} catch (Exception $e) {
- Logger::log('retriever_get_item: exception ' . $e->getMessage(), Logger::INFO);
+ Logger::info('retriever_get_item: exception ' . $e->getMessage());
}
}
function retriever_item_completed($retriever_item_id, $resource, $a) {
- Logger::log('retriever_item_completed: id ' . $retriever_item_id . ' url ' . $resource['url'], Logger::DEBUG);
+ Logger::debug('retriever_item_completed: id ' . $retriever_item_id . ' url ' . $resource['url']);
$retriever_item = retriever_get_retriever_item($retriever_item_id);
if (!DBA::isResult($retriever_item)) {
- Logger::log('retriever_item_completed: no retriever item with id ' . $retriever_item_id, Logger::INFO);
+ Logger::info('retriever_item_completed: no retriever item with id ' . $retriever_item_id);
return;
}
$item = retriever_get_item($retriever_item);
Logger::log('@@@ 2 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
if (!$item) {
- Logger::log('retriever_item_completed: no item ' . $retriever_item['item-uri'], Logger::INFO);
+ Logger::log('retriever_item_completed: no item ' . $retriever_item['item-uri']);
return;
}
// Note: the retriever might be null. Doesn't matter.
@@ -322,7 +322,7 @@ function retriever_item_completed($retriever_item_id, $resource, $a) {
}
function retriever_resource_completed($resource, $a) {
- Logger::log('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url'], Logger::DEBUG);
+ Logger::debug('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url']);
$r = q("SELECT `id` FROM `retriever_item` WHERE `resource` = %d", $resource['id']);
foreach ($r as $rr) {
retriever_item_completed($rr['id'], $resource, $a);
@@ -343,31 +343,31 @@ function apply_retrospective($a, $retriever, $num) {
//@@@ make this trigger a retriever immediately somehow
//@@@ need a lock to say something is doing something
function retriever_on_item_insert($a, $retriever, &$item) {
- Logger::log('@@@ 4 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
- Logger::log('@@@ retriever_on_item_insert start ' . $item['plink'], Logger::INFO);
+ Logger::info('@@@ 4 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
+ Logger::info('@@@ retriever_on_item_insert start ' . $item['plink']);
if (!$retriever || !$retriever['id']) {
- Logger::log('retriever_on_item_insert: No retriever supplied', Logger::INFO);
+ Logger::info('retriever_on_item_insert: No retriever supplied');
return;
}
if (!$retriever["data"]['enable'] == "on") {
- Logger::log('@@@ retriever_on_item_insert: Disabled', Logger::INFO);
+ Logger::info('@@@ retriever_on_item_insert: Disabled');
return;
}
if (array_key_exists('pattern', $retriever["data"]) && $retriever["data"]['pattern']) {
$url = preg_replace('/' . $retriever["data"]['pattern'] . '/', $retriever["data"]['replace'], $item['plink']);
- Logger::log('retriever_on_item_insert: Changed ' . $item['plink'] . ' to ' . $url, Logger::DATA);
+ Logger::debug('retriever_on_item_insert: Changed ' . $item['plink'] . ' to ' . $url);
}
else {
$url = $item['plink'];
}
- Logger::log('@@@ retriever_on_item_insert: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id'], Logger::DEBUG);
+ Logger::debug('@@@ retriever_on_item_insert: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id']);
$resource = add_retriever_resource($a, $url, $item['uid'], $item['contact-id']);
$retriever_item_id = add_retriever_item($item, $resource);
}
function add_retriever_resource($a, $url, $uid, $cid, $binary = false) {
- Logger::log('add_retriever_resource: url ' . $url . ' uid ' . $uid . ' contact-id ' . $cid, Logger::DEBUG);
+ Logger::debug('add_retriever_resource: url ' . $url . ' uid ' . $uid . ' contact-id ' . $cid);
$scheme = parse_url($url, PHP_URL_SCHEME);
if ($scheme == 'data') {
@@ -381,11 +381,11 @@ function add_retriever_resource($a, $url, $uid, $cid, $binary = false) {
$r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s' AND `item-uid` = %d AND `contact-id` = %d", DBA::escape($url), intval($uid), intval($cid));
$resource = $r[0];
if (count($r)) {
- Logger::log('add_retriever_resource: Resource ' . $url . ' already requested', Logger::DEBUG);
+ Logger::debug('add_retriever_resource: Resource ' . $url . ' already requested');
return $resource;
}
- Logger::log('retrieve_resource: got data URL type ' . $resource['type'], Logger::DEBUG);
+ Logger::debug('retrieve_resource: got data URL type ' . $resource['type']);
q("INSERT INTO `retriever_resource` (`item-uid`, `contact-id`, `type`, `binary`, `url`, `completed`, `data`) " .
"VALUES (%d, %d, '%s', %d, '%s', now(), '%s')",
intval($uid),
@@ -403,12 +403,12 @@ function add_retriever_resource($a, $url, $uid, $cid, $binary = false) {
}
if (strlen($url) > 800) {
- Logger::log('add_retriever_resource: URL is longer than 800 characters', Logger::WARNING);
+ Logger::warning('add_retriever_resource: URL is longer than 800 characters');
}
$r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s' AND `item-uid` = %d AND `contact-id` = %d", DBA::escape($url), intval($uid), intval($cid));
if (count($r)) {
- Logger::log('add_retriever_resource: Resource ' . $url . ' uid ' . $uid . ' cid ' . $cid . ' already requested', Logger::DEBUG);
+ Logger::debug('add_retriever_resource: Resource ' . $url . ' uid ' . $uid . ' cid ' . $cid . ' already requested');
return $r[0];
}
@@ -419,14 +419,14 @@ function add_retriever_resource($a, $url, $uid, $cid, $binary = false) {
}
function add_retriever_item(&$item, $resource) {
- Logger::log('@@@ 5 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item), Logger::DEBUG);
- Logger::log('add_retriever_item: ' . $resource['url'] . ' for ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG);
+ Logger::debug('@@@ 5 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
+ Logger::debug('add_retriever_item: ' . $resource['url'] . ' for ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']);
$r = q("SELECT COUNT(*) FROM `retriever_item` WHERE " .
"`item-uri` = '%s' AND `item-uid` = %d AND `contact-id` = %d AND `resource` = %d",
DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource['id']));
if ($r[0]['COUNT(*)'] > 0) {
- Logger::log("add_retriever_item: retriever item already present for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::INFO);
+ Logger::info("add_retriever_item: retriever item already present for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']);
return;
}
q("INSERT INTO `retriever_item` (`item-uri`, `item-uid`, `contact-id`, `resource`) " .
@@ -436,10 +436,10 @@ function add_retriever_item(&$item, $resource) {
"`item-uri` = '%s' AND `item-uid` = %d AND `contact-id` = %d AND `resource` = %d ORDER BY id DESC",
DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource['id']));
if (!count($r)) {
- Logger::log("add_retriever_item: couldn't create retriever item for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::INFO);
+ Logger::info("add_retriever_item: couldn't create retriever item for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']);
return;
}
- Logger::log('add_retriever_item: created retriever_item ' . $r[0]['id'] . ' for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG);
+ Logger::debug('add_retriever_item: created retriever_item ' . $r[0]['id'] . ' for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']);
return $r[0]['id'];
}
@@ -453,12 +453,12 @@ function retriever_get_encoding($resource) {
function retriever_apply_xslt_text($xslt_text, $doc) {
if (!$xslt_text) {
- Logger::log('retriever_apply_xslt_text: empty XSLT text', Logger::INFO);
+ Logger::info('retriever_apply_xslt_text: empty XSLT text');
return $doc;
}
$xslt_doc = new DOMDocument();
if (!$xslt_doc->loadXML($xslt_text)) {
- Logger::log('retriever_apply_xslt_text: could not load XML', Logger::INFO);
+ Logger::info('retriever_apply_xslt_text: could not load XML');
return $doc;
}
$xp = new XsltProcessor();
@@ -469,15 +469,15 @@ function retriever_apply_xslt_text($xslt_text, $doc) {
//@@@ is that an item or a resource_item? I really want an item here so I can update it
function retriever_apply_dom_filter($retriever, &$item, $resource) {
- Logger::log('@@@ 6 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item), Logger::DEBUG);
- Logger::log('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['uri'] . ' contact ' . $item['contact-id'], Logger::DEBUG);
+ Logger::debug('@@@ 6 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
+ Logger::debug('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['uri'] . ' contact ' . $item['contact-id']);
if (!array_key_exists('include', $retriever['data']) && !array_key_exists('customxslt', $retriever['data'])) {
- Logger::log('retriever_apply_dom_filter: no include and no customxslt', Logger::INFO);
+ Logger::info('retriever_apply_dom_filter: no include and no customxslt');
return;
}
if (!$resource['data']) {
- Logger::log('retriever_apply_dom_filter: no text to work with', Logger::INFO);
+ Logger::info('retriever_apply_dom_filter: no text to work with');
return;
}
@@ -495,104 +495,104 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) {
$extract_template = Renderer::getMarkupTemplate('extract.tpl', 'addon/retriever/');
$extract_xslt = Renderer::replaceMacros($extract_template, $params);
if ($retriever['data']['include']) {
- Logger::log('retriever_apply_dom_filter: applying include/exclude template \"' . $extract_xslt . '\"', Logger::DEBUG);
+ Logger::debug('retriever_apply_dom_filter: applying include/exclude template \"' . $extract_xslt . '\"');
$doc = retriever_apply_xslt_text($extract_xslt, $doc);
}
if (array_key_exists('customxslt', $retriever['data']) && $retriever['data']['customxslt']) {
- Logger::log('retriever_apply_dom_filter: applying custom XSLT \"' . $retriever['data']['customxslt'] . '\"', Logger::DEBUG);
+ Logger::debug('retriever_apply_dom_filter: applying custom XSLT \"' . $retriever['data']['customxslt'] . '\"');
$doc = retriever_apply_xslt_text($retriever['data']['customxslt'], $doc);
}
if (!$doc) {
- Logger::log('retriever_apply_dom_filter: failed to apply extract XSLT template', Logger::INFO);
+ Logger::info('retriever_apply_dom_filter: failed to apply extract XSLT template');
return;
}
- Logger::log('@@@ retriever_apply_dom_filter: 1', Logger::INFO);
+ Logger::info('@@@ retriever_apply_dom_filter: 1');
$components = parse_url($resource['redirect-url']);
$rooturl = $components['scheme'] . "://" . $components['host'];
$dirurl = $rooturl . dirname($components['path']) . "/";
- Logger::log('@@@ retriever_apply_dom_filter: 2', Logger::INFO);
+ Logger::info('@@@ retriever_apply_dom_filter: 2');
$params = array('$dirurl' => $dirurl, '$rooturl' => $rooturl);
$fix_urls_template = Renderer::getMarkupTemplate('fix-urls.tpl', 'addon/retriever/');
$fix_urls_xslt = Renderer::replaceMacros($fix_urls_template, $params);
- Logger::log('@@@ retriever_apply_dom_filter: 3', Logger::INFO);
+ Logger::info('@@@ retriever_apply_dom_filter: 3');
$doc = retriever_apply_xslt_text($fix_urls_xslt, $doc);
- Logger::log('@@@ retriever_apply_dom_filter: 4', Logger::INFO);
+ Logger::info('@@@ retriever_apply_dom_filter: 4');
if (!$doc) {
- Logger::log('retriever_apply_dom_filter: failed to apply fix urls XSLT template', Logger::INFO);
+ Logger::info('retriever_apply_dom_filter: failed to apply fix urls XSLT template');
return;
}
- Logger::log('@@@ retriever_apply_dom_filter: 5', Logger::INFO);
+ Logger::info('@@@ retriever_apply_dom_filter: 5');
$body = HTML::toBBCode($doc->saveHTML());
if (!strlen($body)) {
- Logger::log('retriever_apply_dom_filter retriever ' . $retriever['id'] . ' item ' . $item['id'] . ': output was empty', Logger::INFO);
+ Logger::info('retriever_apply_dom_filter retriever ' . $retriever['id'] . ' item ' . $item['id'] . ': output was empty');
return;
}
$body .= "\n\n" . L10n::t('Retrieved') . ' ' . date("Y-m-d") . ': [url=';
$body .= $item['plink'];
$body .= ']' . $item['plink'] . '[/url]';
- Logger::log('@@@ retriever_apply_dom_filter: 6', Logger::INFO);
+ Logger::info('@@@ retriever_apply_dom_filter: 6');
$uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself?
- Logger::log('@@@ retriever_apply_dom_filter: item id is ' . $item['id'] . ' uri id is ' . $uri_id, Logger::INFO);
- Logger::log('retriever_apply_dom_filter: XSLT result \"' . $body . '\"', Logger::DATA);
+ Logger::info('@@@ retriever_apply_dom_filter: item id is ' . $item['id'] . ' uri id is ' . $uri_id);
+ Logger::debug('retriever_apply_dom_filter: XSLT result \"' . $body . '\"');
DBA::update('item-content', ['body' => $body], ['uri-id' => $uri_id]); //@@@ isn't there a better interface to that?
//@@@ probably Item::updateContent
}
function retrieve_images(&$item, $a) {
$blah_item_class = retriever_class_of_item($item) . ' ' . mat_test($item);
- Logger::log('@@@ 7 item class is ' . $blah_item_class, Logger::DEBUG);
+ Logger::debug('@@@ 7 item class is ' . $blah_item_class);
$uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself?
$content = DBA::selectFirst('item-content', [], ['uri-id' => $uri_id]);
$body = $content['body'];
if (!strlen($body)) {
- Logger::log('retrieve_images: no body for uri-id ' . $uri_id, Logger::WARNING);
+ Logger::warning('retrieve_images: no body for uri-id ' . $uri_id);
return;
}
- Logger::log('@@@ retrieve_images start looking in body "' . $body . '"', Logger::INFO);
+ Logger::info('@@@ retrieve_images start looking in body "' . $body . '"');
$matches1 = array();
preg_match_all("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", $body, $matches1);
$matches2 = array();
preg_match_all("/\[img\](.*?)\[\/img\]/ism", $body, $matches2);
$matches = array_merge($matches1[3], $matches2[1]);
- Logger::log('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG);
+ Logger::debug('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']);
foreach ($matches as $url) {
- Logger::log('@@@ retrieve_images: url ' . $url, Logger::DEBUG);
+ Logger::debug('@@@ retrieve_images: url ' . $url);
if (strpos($url, get_app()->getBaseUrl()) === FALSE) {
- Logger::log('@@@ retrieve_images: it is from somewhere else', Logger::DEBUG);
- Logger::log('@@@ retrieve_images: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id'], Logger::DEBUG);
+ Logger::debug('@@@ retrieve_images: it is from somewhere else');
+ Logger::debug('@@@ retrieve_images: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id']);
$resource = add_retriever_resource($a, $url, $item['uid'], $item['contact-id'], true);
if (!$resource['completed']) {
- Logger::log('@@@ retrieve_images: do not have it yet, get it later', Logger::DEBUG);
+ Logger::debug('@@@ retrieve_images: do not have it yet, get it later');
add_retriever_item($item, $resource);
}
else {
- Logger::log('@@@ retrieve_images: got it already, transform', Logger::DEBUG);
+ Logger::debug('@@@ retrieve_images: got it already, transform');
retriever_transform_images($a, $item, $resource);
}
}
}
- Logger::log('@@@ retrieve_images end', Logger::INFO);
+ Logger::info('@@@ retrieve_images end');
}
function retriever_check_item_completed(&$item)
{
- Logger::log('@@@ 9 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item), Logger::DEBUG);
+ Logger::debug('@@@ 9 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
$r = q('SELECT count(*) FROM retriever_item WHERE `item-uri` = "%s" ' .
'AND `item-uid` = %d AND `contact-id` = %d AND `finished` = 0',
DBA::escape($item['uri']), intval($item['uid']),
intval($item['contact-id']));
$waiting = $r[0]['count(*)'];
- Logger::log('retriever_check_item_completed: item ' . $item['uri'] . ' ' . $item['uid'] . ' '. $item['contact-id'] . ' waiting for ' . $waiting . ' resources', Logger::DEBUG);
+ Logger::debug('retriever_check_item_completed: item ' . $item['uri'] . ' ' . $item['uid'] . ' '. $item['contact-id'] . ' waiting for ' . $waiting . ' resources');
$old_visible = $item['visible'];
$item['visible'] = $waiting ? 0 : 1;
if (array_key_exists('id', $item) && ($item['id'] > 0) && ($old_visible != $item['visible'])) {
- Logger::log('retriever_check_item_completed: changing visible flag to ' . $item['visible'], Logger::DEBUG);
+ Logger::debug('retriever_check_item_completed: changing visible flag to ' . $item['visible']);
q("UPDATE `item` SET `visible` = %d WHERE `id` = %d",
intval($item['visible']),
intval($item['id']));
@@ -603,10 +603,10 @@ function retriever_check_item_completed(&$item)
}
function retriever_apply_completed_resource_to_item($retriever, &$item, $resource, $a) {
- Logger::log('@@@ 10 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item), Logger::DEBUG);
- Logger::log('retriever_apply_completed_resource_to_item: retriever ' . ($retriever ? $retriever['id'] : 'none') . ' resource ' . $resource['url'] . ' plink ' . $item['plink'], Logger::DEBUG);
+ Logger::debug('@@@ 10 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
+ Logger::debug('retriever_apply_completed_resource_to_item: retriever ' . ($retriever ? $retriever['id'] : 'none') . ' resource ' . $resource['url'] . ' plink ' . $item['plink']);
if (strpos($resource['type'], 'image') !== false) {
- Logger::log('@@@ retriever_apply_completed_resource_to_item this is an image must transform', Logger::INFO);
+ Logger::info('@@@ retriever_apply_completed_resource_to_item this is an image must transform');
retriever_transform_images($a, $item, $resource);
}
if (!$retriever) {
@@ -621,13 +621,13 @@ function retriever_apply_completed_resource_to_item($retriever, &$item, $resourc
}
}
-//@@@ todo: change all Logger::log to Logger::info etc
+//@@@ todo: change all Logger::info t etc
//@@@ todo: what is this reference for? document if needed delete if not
function retriever_transform_images($a, &$item, $resource) {
- Logger::log('@@@ 11 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item), Logger::DEBUG);
- Logger::log('@@@ retriever_transform_images', Logger::INFO);
+ Logger::debug('@@@ 11 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
+ Logger::info('@@@ retriever_transform_images');
if (!$resource["data"]) {
- Logger::log('retriever_transform_images: no data available for ' . $resource['id'] . ' ' . $resource['url'], Logger::INFO);
+ Logger::info('retriever_transform_images: no data available for ' . $resource['id'] . ' ' . $resource['url']);
return;
}
@@ -642,42 +642,42 @@ function retriever_transform_images($a, &$item, $resource) {
$path = parse_url($resource['url'], PHP_URL_PATH);
$parts = pathinfo($path);
$filename = $parts['filename'] . (array_key_exists('extension', $parts) ? '.' . $parts['extension'] : '');
- Logger::log('@@@ retriever_transform_images url ' . $resource['url'] . ' path ' . $path . ' filename ' . $parts['filename'], Logger::INFO);
+ Logger::info('@@@ retriever_transform_images url ' . $resource['url'] . ' path ' . $path . ' filename ' . $parts['filename']);
$album = 'Wall Photos';
$scale = 0;
$desc = ''; // TODO: store alt text with resource when it's requested so we can fill this in
- Logger::log('retriever_transform_images storing ' . strlen($data) . ' bytes type ' . $type . ': uid ' . $uid . ' cid ' . $cid . ' rid ' . $rid . ' filename ' . $filename . ' album ' . $album . ' scale ' . $scale . ' desc ' . $desc, Logger::DEBUG);
- Logger::log('@@@ retriever_transform_images before new Image', Logger::INFO);
+ Logger::debug('retriever_transform_images storing ' . strlen($data) . ' bytes type ' . $type . ': uid ' . $uid . ' cid ' . $cid . ' rid ' . $rid . ' filename ' . $filename . ' album ' . $album . ' scale ' . $scale . ' desc ' . $desc);
+ Logger::info('@@@ retriever_transform_images before new Image');
$image = new Image($data, $type);
- Logger::log('@@@ retriever_transform_images after new Image', Logger::INFO);
+ Logger::info('@@@ retriever_transform_images after new Image');
if (!$image->isValid()) {
- Logger::log('retriever_transform_images: invalid image found at URL ' . $resource['url'] ' for item ' . $item['id'], Logger::WARNING);
+ Logger::warning('retriever_transform_images: invalid image found at URL ' . $resource['url'] . ' for item ' . $item['id']);
return;
}
- Logger::log('@@@ retriever_transform_images before Photo::store', Logger::INFO);
+ Logger::info('@@@ retriever_transform_images before Photo::store');
$photo = Photo::store($image, $uid, $cid, $rid, $filename, $album, 0, 0, "", "", "", "", $desc);
- Logger::log('@@@ retriever_transform_images after Photo::store', Logger::INFO);
+ Logger::info('@@@ retriever_transform_images after Photo::store');
$new_url = System::baseUrl() . '/photo/' . $rid . '-0.' . $image->getExt();
- Logger::log('@@@ retriever_transform_images new url ' . $new_url . ' rid ' . $rid . ' ext ' . $image->getExt(), Logger::INFO);
+ Logger::info('@@@ retriever_transform_images new url ' . $new_url . ' rid ' . $rid . ' ext ' . $image->getExt());
if (!strlen($new_url)) {
- Logger::log('retriever_transform_images: no replacement URL for image ' . $resource['url'], Logger::WARNING);
+ Logger::warning('retriever_transform_images: no replacement URL for image ' . $resource['url']);
return;
}
$content = DBA::selectFirst('item-content', [], ['uri-id' => $uri_id]);
$body = $content['body'];
- Logger::log('@@@ retriever_transform_images: found body for uri id ' . $uri_id . ': ' . $body, Logger::INFO);
+ Logger::info('@@@ retriever_transform_images: found body for uri id ' . $uri_id . ': ' . $body);
- Logger::log('retriever_transform_images: replacing ' . $resource['url'] . ' with ' . $new_url . ' in item ' . $item['uri'], Logger::DEBUG);
- Logger::log('@@@ retriever_transform_images: replacing ' . $resource['url'] . ' with ' . $new_url . ' in body ' . $body, Logger::DEBUG);
+ Logger::debug('retriever_transform_images: replacing ' . $resource['url'] . ' with ' . $new_url . ' in item ' . $item['uri']);
+ Logger::debug('@@@ retriever_transform_images: replacing ' . $resource['url'] . ' with ' . $new_url . ' in body ' . $body);
$body = str_replace($resource["url"], $new_url, $body);
- Logger::log('@@@ retriever_transform_images: result \"' . $body . '\"', Logger::INFO);
+ Logger::info('@@@ retriever_transform_images: result \"' . $body . '\"');
DBA::update('item-content', ['body' => $body], ['uri-id' => $uri_id]); //@@@ isn't there a better interface to that?
//@@@ probably Item::updateContent
//@@ actually no, Item::update
} catch (Exception $e) {
- Logger::log('retriever_transform_images caught exception ' . $e->getMessage(), Logger::INFO);
+ Logger::info('retriever_transform_images caught exception ' . $e->getMessage());
return;
}
}
@@ -805,8 +805,8 @@ function retriever_contact_photo_menu($a, &$args) {
}
function retriever_post_remote_hook(&$a, &$item) {
- Logger::log('@@@ 12 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
- Logger::log('retriever_post_remote_hook: ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], Logger::DEBUG);
+ Logger::info('@@@ 12 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
+ Logger::info('retriever_post_remote_hook: ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']);
$uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself?
$retriever_rule = get_retriever_rule($item['contact-id'], $item["uid"], false);
@@ -818,7 +818,7 @@ function retriever_post_remote_hook(&$a, &$item) {
// Convert to HTML and back to take advantage of bbcode's resolution of oembeds.
$content = DBA::selectFirst('item-content', [], ['uri-id' => $uri_id]);
$body = HTML::toBBCode(BBCode::convert($content['body']));
- Logger::log('@@@ retriever_post_remote_hook item uri-id ' . $uri_id . ' body "' . $item['body'] . '" item content body "' . $body . '"', Logger::DEBUG);
+ Logger::debug('@@@ retriever_post_remote_hook item uri-id ' . $uri_id . ' body "' . $item['body'] . '" item content body "' . $body . '"');
if ($body) {
$item['body'] = $body;
DBA::update('item-content', ['body' => $body], ['uri-id' => $uri_id]); //@@@ isn't there a better interface to that?
From c5fb49455226f776ca16a99e9655d1fce0683b69 Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Fri, 27 Sep 2019 22:05:00 +0200
Subject: [PATCH 022/527] retriever stuff
---
retriever/retriever.php | 10 +++-------
1 file changed, 3 insertions(+), 7 deletions(-)
diff --git a/retriever/retriever.php b/retriever/retriever.php
index ac6b321a..56852e45 100644
--- a/retriever/retriever.php
+++ b/retriever/retriever.php
@@ -30,7 +30,6 @@ function retriever_install() {
Addon::registerHook('contact_photo_menu', 'addon/retriever/retriever.php', 'retriever_contact_photo_menu');
Addon::registerHook('cron', 'addon/retriever/retriever.php', 'retriever_cron');
- $r = q("SELECT `id` FROM `pconfig` WHERE `cat` LIKE 'retriever_%%'");
if (Config::get('retriever', 'dbversion') == '0.10') {
q("ALTER TABLE `retriever_resource` MODIFY COLUMN `type` char(255) NULL DEFAULT NULL");
q("ALTER TABLE `retriever_resource` MODIFY COLUMN `data` mediumblob NULL DEFAULT NULL");
@@ -537,8 +536,7 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) {
$uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself?
Logger::info('@@@ retriever_apply_dom_filter: item id is ' . $item['id'] . ' uri id is ' . $uri_id);
Logger::debug('retriever_apply_dom_filter: XSLT result \"' . $body . '\"');
- DBA::update('item-content', ['body' => $body], ['uri-id' => $uri_id]); //@@@ isn't there a better interface to that?
- //@@@ probably Item::updateContent
+ Item::update(['body' => $body], ['uri-id' => $uri_id]);
}
function retrieve_images(&$item, $a) {
@@ -673,9 +671,7 @@ function retriever_transform_images($a, &$item, $resource) {
$body = str_replace($resource["url"], $new_url, $body);
Logger::info('@@@ retriever_transform_images: result \"' . $body . '\"');
- DBA::update('item-content', ['body' => $body], ['uri-id' => $uri_id]); //@@@ isn't there a better interface to that?
- //@@@ probably Item::updateContent
- //@@ actually no, Item::update
+ Item::update(['body' => $body], ['uri-id' => $uri_id]);
} catch (Exception $e) {
Logger::info('retriever_transform_images caught exception ' . $e->getMessage());
return;
@@ -821,7 +817,7 @@ function retriever_post_remote_hook(&$a, &$item) {
Logger::debug('@@@ retriever_post_remote_hook item uri-id ' . $uri_id . ' body "' . $item['body'] . '" item content body "' . $body . '"');
if ($body) {
$item['body'] = $body;
- DBA::update('item-content', ['body' => $body], ['uri-id' => $uri_id]); //@@@ isn't there a better interface to that?
+ Item::update(['body' => $body], ['uri-id' => $uri_id]);
}
}
if (PConfig::get($item["uid"], 'retriever', 'all_photos')) {
From 4dc51d8f0557fbc2bdd94b0c2985036fa03a6ed3 Mon Sep 17 00:00:00 2001
From: Administrator
Date: Fri, 27 Sep 2019 21:02:52 +0200
Subject: [PATCH 023/527] Fix retriever database problems
---
retriever/database.sql | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/retriever/database.sql b/retriever/database.sql
index a29135e7..68480cfd 100644
--- a/retriever/database.sql
+++ b/retriever/database.sql
@@ -36,7 +36,7 @@ CREATE TABLE IF NOT EXISTS `retriever_resource` (
`data` mediumblob NULL DEFAULT NULL,
`http-code` smallint(1) unsigned NULL DEFAULT NULL,
`redirect-url` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NULL DEFAULT NULL,
- KEY `retriever_resource` ADD INDEX `url` (`url`),
- KEY `retriever_resource` ADD INDEX `completed` (`completed`),
+ KEY `url` (`url`),
+ KEY `completed` (`completed`),
PRIMARY KEY (`id`)
) DEFAULT CHARSET=utf8 COLLATE=utf8_bin
From 0963f0da4a2975d8731093cdcd384fca81ae7880 Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Fri, 27 Sep 2019 22:05:22 +0200
Subject: [PATCH 024/527] more retriever stuff
---
retriever/retriever.php | 34 +++++++++++++++++++++++++---------
1 file changed, 25 insertions(+), 9 deletions(-)
diff --git a/retriever/retriever.php b/retriever/retriever.php
index 56852e45..3b557d80 100644
--- a/retriever/retriever.php
+++ b/retriever/retriever.php
@@ -4,7 +4,6 @@
* Description: Follow the permalink of RSS/Atom feed items and replace the summary with the full content.
* Version: 1.1
* Author: Matthew Exon
- * Status: Unsupported
*/
use Friendica\Core\Addon;
@@ -53,6 +52,7 @@ function retriever_install() {
$arr = explode(';', $schema);
foreach ($arr as $a) {
$r = q($a);
+ //@@@ check for errors
}
Config::set('retriever', 'dbversion', '0.13');
}
@@ -332,7 +332,6 @@ function apply_retrospective($a, $retriever, $num) {
$r = q("SELECT * FROM `item` WHERE `contact-id` = %d ORDER BY `received` DESC LIMIT %d",
intval($retriever['contact-id']), intval($num));
foreach ($r as $item) {
- Logger::log('@@@ 3 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item)); //@@@ already know this is wrong
q('UPDATE `item` SET `visible` = 0 WHERE `id` = %d', $item['id']);
q('UPDATE `thread` SET `visible` = 0 WHERE `iid` = %d', $item['id']);
retriever_on_item_insert($a, $retriever, $item);
@@ -343,7 +342,10 @@ function apply_retrospective($a, $retriever, $num) {
//@@@ need a lock to say something is doing something
function retriever_on_item_insert($a, $retriever, &$item) {
Logger::info('@@@ 4 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
- Logger::info('@@@ retriever_on_item_insert start ' . $item['plink']);
+ foreach ($item as $key => $value) {
+ Logger::info("@@@ $key => $value");
+ }
+ Logger::info('@@@ retriever_on_item_insert start ' . ' plink ' . $item['plink']);
if (!$retriever || !$retriever['id']) {
Logger::info('retriever_on_item_insert: No retriever supplied');
return;
@@ -352,15 +354,29 @@ function retriever_on_item_insert($a, $retriever, &$item) {
Logger::info('@@@ retriever_on_item_insert: Disabled');
return;
}
- if (array_key_exists('pattern', $retriever["data"]) && $retriever["data"]['pattern']) {
- $url = preg_replace('/' . $retriever["data"]['pattern'] . '/', $retriever["data"]['replace'], $item['plink']);
- Logger::debug('retriever_on_item_insert: Changed ' . $item['plink'] . ' to ' . $url);
- }
- else {
+ if (array_key_exists('plink', $item)) {
$url = $item['plink'];
}
+ else {
+ if (!array_key_exists('uri_id', $item)) {
+ Logger::warning('retriever_on_item_insert: item ' . ' has no plink and no uri-id');
+ // @@@ find an identifier and put it in warning
+ Logger::warning('@@@ retriever_on_item_insert: item has: ' . print_r($item, true));
+ foreach ($item as $key => $value) {
+ Logger::warning("@@@ $key => $value");
+ }
+ return;
+ }
+ $content = DBA::selectFirst('item-content', [], ['uri-id' => $item['uri_id']]);
+ $url = $content['plink'];
+ }
- Logger::debug('@@@ retriever_on_item_insert: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id']);
+ if (array_key_exists('pattern', $retriever["data"]) && $retriever["data"]['pattern']) {
+ $url = preg_replace('/' . $retriever["data"]['pattern'] . '/', $retriever["data"]['replace'], $url);
+ Logger::debug('retriever_on_item_insert: Changed ' . $item['plink'] . ' to ' . $url);
+ }
+
+ Logger::debug('@@@ retriever_on_item_insert: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id'] . ' url ' . $url);
$resource = add_retriever_resource($a, $url, $item['uid'], $item['contact-id']);
$retriever_item_id = add_retriever_item($item, $resource);
}
From 04e57e4334d97764b67393ecfe75c48fc75afb8f Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Sun, 29 Sep 2019 17:01:46 +0200
Subject: [PATCH 025/527] Fix bugs in retriever retrospective stuff
---
retriever/retriever.php | 100 ++++++++++++++++----------------
retriever/templates/extract.tpl | 18 ++----
2 files changed, 55 insertions(+), 63 deletions(-)
diff --git a/retriever/retriever.php b/retriever/retriever.php
index 3b557d80..4d701276 100644
--- a/retriever/retriever.php
+++ b/retriever/retriever.php
@@ -30,16 +30,16 @@ function retriever_install() {
Addon::registerHook('cron', 'addon/retriever/retriever.php', 'retriever_cron');
if (Config::get('retriever', 'dbversion') == '0.10') {
- q("ALTER TABLE `retriever_resource` MODIFY COLUMN `type` char(255) NULL DEFAULT NULL");
- q("ALTER TABLE `retriever_resource` MODIFY COLUMN `data` mediumblob NULL DEFAULT NULL");
- q("ALTER TABLE `retriever_rule` MODIFY COLUMN `data` mediumtext NULL DEFAULT NULL");
+ q('ALTER TABLE `retriever_resource` MODIFY COLUMN `type` char(255) NULL DEFAULT NULL');
+ q('ALTER TABLE `retriever_resource` MODIFY COLUMN `data` mediumblob NULL DEFAULT NULL');
+ q('ALTER TABLE `retriever_rule` MODIFY COLUMN `data` mediumtext NULL DEFAULT NULL');
Config::set('retriever', 'dbversion', '0.11');
}
if (Config::get('retriever', 'dbversion') == '0.11') {
- q("ALTER TABLE `retriever_resource` ADD INDEX `url` (`url`)");
- q("ALTER TABLE `retriever_resource` ADD INDEX `completed` (`completed`)");
- q("ALTER TABLE `retriever_item` ADD INDEX `finished` (`finished`)");
- q("ALTER TABLE `retriever_item` ADD INDEX `item-uid` (`item-uid`)");
+ q('ALTER TABLE `retriever_resource` ADD INDEX `url` (`url`)');
+ q('ALTER TABLE `retriever_resource` ADD INDEX `completed` (`completed`)');
+ q('ALTER TABLE `retriever_item` ADD INDEX `finished` (`finished`)');
+ q('ALTER TABLE `retriever_item` ADD INDEX `item-uid` (`item-uid`)');
Config::set('retriever', 'dbversion', '0.12');
}
if (Config::get('retriever', 'dbversion') == '0.12') {
@@ -206,16 +206,19 @@ function retrieve_resource($resource) {
try {
Logger::debug('retrieve_resource: ' . ($resource['num-tries'] + 1) . ' attempt at resource ' . $resource['id'] . ' ' . $resource['url']);
$redirects = 0;
- $cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-');
+ $cookiejar = '';
if (array_key_exists('storecookies', $retriever_rule) && $retriever_rule['storecookies']) {
+ $cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-');
file_put_contents($cookiejar, $retriever_rule['cookiedata']);
}
$fetch_result = Network::fetchUrlFull($resource['url'], $resource['binary'], $redirects, '', $cookiejar);
if (array_key_exists('storecookies', $retriever_rule) && $retriever_rule['storecookies']) {
$retriever_rule['cookiedata'] = file_get_contents($cookiejar);
- //@@@ do the store here
+ Logger::debug('@@@ retriever_resource update cookie ' . json_encode($retriever_rule['data'] . ' id ' . $retriever_rule['id']));
+ q("UPDATE `retriever_rule` SET `data`='%s' WHERE `id` = %d",
+ DBA::escape(json_encode($retriever_rule['data'])), intval($retriever_rule["id"]));
+ unlink($cookiejar);
}
- unlink($cookiejar);
$resource['data'] = $fetch_result->getBody();
$resource['http-code'] = $fetch_result->getReturnCode();
$resource['type'] = $fetch_result->getContentType();
@@ -323,17 +326,25 @@ function retriever_item_completed($retriever_item_id, $resource, $a) {
function retriever_resource_completed($resource, $a) {
Logger::debug('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url']);
$r = q("SELECT `id` FROM `retriever_item` WHERE `resource` = %d", $resource['id']);
- foreach ($r as $rr) {
- retriever_item_completed($rr['id'], $resource, $a);
+ foreach (DBA::select('retriever_item', ['id'], ['resource' => intval($resource['id'])]) as $retriever_item) {
+ Logger::debug('@@@ retriever_resource_completed got item id ' . $retriever_item['id']);
+ retriever_item_completed($retriever_item['id'], $resource, $a);
}
}
function apply_retrospective($a, $retriever, $num) {
+ Logger::info('@@@ apply_retrospective');
$r = q("SELECT * FROM `item` WHERE `contact-id` = %d ORDER BY `received` DESC LIMIT %d",
intval($retriever['contact-id']), intval($num));
foreach ($r as $item) {
+ Logger::info('@@@ apply_retrospective item ' . $item['id']);
q('UPDATE `item` SET `visible` = 0 WHERE `id` = %d', $item['id']);
q('UPDATE `thread` SET `visible` = 0 WHERE `iid` = %d', $item['id']);
+ foreach (DBA::select('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => $item['uid'], 'contact-id' => $item['contact-id']]) as $retriever_item) {
+ Logger::info('@@@ about to delete retriever_item id ' . $retriever_item['id'] . ' uri ' . $item['uri'] . ' uid ' . $item['uid'] . ' contact ' . $item['contact-id']);
+ DBA::delete('retriever_resource', ['id' => $retriever_item['resource']]);
+ DBA::delete('retriever_item', ['id' => $retriever_item['id']]);
+ }
retriever_on_item_insert($a, $retriever, $item);
}
}
@@ -341,39 +352,31 @@ function apply_retrospective($a, $retriever, $num) {
//@@@ make this trigger a retriever immediately somehow
//@@@ need a lock to say something is doing something
function retriever_on_item_insert($a, $retriever, &$item) {
- Logger::info('@@@ 4 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
- foreach ($item as $key => $value) {
- Logger::info("@@@ $key => $value");
- }
- Logger::info('@@@ retriever_on_item_insert start ' . ' plink ' . $item['plink']);
+ Logger::info('@@@ retriever_on_item_insert start plink ' . $item['plink'] . ' id ' . $item['id']);
if (!$retriever || !$retriever['id']) {
Logger::info('retriever_on_item_insert: No retriever supplied');
return;
}
- if (!$retriever["data"]['enable'] == "on") {
+ if (!$retriever['data']['enable'] == "on") {
Logger::info('@@@ retriever_on_item_insert: Disabled');
return;
}
- if (array_key_exists('plink', $item)) {
+ if (array_key_exists('plink', $item) && strlen($item['plink'])) {
$url = $item['plink'];
}
else {
- if (!array_key_exists('uri_id', $item)) {
- Logger::warning('retriever_on_item_insert: item ' . ' has no plink and no uri-id');
- // @@@ find an identifier and put it in warning
- Logger::warning('@@@ retriever_on_item_insert: item has: ' . print_r($item, true));
- foreach ($item as $key => $value) {
- Logger::warning("@@@ $key => $value");
- }
+ if (!array_key_exists('uri-id', $item)) {
+ Logger::warning('retriever_on_item_insert: item ' . $item['id'] . ' has no plink and no uri-id');
return;
}
- $content = DBA::selectFirst('item-content', [], ['uri-id' => $item['uri_id']]);
+ $content = DBA::selectFirst('item-content', [], ['uri-id' => $item['uri-id']]);
$url = $content['plink'];
}
- if (array_key_exists('pattern', $retriever["data"]) && $retriever["data"]['pattern']) {
- $url = preg_replace('/' . $retriever["data"]['pattern'] . '/', $retriever["data"]['replace'], $url);
- Logger::debug('retriever_on_item_insert: Changed ' . $item['plink'] . ' to ' . $url);
+ if (array_key_exists('pattern', $retriever['data']) && $retriever['data']['pattern']) {
+ $orig_url = $url;
+ $url = preg_replace('/' . $retriever['data']['pattern'] . '/', $retriever['data']['replace'], $orig_url);
+ Logger::debug('retriever_on_item_insert: Changed ' . $orig_url . ' to ' . $url);
}
Logger::debug('@@@ retriever_on_item_insert: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id'] . ' url ' . $url);
@@ -476,15 +479,14 @@ function retriever_apply_xslt_text($xslt_text, $doc) {
Logger::info('retriever_apply_xslt_text: could not load XML');
return $doc;
}
+ Logger::debug('@@@ retriever_apply_xslt_text: ' . $xslt_text);
$xp = new XsltProcessor();
$xp->importStylesheet($xslt_doc);
$result = $xp->transformToDoc($doc);
return $result;
}
-//@@@ is that an item or a resource_item? I really want an item here so I can update it
function retriever_apply_dom_filter($retriever, &$item, $resource) {
- Logger::debug('@@@ 6 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
Logger::debug('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['uri'] . ' contact ' . $item['contact-id']);
if (!array_key_exists('include', $retriever['data']) && !array_key_exists('customxslt', $retriever['data'])) {
@@ -496,6 +498,7 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) {
return;
}
+ //@@@ break this bit into separate function
$encoding = retriever_get_encoding($resource);
$content = mb_convert_encoding($resource['data'], 'HTML-ENTITIES', $encoding);
$doc = new DOMDocument('1.0', 'UTF-8');
@@ -522,23 +525,19 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) {
return;
}
- Logger::info('@@@ retriever_apply_dom_filter: 1');
+ //@@@ break this bit into separate function
$components = parse_url($resource['redirect-url']);
$rooturl = $components['scheme'] . "://" . $components['host'];
$dirurl = $rooturl . dirname($components['path']) . "/";
- Logger::info('@@@ retriever_apply_dom_filter: 2');
$params = array('$dirurl' => $dirurl, '$rooturl' => $rooturl);
$fix_urls_template = Renderer::getMarkupTemplate('fix-urls.tpl', 'addon/retriever/');
$fix_urls_xslt = Renderer::replaceMacros($fix_urls_template, $params);
- Logger::info('@@@ retriever_apply_dom_filter: 3');
$doc = retriever_apply_xslt_text($fix_urls_xslt, $doc);
- Logger::info('@@@ retriever_apply_dom_filter: 4');
if (!$doc) {
Logger::info('retriever_apply_dom_filter: failed to apply fix urls XSLT template');
return;
}
- Logger::info('@@@ retriever_apply_dom_filter: 5');
$body = HTML::toBBCode($doc->saveHTML());
if (!strlen($body)) {
Logger::info('retriever_apply_dom_filter retriever ' . $retriever['id'] . ' item ' . $item['id'] . ': output was empty');
@@ -548,9 +547,7 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) {
$body .= $item['plink'];
$body .= ']' . $item['plink'] . '[/url]';
- Logger::info('@@@ retriever_apply_dom_filter: 6');
- $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself?
- Logger::info('@@@ retriever_apply_dom_filter: item id is ' . $item['id'] . ' uri id is ' . $uri_id);
+ $uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself? Consider using item['id'] instead
Logger::debug('retriever_apply_dom_filter: XSLT result \"' . $body . '\"');
Item::update(['body' => $body], ['uri-id' => $uri_id]);
}
@@ -629,7 +626,7 @@ function retriever_apply_completed_resource_to_item($retriever, &$item, $resourc
if ((strpos($resource['type'], 'html') !== false) ||
(strpos($resource['type'], 'xml') !== false)) {
retriever_apply_dom_filter($retriever, $item, $resource);
- if ($retriever["data"]['images'] ) {
+ if ($retriever['data']['images'] ) {
retrieve_images($item, $a);
}
}
@@ -640,7 +637,7 @@ function retriever_apply_completed_resource_to_item($retriever, &$item, $resourc
function retriever_transform_images($a, &$item, $resource) {
Logger::debug('@@@ 11 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
Logger::info('@@@ retriever_transform_images');
- if (!$resource["data"]) {
+ if (!$resource['data']) {
Logger::info('retriever_transform_images: no data available for ' . $resource['id'] . ' ' . $resource['url']);
return;
}
@@ -716,10 +713,13 @@ function retriever_content($a) {
if (!empty($_POST["id"])) {
$retriever_rule = get_retriever_rule($a->argv[1], local_user(), true);
- $retriever_rule["data"] = array();
+ $retriever_rule['data'] = array();
foreach (array('pattern', 'replace', 'enable', 'images', 'customxslt', 'storecookies', 'cookiedata') as $setting) {
- if (!empty($_POST['retriever_' . $setting])) {
- $retriever_rule["data"][$setting] = $_POST['retriever_' . $setting];
+ if (empty($_POST['retriever_' . $setting])) {
+ $retriever_rule['data'][$setting] = NULL;
+ }
+ else {
+ $retriever_rule['data'][$setting] = $_POST['retriever_' . $setting];
}
}
foreach ($_POST as $k=>$v) {
@@ -739,11 +739,11 @@ function retriever_content($a) {
}
}
q("UPDATE `retriever_rule` SET `data`='%s' WHERE `id` = %d",
- DBA::escape(json_encode($retriever_rule["data"])), intval($retriever_rule["id"]));
+ DBA::escape(json_encode($retriever_rule['data'])), intval($retriever_rule["id"]));
$a->page['content'] .= "Settings Updated";
if (!empty($_POST["retriever_retrospective"])) {
apply_retrospective($a, $retriever_rule, $_POST["retriever_retrospective"]);
- $a->page['content'] .= " and retrospectively applied to " . $_POST["apply"] . " posts";
+ $a->page['content'] .= " and retrospectively applied to " . $_POST["retriever_retrospective"] . " posts";
}
$a->page['content'] .= ".
";
}
@@ -757,12 +757,12 @@ function retriever_content($a) {
'$pattern' => array(
'retriever_pattern',
L10n::t('URL Pattern'),
- $retriever_rule["data"]['pattern'],
+ $retriever_rule['data']['pattern'],
L10n::t('Regular expression matching part of the URL to replace')),
'$replace' => array(
'retriever_replace',
L10n::t('URL Replace'),
- $retriever_rule["data"]['replace'],
+ $retriever_rule['data']['replace'],
L10n::t('Text to replace matching part of above regular expression')),
'$images' => array(
'retriever_images',
@@ -802,7 +802,7 @@ function retriever_content($a) {
'$include_t' => L10n::t('Include'),
'$include' => $retriever_rule['data']['include'],
'$exclude_t' => L10n::t('Exclude'),
- '$exclude' => $retriever_rule["data"]['exclude']));
+ '$exclude' => $retriever_rule['data']['exclude']));
return;
}
}
diff --git a/retriever/templates/extract.tpl b/retriever/templates/extract.tpl
index f24a860d..ca67f683 100644
--- a/retriever/templates/extract.tpl
+++ b/retriever/templates/extract.tpl
@@ -3,25 +3,17 @@
-
-{{function clause_xpath}}
-{{if !$clause.attribute}}
-{{$clause.element}}{{elseif $clause.attribute == 'class'}}
-{{$clause.element}}[contains(concat(' ', normalize-space(@class), ' '), '{{$clause.value}}')]{{else}}
-{{$clause.element}}[@{{$clause.attribute}}='{{$clause.value}}']{{/if}}
-{{/function}}
-
+{{function clause_xpath}}{{if !$clause.attribute}}{{$clause.element}}{{elseif $clause.attribute == 'class'}}{{$clause.element}}[contains(concat(' ', normalize-space(@class), ' '), '{{$clause.value}}')]{{else}}{{$clause.element}}[@{{$clause.attribute}}='{{$clause.value}}']{{/if}}{{/function}}
{{foreach $spec.include as $clause}}
+
-
-{{/foreach}}
-
+ {{/foreach}}
{{foreach $spec.exclude as $clause}}
-
-{{/foreach}}
+
+ {{/foreach}}
From 0c9db8383af59932929bd4cafabd429fe0bff635 Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Sun, 29 Sep 2019 17:04:34 +0200
Subject: [PATCH 026/527] fakerei2
---
retriever/retriever.php | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/retriever/retriever.php b/retriever/retriever.php
index 4d701276..294fba67 100644
--- a/retriever/retriever.php
+++ b/retriever/retriever.php
@@ -51,8 +51,10 @@ function retriever_install() {
$schema = file_get_contents(dirname(__file__).'/database.sql');
$arr = explode(';', $schema);
foreach ($arr as $a) {
- $r = q($a);
- //@@@ check for errors
+ if (!DBA::e($a)) {
+ Logger::warning('Unable to create database table: ' . DBA::errorMessage());
+ return;
+ }
}
Config::set('retriever', 'dbversion', '0.13');
}
@@ -142,7 +144,8 @@ function retriever_clean_up_completed_resources($max_items, $a) {
}
Logger::debug('retriever_clean_up_completed_resources: items waiting even though resource has completed: ' . count($r));
foreach ($r as $rr) {
- $resource = q("SELECT * FROM retriever_resource WHERE `id` = %d", $rr['resource']);
+ $resource = DBA::selectFirst('retriever_resource', [], ['id' => intval($rr['resource'])]);
+ Logger::info('@@@ retriever_clean_up_completed_resources did alternate thing resource type ' . $resource['type']);
$retriever_item = retriever_get_retriever_item($rr['item']);
if (!DBA::isResult($retriever_item)) {
Logger::warning('retriever_clean_up_completed_resources: no retriever item with id ' . $rr['item']);
@@ -159,7 +162,7 @@ function retriever_clean_up_completed_resources($max_items, $a) {
continue;
}
Logger::info('@@@ retriever_clean_up_completed_resources: about to retriever_apply_completed_resource_to_item');
- retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource[0], $a);
+ retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource, $a);
q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d", intval($retriever_item['id']));
retriever_check_item_completed($item);
}
From 043c5157073333127b3a9966c507cfdcb09161da Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Sun, 29 Sep 2019 17:09:11 +0200
Subject: [PATCH 027/527] more dba stuff
---
retriever/retriever.php | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/retriever/retriever.php b/retriever/retriever.php
index 294fba67..bbe138b9 100644
--- a/retriever/retriever.php
+++ b/retriever/retriever.php
@@ -144,8 +144,6 @@ function retriever_clean_up_completed_resources($max_items, $a) {
}
Logger::debug('retriever_clean_up_completed_resources: items waiting even though resource has completed: ' . count($r));
foreach ($r as $rr) {
- $resource = DBA::selectFirst('retriever_resource', [], ['id' => intval($rr['resource'])]);
- Logger::info('@@@ retriever_clean_up_completed_resources did alternate thing resource type ' . $resource['type']);
$retriever_item = retriever_get_retriever_item($rr['item']);
if (!DBA::isResult($retriever_item)) {
Logger::warning('retriever_clean_up_completed_resources: no retriever item with id ' . $rr['item']);
@@ -161,7 +159,7 @@ function retriever_clean_up_completed_resources($max_items, $a) {
Logger::warning('retriever_clean_up_completed_resources: no retriever for uri ' . $retriever_item['item-uri'] . ' uid ' . $retriever_item['uid'] . ' ' . $retriever_item['contact-id']);
continue;
}
- Logger::info('@@@ retriever_clean_up_completed_resources: about to retriever_apply_completed_resource_to_item');
+ $resource = DBA::selectFirst('retriever_resource', [], ['id' => intval($rr['resource'])]);
retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource, $a);
q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d", intval($retriever_item['id']));
retriever_check_item_completed($item);
From 3906813dcfc9682f3f73d82a04fca2b1b9c3e4f9 Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Sun, 29 Sep 2019 20:59:14 +0200
Subject: [PATCH 028/527] fixed image regex
---
retriever/retriever.php | 70 +++++++++++++++++++++++++++++------------
1 file changed, 50 insertions(+), 20 deletions(-)
diff --git a/retriever/retriever.php b/retriever/retriever.php
index bbe138b9..20ab1ee8 100644
--- a/retriever/retriever.php
+++ b/retriever/retriever.php
@@ -47,7 +47,10 @@ function retriever_install() {
q("ALTER TABLE `retriever_resource` ADD COLUMN `item-uid` int(10) unsigned NOT NULL DEFAULT '0' AFTER `id`");
Config::set('retriever', 'dbversion', '0.13');
}
- if (Config::get('retriever', 'dbversion') != '0.13') {
+ if (Config::get('retriever', 'dbversion') == '0.13') {
+ Config::set('retriever', 'downloads_per_cron', '100');
+ }
+ if (Config::get('retriever', 'dbversion') != '0.14') {
$schema = file_get_contents(dirname(__file__).'/database.sql');
$arr = explode(';', $schema);
foreach ($arr as $a) {
@@ -56,7 +59,8 @@ function retriever_install() {
return;
}
}
- Config::set('retriever', 'dbversion', '0.13');
+ Config::set('retriever', 'downloads_per_cron', '100');
+ Config::set('retriever', 'dbversion', '0.14');
}
}
@@ -72,20 +76,37 @@ function retriever_uninstall() {
function retriever_module() {}
+function retriever_addon_admin(&$a, &$o) {
+ $downloads_per_cron = Config::get('retriever', 'downloads_per_cron');
+ $template = Renderer::getMarkupTemplate('admin.tpl', 'addon/retriever/');
+ $config = ['downloads_per_cron',
+ L10n::t('Downloads per Cron'),
+ $downloads_per_cron,
+ L10n::t('Maximum number of downloads to attempt during each run of the cron job.')];
+ $o .= Renderer::replaceMacros($template, [
+ '$downloads_per_cron' => $config,
+ '$submit' => L10n::t('Save Settings')]);
+}
+
+function retriever_addon_admin_post ($a) {
+ if (!empty($_POST['downloads_per_cron'])) {
+ Config::set('retriever', 'downloads_per_cron', $_POST['downloads_per_cron']);
+ }
+}
+
function retriever_cron($a, $b) {
- // 100 is a nice sane number. Maybe this should be configurable. @@@
+ $downloads_per_cron = Config::get('retriever', 'downloads_per_cron');
- // Do this first, otherwise it can interfere with retreiver_retrieve_items
- retriever_clean_up_completed_resources(100, $a);
+ // Do this first, otherwise it can interfere with retriever_retrieve_items
+ retriever_clean_up_completed_resources($downloads_per_cron, $a);
- retriever_retrieve_items(100, $a);
+ retriever_retrieve_items($downloads_per_cron, $a);
retriever_tidy();
}
$retriever_item_count = 0;
function retriever_retrieve_items($max_items, $a) {
- Logger::info('@@@ retriever_retrieve_items');
global $retriever_item_count;
$retriever_schedule = array(array(1,'minute'),
@@ -108,8 +129,7 @@ function retriever_retrieve_items($max_items, $a) {
$retrieve_items = $max_items - $retriever_item_count;
Logger::debug('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . $retriever_item_count . ', retrieve ' . $retrieve_items);
do {
- Logger::info('@@@ retriever_retrieve_items loop max ' . $max_items . ' count ' . $retriever_item_count);
- Logger::info("@@@ SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR " . implode($schedule_clauses, ' OR ') . ") ORDER BY `last-try` ASC LIMIT " . $retrieve_items);
+ // TODO: figure out how to do this with DBA module
$retriever_resources = q("SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR %s) ORDER BY `last-try` ASC LIMIT %d",
DBA::escape(implode($schedule_clauses, ' OR ')),
intval($retrieve_items));
@@ -121,7 +141,6 @@ function retriever_retrieve_items($max_items, $a) {
}
Logger::debug('retriever_retrieve_items: found ' . count($retriever_resources) . ' waiting resources in database');
foreach ($retriever_resources as $retriever_resource) {
- Logger::info('@@@ need to get the retriever config here cid ' . $retriever_resource['contact-id'] . ' uid ' . $retriever_resource['item-uid']);
retrieve_resource($retriever_resource);
$retriever_item_count++;
}
@@ -129,7 +148,7 @@ function retriever_retrieve_items($max_items, $a) {
}
while ($retrieve_items > 0);
// @@@ todo: when items add further items (i.e. images), do the new images go round this loop again?
- Logger::info('@@@ retriever_retrieve_items: finished retrieving items');
+ Logger::debug('retriever_retrieve_items: finished retrieving items');
}
/* Look for items that are waiting even though the resource has
@@ -137,7 +156,8 @@ function retriever_retrieve_items($max_items, $a) {
* retrospectively apply a config change. It could also happen due to
* a cron job dying or something. */
function retriever_clean_up_completed_resources($max_items, $a) {
- $r = q("SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT %d",
+ // TODO: figure out how to do this with DBA module
+ $r = q('SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT %d',
intval($max_items));
if (!$r) {
$r = array();
@@ -161,6 +181,7 @@ function retriever_clean_up_completed_resources($max_items, $a) {
}
$resource = DBA::selectFirst('retriever_resource', [], ['id' => intval($rr['resource'])]);
retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource, $a);
+ //@@@ next one to do
q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d", intval($retriever_item['id']));
retriever_check_item_completed($item);
}
@@ -208,8 +229,10 @@ function retrieve_resource($resource) {
Logger::debug('retrieve_resource: ' . ($resource['num-tries'] + 1) . ' attempt at resource ' . $resource['id'] . ' ' . $resource['url']);
$redirects = 0;
$cookiejar = '';
+ Logger::debug('@@@ retrieve_resource storecookies ' . $retriever_rule['storecookies']);
if (array_key_exists('storecookies', $retriever_rule) && $retriever_rule['storecookies']) {
$cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-');
+ Logger::debug('@@@ retrieve_resource cookie file ' . $cookiejar . ' content ' . $retriever_rule['cookiedata']);
file_put_contents($cookiejar, $retriever_rule['cookiedata']);
}
$fetch_result = Network::fetchUrlFull($resource['url'], $resource['binary'], $redirects, '', $cookiejar);
@@ -218,7 +241,7 @@ function retrieve_resource($resource) {
Logger::debug('@@@ retriever_resource update cookie ' . json_encode($retriever_rule['data'] . ' id ' . $retriever_rule['id']));
q("UPDATE `retriever_rule` SET `data`='%s' WHERE `id` = %d",
DBA::escape(json_encode($retriever_rule['data'])), intval($retriever_rule["id"]));
- unlink($cookiejar);
+ /* unlink($cookiejar); */ //@@@
}
$resource['data'] = $fetch_result->getBody();
$resource['http-code'] = $fetch_result->getReturnCode();
@@ -350,8 +373,8 @@ function apply_retrospective($a, $retriever, $num) {
}
}
-//@@@ make this trigger a retriever immediately somehow
-//@@@ need a lock to say something is doing something
+// TODO: Currently this waits until the next cron before actually downloading. Should do it immediately.
+// TODO: This queries then inserts. It should use some kind of lock to avoid requesting the same resource twice.
function retriever_on_item_insert($a, $retriever, &$item) {
Logger::info('@@@ retriever_on_item_insert start plink ' . $item['plink'] . ' id ' . $item['id']);
if (!$retriever || !$retriever['id']) {
@@ -397,6 +420,7 @@ function add_retriever_resource($a, $url, $uid, $cid, $binary = false) {
fclose($fp);
$url = 'md5://' . hash('md5', $url);
+ //@@@ fix this
$r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s' AND `item-uid` = %d AND `contact-id` = %d", DBA::escape($url), intval($uid), intval($cid));
$resource = $r[0];
if (count($r)) {
@@ -405,6 +429,7 @@ function add_retriever_resource($a, $url, $uid, $cid, $binary = false) {
}
Logger::debug('retrieve_resource: got data URL type ' . $resource['type']);
+ //@@@ fix this
q("INSERT INTO `retriever_resource` (`item-uid`, `contact-id`, `type`, `binary`, `url`, `completed`, `data`) " .
"VALUES (%d, %d, '%s', %d, '%s', now(), '%s')",
intval($uid),
@@ -425,6 +450,7 @@ function add_retriever_resource($a, $url, $uid, $cid, $binary = false) {
Logger::warning('add_retriever_resource: URL is longer than 800 characters');
}
+ //@@@ fix this
$r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s' AND `item-uid` = %d AND `contact-id` = %d", DBA::escape($url), intval($uid), intval($cid));
if (count($r)) {
Logger::debug('add_retriever_resource: Resource ' . $url . ' uid ' . $uid . ' cid ' . $cid . ' already requested');
@@ -554,24 +580,29 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) {
}
function retrieve_images(&$item, $a) {
+ // Note that $item doesn't necessarily contain all the fields you would expect, in particular 'id'
$blah_item_class = retriever_class_of_item($item) . ' ' . mat_test($item);
Logger::debug('@@@ 7 item class is ' . $blah_item_class);
+ Logger::debug('@@@ retrieve_images start item '. $item['id'] . ' uri ' . $item['uri'] . ' uri id ' . $item['uri-id'] . ' plink ' . $item['plink'] . ' guid ' . $item['guid']);
$uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself?
- $content = DBA::selectFirst('item-content', [], ['uri-id' => $uri_id]);
+ $content = DBA::selectFirst('item-content', ['body'], ['uri-id' => $uri_id]);
$body = $content['body'];
if (!strlen($body)) {
Logger::warning('retrieve_images: no body for uri-id ' . $uri_id);
return;
}
- Logger::info('@@@ retrieve_images start looking in body "' . $body . '"');
+ Logger::info('@@@ retrieve_images looking in body "' . $body . '"');
+ // I suspect that matches1 and matches2 are not used any more?
$matches1 = array();
preg_match_all("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", $body, $matches1);
$matches2 = array();
preg_match_all("/\[img\](.*?)\[\/img\]/ism", $body, $matches2);
- $matches = array_merge($matches1[3], $matches2[1]);
+ $matches3 = array();
+ preg_match_all("/\[img\=([^\]]*)\]([^[]*)\[\/img\]/ism", $body, $matches3);
+ $matches = array_merge($matches1[3], $matches2[1], $matches3[1]);
Logger::debug('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']);
foreach ($matches as $url) {
Logger::debug('@@@ retrieve_images: url ' . $url);
@@ -615,7 +646,6 @@ function retriever_check_item_completed(&$item)
}
function retriever_apply_completed_resource_to_item($retriever, &$item, $resource, $a) {
- Logger::debug('@@@ 10 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
Logger::debug('retriever_apply_completed_resource_to_item: retriever ' . ($retriever ? $retriever['id'] : 'none') . ' resource ' . $resource['url'] . ' plink ' . $item['plink']);
if (strpos($resource['type'], 'image') !== false) {
Logger::info('@@@ retriever_apply_completed_resource_to_item this is an image must transform');
@@ -676,7 +706,7 @@ function retriever_transform_images($a, &$item, $resource) {
return;
}
- $content = DBA::selectFirst('item-content', [], ['uri-id' => $uri_id]);
+ $content = DBA::selectFirst('item-content', ['body'], ['uri-id' => $uri_id]);
$body = $content['body'];
Logger::info('@@@ retriever_transform_images: found body for uri id ' . $uri_id . ': ' . $body);
From dbd00503aaaaef8d4c3100ae1d6af51601c54857 Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Sun, 29 Sep 2019 22:05:49 +0200
Subject: [PATCH 029/527] Stuff in retriever
---
retriever/retriever.php | 27 ++++++++-------------------
1 file changed, 8 insertions(+), 19 deletions(-)
diff --git a/retriever/retriever.php b/retriever/retriever.php
index 20ab1ee8..f43504e6 100644
--- a/retriever/retriever.php
+++ b/retriever/retriever.php
@@ -147,14 +147,10 @@ function retriever_retrieve_items($max_items, $a) {
$retrieve_items = $max_items - $retriever_item_count;
}
while ($retrieve_items > 0);
- // @@@ todo: when items add further items (i.e. images), do the new images go round this loop again?
Logger::debug('retriever_retrieve_items: finished retrieving items');
}
-/* Look for items that are waiting even though the resource has
- * completed. This usually happens because we've been asked to
- * retrospectively apply a config change. It could also happen due to
- * a cron job dying or something. */
+// Look for items that are waiting even though the resource has completed. This shouldn't happen, but is worth cleaning up if it does.
function retriever_clean_up_completed_resources($max_items, $a) {
// TODO: figure out how to do this with DBA module
$r = q('SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT %d',
@@ -181,13 +177,14 @@ function retriever_clean_up_completed_resources($max_items, $a) {
}
$resource = DBA::selectFirst('retriever_resource', [], ['id' => intval($rr['resource'])]);
retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource, $a);
- //@@@ next one to do
- q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d", intval($retriever_item['id']));
+ Logger::info('@@@ retriever_clean_up_completed_resources tried to update id ' . $retriver_item['id'] . ' to finished, better check that it really worked!');
+ DBA::update('retriever_item', ['finished' => 1], ['id' => intval($retriever_item['id'])], ['finished']);
retriever_check_item_completed($item);
}
}
function retriever_tidy() {
+ // TODO: figure out how to do this with DBA module
q("DELETE FROM retriever_resource WHERE completed IS NOT NULL AND completed < DATE_SUB(now(), INTERVAL 1 WEEK)");
q("DELETE FROM retriever_resource WHERE completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)");
@@ -581,8 +578,6 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) {
function retrieve_images(&$item, $a) {
// Note that $item doesn't necessarily contain all the fields you would expect, in particular 'id'
- $blah_item_class = retriever_class_of_item($item) . ' ' . mat_test($item);
- Logger::debug('@@@ 7 item class is ' . $blah_item_class);
Logger::debug('@@@ retrieve_images start item '. $item['id'] . ' uri ' . $item['uri'] . ' uri id ' . $item['uri-id'] . ' plink ' . $item['plink'] . ' guid ' . $item['guid']);
$uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself?
@@ -595,27 +590,21 @@ function retrieve_images(&$item, $a) {
}
Logger::info('@@@ retrieve_images looking in body "' . $body . '"');
- // I suspect that matches1 and matches2 are not used any more?
- $matches1 = array();
- preg_match_all("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", $body, $matches1);
- $matches2 = array();
- preg_match_all("/\[img\](.*?)\[\/img\]/ism", $body, $matches2);
- $matches3 = array();
- preg_match_all("/\[img\=([^\]]*)\]([^[]*)\[\/img\]/ism", $body, $matches3);
+ // I suspect that the first two are not used any more?
+ preg_match_all("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", $item["body"], $matches1);
+ preg_match_all("/\[img\](.*?)\[\/img\]/ism", $item["body"], $matches2);
+ preg_match_all("/\[img\=([^\]]*)\]([^[]*)\[\/img\]/ism", $item["body"], $matches3);
$matches = array_merge($matches1[3], $matches2[1], $matches3[1]);
Logger::debug('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']);
foreach ($matches as $url) {
Logger::debug('@@@ retrieve_images: url ' . $url);
if (strpos($url, get_app()->getBaseUrl()) === FALSE) {
- Logger::debug('@@@ retrieve_images: it is from somewhere else');
Logger::debug('@@@ retrieve_images: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id']);
$resource = add_retriever_resource($a, $url, $item['uid'], $item['contact-id'], true);
if (!$resource['completed']) {
- Logger::debug('@@@ retrieve_images: do not have it yet, get it later');
add_retriever_item($item, $resource);
}
else {
- Logger::debug('@@@ retrieve_images: got it already, transform');
retriever_transform_images($a, $item, $resource);
}
}
From be68a4aa3c53b08731b92d66b64979b322be2a34 Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Mon, 30 Sep 2019 08:25:00 +0200
Subject: [PATCH 030/527] update version number
---
retriever/retriever.php | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/retriever/retriever.php b/retriever/retriever.php
index f43504e6..fcd45b46 100644
--- a/retriever/retriever.php
+++ b/retriever/retriever.php
@@ -1,8 +1,8 @@
*/
From 2ba05cc80cb81f8d625ec44b4e0167f62afaa2cb Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Mon, 30 Sep 2019 08:25:16 +0200
Subject: [PATCH 031/527] configurable number of requests
---
retriever/templates/admin.tpl | 8 ++++++++
1 file changed, 8 insertions(+)
create mode 100644 retriever/templates/admin.tpl
diff --git a/retriever/templates/admin.tpl b/retriever/templates/admin.tpl
new file mode 100644
index 00000000..b5a35961
--- /dev/null
+++ b/retriever/templates/admin.tpl
@@ -0,0 +1,8 @@
+{{*
+ * AUTOMATICALLY GENERATED TEMPLATE
+ * DO NOT EDIT THIS FILE, CHANGES WILL BE OVERWRITTEN
+ *
+ *}}
+{{include file="field_input.tpl" field=$downloads_per_cron}}
+
+
From 42314b667063d2dca23376d138ef7cb27874c3ce Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Mon, 30 Sep 2019 20:52:05 +0200
Subject: [PATCH 032/527] Add phototrack and publicise
---
phototrack/database.sql | 23 ++
phototrack/phototrack.php | 258 ++++++++++++++++++++
publicise/publicise.php | 431 ++++++++++++++++++++++++++++++++++
publicise/templates/admin.tpl | 39 +++
4 files changed, 751 insertions(+)
create mode 100644 phototrack/database.sql
create mode 100644 phototrack/phototrack.php
create mode 100644 publicise/publicise.php
create mode 100644 publicise/templates/admin.tpl
diff --git a/phototrack/database.sql b/phototrack/database.sql
new file mode 100644
index 00000000..f1b58f6b
--- /dev/null
+++ b/phototrack/database.sql
@@ -0,0 +1,23 @@
+CREATE TABLE IF NOT EXISTS `phototrack_photo_use` (
+ `id` int(11) unsigned NOT NULL AUTO_INCREMENT,
+ `resource-id` char(64) NOT NULL,
+ `table` char(64) NOT NULL,
+ `field` char(64) NOT NULL,
+ `row-id` int(11) NOT NULL,
+ `checked` timestamp NOT NULL DEFAULT now(),
+ PRIMARY KEY (`id`),
+ INDEX `resource-id` (`resource-id`),
+ INDEX `row` (`table`,`field`,`row-id`)
+) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin;
+
+CREATE TABLE IF NOT EXISTS `phototrack_row_check` (
+ `id` int(11) unsigned NOT NULL AUTO_INCREMENT,
+ `table` char(64) NOT NULL,
+ `row-id` int(11) NOT NULL,
+ `checked` timestamp NOT NULL DEFAULT now(),
+ PRIMARY KEY (`id`),
+ INDEX `row` (`table`,`row-id`),
+ INDEX `checked` (`checked`)
+) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin;
+
+SELECT TRUE
diff --git a/phototrack/phototrack.php b/phototrack/phototrack.php
new file mode 100644
index 00000000..8b909f5d
--- /dev/null
+++ b/phototrack/phototrack.php
@@ -0,0 +1,258 @@
+
+ */
+
+/*
+ * List of tables and the fields that are checked:
+ *
+ * contact: photo thumb micro about
+ * fcontact: photo
+ * fsuggest: photo
+ * gcontact: photo about
+ * item: body
+ * item-content: body
+ * mail: from-photo
+ * notify: photo
+ * profile: photo thumb about
+ */
+
+use Friendica\Core\Addon;
+use Friendica\Core\Config;
+use Friendica\Core\Logger;
+use Friendica\Object\Image;
+use Friendica\Database\DBA;
+
+if (!defined('PHOTOTRACK_DEFAULT_BATCH_SIZE')) {
+ define('PHOTOTRACK_DEFAULT_BATCH_SIZE', 1000);
+}
+// Time in *minutes* between searching for photo uses
+if (!defined('PHOTOTRACK_DEFAULT_SEARCH_INTERVAL')) {
+ define('PHOTOTRACK_DEFAULT_SEARCH_INTERVAL', 10);
+}
+
+function phototrack_install() {
+ global $db;
+
+ Addon::registerHook('post_local_end', 'addon/phototrack/phototrack.php', 'phototrack_post_local_end');
+ Addon::registerHook('post_remote_end', 'addon/phototrack/phototrack.php', 'phototrack_post_remote_end');
+ Addon::registerHook('notifier_end', 'addon/phototrack/phototrack.php', 'phototrack_notifier_end');
+ Addon::registerHook('cron', 'addon/phototrack/phototrack.php', 'phototrack_cron');
+
+ if (Config::get('phototrack', 'dbversion') != '0.1') {
+ $schema = file_get_contents(dirname(__file__).'/database.sql');
+ $arr = explode(';', $schema);
+ foreach ($arr as $a) {
+ if (!DBA::e($a)) {
+ Logger::warning('Unable to create database table: ' . DBA::errorMessage());
+ return;
+ }
+ }
+ Config::set('phototrack', 'dbversion', '0.1');
+ }
+}
+
+function phototrack_uninstall() {
+ Addon::unregisterHook('post_local_end', 'addon/phototrack/phototrack.php', 'phototrack_post_local_end');
+ Addon::unregisterHook('post_remote_end', 'addon/phototrack/phototrack.php', 'phototrack_post_remote_end');
+ Addon::unregisterHook('notifier_end', 'addon/phototrack/phototrack.php', 'phototrack_notifier_end');
+ Addon::unregisterHook('cron', 'addon/phototrack/phototrack.php', 'phototrack_cron');
+}
+
+function phototrack_module() {}
+
+function phototrack_finished_row($table, $id) {
+ $existing = DBA::selectFirst('phototrack_row_check', ['id'], ['table' => $table, 'row-id' => $id]);
+ if (!is_bool($existing)) {
+ q("UPDATE phototrack_row_check SET checked = NOW() WHERE `table` = '$table' AND `row-id` = '$id'");
+ }
+ else {
+ q("INSERT INTO phototrack_row_check (`table`, `row-id`, `checked`) VALUES ('$table', '$id', NOW())");
+ }
+}
+
+function phototrack_photo_use($photo, $table, $field, $id) {
+ Logger::debug('@@@ phototrack_photo_use ' . $photo);
+ foreach (Image::supportedTypes() as $m => $e) {
+ $photo = str_replace(".$e", '', $photo);
+ }
+ if (substr($photo, -2, 1) == '-') {
+ $resolution = intval(substr($photo,-1,1));
+ $photo = substr($photo,0,-2);
+ }
+ if (strlen($photo) != 32) {
+ return;
+ }
+ $r = q("SELECT `resource-id` FROM `photo` WHERE `resource-id` = '%s' LIMIT 1", DBA::escape($photo));
+ if (!count($r)) {
+ return;
+ }
+ $rid = $r[0]['resource-id'];
+ $existing = q("SELECT id FROM phototrack_photo_use WHERE `resource-id` = '$rid' AND `table` = '$table' AND `field` = '$field' AND `row-id` = '$id'");
+ if (count($existing)) {
+ q("UPDATE phototrack_photo_use SET checked = NOW() WHERE `resource-id` = '$rid' AND `table` = '$table' AND `field` = '$field' AND `row-id` = '$id'");
+ }
+ else {
+ q("INSERT INTO phototrack_photo_use (`resource-id`, `table`, `field`, `row-id`, `checked`) VALUES ('$rid', '$table', '$field', '$id', NOW())");
+ }
+}
+
+function phototrack_check_field_url($a, $table, $field, $id, $url) {
+ Logger::info('@@@ phototrack_check_field_url table ' . $table . ' field ' . $field . ' id ' . $id . ' url ' . $url);
+ $baseurl = $a->getBaseURL();
+ if (strpos($url, $baseurl) !== FALSE) {
+ $url = substr($url, strlen($baseurl));
+ Logger::info('@@@ phototrack_check_field_url funny url stuff ' . $url . ' base ' . $baseurl);
+ }
+ if (strpos($url, '/photo/') !== FALSE) {
+ $rid = substr($url, strlen('/photo/'));
+ Logger::info('@@@ phototrack_check_field_url rid ' . $rid);
+ phototrack_photo_use($rid, $table, $field, $id);
+ }
+}
+
+function phototrack_check_field_bbcode($a, $table, $field, $id, $value) {
+ $baseurl = $a->getBaseURL();
+ $matches = array();
+ preg_match_all("/\[img(\=([0-9]*)x([0-9]*))?\](.*?)\[\/img\]/ism", $value, $matches);
+ foreach ($matches[4] as $url) {
+ phototrack_check_field_url($a, $table, $field, $id, $url);
+ }
+}
+
+function phototrack_post_local_end(&$a, &$item) {
+ phototrack_check_row($a, 'item', $item);
+ phototrack_check_row($a, 'item-content', $item);
+}
+
+function phototrack_post_remote_end(&$a, &$item) {
+ phototrack_check_row($a, 'item', $item);
+ phototrack_check_row($a, 'item-content', $item);
+}
+
+function phototrack_notifier_end($item) {
+ $a = get_app();
+}
+
+function phototrack_check_row($a, $table, $row) {
+ switch ($table) {
+ case 'item':
+ $fields = array(
+ 'body' => 'bbcode');
+ break;
+ case 'item-content':
+ $fields = array(
+ 'body' => 'bbcode');
+ break;
+ case 'contact':
+ $fields = array(
+ 'photo' => 'url',
+ 'thumb' => 'url',
+ 'micro' => 'url',
+ 'about' => 'bbcode');
+ break;
+ case 'fcontact':
+ $fields = array(
+ 'photo' => 'url');
+ break;
+ case 'fsuggest':
+ $fields = array(
+ 'photo' => 'url');
+ break;
+ case 'gcontact':
+ $fields = array(
+ 'photo' => 'url',
+ 'about' => 'bbcode');
+ break;
+ default: $fields = array(); break;
+ }
+ foreach ($fields as $field => $type) {
+ switch ($type) {
+ case 'bbcode': phototrack_check_field_bbcode($a, $table, $field, $row['id'], $row[$field]); break;
+ case 'url': phototrack_check_field_url($a, $table, $field, $row['id'], $row[$field]); break;
+ }
+ }
+ phototrack_finished_row($table, $row['id']);
+}
+
+function phototrack_batch_size() {
+ $batch_size = Config::get('phototrack', 'batch_size');
+ if ($batch_size > 0) {
+ return $batch_size;
+ }
+ return PHOTOTRACK_DEFAULT_BATCH_SIZE;
+}
+
+function phototrack_search_table($a, $table) {
+ $batch_size = phototrack_batch_size();
+ $rows = q("SELECT `$table`.* FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) ) ORDER BY phototrack_row_check.checked LIMIT $batch_size");
+ foreach ($rows as $row) {
+ phototrack_check_row($a, $table, $row);
+ }
+ $r = q("SELECT COUNT(*) FROM `$table` LEFT OUTER JOIN phototrack_row_check ON ( phototrack_row_check.`table` = '$table' AND phototrack_row_check.`row-id` = `$table`.id ) WHERE ( ( phototrack_row_check.checked IS NULL ) OR ( phototrack_row_check.checked < DATE_SUB(NOW(), INTERVAL 1 MONTH) ) )");
+ $remaining = $r[0]['COUNT(*)'];
+ Logger::info('phototrack: searched ' . count($rows) . ' rows in table ' . $table . ', ' . $remaining . ' still remaining to search');
+ return $remaining;
+}
+
+function phototrack_cron_time() {
+ $prev_remaining = Config::get('phototrack', 'remaining_items');
+ if ($prev_remaining > 10 * phototrack_batch_size()) {
+ Logger::debug('phototrack: more than ' . (10 * phototrack_batch_size()) . ' items remaining');
+ return true;
+ }
+ $last = Config::get('phototrack', 'last_search');
+ $search_interval = intval(Config::get('phototrack', 'search_interval'));
+ if (!$search_interval) {
+ $search_interval = PHOTOTRACK_DEFAULT_SEARCH_INTERVAL;
+ }
+ if ($last) {
+ $next = $last + ($search_interval * 60);
+ if ($next > time()) {
+ Logger::debug('phototrack: search interval not reached');
+ return false;
+ }
+ }
+ return true;
+}
+
+function phototrack_cron($a, $b) {
+ if (!phototrack_cron_time()) {
+ return;
+ }
+ Config::set('phototrack', 'last_search', time());
+
+ $remaining = 0;
+ $remaining += phototrack_search_table($a, 'item');
+ $remaining += phototrack_search_table($a, 'item-content');
+ $remaining += phototrack_search_table($a, 'contact');
+ $remaining += phototrack_search_table($a, 'fcontact');
+ $remaining += phototrack_search_table($a, 'fsuggest');
+ $remaining += phototrack_search_table($a, 'gcontact');
+
+ Config::set('phototrack', 'remaining_items', $remaining);
+ if ($remaining === 0) {
+ phototrack_tidy();
+ }
+}
+
+function phototrack_tidy() {
+ $batch_size = phototrack_batch_size();
+ q('CREATE TABLE IF NOT EXISTS `phototrack-temp` (`resource-id` char(255) not null)');
+ q('INSERT INTO `phototrack-temp` SELECT DISTINCT(`resource-id`) FROM photo WHERE photo.`created` < DATE_SUB(NOW(), INTERVAL 2 MONTH)');
+ $rows = q('SELECT `phototrack-temp`.`resource-id` FROM `phototrack-temp` LEFT OUTER JOIN phototrack_photo_use ON (`phototrack-temp`.`resource-id` = phototrack_photo_use.`resource-id`) WHERE phototrack_photo_use.id IS NULL limit ' . /*$batch_size*/1000);
+ foreach ($rows as $row) {
+ Logger::debug('phototrack: remove photo ' . $row['resource-id']);
+ q('DELETE FROM photo WHERE `resource-id` = "' . $row['resource-id'] . '"');
+ }
+ q('DROP TABLE `phototrack-temp`');
+ Logger::info('phototrack_tidy: deleted ' . count($rows) . ' photos');
+ $rows = q('SELECT id FROM phototrack_photo_use WHERE checked < DATE_SUB(NOW(), INTERVAL 14 DAY)');
+ foreach ($rows as $row) {
+ q('DELETE FROM phototrack_photo_use WHERE id = ' . $row['id']);
+ }
+ Logger::info('phototrack_tidy: deleted ' . count($rows) . ' phototrack_photo_use rows');
+}
diff --git a/publicise/publicise.php b/publicise/publicise.php
new file mode 100644
index 00000000..d27eefd4
--- /dev/null
+++ b/publicise/publicise.php
@@ -0,0 +1,431 @@
+
+ */
+
+use Friendica\Core\Addon;
+use Friendica\Core\Logger;
+use Friendica\Core\Renderer;
+use Friendica\Core\L10n;
+use Friendica\Database\DBA;
+
+function publicise_install() {
+ Addon::registerHook('post_remote', 'addon/publicise/publicise.php', 'publicise_post_remote_hook');
+}
+
+function publicise_uninstall() {
+ Addon::unregisterHook('post_remote', 'addon/publicise/publicise.php', 'publicise_post_remote_hook');
+ Addon::unregisterHook('post_remote_end', 'addon/publicise/publicise.php', 'publicise_post_remote_end_hook');
+}
+
+function publicise_get_contacts() {
+ $query = <<$v) {
+ $enabled = ($v['reason'] === 'publicise') ? 1 : NULL;
+ $expire = 30;
+ $comments = 1;
+ $url = $v['url'];
+ if ($enabled) {
+ $r = q('SELECT * FROM `user` WHERE `uid` = %d', intval($v['uid']));
+ $expire = $r[0]['expire'];
+ $url = $a->get_baseurl() . '/profile/' . $v['nick'];
+ if ($r[0]['page-flags'] == PAGE_SOAPBOX) {
+ $comments = NULL;
+ }
+ if ($r[0]['account_expired']) {
+ $enabled = NULL;
+ }
+ }
+ $contacts[$k]['enabled'] = array('publicise-enabled-' . $v['id'], NULL, $enabled);
+ $contacts[$k]['comments'] = array('publicise-comments-' . $v['id'], NULL, $comments);
+ $contacts[$k]['expire'] = $expire;
+ $contacts[$k]['url'] = $url;
+ }
+ $template = Renderer::getMarkupTemplate('admin.tpl', 'addon/publicise/');
+ $o .= Renderer::replaceMacros($template, array(
+ '$feeds' => $contacts,
+ '$feed_t' => L10n::t('Feed'),
+ '$publicised_t' => L10n::t('Publicised'),
+ '$comments_t' => L10n::t('Allow Comments/Likes'),
+ '$expire_t' => L10n::t('Expire Articles After (Days)'),
+ '$submit_t' => L10n::t('Submit')));
+}
+
+function publicise_make_string($in) {
+ return "'" . DBA::escape($in) . "'";
+}
+
+function publicise_make_int($in) {
+ return intval($in) ? $in : 0;
+}
+
+function publicise_create_user($owner, $contact) {
+
+ $nick = $contact['nick'];
+ if (!$nick) {
+ notice(sprintf(t("Can't publicise feed \"%s\" because it doesn't have a nickname"), $contact['name']) . EOL);
+ return;
+ }
+ Logger::info('Publicise: create user, beginning key generation...');
+ $res=openssl_pkey_new(array(
+ 'digest_alg' => 'sha1',
+ 'private_key_bits' => 4096,
+ 'encrypt_key' => false ));
+ $prvkey = '';
+ openssl_pkey_export($res, $prvkey);
+ $pkey = openssl_pkey_get_details($res);
+ $pubkey = $pkey["key"];
+ $sres=openssl_pkey_new(array(
+ 'digest_alg' => 'sha1',
+ 'private_key_bits' => 512,
+ 'encrypt_key' => false ));
+ $sprvkey = '';
+ openssl_pkey_export($sres, $sprvkey);
+ $spkey = openssl_pkey_get_details($sres);
+ $spubkey = $spkey["key"];
+ $guid = generate_user_guid();
+
+ $newuser = array(
+ 'guid' => publicise_make_string($guid),
+ 'username' => publicise_make_string($contact['name']),
+ 'password' => publicise_make_string($owner['password']),
+ 'nickname' => publicise_make_string($contact['nick']),
+ 'email' => publicise_make_string($owner['email']),
+ 'openid' => publicise_make_string($owner['openid']),
+ 'timezone' => publicise_make_string($owner['timezone']),
+ 'language' => publicise_make_string($owner['language']),
+ 'register_date' => publicise_make_string(datetime_convert()),
+ 'default-location' => publicise_make_string($owner['default-location']),
+ 'allow_location' => publicise_make_string($owner['allow_location']),
+ 'theme' => publicise_make_string($owner['theme']),
+ 'pubkey' => publicise_make_string($pubkey),
+ 'prvkey' => publicise_make_string($prvkey),
+ 'spubkey' => publicise_make_string($spubkey),
+ 'sprvkey' => publicise_make_string($sprvkey),
+ 'verified' => publicise_make_int($owner['verified']),
+ 'blocked' => publicise_make_int(0),
+ 'blockwall' => publicise_make_int(1),
+ 'hidewall' => publicise_make_int(0),
+ 'blocktags' => publicise_make_int(0),
+ 'notify-flags' => publicise_make_int($owner['notifyflags']),
+ 'page-flags' => publicise_make_int($comments ? PAGE_COMMUNITY : PAGE_SOAPBOX),
+ 'expire' => publicise_make_int($expire),
+ );
+ Logger::debug('Publicise: creating user ' . print_r($newuser, true));
+ $r = q("INSERT INTO `user` (`"
+ . implode("`, `", array_keys($newuser))
+ . "`) VALUES ("
+ . implode(", ", array_values($newuser))
+ . ")" );
+ if (!$r) {
+ Logger::warning('Publicise: create user failed');
+ return;
+ }
+ $r = q('SELECT * FROM `user` WHERE `guid` = "%s"', DBA::escape($guid));
+ if (count($r) != 1) {
+ Logger::warning('Publicise: unexpected number of uids returned');
+ return;
+ }
+ Logger::debug('Publicise: created user ID ' . $r[0]);
+ return $r[0];
+}
+
+function publicise_create_self_contact($a, $contact, $uid) {
+ $newcontact = array(
+ 'uid' => $uid,
+ 'created' => publicise_make_string(datetime_convert()),
+ 'self' => publicise_make_int(1),
+ 'name' => publicise_make_string($contact['name']),
+ 'nick' => publicise_make_string($contact['nick']),
+ 'photo' => publicise_make_string($contact['photo']),
+ 'thumb' => publicise_make_string($contact['thumb']),
+ 'micro' => publicise_make_string($contact['micro']),
+ 'blocked' => publicise_make_int(0),
+ 'pending' => publicise_make_int(0),
+ 'url' => publicise_make_string($a->get_baseurl() . '/profile/' . $contact['nick']),
+ 'nurl' => publicise_make_string($a->get_baseurl() . '/profile/' . $contact['nick']),
+ 'request' => publicise_make_string($a->get_baseurl() . '/dfrn_request/' . $contact['nick']),
+ 'notify' => publicise_make_string($a->get_baseurl() . '/dfrn_notify/' . $contact['nick']),
+ 'poll' => publicise_make_string($a->get_baseurl() . '/dfrn_poll/' . $contact['nick']),
+ 'confirm' => publicise_make_string($a->get_baseurl() . '/dfrn_confirm/' . $contact['nick']),
+ 'poco' => publicise_make_string($a->get_baseurl() . '/poco/' . $contact['nick']),
+ 'uri-date' => publicise_make_string(datetime_convert()),
+ 'avatar-date' => publicise_make_string(datetime_convert()),
+ 'closeness' => publicise_make_int(0),
+ );
+ $existing = q("SELECT * FROM `contact` WHERE `uid` = %d AND `self` = 1", intval($uid));
+ if (count($existing)) {
+ $newcontact = $existing[0];
+ Logger::debug('Publicise: self contact already exists for user ' . $uid . ' id ' . $newcontact['id']);
+ } else {
+ Logger::debug('Publicise: create contact ' . print_r($newcontact, true));
+ q("INSERT INTO `contact` (`"
+ . implode("`, `", array_keys($newcontact))
+ . "`) VALUES ("
+ . implode(", ", array_values($newcontact))
+ . ")" );
+ $results = q("SELECT `id` FROM `contact` WHERE `uid` = %d AND `self` = 1", intval($uid));
+ if (count($results) != 1) {
+ Logger::warning('Publicise: create self contact failed, will delete uid ' . $uid);
+ $r = q("DELETE FROM `user` WHERE `uid` = %d", intval($uid));
+ return;
+ }
+ $newcontact = $results[0];
+ Logger::debug('Publicise: created self contact for user ' . $uid . ' id ' . $newcontact['id']);
+ }
+ Logger::debug('Publicise: self contact for ' . $uid . ' nick ' . $contact['nick'] . ' is ' . $newcontact['id']);
+ return $newcontact['id'];
+}
+
+function publicise_create_profile($contact, $uid) {
+ $newprofile = array(
+ 'uid' => $uid,
+ 'profile-name' => publicise_make_string('default'),
+ 'is-default' => publicise_make_int(1),
+ 'name' => publicise_make_string($contact['name']),
+ 'photo' => publicise_make_string($contact['photo']),
+ 'thumb' => publicise_make_string($contact['thumb']),
+ 'homepage' => publicise_make_string($contact['url']),
+ 'publish' => publicise_make_int(1),
+ 'net-publish' => publicise_make_int(1),
+ );
+ Logger::debug('Publicise: create profile ' . print_r($newprofile, true));
+ $r = q("INSERT INTO `profile` (`"
+ . implode("`, `", array_keys($newprofile))
+ . "`) VALUES ("
+ . implode(", ", array_values($newprofile))
+ . ")" );
+ if (!$r) {
+ Logger::warning('Publicise: create profile failed');
+ }
+ $newprofile = q('SELECT `id` FROM `profile` WHERE `uid` = %d AND `is-default` = 1', intval($uid));
+ if (count($newprofile) != 1) {
+ Logger::warning('Publicise: create profile produced unexpected number of results');
+ return;
+ }
+ Logger::debug('Publicise: created profile ' . $newprofile[0]['id']);
+ return $newprofile[0]['id'];
+}
+
+function publicise_set_up_user($a, $contact, $owner) {
+ $user = publicise_create_user($owner, $contact);
+ if (!$user) {
+ notice(sprintf(t("Failed to create user for feed \"%s\""), $contact['name']) . EOL);
+ return;
+ }
+ $self_contact = publicise_create_self_contact($a, $contact, $user['uid']);
+ if (!$self_contact) {
+ notice(sprintf(t("Failed to create self contact for user \"%s\""), $contact['name']) . EOL);
+ Logger::warning("Publicise: unable to create self contact, deleting user " . $user['uid']);
+ q('DELETE FROM `user` WHERE `uid` = %d', intval($user['uid']));
+ return;
+ }
+ $profile = publicise_create_profile($contact, $user['uid']);
+ if (!$profile) {
+ notice(sprintf(t("Failed to create profile for user \"%s\""), $contact['name']) . EOL);
+ Logger::warning("Publicise: unable to create profile, deleting user $uid contact $self_contact");
+ q('DELETE FROM `user` WHERE `uid` = %d', intval($user['uid']));
+ q('DELETE FROM `contact` WHERE `id` = %d', intval($self_contact));
+ return;
+ }
+ return $user;
+}
+
+function publicise($a, &$contact, &$owner) {
+ Logger::info('@@@ Publicise: publicise');
+ if (!is_site_admin()) {
+ notice(t("Only admin users can publicise feeds"));
+ Logger::warning('Publicise: non-admin tried to publicise');
+ return;
+ }
+
+ // Check if we're changing our mind about a feed we earlier depublicised
+ Logger::info('@@@ Publicise: ' . 'SELECT * FROM `user` WHERE `account_expires_on` != "0000-00-00 00:00:00" AND `nickname` = "' . $contact['nick'] . '" AND `email` = "' . $owner['email'] . '" AND `page-flags` in (' . intval(PAGE_COMMUNITY) . ', ' . intval(PAGE_SOAPBOX) . ')');
+ $existing = q('SELECT * FROM `user` WHERE `account_expires_on` != "0000-00-00 00:00:00" AND `nickname` = "%s" AND `email` = "%s" AND `page-flags` in (%d, %d)',
+ DBA::escape($contact['nick']), DBA::escape($owner['email']), intval(PAGE_COMMUNITY), intval(PAGE_SOAPBOX));
+ if (count($existing) == 1) {
+ Logger::info('@@@ Publicise: there is existing');
+ $owner = $existing[0];
+ q('UPDATE `user` SET `account_expires_on` = "0000-00-00 00:00:00", `account_removed` = 0, `account_expired` = 0 WHERE `uid` = %d', intval($owner['uid']));
+ q('UPDATE `profile` SET `publish` = 1, `net-publish` = 1 WHERE `uid` = %d AND `is-default` = 1', intval($owner['uid']));
+ Logger::debug('Publicise: recycled previous user ' . $owner['uid']);
+ }
+ else {
+ Logger::info('@@@ Publicise: there is not existing');
+ $owner = publicise_set_up_user($a, $contact, $owner);
+ if (!$owner) {
+ return;
+ }
+ Logger::debug("Publicise: created new user " . $owner['uid']);
+ }
+ Logger::info('Publicise: new contact user is ' . $owner['uid']);
+
+ $r = q("UPDATE `contact` SET `uid` = %d, `reason` = 'publicise', `hidden` = 1 WHERE id = %d", intval($owner['uid']), intval($contact['id']));
+ if (!$r) {
+ Logger::warning('Publicise: update contact failed, user is probably in a bad state ' . $user['uid']);
+ }
+ $contact['uid'] = $owner['uid'];
+ $contact['reason'] = 'publicise';
+ $contact['hidden'] = 1;
+ $r = q("UPDATE `item` SET `uid` = %d, type = 'wall', wall = 1, private = 0 WHERE `contact-id` = %d",
+ intval($owner['uid']), intval($contact['id']));
+ Logger::debug('Publicise: moved items from contact ' . $contact['id'] . ' to uid ' . $owner['uid']);
+
+ // Update the retriever config
+ $r = q("UPDATE `retriever_rule` SET `uid` = %d WHERE `contact-id` = %d",
+ intval($owner['uid']), intval($contact['id']));
+
+ info(sprintf(t("Moved feed \"%s\" to dedicated account"), $contact['name']) . EOL);
+ return true;
+}
+
+function publicise_self_contact($uid) {
+ $r = q('SELECT * FROM `contact` WHERE `uid` = %d AND `self` = 1', intval($uid));
+ if (count($r) != 1) {
+ Logger::warning('Publicise: unexpected number of self contacts for user ' . $uid);
+ return;
+ }
+ return $r[0];
+}
+
+function depublicise($a, $contact, $user) {
+ require_once('include/Contact.php');
+
+ if (!is_site_admin()) {
+ notice("Only admin users can depublicise feeds");
+ Logger::warning('Publicise: non-admin tried to depublicise');
+ return;
+ }
+
+ Logger::debug('Publicise: about to depublicise contact ' . $contact['id'] . ' user ' . $user['uid']);
+
+ $self_contact = publicise_self_contact($user['uid']);
+
+ // If the local_user() is subscribed to the feed, take ownership
+ // of the feed and all its items and photos. Otherwise they will
+ // be deleted when the account expires.
+ $r = q('SELECT * FROM `contact` WHERE `uid` = %d AND `url` = "%s"',
+ intval(local_user()), DBA::escape($self_contact['url']));
+ if (count($r)) {
+ // Delete the contact to the feed user and any
+ // copies of its items. These will be replaced by the originals,
+ // which will be brought back into the local_user's feed along
+ // with the feed contact itself.
+ foreach ($r as $my_contact) {
+ q('DELETE FROM `item` WHERE `contact-id` = %d', intval($my_contact['id']));
+ q('DELETE FROM `contact` WHERE `id` = %d', intval($my_contact['id']));
+ }
+
+ // Move the feed contact to local_user. Existing items stay
+ // attached to the original feed contact, but must have their uid
+ // updated. Also update the fields we scribbled over in
+ // publicise_post_remote_hook.
+ q('UPDATE `contact` SET `uid` = %d, `reason` = "", hidden = 0 WHERE id = %d',
+ intval(local_user()), intval($contact['id']));
+ q('UPDATE `item` SET `uid` = %d, `wall` = 0, `type` = "remote", `private` = 2 WHERE `contact-id` = %d',
+ intval(local_user()), intval($contact['id']));
+
+ // Take ownership of any photos created by the feed user
+ q('UPDATE `photo` SET `uid` = %d WHERE `uid` = %d',
+ intval(local_user()), intval($user['uid']));
+
+ // Update the retriever config
+ $r = q("UPDATE `retriever_rule` SET `uid` = %d WHERE `contact-id` = %d",
+ intval($owner['uid']), intval($contact['id']));
+ }
+
+ // Set the account to removed and expired right now. It will be cleaned up by cron after 3 days, giving a chance to change your mind
+ q('UPDATE `user` SET `account_removed` = 1, `account_expired` = 1, `account_expires_on` = UTC_TIMESTAMP() WHERE `uid` = %d',
+ intval($user['uid']));
+ q('UPDATE `profile` SET `publish` = 0, `net-publish` = 0 WHERE `uid` = %d AND `is-default` = 1', intval($user['uid']));
+
+ info(sprintf(t("Removed dedicated account for feed \"%s\""), $contact['name']) . EOL);
+}
+
+function publicise_addon_admin_post ($a) {
+ Logger::info('@@@ publicise_addon_admin_post');
+ if (!is_site_admin()) {
+ Logger::warning('Publicise: non-admin tried to do admin post');
+ return;
+ }
+
+ foreach (publicise_get_contacts() as $contact) {
+ Logger::info('@@@ publicise_addon_admin_post contact ' . $contact['id'] . ' ' . $contact['name']);
+ $user = publicise_get_user($contact['uid']);
+ if (!$_POST['publicise-enabled-' . $contact['id']]) {
+ if ($contact['reason'] === 'publicise') {
+ Logger::info('@@@ depublicise');
+ depublicise($a, $contact, $user);
+ }
+ }
+ else {
+ if ($contact['reason'] !== 'publicise') {
+ Logger::info('@@@ publicise');
+ if (!publicise($a, $contact, $user)) {
+ Logger::warning('Publicise: failed to publicise contact ' . $contact['id']);
+ continue;
+ }
+ }
+ if ($_POST['publicise-expire-' . $contact['id']] != $user['expire']) {
+ q('UPDATE `user` SET `expire` = %d WHERE `uid` = %d',
+ intval($_POST['publicise-expire-' . $contact['id']]), intval($user['uid']));
+ }
+ if ($_POST['publicise-comments-' . $contact['id']]) {
+ if ($user['page-flags'] != PAGE_COMMUNITY) {
+ q('UPDATE `user` SET `page-flags` = %d WHERE `uid` = %d',
+ intval(PAGE_COMMUNITY), intval($user['uid']));
+ q('UPDATE `contact` SET `rel` = %d WHERE `uid` = %d AND `network` = "dfrn"',
+ intval(CONTACT_IS_SHARING), intval($user['uid']));
+ }
+ }
+ else {
+ if ($user['page-flags'] != PAGE_SOAPBOX) {
+ q('UPDATE `user` SET `page-flags` = %d WHERE `uid` = %d',
+ intval(PAGE_SOAPBOX), intval($user['uid']));
+ q('UPDATE `contact` SET `rel` = %d WHERE `uid` = %d AND `network` = "dfrn"',
+ intval(CONTACT_IS_FOLLOWER), intval($user['uid']));
+ }
+ }
+ }
+ }
+}
+
+function publicise_post_remote_hook(&$a, &$item) {
+ $r1 = q("SELECT `uid` FROM `contact` WHERE `id` = %d AND `reason` = 'publicise'", intval($item['contact-id']));
+ if (!$r1) {
+ return;
+ }
+
+ Logger::debug('Publicise: moving to wall: ' . $item['uid'] . ' ' . $item['contact-id'] . ' ' . $item['uri']);
+ $item['type'] = 'wall';
+ $item['wall'] = 1;
+ $item['private'] = 0;
+}
+
diff --git a/publicise/templates/admin.tpl b/publicise/templates/admin.tpl
new file mode 100644
index 00000000..b10c3546
--- /dev/null
+++ b/publicise/templates/admin.tpl
@@ -0,0 +1,39 @@
+{{*
+ * AUTOMATICALLY GENERATED TEMPLATE
+ * DO NOT EDIT THIS FILE, CHANGES WILL BE OVERWRITTEN
+ *
+ *}}
+
From dbfc24d51fc531530b7f4c446d1fee833d0ac998 Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Mon, 30 Sep 2019 20:52:51 +0200
Subject: [PATCH 033/527] retriever tweaks
---
retriever/retriever.php | 13 ++++---------
1 file changed, 4 insertions(+), 9 deletions(-)
diff --git a/retriever/retriever.php b/retriever/retriever.php
index fcd45b46..f495578b 100644
--- a/retriever/retriever.php
+++ b/retriever/retriever.php
@@ -308,9 +308,8 @@ function retriever_get_item($retriever_item) {
Logger::info('@@@ retriever_get_item uri ' . $retriever_item['item-uri'] . ' uid ' . $retriever_item['item-uid'] . ' cid ' . $retriever_item['contact-id']);
try {//@@@ not necessary
$item = Item::selectFirst([], ['uri' => $retriever_item['item-uri'], 'uid' => intval($retriever_item['item-uid'])]);
- Logger::log('@@@ 1 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
if (!DBA::isResult($item)) {
- Logger::log('retriever_get_item: no item found for uri ' . $retriever_item['item-uri']);
+ Logger::warning('retriever_get_item: no item found for uri ' . $retriever_item['item-uri']);
return;
}
Logger::info('@@@ retriever_get_item: yay item found for uri ' . $retriever_item['item-uri'] . ' guid ' . $item['guid'] . ' plink ' . $item['plink']);
@@ -329,9 +328,9 @@ function retriever_item_completed($retriever_item_id, $resource, $a) {
return;
}
$item = retriever_get_item($retriever_item);
- Logger::log('@@@ 2 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
+ Logger::info('@@@ 2 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
if (!$item) {
- Logger::log('retriever_item_completed: no item ' . $retriever_item['item-uri']);
+ Logger::warning('retriever_item_completed: no item ' . $retriever_item['item-uri']);
return;
}
// Note: the retriever might be null. Doesn't matter.
@@ -348,21 +347,17 @@ function retriever_resource_completed($resource, $a) {
Logger::debug('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url']);
$r = q("SELECT `id` FROM `retriever_item` WHERE `resource` = %d", $resource['id']);
foreach (DBA::select('retriever_item', ['id'], ['resource' => intval($resource['id'])]) as $retriever_item) {
- Logger::debug('@@@ retriever_resource_completed got item id ' . $retriever_item['id']);
retriever_item_completed($retriever_item['id'], $resource, $a);
}
}
function apply_retrospective($a, $retriever, $num) {
- Logger::info('@@@ apply_retrospective');
$r = q("SELECT * FROM `item` WHERE `contact-id` = %d ORDER BY `received` DESC LIMIT %d",
intval($retriever['contact-id']), intval($num));
foreach ($r as $item) {
- Logger::info('@@@ apply_retrospective item ' . $item['id']);
q('UPDATE `item` SET `visible` = 0 WHERE `id` = %d', $item['id']);
q('UPDATE `thread` SET `visible` = 0 WHERE `iid` = %d', $item['id']);
foreach (DBA::select('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => $item['uid'], 'contact-id' => $item['contact-id']]) as $retriever_item) {
- Logger::info('@@@ about to delete retriever_item id ' . $retriever_item['id'] . ' uri ' . $item['uri'] . ' uid ' . $item['uid'] . ' contact ' . $item['contact-id']);
DBA::delete('retriever_resource', ['id' => $retriever_item['resource']]);
DBA::delete('retriever_item', ['id' => $retriever_item['id']]);
}
@@ -378,7 +373,7 @@ function retriever_on_item_insert($a, $retriever, &$item) {
Logger::info('retriever_on_item_insert: No retriever supplied');
return;
}
- if (!$retriever['data']['enable'] == "on") {
+ if (!array_key_exists('enable', $retriever['data']) || !$retriever['data']['enable'] == "on") {
Logger::info('@@@ retriever_on_item_insert: Disabled');
return;
}
From 2d8e13d53d388e9f98718e48199d44f8716fe265 Mon Sep 17 00:00:00 2001
From: Matthew Exon
Date: Wed, 2 Oct 2019 07:19:59 +0200
Subject: [PATCH 034/527] extensive refactoring
---
retriever/retriever.php | 305 +++++++++++++---------------
retriever/templates/rule-config.tpl | 32 ++-
2 files changed, 164 insertions(+), 173 deletions(-)
diff --git a/retriever/retriever.php b/retriever/retriever.php
index f495578b..6ace5e98 100644
--- a/retriever/retriever.php
+++ b/retriever/retriever.php
@@ -177,14 +177,14 @@ function retriever_clean_up_completed_resources($max_items, $a) {
}
$resource = DBA::selectFirst('retriever_resource', [], ['id' => intval($rr['resource'])]);
retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource, $a);
- Logger::info('@@@ retriever_clean_up_completed_resources tried to update id ' . $retriver_item['id'] . ' to finished, better check that it really worked!');
+ Logger::info('@@@ retriever_clean_up_completed_resources tried to update id ' . $retriever_item['id'] . ' to finished, better check that it really worked!');
DBA::update('retriever_item', ['finished' => 1], ['id' => intval($retriever_item['id'])], ['finished']);
retriever_check_item_completed($item);
}
}
function retriever_tidy() {
- // TODO: figure out how to do this with DBA module
+ // TODO: figure out how to do this with DBA module @@@ it is possible
q("DELETE FROM retriever_resource WHERE completed IS NOT NULL AND completed < DATE_SUB(now(), INTERVAL 1 WEEK)");
q("DELETE FROM retriever_resource WHERE completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)");
@@ -212,8 +212,6 @@ function retrieve_dataurl_resource($resource) {
}
function retrieve_resource($resource) {
- Logger::info('@@@ retrieve_resource: url ' . $resource['url'] . ' uid ' . $resource['item-uid'] . ' cid ' . $resource['contact-id']);
-
if (substr($resource['url'], 0, 5) == "data:") {
return retrieve_dataurl_resource($resource);
}
@@ -221,24 +219,22 @@ function retrieve_resource($resource) {
$a = get_app();
$retriever_rule = get_retriever_rule($resource['contact-id'], $resource['item-uid']);
+ $rule_data = $retriever_rule['data'];
try {
Logger::debug('retrieve_resource: ' . ($resource['num-tries'] + 1) . ' attempt at resource ' . $resource['id'] . ' ' . $resource['url']);
$redirects = 0;
$cookiejar = '';
- Logger::debug('@@@ retrieve_resource storecookies ' . $retriever_rule['storecookies']);
- if (array_key_exists('storecookies', $retriever_rule) && $retriever_rule['storecookies']) {
+ if (array_key_exists('storecookies', $rule_data) && $rule_data['storecookies']) {
$cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-');
- Logger::debug('@@@ retrieve_resource cookie file ' . $cookiejar . ' content ' . $retriever_rule['cookiedata']);
- file_put_contents($cookiejar, $retriever_rule['cookiedata']);
+ file_put_contents($cookiejar, $rule_data['cookiedata']);
}
$fetch_result = Network::fetchUrlFull($resource['url'], $resource['binary'], $redirects, '', $cookiejar);
- if (array_key_exists('storecookies', $retriever_rule) && $retriever_rule['storecookies']) {
- $retriever_rule['cookiedata'] = file_get_contents($cookiejar);
- Logger::debug('@@@ retriever_resource update cookie ' . json_encode($retriever_rule['data'] . ' id ' . $retriever_rule['id']));
- q("UPDATE `retriever_rule` SET `data`='%s' WHERE `id` = %d",
- DBA::escape(json_encode($retriever_rule['data'])), intval($retriever_rule["id"]));
- /* unlink($cookiejar); */ //@@@
+ if (array_key_exists('storecookies', $rule_data) && $rule_data['storecookies']) {
+ $retriever_rule['data']['cookiedata'] = file_get_contents($cookiejar);
+ DBA::update('retriever_rule', ['data' => json_encode($retriever_rule['data'])], ['id' => intval($retriever_rule["id"])]);
+ //@@@ check the update worked
+ unlink($cookiejar);
}
$resource['data'] = $fetch_result->getBody();
$resource['http-code'] = $fetch_result->getReturnCode();
@@ -248,36 +244,33 @@ function retrieve_resource($resource) {
} catch (Exception $e) {
Logger::info('retrieve_resource: unable to retrieve ' . $resource['url'] . ' - ' . $e->getMessage());
}
+ // TODO: figure out how to do this with DBA module
q("UPDATE `retriever_resource` SET `last-try` = now(), `num-tries` = `num-tries` + 1, `http-code` = %d, `redirect-url` = '%s' WHERE id = %d",
intval($resource['http-code']),
DBA::escape($resource['redirect-url']),
intval($resource['id']));
if ($resource['data']) {
+ // TODO: figure out how to do this with DBA module
q("UPDATE `retriever_resource` SET `completed` = now(), `data` = '%s', `type` = '%s' WHERE id = %d",
DBA::escape($resource['data']),
DBA::escape($resource['type']),
intval($resource['id']));
retriever_resource_completed($resource, $a);
}
- Logger::info('@@@ retrieve_resource finished: ' . $resource['url']);
}
function get_retriever_rule($contact_id, $uid, $create = false) {
- Logger::info('@@@ get_retriever_rule ' . "SELECT * FROM `retriever_rule` WHERE `contact-id` = " . intval($contact_id) . " AND `uid` = " . intval($uid));
- $r = q("SELECT * FROM `retriever_rule` WHERE `contact-id` = %d AND `uid` = %d",
- intval($contact_id), intval($uid));
- Logger::info('@@@ get_retriever_rule count is ' . count($r));
- if (count($r)) {
- $r[0]['data'] = json_decode($r[0]['data'], true);
+ $retriever_rule = DBA::selectFirst('retriever_rule', [], ['contact-id' => intval($contact_id), 'uid' => intval($uid)]);
+ //@@@ check that this worked
+ if ($retriever_rule) {
+ $retriever_rule['data'] = json_decode($retriever_rule['data'], true);
Logger::info('@@@ get_retriever_rule returning an actual thing');
- return $r[0];
+ return $retriever_rule;
}
if ($create) {
- q("INSERT INTO `retriever_rule` (`uid`, `contact-id`) VALUES (%d, %d)",
- intval($uid), intval($contact_id));
- $r = q("SELECT * FROM `retriever_rule` WHERE `contact-id` = %d AND `uid` = %d",
- intval($contact_id), intval($uid));
- return $r[0];
+ DBA::insert('retriever_rule', ['uid' => intval($uid), 'contact-id' => intval($contact_id)]);
+ //@@@ check that this worked
+ return DBA::selectFirst('retriever_rule', [], ['contact-id' => intval($contact_id), 'uid' => intval($uid)]);
}
}
@@ -285,38 +278,13 @@ function retriever_get_retriever_item($id) {
return DBA::selectFirst('retriever_item', [], ['id' => intval($id)]);
}
-function retriever_class_of_item($item) { //@@@
- if (!$item) {
- return 'false';
- }
- if (array_key_exists('finished', $item)) {
- Logger::info('@@@ oh no this is a bad thing');
- return 'retriever_item';
- }
- if (array_key_exists('moderated', $item)) {
- return 'friendica_item';
- }
- return 'unknown';
-}
-
-function mat_test($item) { //@@@
- return 'mat_test';
-}
-
function retriever_get_item($retriever_item) {
- // @@@ add contact id as a search term
- Logger::info('@@@ retriever_get_item uri ' . $retriever_item['item-uri'] . ' uid ' . $retriever_item['item-uid'] . ' cid ' . $retriever_item['contact-id']);
- try {//@@@ not necessary
- $item = Item::selectFirst([], ['uri' => $retriever_item['item-uri'], 'uid' => intval($retriever_item['item-uid'])]);
- if (!DBA::isResult($item)) {
- Logger::warning('retriever_get_item: no item found for uri ' . $retriever_item['item-uri']);
- return;
- }
- Logger::info('@@@ retriever_get_item: yay item found for uri ' . $retriever_item['item-uri'] . ' guid ' . $item['guid'] . ' plink ' . $item['plink']);
- return $item;
- } catch (Exception $e) {
- Logger::info('retriever_get_item: exception ' . $e->getMessage());
+ $item = Item::selectFirst([], ['uri' => $retriever_item['item-uri'], 'uid' => intval($retriever_item['item-uid']), 'contact-id' => intval($retriever_item['contact-id'])]);
+ if (!DBA::isResult($item)) {
+ Logger::warning('retriever_get_item: no item found for uri ' . $retriever_item['item-uri']);
+ return;
}
+ return $item;
}
function retriever_item_completed($retriever_item_id, $resource, $a) {
@@ -328,7 +296,6 @@ function retriever_item_completed($retriever_item_id, $resource, $a) {
return;
}
$item = retriever_get_item($retriever_item);
- Logger::info('@@@ 2 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
if (!$item) {
Logger::warning('retriever_item_completed: no item ' . $retriever_item['item-uri']);
return;
@@ -338,25 +305,23 @@ function retriever_item_completed($retriever_item_id, $resource, $a) {
retriever_apply_completed_resource_to_item($retriever_rule, $item, $resource, $a);
- q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d",
- intval($retriever_item['id']));
+ DBA::update('retriever_item', ['finished' => 1], ['id' => intval($retriever_item['id'])], ['finished']);
retriever_check_item_completed($item);
}
function retriever_resource_completed($resource, $a) {
Logger::debug('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url']);
- $r = q("SELECT `id` FROM `retriever_item` WHERE `resource` = %d", $resource['id']);
foreach (DBA::select('retriever_item', ['id'], ['resource' => intval($resource['id'])]) as $retriever_item) {
retriever_item_completed($retriever_item['id'], $resource, $a);
}
}
function apply_retrospective($a, $retriever, $num) {
- $r = q("SELECT * FROM `item` WHERE `contact-id` = %d ORDER BY `received` DESC LIMIT %d",
- intval($retriever['contact-id']), intval($num));
- foreach ($r as $item) {
- q('UPDATE `item` SET `visible` = 0 WHERE `id` = %d', $item['id']);
- q('UPDATE `thread` SET `visible` = 0 WHERE `iid` = %d', $item['id']);
+ Logger::debug('@@@ apply_retrospective');
+ foreach (Item::select([], ['contact-id' => intval($retriever['contact-id'])], ['order' => ['received' => true], 'limit' => $num]) as $item) {
+ Logger::debug('@@@ apply_retrospective got item id ' . $item['id'] . ' uri ' . $item['uri']);
+ Item::update(['visible' => 0], ['id' => intval($item['id'])]);
+ //@@@ check that this works
foreach (DBA::select('retriever_item', [], ['item-uri' => $item['uri'], 'item-uid' => $item['uid'], 'contact-id' => $item['contact-id']]) as $retriever_item) {
DBA::delete('retriever_resource', ['id' => $retriever_item['resource']]);
DBA::delete('retriever_item', ['id' => $retriever_item['id']]);
@@ -368,13 +333,11 @@ function apply_retrospective($a, $retriever, $num) {
// TODO: Currently this waits until the next cron before actually downloading. Should do it immediately.
// TODO: This queries then inserts. It should use some kind of lock to avoid requesting the same resource twice.
function retriever_on_item_insert($a, $retriever, &$item) {
- Logger::info('@@@ retriever_on_item_insert start plink ' . $item['plink'] . ' id ' . $item['id']);
if (!$retriever || !$retriever['id']) {
Logger::info('retriever_on_item_insert: No retriever supplied');
return;
}
if (!array_key_exists('enable', $retriever['data']) || !$retriever['data']['enable'] == "on") {
- Logger::info('@@@ retriever_on_item_insert: Disabled');
return;
}
if (array_key_exists('plink', $item) && strlen($item['plink'])) {
@@ -389,13 +352,12 @@ function retriever_on_item_insert($a, $retriever, &$item) {
$url = $content['plink'];
}
- if (array_key_exists('pattern', $retriever['data']) && $retriever['data']['pattern']) {
+ if ($retriever['data']['modurl']) {
$orig_url = $url;
$url = preg_replace('/' . $retriever['data']['pattern'] . '/', $retriever['data']['replace'], $orig_url);
Logger::debug('retriever_on_item_insert: Changed ' . $orig_url . ' to ' . $url);
}
- Logger::debug('@@@ retriever_on_item_insert: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id'] . ' url ' . $url);
$resource = add_retriever_resource($a, $url, $item['uid'], $item['contact-id']);
$retriever_item_id = add_retriever_item($item, $resource);
}
@@ -412,16 +374,15 @@ function add_retriever_resource($a, $url, $uid, $cid, $binary = false) {
fclose($fp);
$url = 'md5://' . hash('md5', $url);
- //@@@ fix this
- $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s' AND `item-uid` = %d AND `contact-id` = %d", DBA::escape($url), intval($uid), intval($cid));
- $resource = $r[0];
- if (count($r)) {
+ if (DBA::selectFirst('retriever_resource', [], ['url' => $url, 'item-uid' => intval($uid), 'contact-id' => intval($cid)])) {
+ //@@@ test that this really happens - it should sometimes
Logger::debug('add_retriever_resource: Resource ' . $url . ' already requested');
return $resource;
}
Logger::debug('retrieve_resource: got data URL type ' . $resource['type']);
- //@@@ fix this
+ // TODO: figure out how to do this with DBA module
+ // @@@ DBA::update('workerqueue', ['executed' => DateTimeFormat::utcNow()], ['pid' => $mypid, 'done' => false]);
q("INSERT INTO `retriever_resource` (`item-uid`, `contact-id`, `type`, `binary`, `url`, `completed`, `data`) " .
"VALUES (%d, %d, '%s', %d, '%s', now(), '%s')",
intval($uid),
@@ -430,6 +391,7 @@ function add_retriever_resource($a, $url, $uid, $cid, $binary = false) {
intval($binary ? 1 : 0),
DBA::escape($url),
DBA::escape($data));
+ //@@@ fix this
$r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url));
$resource = $r[0];
if (count($r)) {
@@ -449,16 +411,18 @@ function add_retriever_resource($a, $url, $uid, $cid, $binary = false) {
return $r[0];
}
+ //@@@ fix this
q("INSERT INTO `retriever_resource` (`item-uid`, `contact-id`, `binary`, `url`) " .
"VALUES (%d, %d, %d, '%s')", intval($uid), intval($cid), intval($binary ? 1 : 0), DBA::escape($url));
+ //@@@ fix this
$r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", DBA::escape($url));
return $r[0];
}
function add_retriever_item(&$item, $resource) {
- Logger::debug('@@@ 5 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
Logger::debug('add_retriever_item: ' . $resource['url'] . ' for ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']);
+ //@@@ can use selectFirst
$r = q("SELECT COUNT(*) FROM `retriever_item` WHERE " .
"`item-uri` = '%s' AND `item-uid` = %d AND `contact-id` = %d AND `resource` = %d",
DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource['id']));
@@ -466,9 +430,11 @@ function add_retriever_item(&$item, $resource) {
Logger::info("add_retriever_item: retriever item already present for " . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']);
return;
}
+ //@@@ fix this
q("INSERT INTO `retriever_item` (`item-uri`, `item-uid`, `contact-id`, `resource`) " .
"VALUES ('%s', %d, %d, %d)",
DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource["id"]));
+ //@@@ fix this
$r = q("SELECT id FROM `retriever_item` WHERE " .
"`item-uri` = '%s' AND `item-uid` = %d AND `contact-id` = %d AND `resource` = %d ORDER BY id DESC",
DBA::escape($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource['id']));
@@ -505,8 +471,10 @@ function retriever_apply_xslt_text($xslt_text, $doc) {
return $result;
}
+//@@@ I think this is supposed to update the $item, but it doesn't
function retriever_apply_dom_filter($retriever, &$item, $resource) {
- Logger::debug('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['uri'] . ' contact ' . $item['contact-id']);
+ //@@@ check if id and uri-id are there //@@@ uri-id definitely is not
+ Logger::debug('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['uri'] . ' contact ' . $item['contact-id'] . ' uri-id ' . $item['uri-id']);
if (!array_key_exists('include', $retriever['data']) && !array_key_exists('customxslt', $retriever['data'])) {
Logger::info('retriever_apply_dom_filter: no include and no customxslt');
@@ -517,41 +485,15 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) {
return;
}
- //@@@ break this bit into separate function
- $encoding = retriever_get_encoding($resource);
- $content = mb_convert_encoding($resource['data'], 'HTML-ENTITIES', $encoding);
- $doc = new DOMDocument('1.0', 'UTF-8');
- if (strpos($resource['type'], 'html') !== false) {
- @$doc->loadHTML($content);
- }
- else {
- $doc->loadXML($content);
- }
+ $doc = retriever_load_into_dom($resource);
- $params = array('$spec' => $retriever['data']);
- $extract_template = Renderer::getMarkupTemplate('extract.tpl', 'addon/retriever/');
- $extract_xslt = Renderer::replaceMacros($extract_template, $params);
- if ($retriever['data']['include']) {
- Logger::debug('retriever_apply_dom_filter: applying include/exclude template \"' . $extract_xslt . '\"');
- $doc = retriever_apply_xslt_text($extract_xslt, $doc);
- }
- if (array_key_exists('customxslt', $retriever['data']) && $retriever['data']['customxslt']) {
- Logger::debug('retriever_apply_dom_filter: applying custom XSLT \"' . $retriever['data']['customxslt'] . '\"');
- $doc = retriever_apply_xslt_text($retriever['data']['customxslt'], $doc);
- }
+ $doc = retriever_extract($doc, $retriever);
if (!$doc) {
Logger::info('retriever_apply_dom_filter: failed to apply extract XSLT template');
return;
}
- //@@@ break this bit into separate function
- $components = parse_url($resource['redirect-url']);
- $rooturl = $components['scheme'] . "://" . $components['host'];
- $dirurl = $rooturl . dirname($components['path']) . "/";
- $params = array('$dirurl' => $dirurl, '$rooturl' => $rooturl);
- $fix_urls_template = Renderer::getMarkupTemplate('fix-urls.tpl', 'addon/retriever/');
- $fix_urls_xslt = Renderer::replaceMacros($fix_urls_template, $params);
- $doc = retriever_apply_xslt_text($fix_urls_xslt, $doc);
+ $doc = retriever_globalise_urls($doc, $resource);
if (!$doc) {
Logger::info('retriever_apply_dom_filter: failed to apply fix urls XSLT template');
return;
@@ -571,10 +513,56 @@ function retriever_apply_dom_filter($retriever, &$item, $resource) {
Item::update(['body' => $body], ['uri-id' => $uri_id]);
}
+function retriever_load_into_dom($resource) {
+ Logger::info('@@@ retriever_load_into_dom start');
+ $encoding = retriever_get_encoding($resource);
+ $content = mb_convert_encoding($resource['data'], 'HTML-ENTITIES', $encoding);
+ $doc = new DOMDocument('1.0', 'UTF-8');
+ if (strpos($resource['type'], 'html') !== false) {
+ @$doc->loadHTML($content);
+ }
+ else {
+ $doc->loadXML($content);
+ }
+ Logger::info('@@@ retriever_load_into_dom end');
+ return $doc;
+}
+
+function retriever_extract($doc, $retriever) {
+ Logger::info('@@@ retriever_extract start');
+ $params = array('$spec' => $retriever['data']);
+ $extract_template = Renderer::getMarkupTemplate('extract.tpl', 'addon/retriever/');
+ $extract_xslt = Renderer::replaceMacros($extract_template, $params);
+ if ($retriever['data']['include']) {
+ Logger::debug('retriever_apply_dom_filter: applying include/exclude template \"' . $extract_xslt . '\"');
+ $doc = retriever_apply_xslt_text($extract_xslt, $doc);
+ }
+ if (array_key_exists('customxslt', $retriever['data']) && $retriever['data']['customxslt']) {
+ Logger::debug('retriever_extract: applying custom XSLT \"' . $retriever['data']['customxslt'] . '\"');
+ $doc = retriever_apply_xslt_text($retriever['data']['customxslt'], $doc);
+ }
+ Logger::info('@@@ retriever_extract end');
+ return $doc;
+}
+
+function retriever_globalise_urls($doc, $resource) {
+ Logger::info('@@@ retriever_globalise_urls start');
+ $components = parse_url($resource['redirect-url']);
+ $rooturl = $components['scheme'] . "://" . $components['host'];
+ $dirurl = $rooturl . dirname($components['path']) . "/";
+ $params = array('$dirurl' => $dirurl, '$rooturl' => $rooturl);
+ $fix_urls_template = Renderer::getMarkupTemplate('fix-urls.tpl', 'addon/retriever/');
+ $fix_urls_xslt = Renderer::replaceMacros($fix_urls_template, $params);
+ $doc = retriever_apply_xslt_text($fix_urls_xslt, $doc);
+ Logger::info('@@@ retriever_globalise_urls end');
+ return $doc;
+}
+
function retrieve_images(&$item, $a) {
// Note that $item doesn't necessarily contain all the fields you would expect, in particular 'id'
+ //@@@ doe sit contain uri-id? //@@@ it definitely does not
- Logger::debug('@@@ retrieve_images start item '. $item['id'] . ' uri ' . $item['uri'] . ' uri id ' . $item['uri-id'] . ' plink ' . $item['plink'] . ' guid ' . $item['guid']);
+ Logger::debug('@@@ retrieve_images start item id '. (array_key_exists('id', $item) ? $item['id'] : 'undef') . ' uri ' . $item['uri'] . ' uri id ' . $item['uri-id'] . ' plink ' . $item['plink'] . ' guid ' . $item['guid']);
$uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself?
$content = DBA::selectFirst('item-content', ['body'], ['uri-id' => $uri_id]);
@@ -584,7 +572,6 @@ function retrieve_images(&$item, $a) {
return;
}
- Logger::info('@@@ retrieve_images looking in body "' . $body . '"');
// I suspect that the first two are not used any more?
preg_match_all("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", $item["body"], $matches1);
preg_match_all("/\[img\](.*?)\[\/img\]/ism", $item["body"], $matches2);
@@ -592,9 +579,7 @@ function retrieve_images(&$item, $a) {
$matches = array_merge($matches1[3], $matches2[1], $matches3[1]);
Logger::debug('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']);
foreach ($matches as $url) {
- Logger::debug('@@@ retrieve_images: url ' . $url);
if (strpos($url, get_app()->getBaseUrl()) === FALSE) {
- Logger::debug('@@@ retrieve_images: about to add_retriever_resource uid ' . $item['uid'] . ' cid ' . $item['contact-id']);
$resource = add_retriever_resource($a, $url, $item['uid'], $item['contact-id'], true);
if (!$resource['completed']) {
add_retriever_item($item, $resource);
@@ -604,12 +589,11 @@ function retrieve_images(&$item, $a) {
}
}
}
- Logger::info('@@@ retrieve_images end');
}
function retriever_check_item_completed(&$item)
{
- Logger::debug('@@@ 9 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
+ // TODO: figure out how to do this with DBA module
$r = q('SELECT count(*) FROM retriever_item WHERE `item-uri` = "%s" ' .
'AND `item-uid` = %d AND `contact-id` = %d AND `finished` = 0',
DBA::escape($item['uri']), intval($item['uid']),
@@ -620,12 +604,7 @@ function retriever_check_item_completed(&$item)
$item['visible'] = $waiting ? 0 : 1;
if (array_key_exists('id', $item) && ($item['id'] > 0) && ($old_visible != $item['visible'])) {
Logger::debug('retriever_check_item_completed: changing visible flag to ' . $item['visible']);
- q("UPDATE `item` SET `visible` = %d WHERE `id` = %d",
- intval($item['visible']),
- intval($item['id']));
- q("UPDATE `thread` SET `visible` = %d WHERE `iid` = %d",
- intval($item['visible']),
- intval($item['id']));
+ Item::update(['visible' => 0], ['id' => intval($item['id'])]);
}
}
@@ -647,11 +626,8 @@ function retriever_apply_completed_resource_to_item($retriever, &$item, $resourc
}
}
-//@@@ todo: change all Logger::info t etc
//@@@ todo: what is this reference for? document if needed delete if not
function retriever_transform_images($a, &$item, $resource) {
- Logger::debug('@@@ 11 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
- Logger::info('@@@ retriever_transform_images');
if (!$resource['data']) {
Logger::info('retriever_transform_images: no data available for ' . $resource['id'] . ' ' . $resource['url']);
return;
@@ -659,51 +635,37 @@ function retriever_transform_images($a, &$item, $resource) {
$uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself?
- try { //@@@ probably can get rid of this try/catch
- $data = $resource['data'];
- $type = $resource['type'];
- $uid = $item['uid'];
- $cid = $item['contact-id'];
- $rid = Photo::newResource();
- $path = parse_url($resource['url'], PHP_URL_PATH);
- $parts = pathinfo($path);
- $filename = $parts['filename'] . (array_key_exists('extension', $parts) ? '.' . $parts['extension'] : '');
- Logger::info('@@@ retriever_transform_images url ' . $resource['url'] . ' path ' . $path . ' filename ' . $parts['filename']);
- $album = 'Wall Photos';
- $scale = 0;
- $desc = ''; // TODO: store alt text with resource when it's requested so we can fill this in
- Logger::debug('retriever_transform_images storing ' . strlen($data) . ' bytes type ' . $type . ': uid ' . $uid . ' cid ' . $cid . ' rid ' . $rid . ' filename ' . $filename . ' album ' . $album . ' scale ' . $scale . ' desc ' . $desc);
- Logger::info('@@@ retriever_transform_images before new Image');
- $image = new Image($data, $type);
- Logger::info('@@@ retriever_transform_images after new Image');
- if (!$image->isValid()) {
- Logger::warning('retriever_transform_images: invalid image found at URL ' . $resource['url'] . ' for item ' . $item['id']);
- return;
- }
- Logger::info('@@@ retriever_transform_images before Photo::store');
- $photo = Photo::store($image, $uid, $cid, $rid, $filename, $album, 0, 0, "", "", "", "", $desc);
- Logger::info('@@@ retriever_transform_images after Photo::store');
- $new_url = System::baseUrl() . '/photo/' . $rid . '-0.' . $image->getExt();
- Logger::info('@@@ retriever_transform_images new url ' . $new_url . ' rid ' . $rid . ' ext ' . $image->getExt());
- if (!strlen($new_url)) {
- Logger::warning('retriever_transform_images: no replacement URL for image ' . $resource['url']);
- return;
- }
-
- $content = DBA::selectFirst('item-content', ['body'], ['uri-id' => $uri_id]);
- $body = $content['body'];
- Logger::info('@@@ retriever_transform_images: found body for uri id ' . $uri_id . ': ' . $body);
-
- Logger::debug('retriever_transform_images: replacing ' . $resource['url'] . ' with ' . $new_url . ' in item ' . $item['uri']);
- Logger::debug('@@@ retriever_transform_images: replacing ' . $resource['url'] . ' with ' . $new_url . ' in body ' . $body);
- $body = str_replace($resource["url"], $new_url, $body);
-
- Logger::info('@@@ retriever_transform_images: result \"' . $body . '\"');
- Item::update(['body' => $body], ['uri-id' => $uri_id]);
- } catch (Exception $e) {
- Logger::info('retriever_transform_images caught exception ' . $e->getMessage());
+ $data = $resource['data'];
+ $type = $resource['type'];
+ $uid = $item['uid'];
+ $cid = $item['contact-id'];
+ $rid = Photo::newResource();
+ $path = parse_url($resource['url'], PHP_URL_PATH);
+ $parts = pathinfo($path);
+ $filename = $parts['filename'] . (array_key_exists('extension', $parts) ? '.' . $parts['extension'] : '');
+ $album = 'Wall Photos';
+ $scale = 0;
+ $desc = ''; // TODO: store alt text with resource when it's requested so we can fill this in
+ Logger::debug('retriever_transform_images storing ' . strlen($data) . ' bytes type ' . $type . ': uid ' . $uid . ' cid ' . $cid . ' rid ' . $rid . ' filename ' . $filename . ' album ' . $album . ' scale ' . $scale . ' desc ' . $desc);
+ $image = new Image($data, $type);
+ if (!$image->isValid()) {
+ Logger::warning('retriever_transform_images: invalid image found at URL ' . $resource['url'] . ' for item ' . $item['id']);
return;
}
+ $photo = Photo::store($image, $uid, $cid, $rid, $filename, $album, 0, 0, "", "", "", "", $desc);
+ $new_url = System::baseUrl() . '/photo/' . $rid . '-0.' . $image->getExt();
+ if (!strlen($new_url)) {
+ Logger::warning('retriever_transform_images: no replacement URL for image ' . $resource['url']);
+ return;
+ }
+
+ $content = DBA::selectFirst('item-content', ['body'], ['uri-id' => $uri_id]);
+ $body = $content['body'];
+
+ Logger::debug('retriever_transform_images: replacing ' . $resource['url'] . ' with ' . $new_url . ' in item ' . $item['uri']);
+ $body = str_replace($resource["url"], $new_url, $body);
+
+ Item::update(['body' => $body], ['uri-id' => $uri_id]);
}
function retriever_content($a) {
@@ -712,6 +674,7 @@ function retriever_content($a) {
return;
}
if ($a->argv[1] === 'help') {
+ //@@@ fix me
$feeds = q("SELECT `id`, `name`, `thumb` FROM contact WHERE `uid` = %d AND `network` = 'feed'",
local_user());
foreach ($feeds as $k=>$v) {
@@ -729,7 +692,7 @@ function retriever_content($a) {
if (!empty($_POST["id"])) {
$retriever_rule = get_retriever_rule($a->argv[1], local_user(), true);
$retriever_rule['data'] = array();
- foreach (array('pattern', 'replace', 'enable', 'images', 'customxslt', 'storecookies', 'cookiedata') as $setting) {
+ foreach (array('modurl', 'pattern', 'replace', 'enable', 'images', 'customxslt', 'storecookies', 'cookiedata') as $setting) {
if (empty($_POST['retriever_' . $setting])) {
$retriever_rule['data'][$setting] = NULL;
}
@@ -753,6 +716,7 @@ function retriever_content($a) {
unset($retriever_rule['data']['exclude'][$k]);
}
}
+ //@@@ fix me
q("UPDATE `retriever_rule` SET `data`='%s' WHERE `id` = %d",
DBA::escape(json_encode($retriever_rule['data'])), intval($retriever_rule["id"]));
$a->page['content'] .= "Settings Updated";
@@ -769,6 +733,11 @@ function retriever_content($a) {
'retriever_enable',
L10n::t('Enabled'),
$retriever_rule['data']['enable']),
+ '$modurl' => array(
+ 'retriever_modurl',
+ L10n::t('Modify URL'),
+ $retriever_rule['data']['modurl'],
+ L10n::t("Modify each article's URL with regular expressions before retrieving.")),
'$pattern' => array(
'retriever_pattern',
L10n::t('URL Pattern'),
@@ -832,7 +801,8 @@ function retriever_contact_photo_menu($a, &$args) {
}
function retriever_post_remote_hook(&$a, &$item) {
- Logger::info('@@@ 12 item class is ' . retriever_class_of_item($item) . ' ' . mat_test($item));
+ // Note that $item doesn't necessarily contain all the fields you would expect, in particular 'id'
+
Logger::info('retriever_post_remote_hook: ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id']);
$uri_id = ItemURI::getIdByURI($item['uri']); //@@@ why can't I get this from the item itself?
@@ -845,14 +815,13 @@ function retriever_post_remote_hook(&$a, &$item) {
// Convert to HTML and back to take advantage of bbcode's resolution of oembeds.
$content = DBA::selectFirst('item-content', [], ['uri-id' => $uri_id]);
$body = HTML::toBBCode(BBCode::convert($content['body']));
- Logger::debug('@@@ retriever_post_remote_hook item uri-id ' . $uri_id . ' body "' . $item['body'] . '" item content body "' . $body . '"');
if ($body) {
$item['body'] = $body;
Item::update(['body' => $body], ['uri-id' => $uri_id]);
}
}
if (PConfig::get($item["uid"], 'retriever', 'all_photos')) {
- retrieve_images($item, $a);
+ retrieve_images($item, $a); //@@@ backwards
}
}
retriever_check_item_completed($item);
diff --git a/retriever/templates/rule-config.tpl b/retriever/templates/rule-config.tpl
index 9061d1ff..171054de 100644
--- a/retriever/templates/rule-config.tpl
+++ b/retriever/templates/rule-config.tpl
@@ -41,6 +41,25 @@ function retriever_remove_row(id, number)
tbody.removeChild(row);
}
+function retriever_toggle_url_block()
+{
+ var pattern = document.querySelector("#id_retriever_pattern").parentNode;
+ if (document.querySelector("#id_retriever_modurl").checked) {
+ pattern.style.display = "block";
+ }
+ else {
+ pattern.style.display = "none";
+ }
+
+ var replace = document.querySelector("#id_retriever_replace").parentNode;
+ if (document.querySelector("#id_retriever_modurl").checked) {
+ replace.style.display = "block";
+ }
+ else {
+ replace.style.display = "none";
+ }
+}
+
function retriever_toggle_cookiedata_block()
{
var div = document.querySelector("#id_retriever_cookiedata").parentNode;
@@ -53,6 +72,8 @@ function retriever_toggle_cookiedata_block()
}
document.addEventListener('DOMContentLoaded', function() {
+ retriever_toggle_url_block();
+ document.querySelector("#id_retriever_modurl").addEventListener('change', retriever_toggle_url_block, false);
retriever_toggle_cookiedata_block();
document.querySelector("#id_retriever_storecookies").addEventListener('change', retriever_toggle_cookiedata_block, false);
}, false);
@@ -62,10 +83,6 @@ document.addEventListener('DOMContentLoaded', function() {