Pagecache for frequently fetched pages

This commit is contained in:
Michael 2022-09-06 06:04:41 +00:00
parent 934a3a6721
commit 6eb9dff807
11 changed files with 151 additions and 26 deletions

View file

@ -1,6 +1,6 @@
-- ------------------------------------------
-- Friendica 2022.09-rc (Giant Rhubarb)
-- DB_UPDATE_VERSION 1482
-- DB_UPDATE_VERSION 1483
-- ------------------------------------------
@ -1007,6 +1007,17 @@ CREATE TABLE IF NOT EXISTS `openwebauth-token` (
FOREIGN KEY (`uid`) REFERENCES `user` (`uid`) ON UPDATE RESTRICT ON DELETE CASCADE
) DEFAULT COLLATE utf8mb4_general_ci COMMENT='Store OpenWebAuth token to verify contacts';
--
-- TABLE pagecache
--
CREATE TABLE IF NOT EXISTS `pagecache` (
`page` varbinary(255) NOT NULL COMMENT 'Page',
`content` mediumtext COMMENT 'Page content',
`fetched` datetime COMMENT 'date when the page had been fetched',
PRIMARY KEY(`page`),
INDEX `fetched` (`fetched`)
) DEFAULT COLLATE utf8mb4_general_ci COMMENT='Stores temporary data';
--
-- TABLE parsed_url
--

View file

@ -47,6 +47,7 @@ Database Tables
| [notify-threads](help/database/db_notify-threads) | |
| [oembed](help/database/db_oembed) | cache for OEmbed queries |
| [openwebauth-token](help/database/db_openwebauth-token) | Store OpenWebAuth token to verify contacts |
| [pagecache](help/database/db_pagecache) | Stores temporary data |
| [parsed_url](help/database/db_parsed_url) | cache for 'parse_url' queries |
| [pconfig](help/database/db_pconfig) | personal (per user) configuration storage |
| [permissionset](help/database/db_permissionset) | |

View file

@ -0,0 +1,24 @@
Table pagecache
===========
Stores temporary data
Fields
------
| Field | Description | Type | Null | Key | Default | Extra |
| ------- | ----------------------------------- | -------------- | ---- | --- | ------- | ----- |
| page | Page | varbinary(255) | NO | PRI | NULL | |
| content | Page content | mediumtext | YES | | NULL | |
| fetched | date when the page had been fetched | datetime | YES | | NULL | |
Indexes
------------
| Name | Fields |
| ------- | ------- |
| PRIMARY | page |
| fetched | fetched |
Return to [database documentation](help/database)

View file

@ -3321,7 +3321,7 @@ class Contact
continue;
}
$contact = self::getByURL($url, false, ['id', 'updated']);
if (empty($contact['id'])) {
if (empty($contact['id']) && Network::isValidHttpUrl($url)) {
Worker::add(PRIORITY_LOW, 'AddContact', 0, $url);
++$added;
} elseif ($contact['updated'] < DateTimeFormat::utc('now -7 days')) {

View file

@ -21,8 +21,10 @@
namespace Friendica\Model;
use Friendica\Core\Logger;
use Friendica\Database\Database;
use Friendica\Database\DBA;
use Friendica\DI;
class ItemURI
{
@ -35,14 +37,16 @@ class ItemURI
*/
public static function insert(array $fields)
{
$fields = DI::dbaDefinition()->truncateFieldsForTable('item-uri', $fields);
if (!DBA::exists('item-uri', ['uri' => $fields['uri']])) {
DBA::insert('item-uri', $fields, Database::INSERT_UPDATE);
DBA::insert('item-uri', $fields, Database::INSERT_IGNORE);
}
$itemuri = DBA::selectFirst('item-uri', ['id', 'guid'], ['uri' => $fields['uri']]);
if (!DBA::isResult($itemuri)) {
// This shouldn't happen
Logger::warning('Item-uri not found', $fields);
return null;
}
@ -77,22 +81,4 @@ class ItemURI
return $itemuri['id'] ?? 0;
}
/**
* Searched for an id of a given guid.
*
* @param string $guid
* @return integer item-uri id
* @throws \Exception
*/
public static function getIdByGUID(string $guid): int
{
$itemuri = DBA::selectFirst('item-uri', ['id'], ['guid' => $guid]);
if (!DBA::isResult($itemuri)) {
return 0;
}
return $itemuri['id'];
}
}

View file

@ -31,6 +31,7 @@ use Friendica\Model\Item;
use Friendica\Model\Post;
use Friendica\Network\HTTPException;
use Friendica\Protocol\ActivityPub;
use Friendica\Protocol\ActivityPub\PageCache;
use Friendica\Util\HTTPSignature;
use Friendica\Util\Network;
use Friendica\Util\Strings;
@ -50,6 +51,13 @@ class Objects extends BaseModule
DI::baseUrl()->redirect(str_replace('objects/', 'display/', DI::args()->getQueryString()));
}
$data = PageCache::fetch($_SERVER['REQUEST_URI']);
if (!empty($data)) {
header('Access-Control-Allow-Origin: *');
System::jsonExit($data, 'application/activity+json');
}
$itemuri = DBA::selectFirst('item-uri', ['id'], ['guid' => $this->parameters['guid']]);
if (DBA::isResult($itemuri)) {
@ -127,6 +135,10 @@ class Objects extends BaseModule
throw new HTTPException\NotFoundException();
}
if (in_array($item['private'], [Item::PUBLIC, Item::UNLISTED])) {
PageCache::add($_SERVER['REQUEST_URI'], $data);
}
// Relaxed CORS header for public items
header('Access-Control-Allow-Origin: *');

View file

@ -38,6 +38,7 @@ use Friendica\Model\Verb;
use Friendica\Module\BaseSettings;
use Friendica\Network\HTTPException;
use Friendica\Protocol\Activity;
use Friendica\Util\Network;
use Friendica\Util\Temporal;
use Friendica\Worker\Delivery;
@ -373,7 +374,7 @@ class Account extends BaseSettings
// or the handle of the account, therefore we check for either
// "http" or "@" to be present in the string.
// All other fields from the row will be ignored
if ((strpos($csvRow[0], '@') !== false) || in_array(parse_url($csvRow[0], PHP_URL_SCHEME), ['http', 'https'])) {
if ((strpos($csvRow[0], '@') !== false) || Network::isValidHttpUrl($csvRow[0])) {
Worker::add(PRIORITY_MEDIUM, 'AddContact', local_user(), $csvRow[0]);
} else {
Logger::notice('Invalid account', ['url' => $csvRow[0]]);

View file

@ -0,0 +1,73 @@
<?php
/**
* @copyright Copyright (C) 2010-2022, the Friendica project
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
namespace Friendica\Protocol\ActivityPub;
use Friendica\Core\Logger;
use Friendica\Database\Database;
use Friendica\Database\DBA;
use Friendica\DI;
use Friendica\Util\DateTimeFormat;
/**
* This class handles the page cache
*/
class PageCache
{
/**
* Add content to the page cache
*
* @param string $page
* @param mixed $content
* @return void
*/
public static function add(string $page, $content)
{
if (!DI::config()->get('system', 'pagecache')) {
return;
}
DBA::delete('pagecache', ["`fetched` < ?", DateTimeFormat::utc('now - 5 minutes')]);
DBA::insert('pagecache', ['page' => $page, 'content' => serialize($content), 'fetched' => DateTimeFormat::utcNow()], Database::INSERT_UPDATE);
Logger::debug('Page added', ['page' => $page]);
}
/**
* Fetch data from the page cache
*
* @param string $page
* @return mixed
*/
public static function fetch(string $page)
{
$pagecache = DBA::selectFirst('pagecache', [], ['page' => $page]);
if (empty($pagecache['content'])) {
return null;
}
DBA::update('pagecache', ['fetched' => DateTimeFormat::utcNow()], ['page' => $page]);
Logger::debug('Page fetched', ['page' => $page]);
return unserialize($pagecache['content']);
}
}

View file

@ -43,6 +43,7 @@ class OptimizeTables
DBA::e("OPTIMIZE TABLE `cache`");
DBA::e("OPTIMIZE TABLE `locks`");
DBA::e("OPTIMIZE TABLE `oembed`");
DBA::e("OPTIMIZE TABLE `pagecache`");
DBA::e("OPTIMIZE TABLE `parsed_url`");
DBA::e("OPTIMIZE TABLE `session`");

View file

@ -55,7 +55,7 @@
use Friendica\Database\DBA;
if (!defined('DB_UPDATE_VERSION')) {
define('DB_UPDATE_VERSION', 1482);
define('DB_UPDATE_VERSION', 1483);
}
return [
@ -795,7 +795,7 @@ return [
"conversation" => ["type" => "varbinary(383)", "comment" => ""],
"type" => ["type" => "varchar(64)", "comment" => "Type of the activity"],
"object-type" => ["type" => "varchar(64)", "comment" => "Type of the object activity"],
"object-object-type" => ["type" => "varchar(64)", "comment" => "Type of the object's object activity"],
"object-object-type" => ["type" => "varchar(64)", "comment" => "Type of the object's object activity"],
"received" => ["type" => "datetime", "comment" => "Receiving date"],
"activity" => ["type" => "mediumtext", "comment" => "The JSON activity"],
"signer" => ["type" => "varchar(255)", "comment" => ""],
@ -1051,6 +1051,18 @@ return [
"uid" => ["uid"],
]
],
"pagecache" => [
"comment" => "Stores temporary data",
"fields" => [
"page" => ["type" => "varbinary(255)", "not null" => "1", "primary" => "1", "comment" => "Page"],
"content" => ["type" => "mediumtext", "comment" => "Page content"],
"fetched" => ["type" => "datetime", "comment" => "date when the page had been fetched"],
],
"indexes" => [
"PRIMARY" => ["page"],
"fetched" => ["fetched"],
],
],
"parsed_url" => [
"comment" => "cache for 'parse_url' queries",
"fields" => [

View file

@ -478,6 +478,10 @@ return [
// Don't show smilies.
'no_smilies' => false,
// pagecache (Boolean)
// Cache frequently fetched pages.
'pagecache' => false,
// paranoia (Boolean)
// Log out users if their IP address changed.
'paranoia' => false,
@ -643,7 +647,7 @@ return [
'worker_load_exponent' => 3,
// worker_processes_cooldown (Integer)
// Maximum number pro processes that causes a cooldown before each worker function call.
// Maximum number per processes that causes a cooldown before each worker function call.
'worker_processes_cooldown' => 0,
// worker_multiple_fetch (Boolean)