forked from friendica/friendica-addons
Blockbot: New user agents, storage for possible bot user agents
This commit is contained in:
parent
d4abc9bac8
commit
033705ac4b
|
@ -13,6 +13,7 @@ use Friendica\DI;
|
|||
use Jaybizzle\CrawlerDetect\CrawlerDetect;
|
||||
use Friendica\Core\Logger;
|
||||
use Friendica\Core\Renderer;
|
||||
use Friendica\Core\System;
|
||||
use Friendica\Network\HTTPException\ForbiddenException;
|
||||
|
||||
require_once __DIR__ . DIRECTORY_SEPARATOR . 'vendor' . DIRECTORY_SEPARATOR . 'autoload.php';
|
||||
|
@ -87,7 +88,11 @@ function blockbot_init_1()
|
|||
'Facebot', 'Googlebot-Video/', 'msnbot/', 'Offline Explorer/', 'YandexNews/', 'msnbot-media/',
|
||||
'EmailWolf', 'Download Demon/', 'FeedFetcher-Google;', 'WebCopier', '+ONB_Bot_Btrix',
|
||||
'scoopit-crawler/', 'ia_archiver', 'Quora-Bot/', 'WebwikiBot/', 'FullStoryBot/',
|
||||
'wpbot/', 'SearchExpress', 'DuckDuckBot/', 'Google Web Preview',
|
||||
'wpbot/', 'SearchExpress', 'DuckDuckBot/', 'Google Web Preview', 'Amazonbot/',
|
||||
'ImagesiftBot;', 'webtech/', 'Bloglines/', 'Netcraft Web Server Survey', 'Spawning-AI',
|
||||
'NLUX_IAHarvester/', 'bots.retroverse.social', 'RSSingBot', 'Chrome-Lighthouse',
|
||||
't3versionsBot/', 'scaninfo@paloaltonetworks.com', 'intelx.io_bot', 'Google-Read-Aloud',
|
||||
'bot Mozilla',
|
||||
];
|
||||
|
||||
if (DI::config()->get('blockbot', 'block_gab')) {
|
||||
|
@ -102,7 +107,7 @@ function blockbot_init_1()
|
|||
'FedditLemmyverseCrawler/', 'lemmy-explorer-crawler/', 'URIports Validator',
|
||||
'rss-is-dead.lol web bot;', 'fedistatsCrawler/', 'W3C_CSS_Validator_JFouffa/',
|
||||
'IABot/', 'Slackbot 1', 'BeeperBot/', 'Matrix-Media-Repo/', 'P3P Validator',
|
||||
'KeybaseBot;',
|
||||
'KeybaseBot;', 'Observatory/', 'CSSCheck/', 'FeedBurner/', 'rss-is-dead.lol feed bot;'
|
||||
];
|
||||
|
||||
if (!DI::config()->get('blockbot', 'good_crawlers')) {
|
||||
|
@ -126,7 +131,8 @@ function blockbot_init_1()
|
|||
|
||||
// HTTP Libraries
|
||||
$http_libraries = ['ReactorNetty/', 'GuzzleHttp/', 'Embed PHP library', 'python-urllib3/',
|
||||
'EventMachine HttpClient', 'HTMLParser/'
|
||||
'EventMachine HttpClient', 'HTMLParser/', 'node-fetch', 'fasthttp', 'python-httpx/',
|
||||
'Fuzz Faster U Fool', 'gvfs/', 'Embarcadero URI Client/', 'grub-client'
|
||||
];
|
||||
|
||||
if (!DI::config()->get('blockbot', 'http_libraries')) {
|
||||
|
@ -165,6 +171,7 @@ function blockbot_init_1()
|
|||
'camo-rs asset proxy', 'gotosocial/', 'incestoma ', 'SpaceCowboys Android RSS Reader',
|
||||
'NewsBlur Feed Finder', 'Lemmy/', 'enby-town/', 'rss2tg bot;', '; HTTrack ',
|
||||
'MbinBot', 'kbinBot', 'Pixelfed/', 'NewsBlur Feed Fetcher', 'NewsBlur Page Fetcher',
|
||||
'facebookexternalua', 'FreshRSS/', 'BookWyrm/', 'Reeder/',
|
||||
];
|
||||
|
||||
if (blockbot_match($agents)) {
|
||||
|
@ -172,10 +179,28 @@ function blockbot_init_1()
|
|||
return;
|
||||
}
|
||||
|
||||
blockbot_save('blocked-bot', $_SERVER['HTTP_USER_AGENT']);
|
||||
|
||||
logger::notice('Blocked bot', $logdata);
|
||||
throw new ForbiddenException('Bots are not allowed. If you consider this a mistake, create an issue at https://github.com/friendica/friendica');
|
||||
}
|
||||
|
||||
function blockbot_save($database, $userAgent)
|
||||
{
|
||||
if (!function_exists('dba_open')) {
|
||||
return;
|
||||
}
|
||||
|
||||
$ressource = dba_open(System::getTempPath() . '/' . $database, 'cl');
|
||||
$result = dba_fetch($userAgent, $ressource);
|
||||
if ($result === false) {
|
||||
dba_insert($userAgent, 1, $ressource);
|
||||
} else {
|
||||
dba_replace($userAgent, ++$result, $ressource);
|
||||
}
|
||||
dba_close($ressource);
|
||||
}
|
||||
|
||||
function blockbot_match(array $agents)
|
||||
{
|
||||
foreach ($agents as $agent) {
|
||||
|
|
Loading…
Reference in a new issue