Blockbot: New user agents, storage for possible bot user agents

This commit is contained in:
Michael 2024-04-16 05:00:23 +00:00
parent d4abc9bac8
commit 741c142a05
1 changed files with 28 additions and 3 deletions

View File

@ -13,6 +13,7 @@ use Friendica\DI;
use Jaybizzle\CrawlerDetect\CrawlerDetect;
use Friendica\Core\Logger;
use Friendica\Core\Renderer;
use Friendica\Core\System;
use Friendica\Network\HTTPException\ForbiddenException;
require_once __DIR__ . DIRECTORY_SEPARATOR . 'vendor' . DIRECTORY_SEPARATOR . 'autoload.php';
@ -87,7 +88,11 @@ function blockbot_init_1()
'Facebot', 'Googlebot-Video/', 'msnbot/', 'Offline Explorer/', 'YandexNews/', 'msnbot-media/',
'EmailWolf', 'Download Demon/', 'FeedFetcher-Google;', 'WebCopier', '+ONB_Bot_Btrix',
'scoopit-crawler/', 'ia_archiver', 'Quora-Bot/', 'WebwikiBot/', 'FullStoryBot/',
'wpbot/', 'SearchExpress', 'DuckDuckBot/', 'Google Web Preview',
'wpbot/', 'SearchExpress', 'DuckDuckBot/', 'Google Web Preview', 'Amazonbot/',
'ImagesiftBot;', 'webtech/', 'Bloglines/', 'Netcraft Web Server Survey', 'Spawning-AI',
'NLUX_IAHarvester/', 'bots.retroverse.social', 'RSSingBot', 'Chrome-Lighthouse',
't3versionsBot/', 'scaninfo@paloaltonetworks.com', 'intelx.io_bot', 'Google-Read-Aloud',
'bot Mozilla',
];
if (DI::config()->get('blockbot', 'block_gab')) {
@ -102,7 +107,7 @@ function blockbot_init_1()
'FedditLemmyverseCrawler/', 'lemmy-explorer-crawler/', 'URIports Validator',
'rss-is-dead.lol web bot;', 'fedistatsCrawler/', 'W3C_CSS_Validator_JFouffa/',
'IABot/', 'Slackbot 1', 'BeeperBot/', 'Matrix-Media-Repo/', 'P3P Validator',
'KeybaseBot;',
'KeybaseBot;', 'Observatory/', 'CSSCheck/', 'FeedBurner/', 'rss-is-dead.lol feed bot;'
];
if (!DI::config()->get('blockbot', 'good_crawlers')) {
@ -126,7 +131,8 @@ function blockbot_init_1()
// HTTP Libraries
$http_libraries = ['ReactorNetty/', 'GuzzleHttp/', 'Embed PHP library', 'python-urllib3/',
'EventMachine HttpClient', 'HTMLParser/'
'EventMachine HttpClient', 'HTMLParser/', 'node-fetch', 'fasthttp', 'python-httpx/',
'Fuzz Faster U Fool', 'gvfs/', 'Embarcadero URI Client/', 'grub-client'
];
if (!DI::config()->get('blockbot', 'http_libraries')) {
@ -165,6 +171,7 @@ function blockbot_init_1()
'camo-rs asset proxy', 'gotosocial/', 'incestoma ', 'SpaceCowboys Android RSS Reader',
'NewsBlur Feed Finder', 'Lemmy/', 'enby-town/', 'rss2tg bot;', '; HTTrack ',
'MbinBot', 'kbinBot', 'Pixelfed/', 'NewsBlur Feed Fetcher', 'NewsBlur Page Fetcher',
'facebookexternalua', 'FreshRSS/', 'BookWyrm/', 'Reeder/', 'microblogpub/',
];
if (blockbot_match($agents)) {
@ -172,10 +179,28 @@ function blockbot_init_1()
return;
}
blockbot_save('blocked-bot', $_SERVER['HTTP_USER_AGENT']);
logger::notice('Blocked bot', $logdata);
throw new ForbiddenException('Bots are not allowed. If you consider this a mistake, create an issue at https://github.com/friendica/friendica');
}
function blockbot_save($database, $userAgent)
{
if (!function_exists('dba_open')) {
return;
}
$ressource = dba_open(System::getTempPath() . '/' . $database, 'cl');
$result = dba_fetch($userAgent, $ressource);
if ($result === false) {
dba_insert($userAgent, 1, $ressource);
} else {
dba_replace($userAgent, ++$result, $ressource);
}
dba_close($ressource);
}
function blockbot_match(array $agents)
{
foreach ($agents as $agent) {