Compare commits

..

1 commit

Author SHA1 Message Date
Michael a5500c41a5 Blockbot: Reworked user agent parsing 2024-05-03 02:53:20 +00:00

View file

@ -171,12 +171,12 @@ function blockbot_save($database, $userAgent)
return;
}
$ressource = dba_open(System::getTempPath() . '/' . $database, 'cl');
$result = dba_fetch($userAgent, $ressource);
$resource = dba_open(System::getTempPath() . '/' . $database, 'cl');
$result = dba_fetch($userAgent, $resource);
if ($result === false) {
dba_insert($userAgent, true, $ressource);
dba_insert($userAgent, true, $resource);
}
dba_close($ressource);
dba_close($resource);
}
function blockbot_check_login_attempt(string $url, array $logdata)
@ -238,10 +238,11 @@ function blockbot_is_crawler(array $parts): bool
'webwikibot', 'woobot', 'project-resonance', 'mtrobot', 'webprosbot', 'youbot',
'queryseekerspider', 'scanning for research', 'semrushbot', 'senutobot', 'spawning-ai',
'statista.com publication finder crawler', 'turnitin', 'who.is bot', 'zaldamosearchbot',
'nuzzel', 'boardreader blog indexer', 'hatena-favicon', 'nbertaupete95',
'nuzzel', 'boardreader blog indexer', 'hatena-favicon', 'nbertaupete95', 'scrapy',
"electronic frontier foundation's do not track verifier", 'synapse', 'trendsmapresolver',
'pinterestbot', 'um-ln', 'slack-imgproxy', 'diffbot', 'dataforseobot', 'bw', 'bitlybot',
'twingly recon-klondike', 'imagesiftbot',
'twingly recon-klondike', 'imagesiftbot', 'google', 'rogerbot', 'yahoocachesystem',
'vkshare', 'appid: s~virustotalcloud', 'clickagy intelligence bot v2',
];
foreach ($parts as $part) {
@ -276,8 +277,10 @@ function blockbot_is_searchbot(array $parts): bool
'coccocbot-image', 'discobot', 'google-inspectiontool', 'netcraftsurveyagent',
'tineye-bot', 'tineye-bot-live', 'bingpreview', 'ask jeeves', 'adsbot-google', "msnbot-media ",
'googlebot-image', 'googlebot-news', 'googlebot-video', 'msnbot-media', 'yahoo! slurp china',
'inoreader.com-like feedfetcher-google', 'google-amphtml', 'duckduckbot',
'googleassociationservice', 'yandexwebmaster', 'yacybot', 'duckduckbot-https',
'inoreader.com-like feedfetcher-google', 'google-amphtml', 'duckduckbot', 'coccocbot-web',
'googleassociationservice', 'yandexwebmaster', 'yacybot', 'duckduckbot-https', 'yandexmobilebot',
'mail.ru_bot/fast', 'yandeximages', 'mail.ru_bot/img', 'ia_archiver', 'yandexblogs',
'yandexaccessibilitybot', 'yandeximageresizer', 'mail.ru_bot', 'yeti', 'obot', 'baiduspider-render',
];
foreach ($parts as $part) {
@ -342,7 +345,7 @@ function blockbot_is_monitor(array $parts): bool
{
$agents = [
'alexa site audit', 'catchpoint', 'google page speed insights', 'checkhost',
'poduptime', 'chrome-lighthouse', 'zabbix',
'poduptime', 'chrome-lighthouse', 'zabbix', 'cloudflare-alwaysonline',
];
foreach ($parts as $part) {
@ -483,7 +486,7 @@ function blockbot_is_service_agent(array $parts): bool
'chrome privacy preserving prefetch proxy', 'http compression test', 'microsoftpreview',
'pocketimagecache', 'wordpress', 'skypeuripreview preview', 'wordpress.com', 'discordbot',
'summalybot', 'livelapbot', 'whatsapp', 'facebot', 'skypeuripreview',
'plasmatrap image proxy server',
'plasmatrap image proxy server', 'grammarly',
];
foreach ($parts as $part) {
@ -519,6 +522,7 @@ function blockbot_is_http_library(array $parts): bool
'cpp-httplib', 'fuzz faster u fool v1.3.1-dev', 'fuzz faster u fool v1.5.0-dev',
'go http package', 'go-resty', 'http.rb', 'ivre-masscan', 'java1.0.21.0',
'jsdom', 'python-urllib3', 'reactornetty', 'req', 'restsharp', 'ruby-rdf-distiller',
'pycurl',
];
foreach ($parts as $part) {