diff --git a/blockbot/blockbot.php b/blockbot/blockbot.php new file mode 100644 index 000000000..30ecc3a6e --- /dev/null +++ b/blockbot/blockbot.php @@ -0,0 +1,32 @@ + + * + */ + +use Friendica\App; +use Friendica\Core\Hook; +use Friendica\Core\System; +use Jaybizzle\CrawlerDetect\CrawlerDetect; + +require_once __DIR__ . DIRECTORY_SEPARATOR . 'vendor' . DIRECTORY_SEPARATOR . 'autoload.php'; + +function blockbot_install() { + Hook::register('init_1', __FILE__, 'blockbot_init_1'); +} + + +function blockbot_uninstall() { + Hook::unregister('init_1', __FILE__, 'blockbot_init_1'); +} + +function blockbot_init_1(App $a) { + $crawlerDetect = new CrawlerDetect(); + + if ($crawlerDetect->isCrawler()) { + System::httpExit(403, 'Bots are not allowed'); + } +} diff --git a/blockbot/composer.json b/blockbot/composer.json new file mode 100644 index 000000000..f13a2d173 --- /dev/null +++ b/blockbot/composer.json @@ -0,0 +1,24 @@ +{ + "name": "friendica-addons/blockbot", + "description": "Blocking bots based on detecting bots/crawlers/spiders via the user agent and http_from header.", + "type": "friendica-addon", + "authors": [ + { + "name": "Philipp Holzer", + "email": "admin@philipp.info", + "homepage": "https://friendica.philipp.info/profile/nupplaphil", + "role": "Developer" + } + ], + "require": { + "php": ">=5.6.0", + "jaybizzle/crawler-detect": "1.*" + }, + "license": "3-clause BSD license", + "minimum-stability": "stable", + "config": { + "optimize-autoloader": true, + "autoloader-suffix": "BlockBotAddon", + "preferred-install": "dist" + } +} diff --git a/blockbot/composer.lock b/blockbot/composer.lock new file mode 100644 index 000000000..26b021b11 --- /dev/null +++ b/blockbot/composer.lock @@ -0,0 +1,69 @@ +{ + "_readme": [ + "This file locks the dependencies of your project to a known state", + "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", + "This file is @generated automatically" + ], + "content-hash": "814fd867d00e99f84d12304e8e244aae", + "packages": [ + { + "name": "jaybizzle/crawler-detect", + "version": "v1.2.80", + "source": { + "type": "git", + "url": "https://github.com/JayBizzle/Crawler-Detect.git", + "reference": "af6a36e6d69670df3f0a3ed8e21d4b8cc67a7847" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/JayBizzle/Crawler-Detect/zipball/af6a36e6d69670df3f0a3ed8e21d4b8cc67a7847", + "reference": "af6a36e6d69670df3f0a3ed8e21d4b8cc67a7847", + "shasum": "" + }, + "require": { + "php": ">=5.3.0" + }, + "require-dev": { + "phpunit/phpunit": "^4.8|^5.5|^6.5", + "satooshi/php-coveralls": "1.*" + }, + "type": "library", + "autoload": { + "psr-4": { + "Jaybizzle\\CrawlerDetect\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Mark Beech", + "email": "m@rkbee.ch", + "role": "Developer" + } + ], + "description": "CrawlerDetect is a PHP class for detecting bots/crawlers/spiders via the user agent", + "homepage": "https://github.com/JayBizzle/Crawler-Detect/", + "keywords": [ + "crawler", + "crawler detect", + "crawler detector", + "crawlerdetect", + "php crawler detect" + ], + "time": "2019-04-05T19:52:02+00:00" + } + ], + "packages-dev": [], + "aliases": [], + "minimum-stability": "stable", + "stability-flags": [], + "prefer-stable": false, + "prefer-lowest": false, + "platform": { + "php": ">=5.6.0" + }, + "platform-dev": [] +} diff --git a/blockbot/vendor/autoload.php b/blockbot/vendor/autoload.php new file mode 100644 index 000000000..d31013473 --- /dev/null +++ b/blockbot/vendor/autoload.php @@ -0,0 +1,7 @@ + + * Jordi Boggiano + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Composer\Autoload; + +/** + * ClassLoader implements a PSR-0, PSR-4 and classmap class loader. + * + * $loader = new \Composer\Autoload\ClassLoader(); + * + * // register classes with namespaces + * $loader->add('Symfony\Component', __DIR__.'/component'); + * $loader->add('Symfony', __DIR__.'/framework'); + * + * // activate the autoloader + * $loader->register(); + * + * // to enable searching the include path (eg. for PEAR packages) + * $loader->setUseIncludePath(true); + * + * In this example, if you try to use a class in the Symfony\Component + * namespace or one of its children (Symfony\Component\Console for instance), + * the autoloader will first look for the class under the component/ + * directory, and it will then fallback to the framework/ directory if not + * found before giving up. + * + * This class is loosely based on the Symfony UniversalClassLoader. + * + * @author Fabien Potencier + * @author Jordi Boggiano + * @see http://www.php-fig.org/psr/psr-0/ + * @see http://www.php-fig.org/psr/psr-4/ + */ +class ClassLoader +{ + // PSR-4 + private $prefixLengthsPsr4 = array(); + private $prefixDirsPsr4 = array(); + private $fallbackDirsPsr4 = array(); + + // PSR-0 + private $prefixesPsr0 = array(); + private $fallbackDirsPsr0 = array(); + + private $useIncludePath = false; + private $classMap = array(); + private $classMapAuthoritative = false; + private $missingClasses = array(); + private $apcuPrefix; + + public function getPrefixes() + { + if (!empty($this->prefixesPsr0)) { + return call_user_func_array('array_merge', $this->prefixesPsr0); + } + + return array(); + } + + public function getPrefixesPsr4() + { + return $this->prefixDirsPsr4; + } + + public function getFallbackDirs() + { + return $this->fallbackDirsPsr0; + } + + public function getFallbackDirsPsr4() + { + return $this->fallbackDirsPsr4; + } + + public function getClassMap() + { + return $this->classMap; + } + + /** + * @param array $classMap Class to filename map + */ + public function addClassMap(array $classMap) + { + if ($this->classMap) { + $this->classMap = array_merge($this->classMap, $classMap); + } else { + $this->classMap = $classMap; + } + } + + /** + * Registers a set of PSR-0 directories for a given prefix, either + * appending or prepending to the ones previously set for this prefix. + * + * @param string $prefix The prefix + * @param array|string $paths The PSR-0 root directories + * @param bool $prepend Whether to prepend the directories + */ + public function add($prefix, $paths, $prepend = false) + { + if (!$prefix) { + if ($prepend) { + $this->fallbackDirsPsr0 = array_merge( + (array) $paths, + $this->fallbackDirsPsr0 + ); + } else { + $this->fallbackDirsPsr0 = array_merge( + $this->fallbackDirsPsr0, + (array) $paths + ); + } + + return; + } + + $first = $prefix[0]; + if (!isset($this->prefixesPsr0[$first][$prefix])) { + $this->prefixesPsr0[$first][$prefix] = (array) $paths; + + return; + } + if ($prepend) { + $this->prefixesPsr0[$first][$prefix] = array_merge( + (array) $paths, + $this->prefixesPsr0[$first][$prefix] + ); + } else { + $this->prefixesPsr0[$first][$prefix] = array_merge( + $this->prefixesPsr0[$first][$prefix], + (array) $paths + ); + } + } + + /** + * Registers a set of PSR-4 directories for a given namespace, either + * appending or prepending to the ones previously set for this namespace. + * + * @param string $prefix The prefix/namespace, with trailing '\\' + * @param array|string $paths The PSR-4 base directories + * @param bool $prepend Whether to prepend the directories + * + * @throws \InvalidArgumentException + */ + public function addPsr4($prefix, $paths, $prepend = false) + { + if (!$prefix) { + // Register directories for the root namespace. + if ($prepend) { + $this->fallbackDirsPsr4 = array_merge( + (array) $paths, + $this->fallbackDirsPsr4 + ); + } else { + $this->fallbackDirsPsr4 = array_merge( + $this->fallbackDirsPsr4, + (array) $paths + ); + } + } elseif (!isset($this->prefixDirsPsr4[$prefix])) { + // Register directories for a new namespace. + $length = strlen($prefix); + if ('\\' !== $prefix[$length - 1]) { + throw new \InvalidArgumentException("A non-empty PSR-4 prefix must end with a namespace separator."); + } + $this->prefixLengthsPsr4[$prefix[0]][$prefix] = $length; + $this->prefixDirsPsr4[$prefix] = (array) $paths; + } elseif ($prepend) { + // Prepend directories for an already registered namespace. + $this->prefixDirsPsr4[$prefix] = array_merge( + (array) $paths, + $this->prefixDirsPsr4[$prefix] + ); + } else { + // Append directories for an already registered namespace. + $this->prefixDirsPsr4[$prefix] = array_merge( + $this->prefixDirsPsr4[$prefix], + (array) $paths + ); + } + } + + /** + * Registers a set of PSR-0 directories for a given prefix, + * replacing any others previously set for this prefix. + * + * @param string $prefix The prefix + * @param array|string $paths The PSR-0 base directories + */ + public function set($prefix, $paths) + { + if (!$prefix) { + $this->fallbackDirsPsr0 = (array) $paths; + } else { + $this->prefixesPsr0[$prefix[0]][$prefix] = (array) $paths; + } + } + + /** + * Registers a set of PSR-4 directories for a given namespace, + * replacing any others previously set for this namespace. + * + * @param string $prefix The prefix/namespace, with trailing '\\' + * @param array|string $paths The PSR-4 base directories + * + * @throws \InvalidArgumentException + */ + public function setPsr4($prefix, $paths) + { + if (!$prefix) { + $this->fallbackDirsPsr4 = (array) $paths; + } else { + $length = strlen($prefix); + if ('\\' !== $prefix[$length - 1]) { + throw new \InvalidArgumentException("A non-empty PSR-4 prefix must end with a namespace separator."); + } + $this->prefixLengthsPsr4[$prefix[0]][$prefix] = $length; + $this->prefixDirsPsr4[$prefix] = (array) $paths; + } + } + + /** + * Turns on searching the include path for class files. + * + * @param bool $useIncludePath + */ + public function setUseIncludePath($useIncludePath) + { + $this->useIncludePath = $useIncludePath; + } + + /** + * Can be used to check if the autoloader uses the include path to check + * for classes. + * + * @return bool + */ + public function getUseIncludePath() + { + return $this->useIncludePath; + } + + /** + * Turns off searching the prefix and fallback directories for classes + * that have not been registered with the class map. + * + * @param bool $classMapAuthoritative + */ + public function setClassMapAuthoritative($classMapAuthoritative) + { + $this->classMapAuthoritative = $classMapAuthoritative; + } + + /** + * Should class lookup fail if not found in the current class map? + * + * @return bool + */ + public function isClassMapAuthoritative() + { + return $this->classMapAuthoritative; + } + + /** + * APCu prefix to use to cache found/not-found classes, if the extension is enabled. + * + * @param string|null $apcuPrefix + */ + public function setApcuPrefix($apcuPrefix) + { + $this->apcuPrefix = function_exists('apcu_fetch') && ini_get('apc.enabled') ? $apcuPrefix : null; + } + + /** + * The APCu prefix in use, or null if APCu caching is not enabled. + * + * @return string|null + */ + public function getApcuPrefix() + { + return $this->apcuPrefix; + } + + /** + * Registers this instance as an autoloader. + * + * @param bool $prepend Whether to prepend the autoloader or not + */ + public function register($prepend = false) + { + spl_autoload_register(array($this, 'loadClass'), true, $prepend); + } + + /** + * Unregisters this instance as an autoloader. + */ + public function unregister() + { + spl_autoload_unregister(array($this, 'loadClass')); + } + + /** + * Loads the given class or interface. + * + * @param string $class The name of the class + * @return bool|null True if loaded, null otherwise + */ + public function loadClass($class) + { + if ($file = $this->findFile($class)) { + includeFile($file); + + return true; + } + } + + /** + * Finds the path to the file where the class is defined. + * + * @param string $class The name of the class + * + * @return string|false The path if found, false otherwise + */ + public function findFile($class) + { + // class map lookup + if (isset($this->classMap[$class])) { + return $this->classMap[$class]; + } + if ($this->classMapAuthoritative || isset($this->missingClasses[$class])) { + return false; + } + if (null !== $this->apcuPrefix) { + $file = apcu_fetch($this->apcuPrefix.$class, $hit); + if ($hit) { + return $file; + } + } + + $file = $this->findFileWithExtension($class, '.php'); + + // Search for Hack files if we are running on HHVM + if (false === $file && defined('HHVM_VERSION')) { + $file = $this->findFileWithExtension($class, '.hh'); + } + + if (null !== $this->apcuPrefix) { + apcu_add($this->apcuPrefix.$class, $file); + } + + if (false === $file) { + // Remember that this class does not exist. + $this->missingClasses[$class] = true; + } + + return $file; + } + + private function findFileWithExtension($class, $ext) + { + // PSR-4 lookup + $logicalPathPsr4 = strtr($class, '\\', DIRECTORY_SEPARATOR) . $ext; + + $first = $class[0]; + if (isset($this->prefixLengthsPsr4[$first])) { + $subPath = $class; + while (false !== $lastPos = strrpos($subPath, '\\')) { + $subPath = substr($subPath, 0, $lastPos); + $search = $subPath . '\\'; + if (isset($this->prefixDirsPsr4[$search])) { + $pathEnd = DIRECTORY_SEPARATOR . substr($logicalPathPsr4, $lastPos + 1); + foreach ($this->prefixDirsPsr4[$search] as $dir) { + if (file_exists($file = $dir . $pathEnd)) { + return $file; + } + } + } + } + } + + // PSR-4 fallback dirs + foreach ($this->fallbackDirsPsr4 as $dir) { + if (file_exists($file = $dir . DIRECTORY_SEPARATOR . $logicalPathPsr4)) { + return $file; + } + } + + // PSR-0 lookup + if (false !== $pos = strrpos($class, '\\')) { + // namespaced class name + $logicalPathPsr0 = substr($logicalPathPsr4, 0, $pos + 1) + . strtr(substr($logicalPathPsr4, $pos + 1), '_', DIRECTORY_SEPARATOR); + } else { + // PEAR-like class name + $logicalPathPsr0 = strtr($class, '_', DIRECTORY_SEPARATOR) . $ext; + } + + if (isset($this->prefixesPsr0[$first])) { + foreach ($this->prefixesPsr0[$first] as $prefix => $dirs) { + if (0 === strpos($class, $prefix)) { + foreach ($dirs as $dir) { + if (file_exists($file = $dir . DIRECTORY_SEPARATOR . $logicalPathPsr0)) { + return $file; + } + } + } + } + } + + // PSR-0 fallback dirs + foreach ($this->fallbackDirsPsr0 as $dir) { + if (file_exists($file = $dir . DIRECTORY_SEPARATOR . $logicalPathPsr0)) { + return $file; + } + } + + // PSR-0 include paths. + if ($this->useIncludePath && $file = stream_resolve_include_path($logicalPathPsr0)) { + return $file; + } + + return false; + } +} + +/** + * Scope isolated include. + * + * Prevents access to $this/self from included files. + */ +function includeFile($file) +{ + include $file; +} diff --git a/blockbot/vendor/composer/LICENSE b/blockbot/vendor/composer/LICENSE new file mode 100644 index 000000000..f27399a04 --- /dev/null +++ b/blockbot/vendor/composer/LICENSE @@ -0,0 +1,21 @@ + +Copyright (c) Nils Adermann, Jordi Boggiano + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is furnished +to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + diff --git a/blockbot/vendor/composer/autoload_classmap.php b/blockbot/vendor/composer/autoload_classmap.php new file mode 100644 index 000000000..e066512b7 --- /dev/null +++ b/blockbot/vendor/composer/autoload_classmap.php @@ -0,0 +1,14 @@ + $vendorDir . '/jaybizzle/crawler-detect/src/CrawlerDetect.php', + 'Jaybizzle\\CrawlerDetect\\Fixtures\\AbstractProvider' => $vendorDir . '/jaybizzle/crawler-detect/src/Fixtures/AbstractProvider.php', + 'Jaybizzle\\CrawlerDetect\\Fixtures\\Crawlers' => $vendorDir . '/jaybizzle/crawler-detect/src/Fixtures/Crawlers.php', + 'Jaybizzle\\CrawlerDetect\\Fixtures\\Exclusions' => $vendorDir . '/jaybizzle/crawler-detect/src/Fixtures/Exclusions.php', + 'Jaybizzle\\CrawlerDetect\\Fixtures\\Headers' => $vendorDir . '/jaybizzle/crawler-detect/src/Fixtures/Headers.php', +); diff --git a/blockbot/vendor/composer/autoload_namespaces.php b/blockbot/vendor/composer/autoload_namespaces.php new file mode 100644 index 000000000..b7fc0125d --- /dev/null +++ b/blockbot/vendor/composer/autoload_namespaces.php @@ -0,0 +1,9 @@ + array($vendorDir . '/jaybizzle/crawler-detect/src'), +); diff --git a/blockbot/vendor/composer/autoload_real.php b/blockbot/vendor/composer/autoload_real.php new file mode 100644 index 000000000..ccb886a23 --- /dev/null +++ b/blockbot/vendor/composer/autoload_real.php @@ -0,0 +1,52 @@ += 50600 && !defined('HHVM_VERSION') && (!function_exists('zend_loader_file_encoded') || !zend_loader_file_encoded()); + if ($useStaticLoader) { + require_once __DIR__ . '/autoload_static.php'; + + call_user_func(\Composer\Autoload\ComposerStaticInitBlockBotAddon::getInitializer($loader)); + } else { + $map = require __DIR__ . '/autoload_namespaces.php'; + foreach ($map as $namespace => $path) { + $loader->set($namespace, $path); + } + + $map = require __DIR__ . '/autoload_psr4.php'; + foreach ($map as $namespace => $path) { + $loader->setPsr4($namespace, $path); + } + + $classMap = require __DIR__ . '/autoload_classmap.php'; + if ($classMap) { + $loader->addClassMap($classMap); + } + } + + $loader->register(true); + + return $loader; + } +} diff --git a/blockbot/vendor/composer/autoload_static.php b/blockbot/vendor/composer/autoload_static.php new file mode 100644 index 000000000..019871810 --- /dev/null +++ b/blockbot/vendor/composer/autoload_static.php @@ -0,0 +1,40 @@ + + array ( + 'Jaybizzle\\CrawlerDetect\\' => 24, + ), + ); + + public static $prefixDirsPsr4 = array ( + 'Jaybizzle\\CrawlerDetect\\' => + array ( + 0 => __DIR__ . '/..' . '/jaybizzle/crawler-detect/src', + ), + ); + + public static $classMap = array ( + 'Jaybizzle\\CrawlerDetect\\CrawlerDetect' => __DIR__ . '/..' . '/jaybizzle/crawler-detect/src/CrawlerDetect.php', + 'Jaybizzle\\CrawlerDetect\\Fixtures\\AbstractProvider' => __DIR__ . '/..' . '/jaybizzle/crawler-detect/src/Fixtures/AbstractProvider.php', + 'Jaybizzle\\CrawlerDetect\\Fixtures\\Crawlers' => __DIR__ . '/..' . '/jaybizzle/crawler-detect/src/Fixtures/Crawlers.php', + 'Jaybizzle\\CrawlerDetect\\Fixtures\\Exclusions' => __DIR__ . '/..' . '/jaybizzle/crawler-detect/src/Fixtures/Exclusions.php', + 'Jaybizzle\\CrawlerDetect\\Fixtures\\Headers' => __DIR__ . '/..' . '/jaybizzle/crawler-detect/src/Fixtures/Headers.php', + ); + + public static function getInitializer(ClassLoader $loader) + { + return \Closure::bind(function () use ($loader) { + $loader->prefixLengthsPsr4 = ComposerStaticInitBlockBotAddon::$prefixLengthsPsr4; + $loader->prefixDirsPsr4 = ComposerStaticInitBlockBotAddon::$prefixDirsPsr4; + $loader->classMap = ComposerStaticInitBlockBotAddon::$classMap; + + }, null, ClassLoader::class); + } +} diff --git a/blockbot/vendor/composer/installed.json b/blockbot/vendor/composer/installed.json new file mode 100644 index 000000000..d255d5738 --- /dev/null +++ b/blockbot/vendor/composer/installed.json @@ -0,0 +1,53 @@ +[ + { + "name": "jaybizzle/crawler-detect", + "version": "v1.2.80", + "version_normalized": "1.2.80.0", + "source": { + "type": "git", + "url": "https://github.com/JayBizzle/Crawler-Detect.git", + "reference": "af6a36e6d69670df3f0a3ed8e21d4b8cc67a7847" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/JayBizzle/Crawler-Detect/zipball/af6a36e6d69670df3f0a3ed8e21d4b8cc67a7847", + "reference": "af6a36e6d69670df3f0a3ed8e21d4b8cc67a7847", + "shasum": "" + }, + "require": { + "php": ">=5.3.0" + }, + "require-dev": { + "phpunit/phpunit": "^4.8|^5.5|^6.5", + "satooshi/php-coveralls": "1.*" + }, + "time": "2019-04-05T19:52:02+00:00", + "type": "library", + "installation-source": "dist", + "autoload": { + "psr-4": { + "Jaybizzle\\CrawlerDetect\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Mark Beech", + "email": "m@rkbee.ch", + "role": "Developer" + } + ], + "description": "CrawlerDetect is a PHP class for detecting bots/crawlers/spiders via the user agent", + "homepage": "https://github.com/JayBizzle/Crawler-Detect/", + "keywords": [ + "crawler", + "crawler detect", + "crawler detector", + "crawlerdetect", + "php crawler detect" + ] + } +] diff --git a/blockbot/vendor/jaybizzle/crawler-detect/LICENSE b/blockbot/vendor/jaybizzle/crawler-detect/LICENSE new file mode 100644 index 000000000..2f4e15e25 --- /dev/null +++ b/blockbot/vendor/jaybizzle/crawler-detect/LICENSE @@ -0,0 +1,22 @@ +The MIT License (MIT) + +Copyright (c) 2015-2018 Mark Beech + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/blockbot/vendor/jaybizzle/crawler-detect/README.md b/blockbot/vendor/jaybizzle/crawler-detect/README.md new file mode 100644 index 000000000..e7c25f3b3 --- /dev/null +++ b/blockbot/vendor/jaybizzle/crawler-detect/README.md @@ -0,0 +1,72 @@ +



+crawlerdetect.io +

+

+ +

+ + + + + + + +

+ +## About CrawlerDetect + +CrawlerDetect is a PHP class for detecting bots/crawlers/spiders via the user agent and http_from header. Currently able to detect 1,000's of bots/spiders/crawlers. + +### Installation +Run `composer require jaybizzle/crawler-detect 1.*` or add `"jaybizzle/crawler-detect" :"1.*"` to your `composer.json`. + +### Usage +```PHP +use Jaybizzle\CrawlerDetect\CrawlerDetect; + +$CrawlerDetect = new CrawlerDetect; + +// Check the user agent of the current 'visitor' +if($CrawlerDetect->isCrawler()) { + // true if crawler user agent detected +} + +// Pass a user agent as a string +if($CrawlerDetect->isCrawler('Mozilla/5.0 (compatible; Sosospider/2.0; +http://help.soso.com/webspider.htm)')) { + // true if crawler user agent detected +} + +// Output the name of the bot that matched (if any) +echo $CrawlerDetect->getMatches(); +``` + +### Contributing +If you find a bot/spider/crawler user agent that CrawlerDetect fails to detect, please submit a pull request with the regex pattern added to the `$data` array in `Fixtures/Crawlers.php` and add the failing user agent to `tests/crawlers.txt`. + +Failing that, just create an issue with the user agent you have found, and we'll take it from there :) + +### Laravel Package +If you would like to use this with Laravel 4/5, please see [Laravel-Crawler-Detect](https://github.com/JayBizzle/Laravel-Crawler-Detect) + +### Symfony Bundle +To use this library with Symfony 2/3/4, check out the [CrawlerDetectBundle](https://github.com/nicolasmure/CrawlerDetectBundle). + +### YII2 Extension +To use this library with the YII2 framework, check out [yii2-crawler-detect](https://github.com/AlikDex/yii2-crawler-detect). + +### ES6 Library +To use this library with NodeJS or any ES6 application based, check out [es6-crawler-detect](https://github.com/JefferyHus/es6-crawler-detect). + +### .NET Library +To use this library in a .net standard (including .net core) based project, check out [NetCrawlerDetect](https://github.com/gplumb/NetCrawlerDetect). + +### Nette Extension +To use this library with the Nette framework, checkout [NetteCrawlerDetect](https://github.com/JanGalek/Crawler-Detect). + +### Ruby Gem + +To use this library with Ruby on Rails or any Ruby-based application, check out [crawler_detect](https://github.com/loadkpi/crawler_detect) gem. + +_Parts of this class are based on the brilliant [MobileDetect](https://github.com/serbanghita/Mobile-Detect)_ + +[![Analytics](https://ga-beacon.appspot.com/UA-72430465-1/Crawler-Detect/readme?pixel)](https://github.com/JayBizzle/Crawler-Detect) diff --git a/blockbot/vendor/jaybizzle/crawler-detect/composer.json b/blockbot/vendor/jaybizzle/crawler-detect/composer.json new file mode 100755 index 000000000..0c0babe6c --- /dev/null +++ b/blockbot/vendor/jaybizzle/crawler-detect/composer.json @@ -0,0 +1,30 @@ +{ + "name": "jaybizzle/crawler-detect", + "type": "library", + "description": "CrawlerDetect is a PHP class for detecting bots/crawlers/spiders via the user agent", + "keywords": ["crawler", "crawler detect", "crawler detector", "crawlerdetect", "php crawler detect"], + "homepage": "https://github.com/JayBizzle/Crawler-Detect/", + "license": "MIT", + "authors": [ + { + "name": "Mark Beech", + "email": "m@rkbee.ch", + "role": "Developer" + } + ], + "require": { + "php": ">=5.3.0" + }, + "require-dev": { + "phpunit/phpunit": "^4.8|^5.5|^6.5", + "satooshi/php-coveralls": "1.*" + }, + "autoload": { + "psr-4": { + "Jaybizzle\\CrawlerDetect\\": "src/" + } + }, + "scripts": { + "test": "vendor/bin/phpunit" + } +} diff --git a/blockbot/vendor/jaybizzle/crawler-detect/export.php b/blockbot/vendor/jaybizzle/crawler-detect/export.php new file mode 100644 index 000000000..4c4b9d5d3 --- /dev/null +++ b/blockbot/vendor/jaybizzle/crawler-detect/export.php @@ -0,0 +1,41 @@ + + * + * This source file is subject to the MIT license that is bundled + * with this source code in the file LICENSE. + */ + +require 'src/Fixtures/AbstractProvider.php'; +require 'src/Fixtures/Crawlers.php'; +require 'src/Fixtures/Exclusions.php'; +require 'src/Fixtures/Headers.php'; + +$src = array( + 'Crawlers', + 'Exclusions', + 'Headers', +); + +foreach ($src as $class) { + $class = "Jaybizzle\\CrawlerDetect\\Fixtures\\$class"; + $object = new $class; + + outputJson($object); + outputTxt($object); +} + +function outputJson($object) +{ + $className = (new ReflectionClass($object))->getShortName(); + file_put_contents("raw/$className.json", json_encode($object->getAll())); +} + +function outputTxt($object) +{ + $className = (new ReflectionClass($object))->getShortName(); + file_put_contents("raw/$className.txt", implode($object->getAll(), PHP_EOL)); +} diff --git a/blockbot/vendor/jaybizzle/crawler-detect/raw/Crawlers.json b/blockbot/vendor/jaybizzle/crawler-detect/raw/Crawlers.json new file mode 100644 index 000000000..a1e690ebf --- /dev/null +++ b/blockbot/vendor/jaybizzle/crawler-detect/raw/Crawlers.json @@ -0,0 +1 @@ +[".*Java.*outbrain"," YLT","^b0t$","^bluefish ","^Calypso v\\\/","^COMODO DCV","^DangDang","^DavClnt","^FDM ","^git\\\/","^Goose\\\/","^Grabber","^HTTPClient\\\/","^Java\\\/","^Jeode\\\/","^Jetty\\\/","^Mail\\\/","^Mget","^Microsoft URL Control","^NG\\\/[0-9\\.]","^NING\\\/","^PHP\\\/[0-9]","^RMA\\\/","^Ruby|Ruby\\\/[0-9]","^VSE\\\/[0-9]","^WordPress\\.com","^XRL\\\/[0-9]","^ZmEu","008\\\/","13TABS","192\\.comAgent","2ip\\.ru","404enemy","7Siters","80legs","a\\.pr-cy\\.ru","a3logics\\.in","A6-Indexer","Abonti","Aboundex","aboutthedomain","Accoona-AI-Agent","acoon","acrylicapps\\.com\\\/pulp","Acunetix","AdAuth\\\/","adbeat","AddThis","ADmantX","AdminLabs","adressendeutschland","adscanner","Adstxtaggregator","agentslug","AHC","aihit","aiohttp\\\/","Airmail","akka-http\\\/","akula\\\/","alertra","alexa site audit","Alibaba\\.Security\\.Heimdall","Alligator","allloadin","AllSubmitter","alyze\\.info","amagit","Anarchie","AndroidDownloadManager","Anemone","AngleSharp","annotate_google","Ant\\.com","Anturis Agent","AnyEvent-HTTP\\\/","Apache Droid","Apache OpenOffice","Apache-HttpAsyncClient","Apache-HttpClient","ApacheBench","Apexoo","APIs-Google","AportWorm\\\/","AppBeat\\\/","AppEngine-Google","AppStoreScraperZ","Aprc\\\/[0-9]","Arachmo","arachnode","Arachnophilia","aria2","Arukereso","asafaweb","AskQuickly","Ask Jeeves","ASPSeek","Asterias","Astute","asynchttp","Attach","autocite","Autonomy","axios\\\/","B-l-i-t-z-B-O-T","Backlink-Ceck","backlink-check","BacklinkHttpStatus","BackStreet","BackWeb","Bad-Neighborhood","Badass","baidu\\.com","Bandit","basicstate","BatchFTP","Battleztar Bazinga","baypup\\\/","BazQux","BBBike","BCKLINKS","BDFetch","BegunAdvertising","Bidtellect","BigBozz","Bigfoot","biglotron","BingLocalSearch","BingPreview","binlar","biNu image cacher","Bitacle","biz_Directory","Black Hole","Blackboard Safeassign","BlackWidow","BlockNote\\.Net","Bloglines","Bloglovin","BlogPulseLive","BlogSearch","Blogtrottr","BlowFish","boitho\\.com-dc","BPImageWalker","Braintree-Webhooks","Branch Metrics API","Branch-Passthrough","Brandprotect","BrandVerity","Brandwatch","Brodie\\\/","Browsershots","BUbiNG","Buck\\\/","Buddy","BuiltWith","Bullseye","BunnySlippers","Burf Search","Butterfly\\\/","BuzzSumo","CAAM\\\/[0-9]","CakePHP","Calculon","Canary%20Mail","CaretNail","catexplorador","CC Metadata Scaper","Cegbfeieh","censys","Cerberian Drtrs","CERT\\.at-Statistics-Survey","cg-eye","changedetection","ChangesMeter","Charlotte","CheckHost","checkprivacy","CherryPicker","ChinaClaw","Chirp\\\/","chkme\\.com","Chlooe","Chromaxa","CirrusExplorer","CISPA Vulnerability Notification","Citoid","CJNetworkQuality","Clarsentia","clips\\.ua\\.ac\\.be","Cloud mapping","CloudEndure","CloudFlare-AlwaysOnline","Cloudinary","cmcm\\.com","coccoc","cognitiveseo","colly -","CommaFeed","Commons-HttpClient","commonscan","contactbigdatafr","contentkingapp","convera","CookieReports","copyright sheriff","CopyRightCheck","Copyscape","Cosmos4j\\.feedback","Covario-IDS","Crescent","Crowsnest","Criteo","CSHttp","curb","Curious George","curl","cuwhois\\\/","cybo\\.com","DAP\\\/NetHTTP","DareBoost","DatabaseDriverMysqli","DataCha0s","Datafeedwatch","Datanyze","DataparkSearch","dataprovider","DataXu","Daum(oa)?[ \\\/][0-9]","Demon","DeuSu","developers\\.google\\.com\\\/\\+\\\/web\\\/snippet\\\/","Devil","Digg","Digincore","DigitalPebble","Dirbuster","Discourse Forum Onebox","Disqus\\\/","Dispatch\\\/","DittoSpyder","dlvr","DMBrowser","DNSPod-reporting","docoloc","Dolphin http client","DomainAppender","Donuts Content Explorer","dotMailer content retrieval","dotSemantic","downforeveryoneorjustme","Download Wonder","downnotifier","DowntimeDetector","Drip","drupact","Drupal \\(\\+http:\\\/\\\/drupal\\.org\\\/\\)","DTS Agent","dubaiindex","EARTHCOM","Easy-Thumb","EasyDL","Ebingbong","ec2linkfinder","eCairn-Grabber","eCatch","ECCP","eContext\\\/","Ecxi","EirGrabber","ElectricMonk","elefent","EMail Exractor","EMail Wolf","EmailWolf","Embarcadero","Embed PHP Library","Embedly","endo\\\/","europarchive\\.org","evc-batch","EventMachine HttpClient","Everwall Link Expander","Evidon","Evrinid","ExactSearch","ExaleadCloudview","Excel\\\/","exif","Exploratodo","Express WebPictures","Extreme Picture Finder","EyeNetIE","ezooms","facebookexternalhit","facebookplatform","fairshare","Faraday v","fasthttp","Faveeo","Favicon downloader","faviconkit","faviconarchive","FavOrg","Feed Wrangler","Feedable\\\/","Feedbin","FeedBooster","FeedBucket","FeedBunch\\\/","FeedBurner","feeder","Feedly","FeedshowOnline","Feedspot","Feedwind\\\/","FeedZcollector","feeltiptop","Fetch API","Fetch\\\/[0-9]","Fever\\\/[0-9]","FHscan","Fimap","findlink","findthatfile","FlashGet","FlipboardBrowserProxy","FlipboardProxy","FlipboardRSS","Flock\\\/","fluffy","Flunky","flynxapp","forensiq","FoundSeoTool","http:\\\/\\\/www.neomo.de\\\/","free thumbnails","Freeuploader","Funnelback","G-i-g-a-b-o-t","g00g1e\\.net","ganarvisitas","geek-tools","Genieo","GentleSource","GetCode","Getintent","GetLinkInfo","getprismatic","GetRight","getroot","GetURLInfo\\\/","GetWeb","Ghost Inspector","GigablastOpenSource","GIS-LABS","github-camo","github\\.com","Go [\\d\\.]* package http","Go http package","Go-Ahead-Got-It","Go-http-client","Go!Zilla","gobyus","gofetch","GomezAgent","gooblog","Goodzer\\\/","Google AppsViewer","Google Desktop","Google favicon","Google Keyword Suggestion","Google Keyword Tool","Google Page Speed Insights","Google PP Default","Google Search Console","Google Web Preview","Google-Adwords","Google-Apps-Script","Google-Calendar-Importer","Google-HotelAdsVerifier","Google-HTTP-Java-Client","Google-Publisher-Plugin","Google-SearchByImage","Google-Site-Verification","Google-Structured-Data-Testing-Tool","Google-Youtube-Links","google-xrawler","GoogleDocs","GoogleHC\\\/","GoogleProducer","GoogleSites","Google-Transparency-Report","Gookey","GoScraper","GoSpotCheck","gosquared-thumbnailer","Gotit","GoZilla","grabify","GrabNet","Grafula","Grammarly","GrapeFX","GreatNews","Gregarius","GRequests","grokkit","grouphigh","grub-client","gSOAP\\\/","GT::WWW","GTmetrix","GuzzleHttp","gvfs\\\/","HAA(A)?RTLAND http client","Haansoft","hackney\\\/","Hadi Agent","HappyApps-WebCheck","Hatena","Havij","HeadlessChrome","HEADMasterSEO","HeartRails_Capture","help@dataminr\\.com","heritrix","historious","hkedcity","hledejLevne\\.cz","Hloader","HMView","Holmes","HonesoSearchEngine","HootSuite Image proxy","Hootsuite-WebFeed","hosterstats","HostTracker","ht:\\\/\\\/check","htdig","HTMLparser","htmlyse","HTTP Banner Detection","HTTP_Compression_Test","http_request2","http_requester","http-get","HTTP-Header-Abfrage","http-kit","http-request\\\/","HTTP-Tiny","HTTP::Lite","http\\.rb\\\/","http_get","HttpComponents","httphr","HTTPMon","httpRequest","httpscheck","httpssites_power","httpunit","HttpUrlConnection","httrack","huaweisymantec","HubSpot ","Humanlinks","i2kconnect\\\/","Iblog","ichiro","Id-search","IdeelaborPlagiaat","IDG Twitter Links Resolver","IDwhois\\\/","Iframely","igdeSpyder","IlTrovatore","Image Fetch","Image Sucker","ImageEngine\\\/","ImageVisu\\\/","Imagga","imagineeasy","imgsizer","InAGist","inbound\\.li parser","InDesign%20CC","Indy Library","InetURL","infegy","infohelfer","InfoTekies","InfoWizards Reciprocal Link","inpwrd\\.com","instabid","Instapaper","Integrity","integromedb","Intelliseek","InterGET","internet_archive","Internet Ninja","InternetSeer","internetVista monitor","intraVnews","IODC","IOI","iplabel","ips-agent","IPS\\\/[0-9]","IPWorks HTTP\\\/S Component","iqdb\\\/","Iria","Irokez","isitup\\.org","iskanie","isUp\\.li","iThemes Sync\\\/","iZSearch","JAHHO","janforman","Jaunt\\\/","Jbrofuzz","Jersey\\\/","JetCar","Jigsaw","Jobboerse","JobFeed discovery","Jobg8 URL Monitor","jobo","Jobrapido","Jobsearch1\\.5","JoinVision Generic","JolokiaPwn","Joomla","Jorgee","JS-Kit","JustView","Kaspersky Lab CFR link resolver","Kelny\\\/","Kerrigan\\\/","KeyCDN","Keyword Density","Keywords Research","KickFire","KimonoLabs\\\/","Kml-Google","knows\\.is","KOCMOHABT","kouio","kube-probe","kulturarw3","KumKie","L\\.webis","Larbin","Lavf\\\/","LeechFTP","LeechGet","letsencrypt","Lftp","LibVLC","LibWeb","Libwhisker","libwww","Licorne","Liferea\\\/","Lightspeedsystems","Lighthouse","Likse","Link Valet","link_thumbnailer","LinkAlarm\\\/","linkCheck","linkdex","LinkExaminer","linkfluence","linkpeek","LinkPreviewGenerator","LinkScan","LinksManager","LinkTiger","LinkWalker","Lipperhey","Litemage_walker","livedoor ScreenShot","LoadImpactRload","localsearch-web","LongURL API","looksystems\\.net","ltx71","lua-resty-http","lwp-request","lwp-trivial","LWP::Simple","lycos","LYT\\.SR","mabontland","Mag-Net","MagpieRSS","Mail\\.Ru","MailChimp","Majestic12","makecontact\\\/","Mandrill","MapperCmd","marketinggrader","MarkMonitor","MarkWatch","Mass Downloader","masscan\\\/","Mata Hari","Mediapartners-Google","mediawords","MegaIndex\\.ru","MeltwaterNews","Melvil Rawi","MemGator","Metaspinner","MetaURI","MFC_Tear_Sample","Microsearch","Microsoft Office ","Microsoft Outlook","Microsoft Windows Network Diagnostics","Microsoft-WebDAV-MiniRedir","Microsoft Data Access","MIDown tool","MIIxpc","Mindjet","Miniature\\.io","Miniflux","Mister PiX","mixdata dot com","mixed-content-scan","Mixmax-LinkPreview","mixnode","Mnogosearch","mogimogi","Mojeek","Mojolicious \\(Perl\\)","Monit\\\/","monitis","Monitority\\\/","montastic","MonTools","Moreover","Morfeus Fucking Scanner","Morning Paper","MovableType","mowser","Mrcgiguy","MS Web Services Client Protocol","MSFrontPage","mShots","MuckRack\\\/","muhstik-scan","MVAClient","MxToolbox\\\/","nagios","Najdi\\.si","Name Intelligence","Nameprotect","Navroad","NearSite","Needle","Nessus","Net Vampire","NetAnts","NETCRAFT","NetLyzer","NetMechanic","NetNewsWire","Netpursual","netresearch","NetShelter ContentScan","Netsparker","NetTrack","Netvibes","NetZIP","Neustar WPM","NeutrinoAPI","NewRelicPinger","NewsBlur .*Finder","NewsGator","newsme","newspaper\\\/","Nexgate Ruby Client","NG-Search","Nibbler","NICErsPRO","Nikto","nineconnections","NLNZ_IAHarvester","Nmap Scripting Engine","node-superagent","node-urllib","node\\.io","Nodemeter","NodePing","nominet\\.org\\.uk","nominet\\.uk","Norton-Safeweb","Notifixious","notifyninja","nuhk","nutch","Nuzzel","nWormFeedFinder","nyawc\\\/","Nymesis","NYU","Ocelli\\\/","Octopus","oegp","Offline Explorer","Offline Navigator","og-scraper","okhttp","omgili","OMSC","Online Domain Tools","OpenCalaisSemanticProxy","Openfind","OpenLinkProfiler","Openstat\\\/","OpenVAS","Optimizer","Orbiter","OrgProbe\\\/","orion-semantics","Outlook-Express","Outlook-iOS","ow\\.ly","Owler","ownCloud News","OxfordCloudService","Page Valet","page_verifier","page scorer","page2rss","PageGrabber","PagePeeker","PageScorer","Pagespeed\\\/","Panopta","panscient","Papa Foto","parsijoo","Pavuk","PayPal IPN","pcBrowser","Pcore-HTTP","Pearltrees","PECL::HTTP","peerindex","Peew","PeoplePal","Perlu -","PhantomJS Screenshoter","PhantomJS\\\/","Photon\\\/","phpservermon","Pi-Monster","Picscout","Picsearch","PictureFinder","Pimonster","ping\\.blo\\.gs","Pingability","PingAdmin\\.Ru","Pingdom","Pingoscope","PingSpot","pinterest\\.com","Pixray","Pizilla","Plagger\\\/","Ploetz \\+ Zeller","Plukkie","plumanalytics","PocketImageCache","PocketParser","Pockey","POE-Component-Client-HTTP","Polymail\\\/","Pompos","Porkbun","Port Monitor","postano","PostmanRuntime","PostPost","postrank","PowerPoint\\\/","Priceonomics Analysis Engine","PrintFriendly","PritTorrent","Prlog","probethenet","Project 25499","prospectb2b","Protopage","ProWebWalker","proximic","PRTG Network Monitor","pshtt, https scanning","PTST ","PTST\\\/[0-9]+","Pulsepoint XT3 web scraper","Pump","Python-httplib2","python-requests","Python-urllib","Qirina Hurdler","QQDownload","QrafterPro","Qseero","Qualidator","QueryN Metasearch","queuedriver","Quora Link Preview","Qwantify","Radian6","RankActive","RankFlex","RankSonicSiteAuditor","Re-re Studio","ReactorNetty","Readability","RealDownload","RealPlayer%20Downloader","RebelMouse","Recorder","RecurPost\\\/","redback\\\/","ReederForMac","ReGet","RepoMonkey","request\\.js","reqwest\\\/","ResponseCodeTest","RestSharp","Riddler","Rival IQ","Robosourcer","Robozilla","ROI Hunter","RPT-HTTPClient","RSSOwl","safe-agent-scanner","SalesIntelligent","Saleslift","Sendsay\\.Ru","SauceNAO","SBIder","scalaj-http","scan\\.lol","ScanAlert","Scoop","scooter","ScoutJet","ScoutURLMonitor","ScrapeBox Page Scanner","SimpleScraper","Scrapy","Screaming","ScreenShotService","Scrubby","Scrutiny\\\/","search\\.thunderstone","Search37","searchenginepromotionhelp","Searchestate","SearchExpress","SearchSight","Seeker","semanticdiscovery","semanticjuice","Semiocast HTTP client","Semrush","sentry\\\/","SEO Browser","Seo Servis","seo-nastroj\\.cz","seo4ajax","Seobility","SEOCentro","SeoCheck","SEOkicks","Seomoz","SEOprofiler","SEOsearch","seoscanners","seositecheckup","SEOstats","servernfo","sexsearcher","Seznam","Shelob","Shodan","Shoppimon","ShopWiki","ShortLinkTranslate","shrinktheweb","Sideqik","SimplePie","SimplyFast","Siphon","SISTRIX","Site-Shot\\\/","Site Sucker","Site24x7","SiteBar","Sitebeam","Sitebulb\\\/","SiteCondor","SiteExplorer","SiteGuardian","Siteimprove","SiteIndexed","Sitemap(s)? Generator","SitemapGenerator","SiteMonitor","Siteshooter B0t","SiteSnagger","SiteSucker","SiteTruth","Sitevigil","sitexy\\.com","SkypeUriPreview","Slack\\\/","slider\\.com","slurp","SlySearch","SmartDownload","SMRF URL Expander","SMUrlExpander","Snake","Snappy","SnapSearch","Snarfer\\\/","SniffRSS","sniptracker","Snoopy","SnowHaze Search","sogou web","SortSite","Sottopop","sovereign\\.ai","SpaceBison","SpamExperts","Spammen","Spanner","spaziodati","SPDYCheck","Specificfeeds","speedy","SPEng","Spinn3r","spray-can","Sprinklr ","spyonweb","sqlmap","Sqlworm","Sqworm","SSL Labs","ssl-tools","StackRambler","Statastico\\\/","StatusCake","Steeler","Stratagems Kumo","Stroke\\.cz","StudioFACA","StumbleUpon","suchen","Sucuri","summify","SuperHTTP","Surphace Scout","Suzuran","SwiteScraper","Symfony BrowserKit","Symfony2 BrowserKit","SynHttpClient-Built","Sysomos","sysscan","Szukacz","T0PHackTeam","tAkeOut","Tarantula\\\/","Taringa UGC","TarmotGezgin","Teleport","Telesoft","Telesphoreo","Telesphorep","Tenon\\.io","teoma","terrainformatica","Test Certificate Info","testuri","Tetrahedron","The Drop Reaper","The Expert HTML Source Viewer","The Knowledge AI","The Intraformant","theinternetrules","TheNomad","Thinklab","Thumbshots","ThumbSniper","timewe\\.net","TinEye","Tiny Tiny RSS","TLSProbe\\\/","Toata","topster","touche\\.com","Traackr\\.com","tracemyfile","Trackuity","TrapitAgent","Trendiction","Trendsmap","trendspottr","truwoGPS","TryJsoup","TulipChain","Turingos","Turnitin","tweetedtimes","Tweetminster","Tweezler\\\/","twibble","Twice","Twikle","Twingly","Twisted PageGetter","Typhoeus","ubermetrics-technologies","uclassify","UdmSearch","unchaos","unirest-java","UniversalFeedParser","Unshorten\\.It","Untiny","UnwindFetchor","updated","updown\\.io daemon","Upflow","Uptimia","Urlcheckr","URL Verifier","URLitor","urlresolver","Urlstat","URLTester","UrlTrends Ranking Updater","URLy Warning","URLy\\.Warning","Vacuum","Vagabondo","VB Project","vBSEO","VCI","via ggpht\\.com GoogleImageProxy","VidibleScraper","Virusdie","visionutils","vkShare","VoidEYE","Voil","voltron","voyager\\\/","VSAgent\\\/","VSB-TUO\\\/","Vulnbusters Meter","VYU2","w3af\\.org","W3C_Unicorn","W3C-checklink","W3C-mobileOK","WAC-OFU","Wallpapers\\\/[0-9]+","WallpapersHD","wangling","Wappalyzer","WatchMouse","WbSrch\\\/","WDT\\.io","web-capture\\.net","Web-sniffer","Web Auto","Web Collage","Web Enhancer","Web Fetch","Web Fuck","Web Pix","Web Sauger","Web Sucker","Webalta","Webauskunft","WebAuto","WebCapture","WebClient\\\/","webcollage","WebCookies","WebCopier","WebCorp","WebDataStats","WebDoc","WebEnhancer","WebFetch","WebFuck","WebGazer","WebGo IS","WebImageCollector","WebImages","WebIndex","webkit2png","WebLeacher","webmastercoffee","webmon ","WebPix","WebReaper","WebSauger","webscreenie","Webshag","Webshot","Website Quester","websitepulse agent","WebsiteQuester","Websnapr","WebSniffer","Webster","WebStripper","WebSucker","Webthumb\\\/","WebThumbnail","WebWhacker","WebZIP","WeLikeLinks","WEPA","WeSEE","wf84","Wfuzz\\\/","wget","WhatsApp","WhatsMyIP","WhatWeb","WhereGoes\\?","Whibse","WhoRunsCoinHive","Whynder Magnet","Windows-RSS-Platform","WinPodder","wkhtmlto","wmtips","Woko","woorankreview","Word\\\/","WordPress\\\/","WordupinfoSearch","wotbox","WP Engine Install Performance API","wpif","wprecon\\.com survey","WPScan","wscheck","Wtrace","WWW-Collector-E","WWW-Mechanize","WWW::Document","WWW::Mechanize","www\\.monitor\\.us","WWWOFFLE","x09Mozilla","x22Mozilla","XaxisSemanticsClassifier","Xenu Link Sleuth","XING-contenttabreceiver","xpymep([0-9]?)\\.exe","Y!J-(ASR|BSC)","Y\\!J-BRW","Yaanb","yacy","Yahoo Link Preview","YahooCacheSystem","YahooYSMcm","YandeG","Yandex(?!Search)","yanga","yeti","Yo-yo","Yoleo Consumer","yoogliFetchAgent","YottaaMonitor","Your-Website-Sucks","yourls\\.org","YoYs\\.net","YP\\.PL","Zabbix","Zade","Zao","Zauba","Zemanta Aggregator","Zend_Http_Client","Zend\\\\Http\\\\Client","Zermelo","Zeus ","zgrab","ZnajdzFoto","Zombie\\.js","Zoom\\.Mac","ZyBorg","[a-z0-9\\-_]*(bot|crawl|archiver|transcoder|spider|uptime|validator|fetcher|cron|checker|reader|extractor|monitoring|analyzer)"] \ No newline at end of file diff --git a/blockbot/vendor/jaybizzle/crawler-detect/raw/Crawlers.txt b/blockbot/vendor/jaybizzle/crawler-detect/raw/Crawlers.txt new file mode 100644 index 000000000..1522796e9 --- /dev/null +++ b/blockbot/vendor/jaybizzle/crawler-detect/raw/Crawlers.txt @@ -0,0 +1,1217 @@ +.*Java.*outbrain + YLT +^b0t$ +^bluefish +^Calypso v\/ +^COMODO DCV +^DangDang +^DavClnt +^FDM +^git\/ +^Goose\/ +^Grabber +^HTTPClient\/ +^Java\/ +^Jeode\/ +^Jetty\/ +^Mail\/ +^Mget +^Microsoft URL Control +^NG\/[0-9\.] +^NING\/ +^PHP\/[0-9] +^RMA\/ +^Ruby|Ruby\/[0-9] +^VSE\/[0-9] +^WordPress\.com +^XRL\/[0-9] +^ZmEu +008\/ +13TABS +192\.comAgent +2ip\.ru +404enemy +7Siters +80legs +a\.pr-cy\.ru +a3logics\.in +A6-Indexer +Abonti +Aboundex +aboutthedomain +Accoona-AI-Agent +acoon +acrylicapps\.com\/pulp +Acunetix +AdAuth\/ +adbeat +AddThis +ADmantX +AdminLabs +adressendeutschland +adscanner +Adstxtaggregator +agentslug +AHC +aihit +aiohttp\/ +Airmail +akka-http\/ +akula\/ +alertra +alexa site audit +Alibaba\.Security\.Heimdall +Alligator +allloadin +AllSubmitter +alyze\.info +amagit +Anarchie +AndroidDownloadManager +Anemone +AngleSharp +annotate_google +Ant\.com +Anturis Agent +AnyEvent-HTTP\/ +Apache Droid +Apache OpenOffice +Apache-HttpAsyncClient +Apache-HttpClient +ApacheBench +Apexoo +APIs-Google +AportWorm\/ +AppBeat\/ +AppEngine-Google +AppStoreScraperZ +Aprc\/[0-9] +Arachmo +arachnode +Arachnophilia +aria2 +Arukereso +asafaweb +AskQuickly +Ask Jeeves +ASPSeek +Asterias +Astute +asynchttp +Attach +autocite +Autonomy +axios\/ +B-l-i-t-z-B-O-T +Backlink-Ceck +backlink-check +BacklinkHttpStatus +BackStreet +BackWeb +Bad-Neighborhood +Badass +baidu\.com +Bandit +basicstate +BatchFTP +Battleztar Bazinga +baypup\/ +BazQux +BBBike +BCKLINKS +BDFetch +BegunAdvertising +Bidtellect +BigBozz +Bigfoot +biglotron +BingLocalSearch +BingPreview +binlar +biNu image cacher +Bitacle +biz_Directory +Black Hole +Blackboard Safeassign +BlackWidow +BlockNote\.Net +Bloglines +Bloglovin +BlogPulseLive +BlogSearch +Blogtrottr +BlowFish +boitho\.com-dc +BPImageWalker +Braintree-Webhooks +Branch Metrics API +Branch-Passthrough +Brandprotect +BrandVerity +Brandwatch +Brodie\/ +Browsershots +BUbiNG +Buck\/ +Buddy +BuiltWith +Bullseye +BunnySlippers +Burf Search +Butterfly\/ +BuzzSumo +CAAM\/[0-9] +CakePHP +Calculon +Canary%20Mail +CaretNail +catexplorador +CC Metadata Scaper +Cegbfeieh +censys +Cerberian Drtrs +CERT\.at-Statistics-Survey +cg-eye +changedetection +ChangesMeter +Charlotte +CheckHost +checkprivacy +CherryPicker +ChinaClaw +Chirp\/ +chkme\.com +Chlooe +Chromaxa +CirrusExplorer +CISPA Vulnerability Notification +Citoid +CJNetworkQuality +Clarsentia +clips\.ua\.ac\.be +Cloud mapping +CloudEndure +CloudFlare-AlwaysOnline +Cloudinary +cmcm\.com +coccoc +cognitiveseo +colly - +CommaFeed +Commons-HttpClient +commonscan +contactbigdatafr +contentkingapp +convera +CookieReports +copyright sheriff +CopyRightCheck +Copyscape +Cosmos4j\.feedback +Covario-IDS +Crescent +Crowsnest +Criteo +CSHttp +curb +Curious George +curl +cuwhois\/ +cybo\.com +DAP\/NetHTTP +DareBoost +DatabaseDriverMysqli +DataCha0s +Datafeedwatch +Datanyze +DataparkSearch +dataprovider +DataXu +Daum(oa)?[ \/][0-9] +Demon +DeuSu +developers\.google\.com\/\+\/web\/snippet\/ +Devil +Digg +Digincore +DigitalPebble +Dirbuster +Discourse Forum Onebox +Disqus\/ +Dispatch\/ +DittoSpyder +dlvr +DMBrowser +DNSPod-reporting +docoloc +Dolphin http client +DomainAppender +Donuts Content Explorer +dotMailer content retrieval +dotSemantic +downforeveryoneorjustme +Download Wonder +downnotifier +DowntimeDetector +Drip +drupact +Drupal \(\+http:\/\/drupal\.org\/\) +DTS Agent +dubaiindex +EARTHCOM +Easy-Thumb +EasyDL +Ebingbong +ec2linkfinder +eCairn-Grabber +eCatch +ECCP +eContext\/ +Ecxi +EirGrabber +ElectricMonk +elefent +EMail Exractor +EMail Wolf +EmailWolf +Embarcadero +Embed PHP Library +Embedly +endo\/ +europarchive\.org +evc-batch +EventMachine HttpClient +Everwall Link Expander +Evidon +Evrinid +ExactSearch +ExaleadCloudview +Excel\/ +exif +Exploratodo +Express WebPictures +Extreme Picture Finder +EyeNetIE +ezooms +facebookexternalhit +facebookplatform +fairshare +Faraday v +fasthttp +Faveeo +Favicon downloader +faviconkit +faviconarchive +FavOrg +Feed Wrangler +Feedable\/ +Feedbin +FeedBooster +FeedBucket +FeedBunch\/ +FeedBurner +feeder +Feedly +FeedshowOnline +Feedspot +Feedwind\/ +FeedZcollector +feeltiptop +Fetch API +Fetch\/[0-9] +Fever\/[0-9] +FHscan +Fimap +findlink +findthatfile +FlashGet +FlipboardBrowserProxy +FlipboardProxy +FlipboardRSS +Flock\/ +fluffy +Flunky +flynxapp +forensiq +FoundSeoTool +http:\/\/www.neomo.de\/ +free thumbnails +Freeuploader +Funnelback +G-i-g-a-b-o-t +g00g1e\.net +ganarvisitas +geek-tools +Genieo +GentleSource +GetCode +Getintent +GetLinkInfo +getprismatic +GetRight +getroot +GetURLInfo\/ +GetWeb +Ghost Inspector +GigablastOpenSource +GIS-LABS +github-camo +github\.com +Go [\d\.]* package http +Go http package +Go-Ahead-Got-It +Go-http-client +Go!Zilla +gobyus +gofetch +GomezAgent +gooblog +Goodzer\/ +Google AppsViewer +Google Desktop +Google favicon +Google Keyword Suggestion +Google Keyword Tool +Google Page Speed Insights +Google PP Default +Google Search Console +Google Web Preview +Google-Adwords +Google-Apps-Script +Google-Calendar-Importer +Google-HotelAdsVerifier +Google-HTTP-Java-Client +Google-Publisher-Plugin +Google-SearchByImage +Google-Site-Verification +Google-Structured-Data-Testing-Tool +Google-Youtube-Links +google-xrawler +GoogleDocs +GoogleHC\/ +GoogleProducer +GoogleSites +Google-Transparency-Report +Gookey +GoScraper +GoSpotCheck +gosquared-thumbnailer +Gotit +GoZilla +grabify +GrabNet +Grafula +Grammarly +GrapeFX +GreatNews +Gregarius +GRequests +grokkit +grouphigh +grub-client +gSOAP\/ +GT::WWW +GTmetrix +GuzzleHttp +gvfs\/ +HAA(A)?RTLAND http client +Haansoft +hackney\/ +Hadi Agent +HappyApps-WebCheck +Hatena +Havij +HeadlessChrome +HEADMasterSEO +HeartRails_Capture +help@dataminr\.com +heritrix +historious +hkedcity +hledejLevne\.cz +Hloader +HMView +Holmes +HonesoSearchEngine +HootSuite Image proxy +Hootsuite-WebFeed +hosterstats +HostTracker +ht:\/\/check +htdig +HTMLparser +htmlyse +HTTP Banner Detection +HTTP_Compression_Test +http_request2 +http_requester +http-get +HTTP-Header-Abfrage +http-kit +http-request\/ +HTTP-Tiny +HTTP::Lite +http\.rb\/ +http_get +HttpComponents +httphr +HTTPMon +httpRequest +httpscheck +httpssites_power +httpunit +HttpUrlConnection +httrack +huaweisymantec +HubSpot +Humanlinks +i2kconnect\/ +Iblog +ichiro +Id-search +IdeelaborPlagiaat +IDG Twitter Links Resolver +IDwhois\/ +Iframely +igdeSpyder +IlTrovatore +Image Fetch +Image Sucker +ImageEngine\/ +ImageVisu\/ +Imagga +imagineeasy +imgsizer +InAGist +inbound\.li parser +InDesign%20CC +Indy Library +InetURL +infegy +infohelfer +InfoTekies +InfoWizards Reciprocal Link +inpwrd\.com +instabid +Instapaper +Integrity +integromedb +Intelliseek +InterGET +internet_archive +Internet Ninja +InternetSeer +internetVista monitor +intraVnews +IODC +IOI +iplabel +ips-agent +IPS\/[0-9] +IPWorks HTTP\/S Component +iqdb\/ +Iria +Irokez +isitup\.org +iskanie +isUp\.li +iThemes Sync\/ +iZSearch +JAHHO +janforman +Jaunt\/ +Jbrofuzz +Jersey\/ +JetCar +Jigsaw +Jobboerse +JobFeed discovery +Jobg8 URL Monitor +jobo +Jobrapido +Jobsearch1\.5 +JoinVision Generic +JolokiaPwn +Joomla +Jorgee +JS-Kit +JustView +Kaspersky Lab CFR link resolver +Kelny\/ +Kerrigan\/ +KeyCDN +Keyword Density +Keywords Research +KickFire +KimonoLabs\/ +Kml-Google +knows\.is +KOCMOHABT +kouio +kube-probe +kulturarw3 +KumKie +L\.webis +Larbin +Lavf\/ +LeechFTP +LeechGet +letsencrypt +Lftp +LibVLC +LibWeb +Libwhisker +libwww +Licorne +Liferea\/ +Lightspeedsystems +Lighthouse +Likse +Link Valet +link_thumbnailer +LinkAlarm\/ +linkCheck +linkdex +LinkExaminer +linkfluence +linkpeek +LinkPreviewGenerator +LinkScan +LinksManager +LinkTiger +LinkWalker +Lipperhey +Litemage_walker +livedoor ScreenShot +LoadImpactRload +localsearch-web +LongURL API +looksystems\.net +ltx71 +lua-resty-http +lwp-request +lwp-trivial +LWP::Simple +lycos +LYT\.SR +mabontland +Mag-Net +MagpieRSS +Mail\.Ru +MailChimp +Majestic12 +makecontact\/ +Mandrill +MapperCmd +marketinggrader +MarkMonitor +MarkWatch +Mass Downloader +masscan\/ +Mata Hari +Mediapartners-Google +mediawords +MegaIndex\.ru +MeltwaterNews +Melvil Rawi +MemGator +Metaspinner +MetaURI +MFC_Tear_Sample +Microsearch +Microsoft Office +Microsoft Outlook +Microsoft Windows Network Diagnostics +Microsoft-WebDAV-MiniRedir +Microsoft Data Access +MIDown tool +MIIxpc +Mindjet +Miniature\.io +Miniflux +Mister PiX +mixdata dot com +mixed-content-scan +Mixmax-LinkPreview +mixnode +Mnogosearch +mogimogi +Mojeek +Mojolicious \(Perl\) +Monit\/ +monitis +Monitority\/ +montastic +MonTools +Moreover +Morfeus Fucking Scanner +Morning Paper +MovableType +mowser +Mrcgiguy +MS Web Services Client Protocol +MSFrontPage +mShots +MuckRack\/ +muhstik-scan +MVAClient +MxToolbox\/ +nagios +Najdi\.si +Name Intelligence +Nameprotect +Navroad +NearSite +Needle +Nessus +Net Vampire +NetAnts +NETCRAFT +NetLyzer +NetMechanic +NetNewsWire +Netpursual +netresearch +NetShelter ContentScan +Netsparker +NetTrack +Netvibes +NetZIP +Neustar WPM +NeutrinoAPI +NewRelicPinger +NewsBlur .*Finder +NewsGator +newsme +newspaper\/ +Nexgate Ruby Client +NG-Search +Nibbler +NICErsPRO +Nikto +nineconnections +NLNZ_IAHarvester +Nmap Scripting Engine +node-superagent +node-urllib +node\.io +Nodemeter +NodePing +nominet\.org\.uk +nominet\.uk +Norton-Safeweb +Notifixious +notifyninja +nuhk +nutch +Nuzzel +nWormFeedFinder +nyawc\/ +Nymesis +NYU +Ocelli\/ +Octopus +oegp +Offline Explorer +Offline Navigator +og-scraper +okhttp +omgili +OMSC +Online Domain Tools +OpenCalaisSemanticProxy +Openfind +OpenLinkProfiler +Openstat\/ +OpenVAS +Optimizer +Orbiter +OrgProbe\/ +orion-semantics +Outlook-Express +Outlook-iOS +ow\.ly +Owler +ownCloud News +OxfordCloudService +Page Valet +page_verifier +page scorer +page2rss +PageGrabber +PagePeeker +PageScorer +Pagespeed\/ +Panopta +panscient +Papa Foto +parsijoo +Pavuk +PayPal IPN +pcBrowser +Pcore-HTTP +Pearltrees +PECL::HTTP +peerindex +Peew +PeoplePal +Perlu - +PhantomJS Screenshoter +PhantomJS\/ +Photon\/ +phpservermon +Pi-Monster +Picscout +Picsearch +PictureFinder +Pimonster +ping\.blo\.gs +Pingability +PingAdmin\.Ru +Pingdom +Pingoscope +PingSpot +pinterest\.com +Pixray +Pizilla +Plagger\/ +Ploetz \+ Zeller +Plukkie +plumanalytics +PocketImageCache +PocketParser +Pockey +POE-Component-Client-HTTP +Polymail\/ +Pompos +Porkbun +Port Monitor +postano +PostmanRuntime +PostPost +postrank +PowerPoint\/ +Priceonomics Analysis Engine +PrintFriendly +PritTorrent +Prlog +probethenet +Project 25499 +prospectb2b +Protopage +ProWebWalker +proximic +PRTG Network Monitor +pshtt, https scanning +PTST +PTST\/[0-9]+ +Pulsepoint XT3 web scraper +Pump +Python-httplib2 +python-requests +Python-urllib +Qirina Hurdler +QQDownload +QrafterPro +Qseero +Qualidator +QueryN Metasearch +queuedriver +Quora Link Preview +Qwantify +Radian6 +RankActive +RankFlex +RankSonicSiteAuditor +Re-re Studio +ReactorNetty +Readability +RealDownload +RealPlayer%20Downloader +RebelMouse +Recorder +RecurPost\/ +redback\/ +ReederForMac +ReGet +RepoMonkey +request\.js +reqwest\/ +ResponseCodeTest +RestSharp +Riddler +Rival IQ +Robosourcer +Robozilla +ROI Hunter +RPT-HTTPClient +RSSOwl +safe-agent-scanner +SalesIntelligent +Saleslift +Sendsay\.Ru +SauceNAO +SBIder +scalaj-http +scan\.lol +ScanAlert +Scoop +scooter +ScoutJet +ScoutURLMonitor +ScrapeBox Page Scanner +SimpleScraper +Scrapy +Screaming +ScreenShotService +Scrubby +Scrutiny\/ +search\.thunderstone +Search37 +searchenginepromotionhelp +Searchestate +SearchExpress +SearchSight +Seeker +semanticdiscovery +semanticjuice +Semiocast HTTP client +Semrush +sentry\/ +SEO Browser +Seo Servis +seo-nastroj\.cz +seo4ajax +Seobility +SEOCentro +SeoCheck +SEOkicks +Seomoz +SEOprofiler +SEOsearch +seoscanners +seositecheckup +SEOstats +servernfo +sexsearcher +Seznam +Shelob +Shodan +Shoppimon +ShopWiki +ShortLinkTranslate +shrinktheweb +Sideqik +SimplePie +SimplyFast +Siphon +SISTRIX +Site-Shot\/ +Site Sucker +Site24x7 +SiteBar +Sitebeam +Sitebulb\/ +SiteCondor +SiteExplorer +SiteGuardian +Siteimprove +SiteIndexed +Sitemap(s)? Generator +SitemapGenerator +SiteMonitor +Siteshooter B0t +SiteSnagger +SiteSucker +SiteTruth +Sitevigil +sitexy\.com +SkypeUriPreview +Slack\/ +slider\.com +slurp +SlySearch +SmartDownload +SMRF URL Expander +SMUrlExpander +Snake +Snappy +SnapSearch +Snarfer\/ +SniffRSS +sniptracker +Snoopy +SnowHaze Search +sogou web +SortSite +Sottopop +sovereign\.ai +SpaceBison +SpamExperts +Spammen +Spanner +spaziodati +SPDYCheck +Specificfeeds +speedy +SPEng +Spinn3r +spray-can +Sprinklr +spyonweb +sqlmap +Sqlworm +Sqworm +SSL Labs +ssl-tools +StackRambler +Statastico\/ +StatusCake +Steeler +Stratagems Kumo +Stroke\.cz +StudioFACA +StumbleUpon +suchen +Sucuri +summify +SuperHTTP +Surphace Scout +Suzuran +SwiteScraper +Symfony BrowserKit +Symfony2 BrowserKit +SynHttpClient-Built +Sysomos +sysscan +Szukacz +T0PHackTeam +tAkeOut +Tarantula\/ +Taringa UGC +TarmotGezgin +Teleport +Telesoft +Telesphoreo +Telesphorep +Tenon\.io +teoma +terrainformatica +Test Certificate Info +testuri +Tetrahedron +The Drop Reaper +The Expert HTML Source Viewer +The Knowledge AI +The Intraformant +theinternetrules +TheNomad +Thinklab +Thumbshots +ThumbSniper +timewe\.net +TinEye +Tiny Tiny RSS +TLSProbe\/ +Toata +topster +touche\.com +Traackr\.com +tracemyfile +Trackuity +TrapitAgent +Trendiction +Trendsmap +trendspottr +truwoGPS +TryJsoup +TulipChain +Turingos +Turnitin +tweetedtimes +Tweetminster +Tweezler\/ +twibble +Twice +Twikle +Twingly +Twisted PageGetter +Typhoeus +ubermetrics-technologies +uclassify +UdmSearch +unchaos +unirest-java +UniversalFeedParser +Unshorten\.It +Untiny +UnwindFetchor +updated +updown\.io daemon +Upflow +Uptimia +Urlcheckr +URL Verifier +URLitor +urlresolver +Urlstat +URLTester +UrlTrends Ranking Updater +URLy Warning +URLy\.Warning +Vacuum +Vagabondo +VB Project +vBSEO +VCI +via ggpht\.com GoogleImageProxy +VidibleScraper +Virusdie +visionutils +vkShare +VoidEYE +Voil +voltron +voyager\/ +VSAgent\/ +VSB-TUO\/ +Vulnbusters Meter +VYU2 +w3af\.org +W3C_Unicorn +W3C-checklink +W3C-mobileOK +WAC-OFU +Wallpapers\/[0-9]+ +WallpapersHD +wangling +Wappalyzer +WatchMouse +WbSrch\/ +WDT\.io +web-capture\.net +Web-sniffer +Web Auto +Web Collage +Web Enhancer +Web Fetch +Web Fuck +Web Pix +Web Sauger +Web Sucker +Webalta +Webauskunft +WebAuto +WebCapture +WebClient\/ +webcollage +WebCookies +WebCopier +WebCorp +WebDataStats +WebDoc +WebEnhancer +WebFetch +WebFuck +WebGazer +WebGo IS +WebImageCollector +WebImages +WebIndex +webkit2png +WebLeacher +webmastercoffee +webmon +WebPix +WebReaper +WebSauger +webscreenie +Webshag +Webshot +Website Quester +websitepulse agent +WebsiteQuester +Websnapr +WebSniffer +Webster +WebStripper +WebSucker +Webthumb\/ +WebThumbnail +WebWhacker +WebZIP +WeLikeLinks +WEPA +WeSEE +wf84 +Wfuzz\/ +wget +WhatsApp +WhatsMyIP +WhatWeb +WhereGoes\? +Whibse +WhoRunsCoinHive +Whynder Magnet +Windows-RSS-Platform +WinPodder +wkhtmlto +wmtips +Woko +woorankreview +Word\/ +WordPress\/ +WordupinfoSearch +wotbox +WP Engine Install Performance API +wpif +wprecon\.com survey +WPScan +wscheck +Wtrace +WWW-Collector-E +WWW-Mechanize +WWW::Document +WWW::Mechanize +www\.monitor\.us +WWWOFFLE +x09Mozilla +x22Mozilla +XaxisSemanticsClassifier +Xenu Link Sleuth +XING-contenttabreceiver +xpymep([0-9]?)\.exe +Y!J-(ASR|BSC) +Y\!J-BRW +Yaanb +yacy +Yahoo Link Preview +YahooCacheSystem +YahooYSMcm +YandeG +Yandex(?!Search) +yanga +yeti +Yo-yo +Yoleo Consumer +yoogliFetchAgent +YottaaMonitor +Your-Website-Sucks +yourls\.org +YoYs\.net +YP\.PL +Zabbix +Zade +Zao +Zauba +Zemanta Aggregator +Zend_Http_Client +Zend\\Http\\Client +Zermelo +Zeus +zgrab +ZnajdzFoto +Zombie\.js +Zoom\.Mac +ZyBorg +[a-z0-9\-_]*(bot|crawl|archiver|transcoder|spider|uptime|validator|fetcher|cron|checker|reader|extractor|monitoring|analyzer) \ No newline at end of file diff --git a/blockbot/vendor/jaybizzle/crawler-detect/raw/Exclusions.json b/blockbot/vendor/jaybizzle/crawler-detect/raw/Exclusions.json new file mode 100644 index 000000000..a18eb985e --- /dev/null +++ b/blockbot/vendor/jaybizzle/crawler-detect/raw/Exclusions.json @@ -0,0 +1 @@ +["Safari.[\\d\\.]*","Firefox.[\\d\\.]*"," Chrome.[\\d\\.]*","Chromium.[\\d\\.]*","MSIE.[\\d\\.]","Opera\\\/[\\d\\.]*","Mozilla.[\\d\\.]*","AppleWebKit.[\\d\\.]*","Trident.[\\d\\.]*","Windows NT.[\\d\\.]*","Android [\\d\\.]*","Macintosh.","Ubuntu","Linux","[ ]Intel","Mac OS X [\\d_]*","(like )?Gecko(.[\\d\\.]*)?","KHTML,","CriOS.[\\d\\.]*","CPU iPhone OS ([0-9_])* like Mac OS X","CPU OS ([0-9_])* like Mac OS X","iPod","compatible","x86_..","i686","x64","X11","rv:[\\d\\.]*","Version.[\\d\\.]*","WOW64","Win64","Dalvik.[\\d\\.]*"," \\.NET CLR [\\d\\.]*","Presto.[\\d\\.]*","Media Center PC","BlackBerry","Build","Opera Mini\\\/\\d{1,2}\\.\\d{1,2}\\.[\\d\\.]*\\\/\\d{1,2}\\.","Opera"," \\.NET[\\d\\.]*","cubot","; M bot","; CRONO","; B bot","; IDbot","; ID bot","; POWER BOT",";"] \ No newline at end of file diff --git a/blockbot/vendor/jaybizzle/crawler-detect/raw/Exclusions.txt b/blockbot/vendor/jaybizzle/crawler-detect/raw/Exclusions.txt new file mode 100644 index 000000000..da56db9bd --- /dev/null +++ b/blockbot/vendor/jaybizzle/crawler-detect/raw/Exclusions.txt @@ -0,0 +1,48 @@ +Safari.[\d\.]* +Firefox.[\d\.]* + Chrome.[\d\.]* +Chromium.[\d\.]* +MSIE.[\d\.] +Opera\/[\d\.]* +Mozilla.[\d\.]* +AppleWebKit.[\d\.]* +Trident.[\d\.]* +Windows NT.[\d\.]* +Android [\d\.]* +Macintosh. +Ubuntu +Linux +[ ]Intel +Mac OS X [\d_]* +(like )?Gecko(.[\d\.]*)? +KHTML, +CriOS.[\d\.]* +CPU iPhone OS ([0-9_])* like Mac OS X +CPU OS ([0-9_])* like Mac OS X +iPod +compatible +x86_.. +i686 +x64 +X11 +rv:[\d\.]* +Version.[\d\.]* +WOW64 +Win64 +Dalvik.[\d\.]* + \.NET CLR [\d\.]* +Presto.[\d\.]* +Media Center PC +BlackBerry +Build +Opera Mini\/\d{1,2}\.\d{1,2}\.[\d\.]*\/\d{1,2}\. +Opera + \.NET[\d\.]* +cubot +; M bot +; CRONO +; B bot +; IDbot +; ID bot +; POWER BOT +; \ No newline at end of file diff --git a/blockbot/vendor/jaybizzle/crawler-detect/raw/Headers.json b/blockbot/vendor/jaybizzle/crawler-detect/raw/Headers.json new file mode 100644 index 000000000..718f7f617 --- /dev/null +++ b/blockbot/vendor/jaybizzle/crawler-detect/raw/Headers.json @@ -0,0 +1 @@ +["HTTP_USER_AGENT","HTTP_X_OPERAMINI_PHONE_UA","HTTP_X_DEVICE_USER_AGENT","HTTP_X_ORIGINAL_USER_AGENT","HTTP_X_SKYFIRE_PHONE","HTTP_X_BOLT_PHONE_UA","HTTP_DEVICE_STOCK_UA","HTTP_X_UCBROWSER_DEVICE_UA","HTTP_FROM","HTTP_X_SCANNER"] \ No newline at end of file diff --git a/blockbot/vendor/jaybizzle/crawler-detect/raw/Headers.txt b/blockbot/vendor/jaybizzle/crawler-detect/raw/Headers.txt new file mode 100644 index 000000000..5e1ae3211 --- /dev/null +++ b/blockbot/vendor/jaybizzle/crawler-detect/raw/Headers.txt @@ -0,0 +1,10 @@ +HTTP_USER_AGENT +HTTP_X_OPERAMINI_PHONE_UA +HTTP_X_DEVICE_USER_AGENT +HTTP_X_ORIGINAL_USER_AGENT +HTTP_X_SKYFIRE_PHONE +HTTP_X_BOLT_PHONE_UA +HTTP_DEVICE_STOCK_UA +HTTP_X_UCBROWSER_DEVICE_UA +HTTP_FROM +HTTP_X_SCANNER \ No newline at end of file diff --git a/blockbot/vendor/jaybizzle/crawler-detect/src/CrawlerDetect.php b/blockbot/vendor/jaybizzle/crawler-detect/src/CrawlerDetect.php new file mode 100644 index 000000000..1067976be --- /dev/null +++ b/blockbot/vendor/jaybizzle/crawler-detect/src/CrawlerDetect.php @@ -0,0 +1,193 @@ + + * + * This source file is subject to the MIT license that is bundled + * with this source code in the file LICENSE. + */ + +namespace Jaybizzle\CrawlerDetect; + +use Jaybizzle\CrawlerDetect\Fixtures\Crawlers; +use Jaybizzle\CrawlerDetect\Fixtures\Exclusions; +use Jaybizzle\CrawlerDetect\Fixtures\Headers; + +class CrawlerDetect +{ + /** + * The user agent. + * + * @var null + */ + protected $userAgent = null; + + /** + * Headers that contain a user agent. + * + * @var array + */ + protected $httpHeaders = array(); + + /** + * Store regex matches. + * + * @var array + */ + protected $matches = array(); + + /** + * Crawlers object. + * + * @var \Jaybizzle\CrawlerDetect\Fixtures\Crawlers + */ + protected $crawlers; + + /** + * Exclusions object. + * + * @var \Jaybizzle\CrawlerDetect\Fixtures\Exclusions + */ + protected $exclusions; + + /** + * Headers object. + * + * @var \Jaybizzle\CrawlerDetect\Fixtures\Headers + */ + protected $uaHttpHeaders; + + /** + * The compiled regex string. + * + * @var string + */ + protected $compiledRegex; + + /** + * The compiled exclusions regex string. + * + * @var string + */ + protected $compiledExclusions; + + /** + * Class constructor. + */ + public function __construct(array $headers = null, $userAgent = null) + { + $this->crawlers = new Crawlers(); + $this->exclusions = new Exclusions(); + $this->uaHttpHeaders = new Headers(); + + $this->compiledRegex = $this->compileRegex($this->crawlers->getAll()); + $this->compiledExclusions = $this->compileRegex($this->exclusions->getAll()); + + $this->setHttpHeaders($headers); + $this->setUserAgent($userAgent); + } + + /** + * Compile the regex patterns into one regex string. + * + * @param array + * + * @return string + */ + public function compileRegex($patterns) + { + return '('.implode('|', $patterns).')'; + } + + /** + * Set HTTP headers. + * + * @param array|null $httpHeaders + */ + public function setHttpHeaders($httpHeaders) + { + // Use global _SERVER if $httpHeaders aren't defined. + if (! is_array($httpHeaders) || ! count($httpHeaders)) { + $httpHeaders = $_SERVER; + } + + // Clear existing headers. + $this->httpHeaders = array(); + + // Only save HTTP headers. In PHP land, that means + // only _SERVER vars that start with HTTP_. + foreach ($httpHeaders as $key => $value) { + if (strpos($key, 'HTTP_') === 0) { + $this->httpHeaders[$key] = $value; + } + } + } + + /** + * Return user agent headers. + * + * @return array + */ + public function getUaHttpHeaders() + { + return $this->uaHttpHeaders->getAll(); + } + + /** + * Set the user agent. + * + * @param string $userAgent + */ + public function setUserAgent($userAgent) + { + if (is_null($userAgent)) { + foreach ($this->getUaHttpHeaders() as $altHeader) { + if (isset($this->httpHeaders[$altHeader])) { + $userAgent .= $this->httpHeaders[$altHeader].' '; + } + } + } + + return $this->userAgent = $userAgent; + } + + /** + * Check user agent string against the regex. + * + * @param string|null $userAgent + * + * @return bool + */ + public function isCrawler($userAgent = null) + { + $agent = trim(preg_replace( + "/{$this->compiledExclusions}/i", + '', + $userAgent ?: $this->userAgent + )); + + if ($agent == '') { + return false; + } + + $result = preg_match("/{$this->compiledRegex}/i", $agent, $matches); + + if ($matches) { + $this->matches = $matches; + } + + return (bool) $result; + } + + /** + * Return the matches. + * + * @return string|null + */ + public function getMatches() + { + return isset($this->matches[0]) ? $this->matches[0] : null; + } +} diff --git a/blockbot/vendor/jaybizzle/crawler-detect/src/Fixtures/AbstractProvider.php b/blockbot/vendor/jaybizzle/crawler-detect/src/Fixtures/AbstractProvider.php new file mode 100644 index 000000000..26ea8e5fc --- /dev/null +++ b/blockbot/vendor/jaybizzle/crawler-detect/src/Fixtures/AbstractProvider.php @@ -0,0 +1,32 @@ + + * + * This source file is subject to the MIT license that is bundled + * with this source code in the file LICENSE. + */ + +namespace Jaybizzle\CrawlerDetect\Fixtures; + +abstract class AbstractProvider +{ + /** + * The data set. + * + * @var array + */ + protected $data; + + /** + * Return the data set. + * + * @return array + */ + public function getAll() + { + return $this->data; + } +} diff --git a/blockbot/vendor/jaybizzle/crawler-detect/src/Fixtures/Crawlers.php b/blockbot/vendor/jaybizzle/crawler-detect/src/Fixtures/Crawlers.php new file mode 100644 index 000000000..a90705658 --- /dev/null +++ b/blockbot/vendor/jaybizzle/crawler-detect/src/Fixtures/Crawlers.php @@ -0,0 +1,1240 @@ + + * + * This source file is subject to the MIT license that is bundled + * with this source code in the file LICENSE. + */ + +namespace Jaybizzle\CrawlerDetect\Fixtures; + +class Crawlers extends AbstractProvider +{ + /** + * Array of regular expressions to match against the user agent. + * + * @var array + */ + protected $data = array( + '.*Java.*outbrain', + ' YLT', + '^b0t$', + '^bluefish ', + '^Calypso v\/', + '^COMODO DCV', + '^DangDang', + '^DavClnt', + '^FDM ', + '^git\/', + '^Goose\/', + '^Grabber', + '^HTTPClient\/', + '^Java\/', + '^Jeode\/', + '^Jetty\/', + '^Mail\/', + '^Mget', + '^Microsoft URL Control', + '^NG\/[0-9\.]', + '^NING\/', + '^PHP\/[0-9]', + '^RMA\/', + '^Ruby|Ruby\/[0-9]', + '^VSE\/[0-9]', + '^WordPress\.com', + '^XRL\/[0-9]', + '^ZmEu', + '008\/', + '13TABS', + '192\.comAgent', + '2ip\.ru', + '404enemy', + '7Siters', + '80legs', + 'a\.pr-cy\.ru', + 'a3logics\.in', + 'A6-Indexer', + 'Abonti', + 'Aboundex', + 'aboutthedomain', + 'Accoona-AI-Agent', + 'acoon', + 'acrylicapps\.com\/pulp', + 'Acunetix', + 'AdAuth\/', + 'adbeat', + 'AddThis', + 'ADmantX', + 'AdminLabs', + 'adressendeutschland', + 'adscanner', + 'Adstxtaggregator', + 'agentslug', + 'AHC', + 'aihit', + 'aiohttp\/', + 'Airmail', + 'akka-http\/', + 'akula\/', + 'alertra', + 'alexa site audit', + 'Alibaba\.Security\.Heimdall', + 'Alligator', + 'allloadin', + 'AllSubmitter', + 'alyze\.info', + 'amagit', + 'Anarchie', + 'AndroidDownloadManager', + 'Anemone', + 'AngleSharp', + 'annotate_google', + 'Ant\.com', + 'Anturis Agent', + 'AnyEvent-HTTP\/', + 'Apache Droid', + 'Apache OpenOffice', + 'Apache-HttpAsyncClient', + 'Apache-HttpClient', + 'ApacheBench', + 'Apexoo', + 'APIs-Google', + 'AportWorm\/', + 'AppBeat\/', + 'AppEngine-Google', + 'AppStoreScraperZ', + 'Aprc\/[0-9]', + 'Arachmo', + 'arachnode', + 'Arachnophilia', + 'aria2', + 'Arukereso', + 'asafaweb', + 'AskQuickly', + 'Ask Jeeves', + 'ASPSeek', + 'Asterias', + 'Astute', + 'asynchttp', + 'Attach', + 'autocite', + 'Autonomy', + 'axios\/', + 'B-l-i-t-z-B-O-T', + 'Backlink-Ceck', + 'backlink-check', + 'BacklinkHttpStatus', + 'BackStreet', + 'BackWeb', + 'Bad-Neighborhood', + 'Badass', + 'baidu\.com', + 'Bandit', + 'basicstate', + 'BatchFTP', + 'Battleztar Bazinga', + 'baypup\/', + 'BazQux', + 'BBBike', + 'BCKLINKS', + 'BDFetch', + 'BegunAdvertising', + 'Bidtellect', + 'BigBozz', + 'Bigfoot', + 'biglotron', + 'BingLocalSearch', + 'BingPreview', + 'binlar', + 'biNu image cacher', + 'Bitacle', + 'biz_Directory', + 'Black Hole', + 'Blackboard Safeassign', + 'BlackWidow', + 'BlockNote\.Net', + 'Bloglines', + 'Bloglovin', + 'BlogPulseLive', + 'BlogSearch', + 'Blogtrottr', + 'BlowFish', + 'boitho\.com-dc', + 'BPImageWalker', + 'Braintree-Webhooks', + 'Branch Metrics API', + 'Branch-Passthrough', + 'Brandprotect', + 'BrandVerity', + 'Brandwatch', + 'Brodie\/', + 'Browsershots', + 'BUbiNG', + 'Buck\/', + 'Buddy', + 'BuiltWith', + 'Bullseye', + 'BunnySlippers', + 'Burf Search', + 'Butterfly\/', + 'BuzzSumo', + 'CAAM\/[0-9]', + 'CakePHP', + 'Calculon', + 'Canary%20Mail', + 'CaretNail', + 'catexplorador', + 'CC Metadata Scaper', + 'Cegbfeieh', + 'censys', + 'Cerberian Drtrs', + 'CERT\.at-Statistics-Survey', + 'cg-eye', + 'changedetection', + 'ChangesMeter', + 'Charlotte', + 'CheckHost', + 'checkprivacy', + 'CherryPicker', + 'ChinaClaw', + 'Chirp\/', + 'chkme\.com', + 'Chlooe', + 'Chromaxa', + 'CirrusExplorer', + 'CISPA Vulnerability Notification', + 'Citoid', + 'CJNetworkQuality', + 'Clarsentia', + 'clips\.ua\.ac\.be', + 'Cloud mapping', + 'CloudEndure', + 'CloudFlare-AlwaysOnline', + 'Cloudinary', + 'cmcm\.com', + 'coccoc', + 'cognitiveseo', + 'colly -', + 'CommaFeed', + 'Commons-HttpClient', + 'commonscan', + 'contactbigdatafr', + 'contentkingapp', + 'convera', + 'CookieReports', + 'copyright sheriff', + 'CopyRightCheck', + 'Copyscape', + 'Cosmos4j\.feedback', + 'Covario-IDS', + 'Crescent', + 'Crowsnest', + 'Criteo', + 'CSHttp', + 'curb', + 'Curious George', + 'curl', + 'cuwhois\/', + 'cybo\.com', + 'DAP\/NetHTTP', + 'DareBoost', + 'DatabaseDriverMysqli', + 'DataCha0s', + 'Datafeedwatch', + 'Datanyze', + 'DataparkSearch', + 'dataprovider', + 'DataXu', + 'Daum(oa)?[ \/][0-9]', + 'Demon', + 'DeuSu', + 'developers\.google\.com\/\+\/web\/snippet\/', + 'Devil', + 'Digg', + 'Digincore', + 'DigitalPebble', + 'Dirbuster', + 'Discourse Forum Onebox', + 'Disqus\/', + 'Dispatch\/', + 'DittoSpyder', + 'dlvr', + 'DMBrowser', + 'DNSPod-reporting', + 'docoloc', + 'Dolphin http client', + 'DomainAppender', + 'Donuts Content Explorer', + 'dotMailer content retrieval', + 'dotSemantic', + 'downforeveryoneorjustme', + 'Download Wonder', + 'downnotifier', + 'DowntimeDetector', + 'Drip', + 'drupact', + 'Drupal \(\+http:\/\/drupal\.org\/\)', + 'DTS Agent', + 'dubaiindex', + 'EARTHCOM', + 'Easy-Thumb', + 'EasyDL', + 'Ebingbong', + 'ec2linkfinder', + 'eCairn-Grabber', + 'eCatch', + 'ECCP', + 'eContext\/', + 'Ecxi', + 'EirGrabber', + 'ElectricMonk', + 'elefent', + 'EMail Exractor', + 'EMail Wolf', + 'EmailWolf', + 'Embarcadero', + 'Embed PHP Library', + 'Embedly', + 'endo\/', + 'europarchive\.org', + 'evc-batch', + 'EventMachine HttpClient', + 'Everwall Link Expander', + 'Evidon', + 'Evrinid', + 'ExactSearch', + 'ExaleadCloudview', + 'Excel\/', + 'exif', + 'Exploratodo', + 'Express WebPictures', + 'Extreme Picture Finder', + 'EyeNetIE', + 'ezooms', + 'facebookexternalhit', + 'facebookplatform', + 'fairshare', + 'Faraday v', + 'fasthttp', + 'Faveeo', + 'Favicon downloader', + 'faviconkit', + 'faviconarchive', + 'FavOrg', + 'Feed Wrangler', + 'Feedable\/', + 'Feedbin', + 'FeedBooster', + 'FeedBucket', + 'FeedBunch\/', + 'FeedBurner', + 'feeder', + 'Feedly', + 'FeedshowOnline', + 'Feedspot', + 'Feedwind\/', + 'FeedZcollector', + 'feeltiptop', + 'Fetch API', + 'Fetch\/[0-9]', + 'Fever\/[0-9]', + 'FHscan', + 'Fimap', + 'findlink', + 'findthatfile', + 'FlashGet', + 'FlipboardBrowserProxy', + 'FlipboardProxy', + 'FlipboardRSS', + 'Flock\/', + 'fluffy', + 'Flunky', + 'flynxapp', + 'forensiq', + 'FoundSeoTool', + 'http:\/\/www.neomo.de\/', //'Francis [Bot]' + 'free thumbnails', + 'Freeuploader', + 'Funnelback', + 'G-i-g-a-b-o-t', + 'g00g1e\.net', + 'ganarvisitas', + 'geek-tools', + 'Genieo', + 'GentleSource', + 'GetCode', + 'Getintent', + 'GetLinkInfo', + 'getprismatic', + 'GetRight', + 'getroot', + 'GetURLInfo\/', + 'GetWeb', + 'Ghost Inspector', + 'GigablastOpenSource', + 'GIS-LABS', + 'github-camo', + 'github\.com', + 'Go [\d\.]* package http', + 'Go http package', + 'Go-Ahead-Got-It', + 'Go-http-client', + 'Go!Zilla', + 'gobyus', + 'gofetch', + 'GomezAgent', + 'gooblog', + 'Goodzer\/', + 'Google AppsViewer', + 'Google Desktop', + 'Google favicon', + 'Google Keyword Suggestion', + 'Google Keyword Tool', + 'Google Page Speed Insights', + 'Google PP Default', + 'Google Search Console', + 'Google Web Preview', + 'Google-Adwords', + 'Google-Apps-Script', + 'Google-Calendar-Importer', + 'Google-HotelAdsVerifier', + 'Google-HTTP-Java-Client', + 'Google-Publisher-Plugin', + 'Google-SearchByImage', + 'Google-Site-Verification', + 'Google-Structured-Data-Testing-Tool', + 'Google-Youtube-Links', + 'google-xrawler', + 'GoogleDocs', + 'GoogleHC\/', + 'GoogleProducer', + 'GoogleSites', + 'Google-Transparency-Report', + 'Gookey', + 'GoScraper', + 'GoSpotCheck', + 'gosquared-thumbnailer', + 'Gotit', + 'GoZilla', + 'grabify', + 'GrabNet', + 'Grafula', + 'Grammarly', + 'GrapeFX', + 'GreatNews', + 'Gregarius', + 'GRequests', + 'grokkit', + 'grouphigh', + 'grub-client', + 'gSOAP\/', + 'GT::WWW', + 'GTmetrix', + 'GuzzleHttp', + 'gvfs\/', + 'HAA(A)?RTLAND http client', + 'Haansoft', + 'hackney\/', + 'Hadi Agent', + 'HappyApps-WebCheck', + 'Hatena', + 'Havij', + 'HeadlessChrome', + 'HEADMasterSEO', + 'HeartRails_Capture', + 'help@dataminr\.com', + 'heritrix', + 'historious', + 'hkedcity', + 'hledejLevne\.cz', + 'Hloader', + 'HMView', + 'Holmes', + 'HonesoSearchEngine', + 'HootSuite Image proxy', + 'Hootsuite-WebFeed', + 'hosterstats', + 'HostTracker', + 'ht:\/\/check', + 'htdig', + 'HTMLparser', + 'htmlyse', + 'HTTP Banner Detection', + 'HTTP_Compression_Test', + 'http_request2', + 'http_requester', + 'http-get', + 'HTTP-Header-Abfrage', + 'http-kit', + 'http-request\/', + 'HTTP-Tiny', + 'HTTP::Lite', + 'http\.rb\/', + 'http_get', + 'HttpComponents', + 'httphr', + 'HTTPMon', + 'httpRequest', + 'httpscheck', + 'httpssites_power', + 'httpunit', + 'HttpUrlConnection', + 'httrack', + 'huaweisymantec', + 'HubSpot ', + 'Humanlinks', + 'i2kconnect\/', + 'Iblog', + 'ichiro', + 'Id-search', + 'IdeelaborPlagiaat', + 'IDG Twitter Links Resolver', + 'IDwhois\/', + 'Iframely', + 'igdeSpyder', + 'IlTrovatore', + 'Image Fetch', + 'Image Sucker', + 'ImageEngine\/', + 'ImageVisu\/', + 'Imagga', + 'imagineeasy', + 'imgsizer', + 'InAGist', + 'inbound\.li parser', + 'InDesign%20CC', + 'Indy Library', + 'InetURL', + 'infegy', + 'infohelfer', + 'InfoTekies', + 'InfoWizards Reciprocal Link', + 'inpwrd\.com', + 'instabid', + 'Instapaper', + 'Integrity', + 'integromedb', + 'Intelliseek', + 'InterGET', + 'internet_archive', + 'Internet Ninja', + 'InternetSeer', + 'internetVista monitor', + 'intraVnews', + 'IODC', + 'IOI', + 'iplabel', + 'ips-agent', + 'IPS\/[0-9]', + 'IPWorks HTTP\/S Component', + 'iqdb\/', + 'Iria', + 'Irokez', + 'isitup\.org', + 'iskanie', + 'isUp\.li', + 'iThemes Sync\/', + 'iZSearch', + 'JAHHO', + 'janforman', + 'Jaunt\/', + 'Jbrofuzz', + 'Jersey\/', + 'JetCar', + 'Jigsaw', + 'Jobboerse', + 'JobFeed discovery', + 'Jobg8 URL Monitor', + 'jobo', + 'Jobrapido', + 'Jobsearch1\.5', + 'JoinVision Generic', + 'JolokiaPwn', + 'Joomla', + 'Jorgee', + 'JS-Kit', + 'JustView', + 'Kaspersky Lab CFR link resolver', + 'Kelny\/', + 'Kerrigan\/', + 'KeyCDN', + 'Keyword Density', + 'Keywords Research', + 'KickFire', + 'KimonoLabs\/', + 'Kml-Google', + 'knows\.is', + 'KOCMOHABT', + 'kouio', + 'kube-probe', + 'kulturarw3', + 'KumKie', + 'L\.webis', + 'Larbin', + 'Lavf\/', + 'LeechFTP', + 'LeechGet', + 'letsencrypt', + 'Lftp', + 'LibVLC', + 'LibWeb', + 'Libwhisker', + 'libwww', + 'Licorne', + 'Liferea\/', + 'Lightspeedsystems', + 'Lighthouse', + 'Likse', + 'Link Valet', + 'link_thumbnailer', + 'LinkAlarm\/', + 'linkCheck', + 'linkdex', + 'LinkExaminer', + 'linkfluence', + 'linkpeek', + 'LinkPreviewGenerator', + 'LinkScan', + 'LinksManager', + 'LinkTiger', + 'LinkWalker', + 'Lipperhey', + 'Litemage_walker', + 'livedoor ScreenShot', + 'LoadImpactRload', + 'localsearch-web', + 'LongURL API', + 'looksystems\.net', + 'ltx71', + 'lua-resty-http', + 'lwp-request', + 'lwp-trivial', + 'LWP::Simple', + 'lycos', + 'LYT\.SR', + 'mabontland', + 'Mag-Net', + 'MagpieRSS', + 'Mail\.Ru', + 'MailChimp', + 'Majestic12', + 'makecontact\/', + 'Mandrill', + 'MapperCmd', + 'marketinggrader', + 'MarkMonitor', + 'MarkWatch', + 'Mass Downloader', + 'masscan\/', + 'Mata Hari', + 'Mediapartners-Google', + 'mediawords', + 'MegaIndex\.ru', + 'MeltwaterNews', + 'Melvil Rawi', + 'MemGator', + 'Metaspinner', + 'MetaURI', + 'MFC_Tear_Sample', + 'Microsearch', + 'Microsoft Office ', + 'Microsoft Outlook', + 'Microsoft Windows Network Diagnostics', + 'Microsoft-WebDAV-MiniRedir', + 'Microsoft Data Access', + 'MIDown tool', + 'MIIxpc', + 'Mindjet', + 'Miniature\.io', + 'Miniflux', + 'Mister PiX', + 'mixdata dot com', + 'mixed-content-scan', + 'Mixmax-LinkPreview', + 'mixnode', + 'Mnogosearch', + 'mogimogi', + 'Mojeek', + 'Mojolicious \(Perl\)', + 'Monit\/', + 'monitis', + 'Monitority\/', + 'montastic', + 'MonTools', + 'Moreover', + 'Morfeus Fucking Scanner', + 'Morning Paper', + 'MovableType', + 'mowser', + 'Mrcgiguy', + 'MS Web Services Client Protocol', + 'MSFrontPage', + 'mShots', + 'MuckRack\/', + 'muhstik-scan', + 'MVAClient', + 'MxToolbox\/', + 'nagios', + 'Najdi\.si', + 'Name Intelligence', + 'Nameprotect', + 'Navroad', + 'NearSite', + 'Needle', + 'Nessus', + 'Net Vampire', + 'NetAnts', + 'NETCRAFT', + 'NetLyzer', + 'NetMechanic', + 'NetNewsWire', + 'Netpursual', + 'netresearch', + 'NetShelter ContentScan', + 'Netsparker', + 'NetTrack', + 'Netvibes', + 'NetZIP', + 'Neustar WPM', + 'NeutrinoAPI', + 'NewRelicPinger', + 'NewsBlur .*Finder', + 'NewsGator', + 'newsme', + 'newspaper\/', + 'Nexgate Ruby Client', + 'NG-Search', + 'Nibbler', + 'NICErsPRO', + 'Nikto', + 'nineconnections', + 'NLNZ_IAHarvester', + 'Nmap Scripting Engine', + 'node-superagent', + 'node-urllib', + 'node\.io', + 'Nodemeter', + 'NodePing', + 'nominet\.org\.uk', + 'nominet\.uk', + 'Norton-Safeweb', + 'Notifixious', + 'notifyninja', + 'nuhk', + 'nutch', + 'Nuzzel', + 'nWormFeedFinder', + 'nyawc\/', + 'Nymesis', + 'NYU', + 'Ocelli\/', + 'Octopus', + 'oegp', + 'Offline Explorer', + 'Offline Navigator', + 'og-scraper', + 'okhttp', + 'omgili', + 'OMSC', + 'Online Domain Tools', + 'OpenCalaisSemanticProxy', + 'Openfind', + 'OpenLinkProfiler', + 'Openstat\/', + 'OpenVAS', + 'Optimizer', + 'Orbiter', + 'OrgProbe\/', + 'orion-semantics', + 'Outlook-Express', + 'Outlook-iOS', + 'ow\.ly', + 'Owler', + 'ownCloud News', + 'OxfordCloudService', + 'Page Valet', + 'page_verifier', + 'page scorer', + 'page2rss', + 'PageGrabber', + 'PagePeeker', + 'PageScorer', + 'Pagespeed\/', + 'Panopta', + 'panscient', + 'Papa Foto', + 'parsijoo', + 'Pavuk', + 'PayPal IPN', + 'pcBrowser', + 'Pcore-HTTP', + 'Pearltrees', + 'PECL::HTTP', + 'peerindex', + 'Peew', + 'PeoplePal', + 'Perlu -', + 'PhantomJS Screenshoter', + 'PhantomJS\/', + 'Photon\/', + 'phpservermon', + 'Pi-Monster', + 'Picscout', + 'Picsearch', + 'PictureFinder', + 'Pimonster', + 'ping\.blo\.gs', + 'Pingability', + 'PingAdmin\.Ru', + 'Pingdom', + 'Pingoscope', + 'PingSpot', + 'pinterest\.com', + 'Pixray', + 'Pizilla', + 'Plagger\/', + 'Ploetz \+ Zeller', + 'Plukkie', + 'plumanalytics', + 'PocketImageCache', + 'PocketParser', + 'Pockey', + 'POE-Component-Client-HTTP', + 'Polymail\/', + 'Pompos', + 'Porkbun', + 'Port Monitor', + 'postano', + 'PostmanRuntime', + 'PostPost', + 'postrank', + 'PowerPoint\/', + 'Priceonomics Analysis Engine', + 'PrintFriendly', + 'PritTorrent', + 'Prlog', + 'probethenet', + 'Project 25499', + 'prospectb2b', + 'Protopage', + 'ProWebWalker', + 'proximic', + 'PRTG Network Monitor', + 'pshtt, https scanning', + 'PTST ', + 'PTST\/[0-9]+', + 'Pulsepoint XT3 web scraper', + 'Pump', + 'Python-httplib2', + 'python-requests', + 'Python-urllib', + 'Qirina Hurdler', + 'QQDownload', + 'QrafterPro', + 'Qseero', + 'Qualidator', + 'QueryN Metasearch', + 'queuedriver', + 'Quora Link Preview', + 'Qwantify', + 'Radian6', + 'RankActive', + 'RankFlex', + 'RankSonicSiteAuditor', + 'Re-re Studio', + 'ReactorNetty', + 'Readability', + 'RealDownload', + 'RealPlayer%20Downloader', + 'RebelMouse', + 'Recorder', + 'RecurPost\/', + 'redback\/', + 'ReederForMac', + 'ReGet', + 'RepoMonkey', + 'request\.js', + 'reqwest\/', + 'ResponseCodeTest', + 'RestSharp', + 'Riddler', + 'Rival IQ', + 'Robosourcer', + 'Robozilla', + 'ROI Hunter', + 'RPT-HTTPClient', + 'RSSOwl', + 'safe-agent-scanner', + 'SalesIntelligent', + 'Saleslift', + 'Sendsay\.Ru', + 'SauceNAO', + 'SBIder', + 'scalaj-http', + 'scan\.lol', + 'ScanAlert', + 'Scoop', + 'scooter', + 'ScoutJet', + 'ScoutURLMonitor', + 'ScrapeBox Page Scanner', + 'SimpleScraper', + 'Scrapy', + 'Screaming', + 'ScreenShotService', + 'Scrubby', + 'Scrutiny\/', + 'search\.thunderstone', + 'Search37', + 'searchenginepromotionhelp', + 'Searchestate', + 'SearchExpress', + 'SearchSight', + 'Seeker', + 'semanticdiscovery', + 'semanticjuice', + 'Semiocast HTTP client', + 'Semrush', + 'sentry\/', + 'SEO Browser', + 'Seo Servis', + 'seo-nastroj\.cz', + 'seo4ajax', + 'Seobility', + 'SEOCentro', + 'SeoCheck', + 'SEOkicks', + 'Seomoz', + 'SEOprofiler', + 'SEOsearch', + 'seoscanners', + 'seositecheckup', + 'SEOstats', + 'servernfo', + 'sexsearcher', + 'Seznam', + 'Shelob', + 'Shodan', + 'Shoppimon', + 'ShopWiki', + 'ShortLinkTranslate', + 'shrinktheweb', + 'Sideqik', + 'SimplePie', + 'SimplyFast', + 'Siphon', + 'SISTRIX', + 'Site-Shot\/', + 'Site Sucker', + 'Site24x7', + 'SiteBar', + 'Sitebeam', + 'Sitebulb\/', + 'SiteCondor', + 'SiteExplorer', + 'SiteGuardian', + 'Siteimprove', + 'SiteIndexed', + 'Sitemap(s)? Generator', + 'SitemapGenerator', + 'SiteMonitor', + 'Siteshooter B0t', + 'SiteSnagger', + 'SiteSucker', + 'SiteTruth', + 'Sitevigil', + 'sitexy\.com', + 'SkypeUriPreview', + 'Slack\/', + 'slider\.com', + 'slurp', + 'SlySearch', + 'SmartDownload', + 'SMRF URL Expander', + 'SMUrlExpander', + 'Snake', + 'Snappy', + 'SnapSearch', + 'Snarfer\/', + 'SniffRSS', + 'sniptracker', + 'Snoopy', + 'SnowHaze Search', + 'sogou web', + 'SortSite', + 'Sottopop', + 'sovereign\.ai', + 'SpaceBison', + 'SpamExperts', + 'Spammen', + 'Spanner', + 'spaziodati', + 'SPDYCheck', + 'Specificfeeds', + 'speedy', + 'SPEng', + 'Spinn3r', + 'spray-can', + 'Sprinklr ', + 'spyonweb', + 'sqlmap', + 'Sqlworm', + 'Sqworm', + 'SSL Labs', + 'ssl-tools', + 'StackRambler', + 'Statastico\/', + 'StatusCake', + 'Steeler', + 'Stratagems Kumo', + 'Stroke\.cz', + 'StudioFACA', + 'StumbleUpon', + 'suchen', + 'Sucuri', + 'summify', + 'SuperHTTP', + 'Surphace Scout', + 'Suzuran', + 'SwiteScraper', + 'Symfony BrowserKit', + 'Symfony2 BrowserKit', + 'SynHttpClient-Built', + 'Sysomos', + 'sysscan', + 'Szukacz', + 'T0PHackTeam', + 'tAkeOut', + 'Tarantula\/', + 'Taringa UGC', + 'TarmotGezgin', + 'Teleport', + 'Telesoft', + 'Telesphoreo', + 'Telesphorep', + 'Tenon\.io', + 'teoma', + 'terrainformatica', + 'Test Certificate Info', + 'testuri', + 'Tetrahedron', + 'The Drop Reaper', + 'The Expert HTML Source Viewer', + 'The Knowledge AI', + 'The Intraformant', + 'theinternetrules', + 'TheNomad', + 'Thinklab', + 'Thumbshots', + 'ThumbSniper', + 'timewe\.net', + 'TinEye', + 'Tiny Tiny RSS', + 'TLSProbe\/', + 'Toata', + 'topster', + 'touche\.com', + 'Traackr\.com', + 'tracemyfile', + 'Trackuity', + 'TrapitAgent', + 'Trendiction', + 'Trendsmap', + 'trendspottr', + 'truwoGPS', + 'TryJsoup', + 'TulipChain', + 'Turingos', + 'Turnitin', + 'tweetedtimes', + 'Tweetminster', + 'Tweezler\/', + 'twibble', + 'Twice', + 'Twikle', + 'Twingly', + 'Twisted PageGetter', + 'Typhoeus', + 'ubermetrics-technologies', + 'uclassify', + 'UdmSearch', + 'unchaos', + 'unirest-java', + 'UniversalFeedParser', + 'Unshorten\.It', + 'Untiny', + 'UnwindFetchor', + 'updated', + 'updown\.io daemon', + 'Upflow', + 'Uptimia', + 'Urlcheckr', + 'URL Verifier', + 'URLitor', + 'urlresolver', + 'Urlstat', + 'URLTester', + 'UrlTrends Ranking Updater', + 'URLy Warning', + 'URLy\.Warning', + 'Vacuum', + 'Vagabondo', + 'VB Project', + 'vBSEO', + 'VCI', + 'via ggpht\.com GoogleImageProxy', + 'VidibleScraper', + 'Virusdie', + 'visionutils', + 'vkShare', + 'VoidEYE', + 'Voil', + 'voltron', + 'voyager\/', + 'VSAgent\/', + 'VSB-TUO\/', + 'Vulnbusters Meter', + 'VYU2', + 'w3af\.org', + 'W3C_Unicorn', + 'W3C-checklink', + 'W3C-mobileOK', + 'WAC-OFU', + 'Wallpapers\/[0-9]+', + 'WallpapersHD', + 'wangling', + 'Wappalyzer', + 'WatchMouse', + 'WbSrch\/', + 'WDT\.io', + 'web-capture\.net', + 'Web-sniffer', + 'Web Auto', + 'Web Collage', + 'Web Enhancer', + 'Web Fetch', + 'Web Fuck', + 'Web Pix', + 'Web Sauger', + 'Web Sucker', + 'Webalta', + 'Webauskunft', + 'WebAuto', + 'WebCapture', + 'WebClient\/', + 'webcollage', + 'WebCookies', + 'WebCopier', + 'WebCorp', + 'WebDataStats', + 'WebDoc', + 'WebEnhancer', + 'WebFetch', + 'WebFuck', + 'WebGazer', + 'WebGo IS', + 'WebImageCollector', + 'WebImages', + 'WebIndex', + 'webkit2png', + 'WebLeacher', + 'webmastercoffee', + 'webmon ', + 'WebPix', + 'WebReaper', + 'WebSauger', + 'webscreenie', + 'Webshag', + 'Webshot', + 'Website Quester', + 'websitepulse agent', + 'WebsiteQuester', + 'Websnapr', + 'WebSniffer', + 'Webster', + 'WebStripper', + 'WebSucker', + 'Webthumb\/', + 'WebThumbnail', + 'WebWhacker', + 'WebZIP', + 'WeLikeLinks', + 'WEPA', + 'WeSEE', + 'wf84', + 'Wfuzz\/', + 'wget', + 'WhatsApp', + 'WhatsMyIP', + 'WhatWeb', + 'WhereGoes\?', + 'Whibse', + 'WhoRunsCoinHive', + 'Whynder Magnet', + 'Windows-RSS-Platform', + 'WinPodder', + 'wkhtmlto', + 'wmtips', + 'Woko', + 'woorankreview', + 'Word\/', + 'WordPress\/', + 'WordupinfoSearch', + 'wotbox', + 'WP Engine Install Performance API', + 'wpif', + 'wprecon\.com survey', + 'WPScan', + 'wscheck', + 'Wtrace', + 'WWW-Collector-E', + 'WWW-Mechanize', + 'WWW::Document', + 'WWW::Mechanize', + 'www\.monitor\.us', + 'WWWOFFLE', + 'x09Mozilla', + 'x22Mozilla', + 'XaxisSemanticsClassifier', + 'Xenu Link Sleuth', + 'XING-contenttabreceiver', + 'xpymep([0-9]?)\.exe', + 'Y!J-(ASR|BSC)', + 'Y\!J-BRW', + 'Yaanb', + 'yacy', + 'Yahoo Link Preview', + 'YahooCacheSystem', + 'YahooYSMcm', + 'YandeG', + 'Yandex(?!Search)', + 'yanga', + 'yeti', + 'Yo-yo', + 'Yoleo Consumer', + 'yoogliFetchAgent', + 'YottaaMonitor', + 'Your-Website-Sucks', + 'yourls\.org', + 'YoYs\.net', + 'YP\.PL', + 'Zabbix', + 'Zade', + 'Zao', + 'Zauba', + 'Zemanta Aggregator', + 'Zend_Http_Client', + 'Zend\\\\Http\\\\Client', + 'Zermelo', + 'Zeus ', + 'zgrab', + 'ZnajdzFoto', + 'Zombie\.js', + 'Zoom\.Mac', + 'ZyBorg', + '[a-z0-9\-_]*(bot|crawl|archiver|transcoder|spider|uptime|validator|fetcher|cron|checker|reader|extractor|monitoring|analyzer)', + ); +} diff --git a/blockbot/vendor/jaybizzle/crawler-detect/src/Fixtures/Exclusions.php b/blockbot/vendor/jaybizzle/crawler-detect/src/Fixtures/Exclusions.php new file mode 100644 index 000000000..e6b3ca897 --- /dev/null +++ b/blockbot/vendor/jaybizzle/crawler-detect/src/Fixtures/Exclusions.php @@ -0,0 +1,72 @@ + + * + * This source file is subject to the MIT license that is bundled + * with this source code in the file LICENSE. + */ + +namespace Jaybizzle\CrawlerDetect\Fixtures; + +class Exclusions extends AbstractProvider +{ + /** + * List of strings to remove from the user agent before running the crawler regex + * Over a large list of user agents, this gives us about a 55% speed increase! + * + * @var array + */ + protected $data = array( + 'Safari.[\d\.]*', + 'Firefox.[\d\.]*', + ' Chrome.[\d\.]*', + 'Chromium.[\d\.]*', + 'MSIE.[\d\.]', + 'Opera\/[\d\.]*', + 'Mozilla.[\d\.]*', + 'AppleWebKit.[\d\.]*', + 'Trident.[\d\.]*', + 'Windows NT.[\d\.]*', + 'Android [\d\.]*', + 'Macintosh.', + 'Ubuntu', + 'Linux', + '[ ]Intel', + 'Mac OS X [\d_]*', + '(like )?Gecko(.[\d\.]*)?', + 'KHTML,', + 'CriOS.[\d\.]*', + 'CPU iPhone OS ([0-9_])* like Mac OS X', + 'CPU OS ([0-9_])* like Mac OS X', + 'iPod', + 'compatible', + 'x86_..', + 'i686', + 'x64', + 'X11', + 'rv:[\d\.]*', + 'Version.[\d\.]*', + 'WOW64', + 'Win64', + 'Dalvik.[\d\.]*', + ' \.NET CLR [\d\.]*', + 'Presto.[\d\.]*', + 'Media Center PC', + 'BlackBerry', + 'Build', + 'Opera Mini\/\d{1,2}\.\d{1,2}\.[\d\.]*\/\d{1,2}\.', + 'Opera', + ' \.NET[\d\.]*', + 'cubot', + '; M bot', + '; CRONO', + '; B bot', + '; IDbot', + '; ID bot', + '; POWER BOT', + ';', // Remove the following characters ; + ); +} diff --git a/blockbot/vendor/jaybizzle/crawler-detect/src/Fixtures/Headers.php b/blockbot/vendor/jaybizzle/crawler-detect/src/Fixtures/Headers.php new file mode 100644 index 000000000..35c60c389 --- /dev/null +++ b/blockbot/vendor/jaybizzle/crawler-detect/src/Fixtures/Headers.php @@ -0,0 +1,37 @@ + + * + * This source file is subject to the MIT license that is bundled + * with this source code in the file LICENSE. + */ + +namespace Jaybizzle\CrawlerDetect\Fixtures; + +class Headers extends AbstractProvider +{ + /** + * All possible HTTP headers that represent the user agent string. + * + * @var array + */ + protected $data = array( + // The default User-Agent string. + 'HTTP_USER_AGENT', + // Header can occur on devices using Opera Mini. + 'HTTP_X_OPERAMINI_PHONE_UA', + // Vodafone specific header: http://www.seoprinciple.com/mobile-web-community-still-angry-at-vodafone/24/ + 'HTTP_X_DEVICE_USER_AGENT', + 'HTTP_X_ORIGINAL_USER_AGENT', + 'HTTP_X_SKYFIRE_PHONE', + 'HTTP_X_BOLT_PHONE_UA', + 'HTTP_DEVICE_STOCK_UA', + 'HTTP_X_UCBROWSER_DEVICE_UA', + // Sometimes, bots (especially Google) use a genuine user agent, but fill this header in with their email address + 'HTTP_FROM', + 'HTTP_X_SCANNER', // Seen in use by Netsparker + ); +}