Issue 1924: New configuration value for permitting crawler access

This commit is contained in:
Michael Vogel 2015-12-06 22:01:20 +01:00
parent 2dc0b2b731
commit bcf7e673c9
2 changed files with 17 additions and 5 deletions

View file

@ -44,6 +44,8 @@ line to your .htconfig.php:
* ostatus_poll_timeframe - Defines how old an item can be to try to complete the conversation with it.
* paranoia (Boolean) - Log out users if their IP address changed.
* permit_crawling (Boolean) - Restricts the search for not logged in users to one search per minute.
* free_crawls - Number of "free" searches when "permit_crawling" is activated (Default value is 10)
* crawl_permit_period - Period in seconds between allowed searches when the number of free searches is reached and "permit_crawling" is activated (Default value is 60)
* png_quality - Default value is 8.
* proc_windows (Boolean) - Should be enabled if Friendica is running under Windows.
* proxy_cache_time - Time after which the cache is cleared. Default value is one day.

View file

@ -104,20 +104,30 @@ function search_content(&$a) {
}
if (get_config('system','permit_crawling') AND !local_user()) {
// To-Do:
// - 10 requests are "free", after the 11th only a call per minute is allowed
// Default values:
// 10 requests are "free", after the 11th only a call per minute is allowed
$free_crawls = intval(get_config('system','free_crawls'));
if ($free_crawls == 0)
$free_crawls = 10;
$crawl_permit_period = intval(get_config('system','crawl_permit_period'));
if ($crawl_permit_period == 0)
$crawl_permit_period = 10;
$remote = $_SERVER["REMOTE_ADDR"];
$result = Cache::get("remote_search:".$remote);
if (!is_null($result)) {
if ($result > (time() - 60)) {
$resultdata = json_decode($result);
if (($resultdata->time > (time() - $crawl_permit_period)) AND ($resultdata->accesses > $free_crawls)) {
http_status_exit(429,
array("title" => t("Too Many Requests"),
"description" => t("Only one search per minute is permitted for not logged in users.")));
killme();
}
}
Cache::set("remote_search:".$remote, time(), CACHE_HOUR);
Cache::set("remote_search:".$remote, json_encode(array("time" => time(), "accesses" => $resultdata->accesses + 1)), CACHE_HOUR);
} else
Cache::set("remote_search:".$remote, json_encode(array("time" => time(), "accesses" => 1)), CACHE_HOUR);
}
nav_set_selected('search');