diff --git a/.htconfig.php b/.htconfig.php
index 6752b9cd..3e70ecda 100644
--- a/.htconfig.php
+++ b/.htconfig.php
@@ -13,6 +13,14 @@ $default_timezone = 'Europe/Amsterdam';
// What is your site name?
$a->config['sitename'] = "EXPERIMENTAL Friendica public directory";
+//Statistic display settings.
+$a->config['stats'] = array(
+
+ //For site health, the max age for which to display data.
+ 'maxDataAge' => 3600*24*30*4 //120 days = ~4 months
+
+);
+
//Settings related to the syncing feature.
$a->config['syncing'] = array(
@@ -46,7 +54,7 @@ $a->config['site-health'] = array(
//Wait for at least ... before probing a site again.
//The longer this value, the more "stable" site-healths will be over time.
//Note: If a bad (negative) health site submits something, a probe will be performed regardless.
- 'min_probe_delay' => 3*24*3600, // 3 days
+ 'min_probe_delay' => 24*3600, // 1 day
//Probes get a simple /friendica/json file from the server.
//Feel free to set this timeout to a very tight value.
diff --git a/dfrndir.sql b/dfrndir.sql
index 118e61e9..2276f0ad 100644
--- a/dfrndir.sql
+++ b/dfrndir.sql
@@ -154,6 +154,7 @@ CREATE TABLE IF NOT EXISTS `user` (
CREATE TABLE IF NOT EXISTS `site-health` (
`id` int(10) unsigned NOT NULL AUTO_INCREMENT,
`base_url` varchar(255) NOT NULL,
+ `effective_base_url` varchar(255) NULL DEFAULT NULL,
`health_score` int(11) NOT NULL DEFAULT 0,
`no_scrape_url` varchar(255) NULL DEFAULT NULL,
`dt_first_noticed` datetime NOT NULL,
diff --git a/include/site-health.php b/include/site-health.php
index 6eadaf10..7f9d03b5 100644
--- a/include/site-health.php
+++ b/include/site-health.php
@@ -29,7 +29,7 @@ function notice_site($url, $check_health=false)
$entry = $result[0];
//If we are allowed to do health checks...
- if(!!$check_health){
+ if($check_health){
//And the site is in bad health currently, do a check now.
//This is because you have a high certainty the site may perform better now.
@@ -57,7 +57,7 @@ function notice_site($url, $check_health=false)
);
//And in case we should probe now, do so.
- if(!!$check_health){
+ if($check_health){
$result = q(
"SELECT * FROM `site-health` WHERE `base_url`= '%s' ORDER BY `id` ASC LIMIT 1",
@@ -134,7 +134,7 @@ function run_site_probe($id, &$entry_out)
CURLOPT_PROTOCOLS => CURLPROTO_HTTP | CURLPROTO_HTTPS,
//Basic request
- CURLOPT_USERAGENT => 'friendica-directory-probe-0.1',
+ CURLOPT_USERAGENT => 'friendica-directory-probe-1.0',
CURLOPT_RETURNTRANSFER => true,
CURLOPT_URL => $probe_location
@@ -159,7 +159,7 @@ function run_site_probe($id, &$entry_out)
//Probe again, without strict SSL.
$options[CURLOPT_SSL_VERIFYPEER] = false;
- //Replace the handler.
+ //Replace the handle.
curl_close($handle);
$handle = curl_init();
curl_setopt_array($handle, $options);
@@ -178,13 +178,14 @@ function run_site_probe($id, &$entry_out)
$time = round(($probe_end - $probe_start) * 1000);
$status = curl_getinfo($handle, CURLINFO_HTTP_CODE);
$type = curl_getinfo($handle, CURLINFO_CONTENT_TYPE);
- $effective_url = curl_getinfo($handle, CURLINFO_EFFECTIVE_URL);
+ $info = curl_getinfo($handle);
//Done with CURL now.
curl_close($handle);
#TODO: if the site redirects elsewhere, notice this site and record an issue.
- $wrong_base_url = parse_site_from_url($effective_url) !== $entry['base_url'];
+ $effective_base_url = parse_site_from_url($info['url']);
+ $wrong_base_url = $effective_base_url !== $entry['base_url'];
try{
$data = json_decode($probe_data);
@@ -195,6 +196,18 @@ function run_site_probe($id, &$entry_out)
$parse_failed = !$data;
$parsedDataQuery = '';
+
+ logger('Effective Base URL: ' . $effective_base_url);
+
+ if($wrong_base_url){
+ $parsedDataQuery .= sprintf(
+ "`effective_base_url` = '%s',",
+ dbesc($effective_base_url)
+ );
+ }else{
+ $parsedDataQuery .= "`effective_base_url` = NULL,";
+ }
+
if(!$parse_failed){
$given_base_url_match = $data->url == $base_url;
@@ -208,7 +221,7 @@ function run_site_probe($id, &$entry_out)
);
//Update any health calculations or otherwise processed data.
- $parsedDataQuery = sprintf(
+ $parsedDataQuery .= sprintf(
"`dt_last_seen` = NOW(),
`name` = '%s',
`version` = '%s',
@@ -307,8 +320,8 @@ function health_score_after_probe($current, $probe_success, $time=null, $version
$current = min($current, 30);
}
- //Older than 3.2.x?
- elseif(intval($versionParts[1] < 2)){
+ //Older than 3.3.x?
+ elseif(intval($versionParts[1] < 3)){
$current -= 5; //Somewhat outdated.
}
diff --git a/include/smoothing.js b/include/smoothing.js
new file mode 100644
index 00000000..9f1d918d
--- /dev/null
+++ b/include/smoothing.js
@@ -0,0 +1,81 @@
+(function(){
+
+ window.Smoothing = {
+
+ /**
+ * Applies both a moving average bracket and and exponential smoothing.
+ * @param {array} raw The raw Y values.
+ * @param {float} factor The exponential smoothing factor to apply (between o and 1).
+ * @param {int} bracket The amount of datapoints to add to the backet on each side! (2 = 5 data points)
+ * @return {array} The smoothed Y values.
+ */
+ exponentialMovingAverage: function(raw, factor, bracket){
+
+ var output = [];
+ var smoother = new ExponentialSmoother(factor);
+
+ //Transform each data point with the smoother.
+ for (var i = 0; i < raw.length; i++){
+
+ var input = raw[i];
+
+ //See if we should bracket.
+ if(bracket > 0){
+
+ //Cap our start and end so it doesn't go out of bounds.
+ var start = Math.max(i-bracket, 0);
+ var end = Math.min(i+bracket, raw.length);
+
+ //Push the range to our input.
+ input = [];
+ for(var j = start; j < end; j++){
+ input.push(raw[j]);
+ }
+
+ }
+
+ output.push(
+ smoother.transform(input)
+ );
+ };
+
+ return output;
+
+ }
+
+ };
+
+ // Exponential Smoother class.
+ var ExponentialSmoother = function(factor){
+ this.currentValue = null;
+ this.smoothingFactor = factor || 1;
+ };
+
+ ExponentialSmoother.prototype.transform = function(input){
+
+ // In case our input is a bracket, first average it.
+ if(input.length){
+ var len = input.length;
+ var sum = 0;
+ for (var i = input.length - 1; i >= 0; i--)
+ sum += input[i]
+ input = sum/len;
+ }
+
+ // Start with our initial value.
+ if(this.currentValue === null){
+ this.currentValue = input;
+ }
+
+ // Our output is basically an updated value.
+ return this.currentValue =
+
+ // Weigh our current value with the smoothing factor.
+ (this.currentValue * this.smoothingFactor) +
+
+ // Add the input to it with the inverse value of the smoothing factor.
+ ( (1-this.smoothingFactor) * input );
+
+ };
+
+})();
\ No newline at end of file
diff --git a/mod/health.php b/mod/health.php
index 6f285326..927a51a1 100644
--- a/mod/health.php
+++ b/mod/health.php
@@ -45,6 +45,7 @@ function health_search(&$a, $search)
'♥ '.
'' . $site['base_url'] . ' '.
'(' . $site['users'] . ')'.
+ ($site['effective_base_url'] ? ' -> '.$site['effective_base_url'].'' : '').
"
\r\n";
}
@@ -70,7 +71,7 @@ function health_summary(&$a){
$sites = array();
//Find the user count per site.
- $r = q("SELECT `homepage` FROM `profile` WHERE 1");
+ $r = q("SELECT `homepage` FROM `profile`");
if(count($r)) {
foreach($r as $rr) {
$site = parse_site_from_url($rr['homepage']);
@@ -82,11 +83,11 @@ function health_summary(&$a){
}
}
- //See if we have a health for them.
+ //See if we have a health for them AND they provide SSL.
$sites_with_health = array();
$site_healths = array();
- $r = q("SELECT * FROM `site-health` WHERE `reg_policy`='REGISTER_OPEN'");
+ $r = q("SELECT * FROM `site-health` WHERE `reg_policy`='REGISTER_OPEN' AND `ssl_state` = 1");
if(count($r)) {
foreach($r as $rr) {
$sites_with_health[$rr['base_url']] = (($sites[$rr['base_url']] / 100) + 10) * intval($rr['health_score']);
@@ -106,7 +107,7 @@ function health_summary(&$a){
//Skip small sites.
$users = $sites[$k];
- if($users < 10) continue;
+ if($users < 5) continue;
$public_sites .=
'♥ '.
@@ -129,6 +130,42 @@ function health_summary(&$a){
function health_details($a, $id)
{
+ //Max data age in MySQL date.
+ $maxDate = date('Y-m-d H:i:s', time()-($a->config['stats']['maxDataAge']));
+
+ //Include graphael line charts.
+ $a->page['htmlhead'] .= ''.PHP_EOL;
+ $a->page['htmlhead'] .= ''.PHP_EOL;
+ $a->page['htmlhead'] .= ''.PHP_EOL;
+ $a->page['htmlhead'] .= ''.PHP_EOL;
+ $a->page['htmlhead'] .= '';
+
//The overall health status.
$r = q(
"SELECT * FROM `site-health`
@@ -142,6 +179,22 @@ function health_details($a, $id)
$site = $r[0];
+ //Does it redirect to a known site?
+ $redirectStatement = '';
+ if($site['effective_base_url']){
+
+ //The effective health status.
+ $r = q(
+ "SELECT * FROM `site-health`
+ WHERE `base_url`= '%s'",
+ dbesc($site['effective_base_url'])
+ );
+ if(count($r)){
+ $redirectStatement = 'Redirects to '.$site['effective_base_url'].'';
+ }
+
+ }
+
//Figure out SSL state.
$urlMeta = parse_url($site['base_url']);
if($urlMeta['scheme'] !== 'https'){
@@ -169,8 +222,10 @@ function health_details($a, $id)
//Get avg probe speed.
$r = q(
"SELECT AVG(`request_time`) as `avg_probe_time` FROM `site-probe`
- WHERE `site_health_id` = %u",
- intval($site['id'])
+ WHERE `site_health_id` = %u
+ AND `dt_performed` > '%s'",
+ intval($site['id']),
+ $maxDate
);
if(count($r)){
$site['avg_probe_time'] = $r[0]['avg_probe_time'];
@@ -184,8 +239,10 @@ function health_details($a, $id)
AVG(`photo_time`) as `avg_photo_time`,
AVG(`total_time`) as `avg_submit_time`
FROM `site-scrape`
- WHERE `site_health_id` = %u",
- intval($site['id'])
+ WHERE `site_health_id` = %u
+ AND `dt_performed` > '%s'",
+ intval($site['id']),
+ $maxDate
);
if(count($r)){
$site['avg_profile_time'] = $r[0]['avg_profile_time'];
@@ -196,62 +253,14 @@ function health_details($a, $id)
//Get probe speed data.
$r = q(
- "SELECT `request_time`, `dt_performed` FROM `site-probe`
- WHERE `site_health_id` = %u",
- intval($site['id'])
+ "SELECT AVG(`request_time`) as `avg_time`, date(`dt_performed`) as `date` FROM `site-probe`
+ WHERE `site_health_id` = %u
+ AND `dt_performed` > '%s'
+ GROUP BY `date`",
+ intval($site['id']),
+ $maxDate
);
if(count($r)){
- //Include graphael line charts.
- $a->page['htmlhead'] .= ''.PHP_EOL;
- $a->page['htmlhead'] .= ''.PHP_EOL;
- $a->page['htmlhead'] .= '';
- $speeds = array();
- $times = array();
- $mintime = time();
- foreach($r as $row){
- $speeds[] = $row['request_time'];
- $time = strtotime($row['dt_performed']);
- $times[] = $time;
- if($mintime > $time) $mintime = $time;
- }
- for($i=0; $i < count($times); $i++){
- $times[$i] -= $mintime;
- $times[$i] = floor($times[$i] / (24*3600));
- }
- $a->page['htmlhead'] .=
- '';
- }
-
- //Get scrape speed data.
- $r = q(
- "SELECT AVG(`total_time`) as `avg_time`, date(`dt_performed`) as `date` FROM `site-scrape`
- WHERE `site_health_id` = %u GROUP BY `date`",
- intval($site['id'])
- // date('Y-m-d H:i:s', time()-(3*24*3600)) //Max 3 days old.
- );
- if($r && count($r)){
- //Include graphael line charts.
- $a->page['htmlhead'] .= ''.PHP_EOL;
- $a->page['htmlhead'] .= ''.PHP_EOL;
- $a->page['htmlhead'] .= '';
$speeds = array();
$times = array();
$mintime = time();
@@ -267,23 +276,78 @@ function health_details($a, $id)
}
$a->page['htmlhead'] .=
'';
+ }
+
+ //Get scrape speed data.
+ $r = q(
+ "SELECT AVG(`total_time`) as `avg_time`, date(`dt_performed`) as `date` FROM `site-scrape`
+ WHERE `site_health_id` = %u
+ AND `dt_performed` > '%s'
+ GROUP BY `date`",
+ intval($site['id']),
+ $maxDate
+ );
+ if($r && count($r)){
+ $speeds = array();
+ $times = array();
+ $mintime = time();
+ foreach($r as $row){
+ $speeds[] = $row['avg_time'];
+ $time = strtotime($row['date']);
+ $times[] = $time;
+ if($mintime > $time) $mintime = $time;
+ }
+ for($i=0; $i < count($times); $i++){
+ $times[$i] -= $mintime;
+ $times[$i] = floor($times[$i] / (24*3600));
+ }
+ $a->page['htmlhead'] .=
+ '';
}
@@ -298,6 +362,7 @@ function health_details($a, $id)
$tpl .= file_get_contents('view/health_details.tpl');
return replace_macros($tpl, array(
'$name' => $site['name'],
+ '$redirectStatement' => $redirectStatement,
'$policy' => $policy,
'$site_info' => $site['info'],
'$base_url' => $site['base_url'],
diff --git a/view/health_details.tpl b/view/health_details.tpl
index 7d6f694e..45b5b057 100644
--- a/view/health_details.tpl
+++ b/view/health_details.tpl
@@ -1,12 +1,13 @@