Adding site-health and noscrape support.
This commit is contained in:
parent
1bac9fb268
commit
a69a9d2278
15 changed files with 1025 additions and 61 deletions
55
dfrndir.sql
55
dfrndir.sql
|
@ -144,3 +144,58 @@ CREATE TABLE IF NOT EXISTS `user` (
|
||||||
`password` char(255) NOT NULL,
|
`password` char(255) NOT NULL,
|
||||||
PRIMARY KEY (`uid`)
|
PRIMARY KEY (`uid`)
|
||||||
) ENGINE=MyISAM DEFAULT CHARSET=utf8 ;
|
) ENGINE=MyISAM DEFAULT CHARSET=utf8 ;
|
||||||
|
|
||||||
|
-- --------------------------------------------------------
|
||||||
|
|
||||||
|
--
|
||||||
|
-- Table structure for table `site-health`
|
||||||
|
--
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS `site-health` (
|
||||||
|
`id` int(10) unsigned NOT NULL AUTO_INCREMENT,
|
||||||
|
`base_url` varchar(255) NOT NULL,
|
||||||
|
`health_score` int(11) NOT NULL DEFAULT 0,
|
||||||
|
`no_scrape_url` varchar(255) NULL DEFAULT NULL,
|
||||||
|
`dt_first_noticed` datetime NOT NULL,
|
||||||
|
`dt_last_seen` datetime NULL DEFAULT NULL,
|
||||||
|
`dt_last_probed` datetime NULL DEFAULT NULL,
|
||||||
|
`dt_last_heartbeat` datetime NULL DEFAULT NULL,
|
||||||
|
`name` varchar(255) NULL DEFAULT NULL,
|
||||||
|
`version` varchar(255) NULL DEFAULT NULL,
|
||||||
|
`plugins` text NULL DEFAULT NULL,
|
||||||
|
`reg_policy` char(32) NULL DEFAULT NULL,
|
||||||
|
`info` text NULL DEFAULT NULL,
|
||||||
|
`admin_name` varchar(255) NULL DEFAULT NULL,
|
||||||
|
`admin_profile` varchar(255) NULL DEFAULT NULL,
|
||||||
|
`ssl_state` bit(1) NULL DEFAULT NULL,
|
||||||
|
PRIMARY KEY (`id`),
|
||||||
|
KEY `base_url` (`base_url`),
|
||||||
|
KEY `health_score` (`health_score`),
|
||||||
|
KEY `dt_last_seen` (`dt_last_seen`)
|
||||||
|
) ENGINE=MyISAM DEFAULT CHARSET=utf8 ;
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS `site-probe` (
|
||||||
|
`id` int(10) unsigned NOT NULL AUTO_INCREMENT,
|
||||||
|
`site_health_id` int(10) unsigned NOT NULL,
|
||||||
|
`dt_performed` datetime NOT NULL,
|
||||||
|
`request_time` int(10) unsigned NOT NULL,
|
||||||
|
PRIMARY KEY (`id`),
|
||||||
|
KEY `site_health_id` (`site_health_id`),
|
||||||
|
KEY `dt_performed` (`dt_performed`)
|
||||||
|
) ENGINE=MyISAM DEFAULT CHARSET=utf8 ;
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS `site-scrape` (
|
||||||
|
`id` int(10) unsigned NOT NULL AUTO_INCREMENT,
|
||||||
|
`site_health_id` int(10) unsigned NOT NULL,
|
||||||
|
`dt_performed` datetime NOT NULL,
|
||||||
|
`request_time` int(10) unsigned NOT NULL,
|
||||||
|
`scrape_time` int(10) unsigned NOT NULL,
|
||||||
|
`photo_time` int(10) unsigned NOT NULL,
|
||||||
|
`total_time` int(10) unsigned NOT NULL,
|
||||||
|
PRIMARY KEY (`id`),
|
||||||
|
KEY `site_health_id` (`site_health_id`),
|
||||||
|
KEY `dt_performed` (`dt_performed`)
|
||||||
|
) ENGINE=MyISAM DEFAULT CHARSET=utf8 ;
|
||||||
|
|
||||||
|
|
|
@ -11,15 +11,41 @@ function attribute_contains($attr,$s) {
|
||||||
}}
|
}}
|
||||||
|
|
||||||
|
|
||||||
|
if(! function_exists('noscrape_dfrn')) {
|
||||||
|
function noscrape_dfrn($url) {
|
||||||
|
$submit_noscrape_start = microtime(true);
|
||||||
|
$data = fetch_url($url);
|
||||||
|
$submit_noscrape_request_end = microtime(true);
|
||||||
|
if(empty($data)) return false;
|
||||||
|
$parms = json_decode($data, true);
|
||||||
|
if(!$parms || !count($parms)) return false;
|
||||||
|
$parms['tags'] = implode(' ', (array)$parms['tags']);
|
||||||
|
$submit_noscrape_end = microtime(true);
|
||||||
|
$parms['_timings'] = array(
|
||||||
|
'fetch' => round(($submit_noscrape_request_end - $submit_noscrape_start) * 1000),
|
||||||
|
'scrape' => round(($submit_noscrape_end - $submit_noscrape_request_end) * 1000)
|
||||||
|
);
|
||||||
|
return $parms;
|
||||||
|
}}
|
||||||
|
|
||||||
if(! function_exists('scrape_dfrn')) {
|
if(! function_exists('scrape_dfrn')) {
|
||||||
function scrape_dfrn($url, $max_nodes=5000) {
|
function scrape_dfrn($url, $max_nodes=3500) {
|
||||||
|
|
||||||
$minNodes = 100; //Lets do at least 100 nodes per type.
|
$minNodes = 100; //Lets do at least 100 nodes per type.
|
||||||
$timeout = 10; //Timeout will affect batch processing.
|
$timeout = 10; //Timeout will affect batch processing.
|
||||||
|
|
||||||
|
//Try and cheat our way into faster profiles.
|
||||||
|
if(strpos($url, 'tab=profile') === false){
|
||||||
|
$url .= (strpos($url, '?') > 0 ? '&' : '?').'tab=profile';
|
||||||
|
}
|
||||||
|
|
||||||
|
$scrape_start = microtime(true);
|
||||||
|
|
||||||
$ret = array();
|
$ret = array();
|
||||||
$s = fetch_url($url, $timeout);
|
$s = fetch_url($url, $timeout);
|
||||||
|
|
||||||
|
$scrape_fetch_end = microtime(true);
|
||||||
|
|
||||||
if(! $s)
|
if(! $s)
|
||||||
return $ret;
|
return $ret;
|
||||||
|
|
||||||
|
@ -28,30 +54,36 @@ function scrape_dfrn($url, $max_nodes=5000) {
|
||||||
if(! $dom)
|
if(! $dom)
|
||||||
return $ret;
|
return $ret;
|
||||||
|
|
||||||
|
|
||||||
$items = $dom->getElementsByTagName('meta');
|
$items = $dom->getElementsByTagName('meta');
|
||||||
|
|
||||||
// get DFRN link elements
|
// get DFRN link elements
|
||||||
$nodes_left = max(intval($max_nodes), $minNodes);
|
$nodes_left = max(intval($max_nodes), $minNodes);
|
||||||
|
$targets = array('hide', 'comm', 'tags');
|
||||||
|
$targets_left = count($targets);
|
||||||
foreach($items as $item) {
|
foreach($items as $item) {
|
||||||
$x = $item->getAttribute('name');
|
$x = $item->getAttribute('name');
|
||||||
if($x == 'dfrn-global-visibility') {
|
if($x == 'dfrn-global-visibility') {
|
||||||
$z = strtolower(trim($item->getAttribute('content')));
|
$z = strtolower(trim($item->getAttribute('content')));
|
||||||
if($z != 'true')
|
if($z != 'true')
|
||||||
$ret['hide'] = 1;
|
$ret['hide'] = 1;
|
||||||
|
if($z === 'false')
|
||||||
|
$ret['explicit-hide'] = 1;
|
||||||
|
$targets_left = pop_scrape_target($targets, 'hide');
|
||||||
}
|
}
|
||||||
if($x == 'friendika.community' || $x == 'friendica.community') {
|
if($x == 'friendika.community' || $x == 'friendica.community') {
|
||||||
$z = strtolower(trim($item->getAttribute('content')));
|
$z = strtolower(trim($item->getAttribute('content')));
|
||||||
if($z == 'true')
|
if($z == 'true')
|
||||||
$ret['comm'] = 1;
|
$ret['comm'] = 1;
|
||||||
|
$targets_left = pop_scrape_target($targets, 'comm');
|
||||||
}
|
}
|
||||||
if($x == 'keywords') {
|
if($x == 'keywords') {
|
||||||
$z = str_replace(',',' ',strtolower(trim($item->getAttribute('content'))));
|
$z = str_replace(',',' ',strtolower(trim($item->getAttribute('content'))));
|
||||||
if(strlen($z))
|
if(strlen($z))
|
||||||
$ret['tags'] = $z;
|
$ret['tags'] = $z;
|
||||||
|
$targets_left = pop_scrape_target($targets, 'tags');
|
||||||
}
|
}
|
||||||
$nodes_left--;
|
$nodes_left--;
|
||||||
if($nodes_left <= 0) break;
|
if($nodes_left <= 0 || $targets_left <= 0) break;
|
||||||
}
|
}
|
||||||
|
|
||||||
$items = $dom->getElementsByTagName('link');
|
$items = $dom->getElementsByTagName('link');
|
||||||
|
@ -71,37 +103,69 @@ function scrape_dfrn($url, $max_nodes=5000) {
|
||||||
|
|
||||||
$nodes_left = max(intval($max_nodes), $minNodes);
|
$nodes_left = max(intval($max_nodes), $minNodes);
|
||||||
$items = $dom->getElementsByTagName('*');
|
$items = $dom->getElementsByTagName('*');
|
||||||
|
$targets = array('fn', 'pdesc', 'photo', 'key', 'locality', 'region', 'postal-code', 'country-name', 'gender', 'marital');
|
||||||
|
$targets_left = count($targets);
|
||||||
foreach($items as $item) {
|
foreach($items as $item) {
|
||||||
if(attribute_contains($item->getAttribute('class'), 'vcard')) {
|
if(attribute_contains($item->getAttribute('class'), 'vcard')) {
|
||||||
$level2 = $item->getElementsByTagName('*');
|
$level2 = $item->getElementsByTagName('*');
|
||||||
foreach($level2 as $x) {
|
foreach($level2 as $x) {
|
||||||
if(attribute_contains($x->getAttribute('class'),'fn'))
|
if(attribute_contains($x->getAttribute('class'),'fn')){
|
||||||
$ret['fn'] = $x->textContent;
|
$ret['fn'] = $x->textContent;
|
||||||
if(attribute_contains($x->getAttribute('class'),'title'))
|
$targets_left = pop_scrape_target($targets, 'fn');
|
||||||
|
}
|
||||||
|
if(attribute_contains($x->getAttribute('class'),'title')){
|
||||||
$ret['pdesc'] = $x->textContent;
|
$ret['pdesc'] = $x->textContent;
|
||||||
if(attribute_contains($x->getAttribute('class'),'photo'))
|
$targets_left = pop_scrape_target($targets, 'pdesc');
|
||||||
|
}
|
||||||
|
if(attribute_contains($x->getAttribute('class'),'photo')){
|
||||||
$ret['photo'] = $x->getAttribute('src');
|
$ret['photo'] = $x->getAttribute('src');
|
||||||
if(attribute_contains($x->getAttribute('class'),'key'))
|
$targets_left = pop_scrape_target($targets, 'photo');
|
||||||
|
}
|
||||||
|
if(attribute_contains($x->getAttribute('class'),'key')){
|
||||||
$ret['key'] = $x->textContent;
|
$ret['key'] = $x->textContent;
|
||||||
if(attribute_contains($x->getAttribute('class'),'locality'))
|
$targets_left = pop_scrape_target($targets, 'key');
|
||||||
|
}
|
||||||
|
if(attribute_contains($x->getAttribute('class'),'locality')){
|
||||||
$ret['locality'] = $x->textContent;
|
$ret['locality'] = $x->textContent;
|
||||||
if(attribute_contains($x->getAttribute('class'),'region'))
|
$targets_left = pop_scrape_target($targets, 'locality');
|
||||||
|
}
|
||||||
|
if(attribute_contains($x->getAttribute('class'),'region')){
|
||||||
$ret['region'] = $x->textContent;
|
$ret['region'] = $x->textContent;
|
||||||
if(attribute_contains($x->getAttribute('class'),'postal-code'))
|
$targets_left = pop_scrape_target($targets, 'region');
|
||||||
|
}
|
||||||
|
if(attribute_contains($x->getAttribute('class'),'postal-code')){
|
||||||
$ret['postal-code'] = $x->textContent;
|
$ret['postal-code'] = $x->textContent;
|
||||||
if(attribute_contains($x->getAttribute('class'),'country-name'))
|
$targets_left = pop_scrape_target($targets, 'postal-code');
|
||||||
|
}
|
||||||
|
if(attribute_contains($x->getAttribute('class'),'country-name')){
|
||||||
$ret['country-name'] = $x->textContent;
|
$ret['country-name'] = $x->textContent;
|
||||||
if(attribute_contains($x->getAttribute('class'),'x-gender'))
|
$targets_left = pop_scrape_target($targets, 'country-name');
|
||||||
|
}
|
||||||
|
if(attribute_contains($x->getAttribute('class'),'x-gender')){
|
||||||
$ret['gender'] = $x->textContent;
|
$ret['gender'] = $x->textContent;
|
||||||
|
$targets_left = pop_scrape_target($targets, 'gender');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if(attribute_contains($item->getAttribute('class'),'marital-text'))
|
}
|
||||||
|
if(attribute_contains($item->getAttribute('class'),'marital-text')){
|
||||||
$ret['marital'] = $item->textContent;
|
$ret['marital'] = $item->textContent;
|
||||||
$nodes_left--;
|
$targets_left = pop_scrape_target($targets, 'marital');
|
||||||
if($nodes_left <= 0) break;
|
|
||||||
}
|
}
|
||||||
|
$nodes_left--;
|
||||||
|
if($nodes_left <= 0 || $targets_left <= 0) break;
|
||||||
|
}
|
||||||
|
|
||||||
|
$scrape_end = microtime(true);
|
||||||
|
$fetch_time = round(($scrape_fetch_end - $scrape_start) * 1000);
|
||||||
|
$scrape_time = round(($scrape_end - $scrape_fetch_end) * 1000);
|
||||||
|
|
||||||
|
$ret['_timings'] = array(
|
||||||
|
'fetch' => $fetch_time,
|
||||||
|
'scrape' => $scrape_time
|
||||||
|
);
|
||||||
|
|
||||||
return $ret;
|
return $ret;
|
||||||
|
|
||||||
}}
|
}}
|
||||||
|
|
||||||
|
|
||||||
|
@ -121,3 +185,10 @@ function validate_dfrn($a) {
|
||||||
return $errors;
|
return $errors;
|
||||||
}}
|
}}
|
||||||
|
|
||||||
|
if(! function_exists('pop_scrape_target')) {
|
||||||
|
function pop_scrape_target(&$array, $name) {
|
||||||
|
$at = array_search($name, $array);
|
||||||
|
unset($array[$at]);
|
||||||
|
return count($array);
|
||||||
|
}}
|
||||||
|
|
||||||
|
|
15
include/g.line-min.js
vendored
Normal file
15
include/g.line-min.js
vendored
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
/*!
|
||||||
|
* g.Raphael 0.51 - Charting library, based on Raphaël
|
||||||
|
*
|
||||||
|
* Copyright (c) 2009-2012 Dmitry Baranovskiy (http://g.raphaeljs.com)
|
||||||
|
* Licensed under the MIT (http://www.opensource.org/licenses/mit-license.php) license.
|
||||||
|
*/
|
||||||
|
(function(){function S(h,o){for(var p=h.length/o,m=0,k=p,b=0,i=[];m<h.length;)k--,0>k?(b+=h[m]*(1+k),i.push(b/p),b=h[m++]*-k,k+=p):b+=1*h[m++];return i}function E(h,o,p,m,k,b,i,c){var F,f,u,w;function J(a){for(var s=[],e=0,G=b.length;e<G;e++)s=s.concat(b[e]);s.sort(function(a,e){return a-e});for(var c=[],g=[],e=0,G=s.length;e<G;e++)s[e]!=s[e-1]&&c.push(s[e])&&g.push(o+d+(s[e]-v)*A);for(var s=c,G=s.length,l=a||h.set(),e=0;e<G;e++){var c=g[e]-(g[e]-(g[e-1]||o))/2,f=((g[e+1]||o+m)-g[e])/2+(g[e]-(g[e-
|
||||||
|
1]||o))/2,j;a?j={}:l.push(j=h.rect(c-1,p,Math.max(f+1,1),k).attr({stroke:"none",fill:"#000",opacity:0}));j.values=[];j.symbols=h.set();j.y=[];j.x=g[e];j.axis=s[e];for(var f=0,r=i.length;f<r;f++)for(var c=b[f]||b[0],n=0,u=c.length;n<u;n++)c[n]==s[e]&&(j.values.push(i[f][n]),j.y.push(p+k-d-(i[f][n]-y)*H),j.symbols.push(q.symbols[f][n]));a&&a.call(j)}!a&&(t=l)}function N(a){for(var g=a||h.set(),e,c=0,j=i.length;c<j;c++)for(var f=0,m=i[c].length;f<m;f++){var l=o+d+((b[c]||b[0])[f]-v)*A,n=o+d+((b[c]||
|
||||||
|
b[0])[f?f-1:1]-v)*A,r=p+k-d-(i[c][f]-y)*H;a?e={}:g.push(e=h.circle(l,r,Math.abs(n-l)/2).attr({stroke:"#000",fill:"#000",opacity:1}));e.x=l;e.y=r;e.value=i[c][f];e.line=q.lines[c];e.shade=q.shades[c];e.symbol=q.symbols[c][f];e.symbols=q.symbols[c];e.axis=(b[c]||b[0])[f];a&&a.call(e)}!a&&(C=g)}c=c||{};h.raphael.is(b[0],"array")||(b=[b]);h.raphael.is(i[0],"array")||(i=[i]);for(var d=c.gutter||10,l=Math.max(b[0].length,i[0].length),O=c.symbol||"",P=c.colors||this.colors,t=null,C=null,q=h.set(),g=[],a=
|
||||||
|
0,n=i.length;a<n;a++)l=Math.max(l,i[a].length);for(var K=h.set(),a=0,n=i.length;a<n;a++)c.shade&&K.push(h.path().attr({stroke:"none",fill:P[a],opacity:c.nostroke?1:0.3})),i[a].length>m-2*d&&(i[a]=S(i[a],m-2*d),l=m-2*d),b[a]&&b[a].length>m-2*d&&(b[a]=S(b[a],m-2*d));var g=Array.prototype.concat.apply([],b),l=Array.prototype.concat.apply([],i),g=this.snapEnds(Math.min.apply(Math,g),Math.max.apply(Math,g),b[0].length-1),v=g.from,g=g.to,l=this.snapEnds(Math.min.apply(Math,l),Math.max.apply(Math,l),i[0].length-
|
||||||
|
1),y=l.from,a=l.to,A=(m-2*d)/(g-v||1),H=(k-2*d)/(a-y||1),l=h.set();c.axis&&(n=(c.axis+"").split(/[,\s]+/),+n[0]&&l.push(this.axis(o+d,p+d,m-2*d,v,g,c.axisxstep||Math.floor((m-2*d)/20),2,h)),+n[1]&&l.push(this.axis(o+m-d,p+k-d,k-2*d,y,a,c.axisystep||Math.floor((k-2*d)/20),3,h)),+n[2]&&l.push(this.axis(o+d,p+k-d,m-2*d,v,g,c.axisxstep||Math.floor((m-2*d)/20),0,h)),+n[3]&&l.push(this.axis(o+d,p+k-d,k-2*d,y,a,c.axisystep||Math.floor((k-2*d)/20),1,h)));for(var Q=h.set(),R=h.set(),E,a=0,n=i.length;a<n;a++){c.nostroke||
|
||||||
|
Q.push(E=h.path().attr({stroke:P[a],"stroke-width":c.width||2,"stroke-linejoin":"round","stroke-linecap":"round","stroke-dasharray":c.dash||""}));for(var D=Raphael.is(O,"array")?O[a]:O,I=h.set(),g=[],j=0,T=i[a].length;j<T;j++){var x=o+d+((b[a]||b[0])[j]-v)*A,z=p+k-d-(i[a][j]-y)*H;(Raphael.is(D,"array")?D[j]:D)&&I.push(h[Raphael.is(D,"array")?D[j]:D](x,z,3*(c.width||2)).attr({fill:P[a],stroke:"none"}));if(c.smooth){if(j&&j!=T-1){f=o+d+((b[a]||b[0])[j-1]-v)*A;var L=p+k-d-(i[a][j-1]-y)*H;u=x;w=z;var r=
|
||||||
|
o+d+((b[a]||b[0])[j+1]-v)*A,B=p+k-d-(i[a][j+1]-y)*H,M=(u-f)/2;F=(r-u)/2;f=Math.atan((u-f)/Math.abs(w-L));r=Math.atan((r-u)/Math.abs(w-B));f=L<w?Math.PI-f:f;r=B<w?Math.PI-r:r;B=Math.PI/2-(f+r)%(2*Math.PI)/2;L=M*Math.sin(B+f);f=M*Math.cos(B+f);M=F*Math.sin(B+r);r=F*Math.cos(B+r);F=u-L;f=w+f;u+=M;w+=r;g=g.concat([F,f,x,z,u,w])}j||(g=["M",x,z,"C",x,z])}else g=g.concat([j?"L":"M",x,z])}c.smooth&&(g=g.concat([x,z,x,z]));R.push(I);c.shade&&K[a].attr({path:g.concat(["L",x,p+k-d,"L",o+d+((b[a]||b[0])[0]-v)*
|
||||||
|
A,p+k-d,"z"]).join(",")});!c.nostroke&&E.attr({path:g.join(",")})}q.push(Q,K,R,l,t,C);q.lines=Q;q.shades=K;q.symbols=R;q.axis=l;q.hoverColumn=function(a,c){!t&&J();t.mouseover(a).mouseout(c);return this};q.clickColumn=function(a){!t&&J();t.click(a);return this};q.hrefColumn=function(a){var c=h.raphael.is(arguments[0],"array")?arguments[0]:arguments;if(!(arguments.length-1)&&typeof a=="object")for(var e in a)for(var b=0,d=t.length;b<d;b++)t[b].axis==e&&t[b].attr("href",a[e]);!t&&J();b=0;for(d=c.length;b<
|
||||||
|
d;b++)t[b]&&t[b].attr("href",c[b]);return this};q.hover=function(a,b){!C&&N();C.mouseover(a).mouseout(b);return this};q.click=function(a){!C&&N();C.click(a);return this};q.each=function(a){N(a);return this};q.eachColumn=function(a){J(a);return this};return q}var I=function(){};I.prototype=Raphael.g;E.prototype=new I;Raphael.fn.linechart=function(h,o,p,m,k,b,i){return new E(this,h,o,p,m,k,b,i)}})();
|
7
include/g.raphael.js
Normal file
7
include/g.raphael.js
Normal file
File diff suppressed because one or more lines are too long
11
include/raphael.js
Normal file
11
include/raphael.js
Normal file
File diff suppressed because one or more lines are too long
342
include/site-health.php
Normal file
342
include/site-health.php
Normal file
|
@ -0,0 +1,342 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
/*
|
||||||
|
Based on a submitted URL, take note of the site it mentions.
|
||||||
|
Ensures that the site health will be tracked if it wasn't already.
|
||||||
|
If $check_health is set to true, this function may trigger some health checks (CURL requests) when needed.
|
||||||
|
Do not enable it unless you have enough execution time to do so.
|
||||||
|
But when you do, it's better to check for health whenever a site submits something.
|
||||||
|
After all, the highest chance for the server to be online is when it submits activity.
|
||||||
|
*/
|
||||||
|
if(! function_exists('notice_site')){
|
||||||
|
function notice_site($url, $check_health=false)
|
||||||
|
{
|
||||||
|
|
||||||
|
global $a;
|
||||||
|
|
||||||
|
//Parse the domain from the URL.
|
||||||
|
$site = parse_site_from_url($url);
|
||||||
|
|
||||||
|
//Search for it in the site-health table.
|
||||||
|
$result = q(
|
||||||
|
"SELECT * FROM `site-health` WHERE `base_url`= '%s' ORDER BY `id` ASC LIMIT 1",
|
||||||
|
dbesc($site)
|
||||||
|
);
|
||||||
|
|
||||||
|
//If it exists, see if we need to update any flags / statuses.
|
||||||
|
if(!empty($result) && isset($result[0])){
|
||||||
|
|
||||||
|
$entry = $result[0];
|
||||||
|
|
||||||
|
//If we are allowed to do health checks...
|
||||||
|
if(!!$check_health){
|
||||||
|
|
||||||
|
//And the site is in bad health currently, do a check now.
|
||||||
|
//This is because you have a high certainty the site may perform better now.
|
||||||
|
if($entry['health_score'] < -40){
|
||||||
|
run_site_probe($entry['id'], $entry);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Or if the site has not been probed for longer than the minimum delay.
|
||||||
|
//This is to make sure not everything is postponed to the batches.
|
||||||
|
elseif(strtotime($entry['dt_last_probed']) < time()-$a->config['site-health']['min_probe_delay']){
|
||||||
|
run_site_probe($entry['id'], $entry);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
//If it does not exist.
|
||||||
|
else{
|
||||||
|
|
||||||
|
//Add it and make sure it is ready for probing.
|
||||||
|
q(
|
||||||
|
"INSERT INTO `site-health` (`base_url`, `dt_first_noticed`) VALUES ('%s', NOW())",
|
||||||
|
dbesc($site)
|
||||||
|
);
|
||||||
|
|
||||||
|
//And in case we should probe now, do so.
|
||||||
|
if(!!$check_health){
|
||||||
|
|
||||||
|
$result = q(
|
||||||
|
"SELECT * FROM `site-health` WHERE `base_url`= '%s' ORDER BY `id` ASC LIMIT 1",
|
||||||
|
dbesc($site)
|
||||||
|
);
|
||||||
|
if(!empty($result) && isset($result[0])){
|
||||||
|
$entry = $result[0];
|
||||||
|
run_site_probe($result[0]['id'], $entry);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
//Give other scripts the site health.
|
||||||
|
return isset($entry) ? $entry : false;
|
||||||
|
|
||||||
|
}}
|
||||||
|
|
||||||
|
//Extracts the site from a given URL.
|
||||||
|
if(! function_exists('parse_site_from_url')){
|
||||||
|
function parse_site_from_url($url)
|
||||||
|
{
|
||||||
|
|
||||||
|
//Currently a simple implementation, but may improve over time.
|
||||||
|
#TODO: support subdirectories?
|
||||||
|
$urlMeta = parse_url($url);
|
||||||
|
return $urlMeta['scheme'].'://'.$urlMeta['host'];
|
||||||
|
|
||||||
|
}}
|
||||||
|
|
||||||
|
//Performs a ping to the given site ID
|
||||||
|
//You may need to notice the site first before you know it's ID.
|
||||||
|
if(! function_exists('run_site_ping')){
|
||||||
|
function run_site_probe($id, &$entry_out)
|
||||||
|
{
|
||||||
|
|
||||||
|
global $a;
|
||||||
|
|
||||||
|
//Get the site information from the DB, based on the ID.
|
||||||
|
$result = q(
|
||||||
|
"SELECT * FROM `site-health` WHERE `id`= %u ORDER BY `id` ASC LIMIT 1",
|
||||||
|
intval($id)
|
||||||
|
);
|
||||||
|
|
||||||
|
//Abort the probe if site is not known.
|
||||||
|
if(!$result || !isset($result[0])){
|
||||||
|
logger('Unknown site-health ID being probed: '.$id);
|
||||||
|
throw new \Exception('Unknown site-health ID being probed: '.$id);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Shortcut.
|
||||||
|
$entry = $result[0];
|
||||||
|
$base_url = $entry['base_url'];
|
||||||
|
$probe_location = $base_url.'/friendica/json';
|
||||||
|
|
||||||
|
//Prepare the CURL call.
|
||||||
|
$handle = curl_init();
|
||||||
|
$options = array(
|
||||||
|
|
||||||
|
//Timeouts
|
||||||
|
CURLOPT_TIMEOUT => max($a->config['site-health']['probe_timeout'], 1), //Minimum of 1 second timeout.
|
||||||
|
CURLOPT_CONNECTTIMEOUT => 1,
|
||||||
|
|
||||||
|
//Redirecting
|
||||||
|
CURLOPT_FOLLOWLOCATION => true,
|
||||||
|
CURLOPT_MAXREDIRS => 8,
|
||||||
|
|
||||||
|
//SSL
|
||||||
|
CURLOPT_SSL_VERIFYPEER => true,
|
||||||
|
// CURLOPT_VERBOSE => true,
|
||||||
|
// CURLOPT_CERTINFO => true,
|
||||||
|
CURLOPT_SSL_VERIFYHOST => 2,
|
||||||
|
CURLOPT_PROTOCOLS => CURLPROTO_HTTP | CURLPROTO_HTTPS,
|
||||||
|
|
||||||
|
//Basic request
|
||||||
|
CURLOPT_USERAGENT => 'friendica-directory-probe-0.1',
|
||||||
|
CURLOPT_RETURNTRANSFER => true,
|
||||||
|
CURLOPT_URL => $probe_location
|
||||||
|
|
||||||
|
);
|
||||||
|
curl_setopt_array($handle, $options);
|
||||||
|
|
||||||
|
//Probe the site.
|
||||||
|
$probe_start = microtime(true);
|
||||||
|
$probe_data = curl_exec($handle);
|
||||||
|
$probe_end = microtime(true);
|
||||||
|
|
||||||
|
//Check for SSL problems.
|
||||||
|
$curl_statuscode = curl_errno($handle);
|
||||||
|
$sslcert_issues = in_array($curl_statuscode, array(
|
||||||
|
60, //Could not authenticate certificate with known CA's
|
||||||
|
83 //Issuer check failed
|
||||||
|
));
|
||||||
|
|
||||||
|
//When it's the certificate that doesn't work.
|
||||||
|
if($sslcert_issues){
|
||||||
|
|
||||||
|
//Probe again, without strict SSL.
|
||||||
|
$options[CURLOPT_SSL_VERIFYPEER] = false;
|
||||||
|
|
||||||
|
//Replace the handler.
|
||||||
|
curl_close($handle);
|
||||||
|
$handle = curl_init();
|
||||||
|
curl_setopt_array($handle, $options);
|
||||||
|
|
||||||
|
//Probe.
|
||||||
|
$probe_start = microtime(true);
|
||||||
|
$probe_data = curl_exec($handle);
|
||||||
|
$probe_end = microtime(true);
|
||||||
|
|
||||||
|
//Store new status.
|
||||||
|
$curl_statuscode = curl_errno($handle);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
//Gather more meta.
|
||||||
|
$time = round(($probe_end - $probe_start) * 1000);
|
||||||
|
$status = curl_getinfo($handle, CURLINFO_HTTP_CODE);
|
||||||
|
$type = curl_getinfo($handle, CURLINFO_CONTENT_TYPE);
|
||||||
|
$effective_url = curl_getinfo($handle, CURLINFO_EFFECTIVE_URL);
|
||||||
|
|
||||||
|
//Done with CURL now.
|
||||||
|
curl_close($handle);
|
||||||
|
|
||||||
|
#TODO: if the site redirects elsewhere, notice this site and record an issue.
|
||||||
|
$wrong_base_url = parse_site_from_url($effective_url) !== $entry['base_url'];
|
||||||
|
|
||||||
|
try{
|
||||||
|
$data = json_decode($probe_data);
|
||||||
|
}catch(\Exception $ex){
|
||||||
|
$data = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
$parse_failed = !$data;
|
||||||
|
|
||||||
|
$parsedDataQuery = '';
|
||||||
|
if(!$parse_failed){
|
||||||
|
|
||||||
|
$given_base_url_match = $data->url == $base_url;
|
||||||
|
|
||||||
|
//Record the probe speed in a probes table.
|
||||||
|
q(
|
||||||
|
"INSERT INTO `site-probe` (`site_health_id`, `dt_performed`, `request_time`)".
|
||||||
|
"VALUES (%u, NOW(), %u)",
|
||||||
|
$entry['id'],
|
||||||
|
$time
|
||||||
|
);
|
||||||
|
|
||||||
|
//Update any health calculations or otherwise processed data.
|
||||||
|
$parsedDataQuery = sprintf(
|
||||||
|
"`dt_last_seen` = NOW(),
|
||||||
|
`name` = '%s',
|
||||||
|
`version` = '%s',
|
||||||
|
`plugins` = '%s',
|
||||||
|
`reg_policy` = '%s',
|
||||||
|
`info` = '%s',
|
||||||
|
`admin_name` = '%s',
|
||||||
|
`admin_profile` = '%s',
|
||||||
|
",
|
||||||
|
dbesc($data->site_name),
|
||||||
|
dbesc($data->version),
|
||||||
|
dbesc(implode("\r\n", $data->plugins)),
|
||||||
|
dbesc($data->register_policy),
|
||||||
|
dbesc($data->info),
|
||||||
|
dbesc($data->admin->name),
|
||||||
|
dbesc($data->admin->profile)
|
||||||
|
);
|
||||||
|
|
||||||
|
//Did we use HTTPS?
|
||||||
|
$urlMeta = parse_url($probe_location);
|
||||||
|
if($urlMeta['scheme'] == 'https'){
|
||||||
|
$parsedDataQuery .= sprintf("`ssl_state` = b'%u',", $sslcert_issues ? '0' : '1');
|
||||||
|
} else {
|
||||||
|
$parsedDataQuery .= "`ssl_state` = NULL,";
|
||||||
|
}
|
||||||
|
|
||||||
|
//Do we have a no scrape supporting node? :D
|
||||||
|
if(isset($data->no_scrape_url)){
|
||||||
|
$parsedDataQuery .= sprintf("`no_scrape_url` = '%s',", dbesc($data->no_scrape_url));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
//Get the new health.
|
||||||
|
$version = $parse_failed ? '' : $data->version;
|
||||||
|
$health = health_score_after_probe($entry['health_score'], !$parse_failed, $time, $version, $sslcert_issues);
|
||||||
|
|
||||||
|
//Update the health.
|
||||||
|
q("UPDATE `site-health` SET
|
||||||
|
`health_score` = '%d',
|
||||||
|
$parsedDataQuery
|
||||||
|
`dt_last_probed` = NOW()
|
||||||
|
WHERE `id` = %d LIMIT 1",
|
||||||
|
$health,
|
||||||
|
$entry['id']
|
||||||
|
);
|
||||||
|
|
||||||
|
//Get the site information from the DB, based on the ID.
|
||||||
|
$result = q(
|
||||||
|
"SELECT * FROM `site-health` WHERE `id`= %u ORDER BY `id` ASC LIMIT 1",
|
||||||
|
$entry['id']
|
||||||
|
);
|
||||||
|
|
||||||
|
//Return updated entry data.
|
||||||
|
if($result && isset($result[0])){
|
||||||
|
$entry_out = $result[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
}}
|
||||||
|
|
||||||
|
//Determines the new health score after a probe has been executed.
|
||||||
|
if(! function_exists('health_score_after_probe')){
|
||||||
|
function health_score_after_probe($current, $probe_success, $time=null, $version=null, $ssl_issues=null)
|
||||||
|
{
|
||||||
|
|
||||||
|
//Probe failed, costs you 30 points.
|
||||||
|
if(!$probe_success) return max($current-30, -100);
|
||||||
|
|
||||||
|
//A good probe gives you 20 points.
|
||||||
|
$current += 20;
|
||||||
|
|
||||||
|
//Speed scoring.
|
||||||
|
if(intval($time) > 0){
|
||||||
|
|
||||||
|
//Pentaly / bonus points.
|
||||||
|
if ($time > 800) $current -= 10; //Bad speed.
|
||||||
|
elseif ($time > 400) $current -= 5; //Still not good.
|
||||||
|
elseif ($time > 250) $current += 0; //This is normal.
|
||||||
|
elseif ($time > 120) $current += 5; //Good speed.
|
||||||
|
else $current += 10; //Excellent speed.
|
||||||
|
|
||||||
|
//Cap for bad speeds.
|
||||||
|
if ($time > 800) $current = min(40, $current);
|
||||||
|
elseif ($time > 400) $current = min(60, $current);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
//Version check.
|
||||||
|
if(!empty($version)){
|
||||||
|
|
||||||
|
$versionParts = explode('.', $version);
|
||||||
|
|
||||||
|
//Older than 3.x.x?
|
||||||
|
//Your score can not go above 30 health.
|
||||||
|
if(intval($versionParts[0]) < 3){
|
||||||
|
$current = min($current, 30);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Older than 3.2.x?
|
||||||
|
elseif(intval($versionParts[1] < 2)){
|
||||||
|
$current -= 5; //Somewhat outdated.
|
||||||
|
}
|
||||||
|
|
||||||
|
#TODO: See if this needs to be more dynamic.
|
||||||
|
#TODO: See if this is a proper indicator of health.
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
//SSL problems? That's a big deal.
|
||||||
|
if($ssl_issues === true){
|
||||||
|
$current -= 10;
|
||||||
|
}
|
||||||
|
|
||||||
|
//Don't go beyond +100 or -100.
|
||||||
|
return max(min(100, $current), -100);
|
||||||
|
|
||||||
|
}}
|
||||||
|
|
||||||
|
//Changes a score into a name. Used for classes and such.
|
||||||
|
if(! function_exists('health_score_to_name')){
|
||||||
|
function health_score_to_name($score)
|
||||||
|
{
|
||||||
|
|
||||||
|
if ($score < -50) return 'very-bad';
|
||||||
|
elseif ($score < 0) return 'bad';
|
||||||
|
elseif ($score < 30) return 'neutral';
|
||||||
|
elseif ($score < 50) return 'ok';
|
||||||
|
elseif ($score < 80) return 'good';
|
||||||
|
else return 'perfect';
|
||||||
|
|
||||||
|
}}
|
|
@ -1,21 +1,27 @@
|
||||||
<?php
|
<?php
|
||||||
|
|
||||||
require_once('datetime.php');
|
require_once('datetime.php');
|
||||||
|
require_once('site-health.php');
|
||||||
|
|
||||||
function run_submit(&$a, $url) {
|
function run_submit($url) {
|
||||||
|
|
||||||
|
global $a;
|
||||||
|
|
||||||
if(! strlen($url))
|
if(! strlen($url))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
logger('Updating: ' . $url);
|
logger('Updating: ' . $url);
|
||||||
|
|
||||||
|
//First run a notice script for the site it is hosted on.
|
||||||
|
$site_health = notice_site($url, true);
|
||||||
|
|
||||||
|
$submit_start = microtime(true);
|
||||||
|
|
||||||
$nurl = str_replace(array('https:','//www.'), array('http:','//'), $url);
|
$nurl = str_replace(array('https:','//www.'), array('http:','//'), $url);
|
||||||
|
|
||||||
$profile_exists = false;
|
$profile_exists = false;
|
||||||
|
|
||||||
$r = q("SELECT * FROM `profile` WHERE ( `homepage` = '%s' OR `nurl` = '%s' ) LIMIT 1",
|
$r = q("SELECT * FROM `profile` WHERE ( `homepage` = '%s' OR `nurl` = '%s' )",
|
||||||
dbesc($url),
|
dbesc($url),
|
||||||
dbesc($nurl)
|
dbesc($nurl)
|
||||||
);
|
);
|
||||||
|
@ -25,14 +31,63 @@ function run_submit(&$a, $url) {
|
||||||
$profile_id = $r[0]['id'];
|
$profile_id = $r[0]['id'];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//Remove duplicates.
|
||||||
|
if(count($r) > 1){
|
||||||
|
for($i=1; $i<count($r); $i++){
|
||||||
|
logger('Removed duplicate profile '.intval($r[$i]['id']));
|
||||||
|
q("DELETE FROM `photo` WHERE `profile-id` = %d LIMIT 1",
|
||||||
|
intval($r[$i]['id'])
|
||||||
|
);
|
||||||
|
q("DELETE FROM `profile` WHERE `id` = %d LIMIT 1",
|
||||||
|
intval($r[$i]['id'])
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
require_once('Scrape.php');
|
require_once('Scrape.php');
|
||||||
|
|
||||||
|
//Skip the scrape? :D
|
||||||
|
$noscrape = $site_health && $site_health['no_scrape_url'];
|
||||||
|
if($noscrape){
|
||||||
|
|
||||||
|
//Find out who to look up.
|
||||||
|
$which = str_replace($site_health['base_url'], '', $url);
|
||||||
|
$noscrape = preg_match('~/profile/([^/]+)~', $which, $matches) === 1;
|
||||||
|
|
||||||
|
//If that did not fail...
|
||||||
|
if($noscrape){
|
||||||
|
$parms = noscrape_dfrn($site_health['no_scrape_url'].'/'.$matches[1]);
|
||||||
|
$noscrape = !!$parms; //If the result was false, do a scrape after all.
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if(!$noscrape){
|
||||||
$parms = scrape_dfrn($url);
|
$parms = scrape_dfrn($url);
|
||||||
|
}
|
||||||
|
|
||||||
// logger('dir_submit: ' . print_r($parms,true));
|
//Empty result is due to an offline site.
|
||||||
|
if(!count($parms)){
|
||||||
|
|
||||||
if((! count($parms)) || (validate_dfrn($parms))) {
|
//For large sites this could lower the health too quickly, so don't track health.
|
||||||
|
//But for sites that are already in bad status. Do a cleanup now.
|
||||||
|
if($profile_exists && $site_health['health_score'] < $a->config['maintenance']['remove_profile_health_threshold']){
|
||||||
|
logger('Nuked bad health record.');
|
||||||
|
nuke_record($url);
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
//We don't care about valid dfrn if the user indicates to be hidden.
|
||||||
|
elseif($parms['explicit-hide'] && $profile_exists) {
|
||||||
|
logger('User opted out of the directory.');
|
||||||
|
nuke_record($url);
|
||||||
|
}
|
||||||
|
|
||||||
|
//This is most likely a problem with the site configuration. Ignore.
|
||||||
|
elseif(validate_dfrn($parms)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -147,10 +202,14 @@ function run_submit(&$a, $url) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$submit_photo_start = microtime(true);
|
||||||
|
|
||||||
require_once("Photo.php");
|
require_once("Photo.php");
|
||||||
|
|
||||||
$photo_failure = false;
|
$photo_failure = false;
|
||||||
|
|
||||||
|
$status = false;
|
||||||
|
|
||||||
$img_str = fetch_url($photo,true);
|
$img_str = fetch_url($photo,true);
|
||||||
$img = new Photo($img_str);
|
$img = new Photo($img_str);
|
||||||
if($img) {
|
if($img) {
|
||||||
|
@ -162,12 +221,28 @@ function run_submit(&$a, $url) {
|
||||||
dbesc($a->get_baseurl() . '/photo/' . $profile_id . '.jpg'),
|
dbesc($a->get_baseurl() . '/photo/' . $profile_id . '.jpg'),
|
||||||
intval($profile_id)
|
intval($profile_id)
|
||||||
);
|
);
|
||||||
|
$status = true;
|
||||||
}
|
}
|
||||||
else{
|
else{
|
||||||
nuke_record($url);
|
nuke_record($url);
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
return true;
|
|
||||||
|
$submit_end = microtime(true);
|
||||||
|
$photo_time = round(($submit_end - $submit_photo_start) * 1000);
|
||||||
|
$time = round(($submit_end - $submit_start) * 1000);
|
||||||
|
|
||||||
|
//Record the scrape speed in a scrapes table.
|
||||||
|
if($site_health && $status) q(
|
||||||
|
"INSERT INTO `site-scrape` (`site_health_id`, `dt_performed`, `request_time`, `scrape_time`, `photo_time`, `total_time`)".
|
||||||
|
"VALUES (%u, NOW(), %u, %u, %u, %u)",
|
||||||
|
$site_health['id'],
|
||||||
|
$parms['_timings']['fetch'],
|
||||||
|
$parms['_timings']['scrape'],
|
||||||
|
$photo_time,
|
||||||
|
$time
|
||||||
|
);
|
||||||
|
|
||||||
|
return $status;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
314
mod/health.php
Normal file
314
mod/health.php
Normal file
|
@ -0,0 +1,314 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
require_once('include/site-health.php');
|
||||||
|
|
||||||
|
function health_content(&$a) {
|
||||||
|
|
||||||
|
if($a->argc > 1){
|
||||||
|
return health_details($a, $a->argv[1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if($_GET['s']){
|
||||||
|
return health_search($a, $_GET['s']);
|
||||||
|
}
|
||||||
|
|
||||||
|
return health_summary($a);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
function health_search(&$a, $search)
|
||||||
|
{
|
||||||
|
|
||||||
|
if(strlen($search) < 3){
|
||||||
|
$result = 'Please use at least 3 characters in your search';
|
||||||
|
}
|
||||||
|
|
||||||
|
else {
|
||||||
|
|
||||||
|
$r = q("SELECT * FROM `site-health` WHERE `base_url` LIKE '%%%s%%' ORDER BY `health_score` DESC LIMIT 100", dbesc($search));
|
||||||
|
if(count($r)){
|
||||||
|
$result = '';
|
||||||
|
foreach($r as $site){
|
||||||
|
|
||||||
|
//Get user count.
|
||||||
|
$site['users'] = 0;
|
||||||
|
$r = q(
|
||||||
|
"SELECT COUNT(*) as `users` FROM `profile`
|
||||||
|
WHERE `homepage` LIKE '%s%%'",
|
||||||
|
dbesc($site['base_url'])
|
||||||
|
);
|
||||||
|
if(count($r)){
|
||||||
|
$site['users'] = $r[0]['users'];
|
||||||
|
}
|
||||||
|
|
||||||
|
$result .=
|
||||||
|
'<span class="health '.health_score_to_name($site['health_score']).'">♥</span> '.
|
||||||
|
'<a href="/health/'.$site['id'].'">' . $site['base_url'] . '</a> '.
|
||||||
|
'(' . $site['users'] . ')'.
|
||||||
|
"<br />\r\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
else {
|
||||||
|
$result = 'No results';
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
$tpl .= file_get_contents('view/health_search.tpl');
|
||||||
|
return replace_macros($tpl, array(
|
||||||
|
'$searched' => $search,
|
||||||
|
'$result' => $result
|
||||||
|
));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
function health_summary(&$a){
|
||||||
|
|
||||||
|
$sites = array();
|
||||||
|
|
||||||
|
//Find the user count per site.
|
||||||
|
$r = q("SELECT `homepage` FROM `profile` WHERE 1");
|
||||||
|
if(count($r)) {
|
||||||
|
foreach($r as $rr) {
|
||||||
|
$site = parse_site_from_url($rr['homepage']);
|
||||||
|
if($site) {
|
||||||
|
if(!isset($sites[$site]))
|
||||||
|
$sites[$site] = 0;
|
||||||
|
$sites[$site] ++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//See if we have a health for them.
|
||||||
|
$sites_with_health = array();
|
||||||
|
$site_healths = array();
|
||||||
|
|
||||||
|
$r = q("SELECT * FROM `site-health` WHERE `reg_policy`='REGISTER_OPEN'");
|
||||||
|
if(count($r)) {
|
||||||
|
foreach($r as $rr) {
|
||||||
|
$sites_with_health[$rr['base_url']] = (($sites[$rr['base_url']] / 100) + 10) * intval($rr['health_score']);
|
||||||
|
$site_healths[$rr['base_url']] = $rr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
arsort($sites_with_health);
|
||||||
|
$total = 0;
|
||||||
|
$public_sites = '';
|
||||||
|
foreach($sites_with_health as $k => $v)
|
||||||
|
{
|
||||||
|
|
||||||
|
//Stop at unhealthy sites.
|
||||||
|
$site = $site_healths[$k];
|
||||||
|
if($site['health_score'] <= 20) break;
|
||||||
|
|
||||||
|
//Skip small sites.
|
||||||
|
$users = $sites[$k];
|
||||||
|
if($users < 10) continue;
|
||||||
|
|
||||||
|
$public_sites .=
|
||||||
|
'<span class="health '.health_score_to_name($site['health_score']).'">♥</span> '.
|
||||||
|
'<a href="/health/'.$site['id'].'">' . $k . '</a> '.
|
||||||
|
'(' . $users . ')'.
|
||||||
|
"<br />\r\n";
|
||||||
|
$total ++;
|
||||||
|
|
||||||
|
}
|
||||||
|
$public_sites .= "<br>Total: $total<br />\r\n";
|
||||||
|
|
||||||
|
$tpl .= file_get_contents('view/health_summary.tpl');
|
||||||
|
return replace_macros($tpl, array(
|
||||||
|
'$versions' => $versions,
|
||||||
|
'$public_sites' => $public_sites
|
||||||
|
));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
function health_details($a, $id)
|
||||||
|
{
|
||||||
|
|
||||||
|
//The overall health status.
|
||||||
|
$r = q(
|
||||||
|
"SELECT * FROM `site-health`
|
||||||
|
WHERE `id`=%u",
|
||||||
|
intval($id)
|
||||||
|
);
|
||||||
|
if(!count($r)){
|
||||||
|
$a->error = 404;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
$site = $r[0];
|
||||||
|
|
||||||
|
//Figure out SSL state.
|
||||||
|
$urlMeta = parse_url($site['base_url']);
|
||||||
|
if($urlMeta['scheme'] !== 'https'){
|
||||||
|
$ssl_state = 'No';
|
||||||
|
}else{
|
||||||
|
switch ($site['ssl_state']) {
|
||||||
|
case null: $ssl_state = 'Yes, but not yet verified.'; break;
|
||||||
|
case '0': $ssl_state = 'Certificate error!'; break;
|
||||||
|
case '1': $ssl_state = '√ Yes, verified.'; break;
|
||||||
|
}
|
||||||
|
$ssl_state .= ' <a href="https://www.ssllabs.com/ssltest/analyze.html?d='.$urlMeta['host'].'" target="_blank">Detailed test</a>';
|
||||||
|
}
|
||||||
|
|
||||||
|
//Get user count.
|
||||||
|
$site['users'] = 0;
|
||||||
|
$r = q(
|
||||||
|
"SELECT COUNT(*) as `users` FROM `profile`
|
||||||
|
WHERE `homepage` LIKE '%s%%'",
|
||||||
|
dbesc($site['base_url'])
|
||||||
|
);
|
||||||
|
if(count($r)){
|
||||||
|
$site['users'] = $r[0]['users'];
|
||||||
|
}
|
||||||
|
|
||||||
|
//Get avg probe speed.
|
||||||
|
$r = q(
|
||||||
|
"SELECT AVG(`request_time`) as `avg_probe_time` FROM `site-probe`
|
||||||
|
WHERE `site_health_id` = %u",
|
||||||
|
intval($site['id'])
|
||||||
|
);
|
||||||
|
if(count($r)){
|
||||||
|
$site['avg_probe_time'] = $r[0]['avg_probe_time'];
|
||||||
|
}
|
||||||
|
|
||||||
|
//Get scraping / submit speeds.
|
||||||
|
$r = q(
|
||||||
|
"SELECT
|
||||||
|
AVG(`request_time`) as `avg_profile_time`,
|
||||||
|
AVG(`scrape_time`) as `avg_scrape_time`,
|
||||||
|
AVG(`photo_time`) as `avg_photo_time`,
|
||||||
|
AVG(`total_time`) as `avg_submit_time`
|
||||||
|
FROM `site-scrape`
|
||||||
|
WHERE `site_health_id` = %u",
|
||||||
|
intval($site['id'])
|
||||||
|
);
|
||||||
|
if(count($r)){
|
||||||
|
$site['avg_profile_time'] = $r[0]['avg_profile_time'];
|
||||||
|
$site['avg_scrape_time'] = $r[0]['avg_scrape_time'];
|
||||||
|
$site['avg_photo_time'] = $r[0]['avg_photo_time'];
|
||||||
|
$site['avg_submit_time'] = $r[0]['avg_submit_time'];
|
||||||
|
}
|
||||||
|
|
||||||
|
//Get probe speed data.
|
||||||
|
$r = q(
|
||||||
|
"SELECT `request_time`, `dt_performed` FROM `site-probe`
|
||||||
|
WHERE `site_health_id` = %u",
|
||||||
|
intval($site['id'])
|
||||||
|
);
|
||||||
|
if(count($r)){
|
||||||
|
//Include graphael line charts.
|
||||||
|
$a->page['htmlhead'] .= '<script type="text/javascript" src="'.$a->get_baseurl().'/include/raphael.js"></script>'.PHP_EOL;
|
||||||
|
$a->page['htmlhead'] .= '<script type="text/javascript" src="'.$a->get_baseurl().'/include/g.raphael.js"></script>'.PHP_EOL;
|
||||||
|
$a->page['htmlhead'] .= '<script type="text/javascript" src="'.$a->get_baseurl().'/include/g.line-min.js"></script>';
|
||||||
|
$speeds = array();
|
||||||
|
$times = array();
|
||||||
|
$mintime = time();
|
||||||
|
foreach($r as $row){
|
||||||
|
$speeds[] = $row['request_time'];
|
||||||
|
$time = strtotime($row['dt_performed']);
|
||||||
|
$times[] = $time;
|
||||||
|
if($mintime > $time) $mintime = $time;
|
||||||
|
}
|
||||||
|
for($i=0; $i < count($times); $i++){
|
||||||
|
$times[$i] -= $mintime;
|
||||||
|
$times[$i] = floor($times[$i] / (24*3600));
|
||||||
|
}
|
||||||
|
$a->page['htmlhead'] .=
|
||||||
|
'<script type="text/javascript">
|
||||||
|
jQuery(function($){
|
||||||
|
|
||||||
|
var r = Raphael("probe-chart")
|
||||||
|
, x = ['.implode(',', $times).']
|
||||||
|
, y = ['.implode(',', $speeds).']
|
||||||
|
;
|
||||||
|
|
||||||
|
r.linechart(30, 15, 400, 300, x, [y], {symbol:"circle", axis:"0 0 0 1", shade:true, width:1.5}).hoverColumn(function () {
|
||||||
|
this.tags = r.set();
|
||||||
|
for (var i = 0, ii = this.y.length; i < ii; i++) {
|
||||||
|
this.tags.push(r.popup(this.x, this.y[i], this.values[i]+"ms", "right", 5).insertBefore(this).attr([{ fill: "#eee" }, { fill: this.symbols[i].attr("fill") }]));
|
||||||
|
}
|
||||||
|
}, function () {
|
||||||
|
this.tags && this.tags.remove();
|
||||||
|
});
|
||||||
|
|
||||||
|
});
|
||||||
|
</script>';
|
||||||
|
}
|
||||||
|
|
||||||
|
//Get scrape speed data.
|
||||||
|
$r = q(
|
||||||
|
"SELECT AVG(`total_time`) as `avg_time`, date(`dt_performed`) as `date` FROM `site-scrape`
|
||||||
|
WHERE `site_health_id` = %u GROUP BY `date`",
|
||||||
|
intval($site['id'])
|
||||||
|
// date('Y-m-d H:i:s', time()-(3*24*3600)) //Max 3 days old.
|
||||||
|
);
|
||||||
|
if($r && count($r)){
|
||||||
|
//Include graphael line charts.
|
||||||
|
$a->page['htmlhead'] .= '<script type="text/javascript" src="'.$a->get_baseurl().'/include/raphael.js"></script>'.PHP_EOL;
|
||||||
|
$a->page['htmlhead'] .= '<script type="text/javascript" src="'.$a->get_baseurl().'/include/g.raphael.js"></script>'.PHP_EOL;
|
||||||
|
$a->page['htmlhead'] .= '<script type="text/javascript" src="'.$a->get_baseurl().'/include/g.line-min.js"></script>';
|
||||||
|
$speeds = array();
|
||||||
|
$times = array();
|
||||||
|
$mintime = time();
|
||||||
|
foreach($r as $row){
|
||||||
|
$speeds[] = $row['avg_time'];
|
||||||
|
$time = strtotime($row['date']);
|
||||||
|
$times[] = $time;
|
||||||
|
if($mintime > $time) $mintime = $time;
|
||||||
|
}
|
||||||
|
for($i=0; $i < count($times); $i++){
|
||||||
|
$times[$i] -= $mintime;
|
||||||
|
$times[$i] = floor($times[$i] / (24*3600));
|
||||||
|
}
|
||||||
|
$a->page['htmlhead'] .=
|
||||||
|
'<script type="text/javascript">
|
||||||
|
jQuery(function($){
|
||||||
|
|
||||||
|
var r = Raphael("scrape-chart")
|
||||||
|
, x = ['.implode(',', $times).']
|
||||||
|
, y = ['.implode(',', $speeds).']
|
||||||
|
;
|
||||||
|
|
||||||
|
r.linechart(30, 15, 400, 300, x, [y], {shade:true, axis:"0 0 0 1", width:1}).hoverColumn(function () {
|
||||||
|
this.tags = r.set();
|
||||||
|
for (var i = 0, ii = this.y.length; i < ii; i++) {
|
||||||
|
this.tags.push(r.popup(this.x, this.y[i], Math.round(this.values[i])+"ms", "right", 5).insertBefore(this));
|
||||||
|
}
|
||||||
|
}, function () {
|
||||||
|
this.tags && this.tags.remove();
|
||||||
|
});
|
||||||
|
|
||||||
|
});
|
||||||
|
</script>';
|
||||||
|
}
|
||||||
|
|
||||||
|
$tpl .= file_get_contents('view/health_details.tpl');
|
||||||
|
return replace_macros($tpl, array(
|
||||||
|
'$name' => $site['name'],
|
||||||
|
'$base_url' => $site['base_url'],
|
||||||
|
'$health_score' => $site['health_score'],
|
||||||
|
'$health_name' => health_score_to_name($site['health_score']),
|
||||||
|
'$no_scrape_support' => !empty($site['no_scrape_url']) ? '√ Supports noscrape' : '',
|
||||||
|
'$dt_first_noticed' => $site['dt_first_noticed'],
|
||||||
|
'$dt_last_seen' => $site['dt_last_seen'],
|
||||||
|
'$version' => $site['version'],
|
||||||
|
'$plugins' => $site['plugins'],
|
||||||
|
'$reg_policy' => $site['reg_policy'],
|
||||||
|
'$info' => $site['info'],
|
||||||
|
'$admin_name' => $site['admin_name'],
|
||||||
|
'$admin_profile' => $site['admin_profile'],
|
||||||
|
'$users' => $site['users'],
|
||||||
|
'$ssl_state' => $ssl_state,
|
||||||
|
'$avg_probe_time' => round($site['avg_probe_time']),
|
||||||
|
'$avg_profile_time' => round($site['avg_profile_time']),
|
||||||
|
'$avg_scrape_time' => round($site['avg_scrape_time']),
|
||||||
|
'$avg_photo_time' => round($site['avg_photo_time']),
|
||||||
|
'$avg_submit_time' => round($site['avg_submit_time'])
|
||||||
|
));
|
||||||
|
|
||||||
|
}
|
|
@ -5,30 +5,7 @@ require_once('include/submit.php');
|
||||||
function submit_content(&$a) {
|
function submit_content(&$a) {
|
||||||
|
|
||||||
$url = hex2bin(notags(trim($_GET['url'])));
|
$url = hex2bin(notags(trim($_GET['url'])));
|
||||||
run_submit($a, $url);
|
run_submit($url);
|
||||||
exit;
|
exit;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
function nuke_record($url) {
|
|
||||||
|
|
||||||
$nurl = str_replace(array('https:','//www.'), array('http:','//'), $url);
|
|
||||||
|
|
||||||
$r = q("SELECT `id` FROM `profile` WHERE ( `homepage` = '%s' OR `nurl` = '%s' ) ",
|
|
||||||
dbesc($url),
|
|
||||||
dbesc($nurl)
|
|
||||||
);
|
|
||||||
|
|
||||||
if(count($r)) {
|
|
||||||
foreach($r as $rr) {
|
|
||||||
q("DELETE FROM `photo` WHERE `profile-id` = %d LIMIT 1",
|
|
||||||
intval($rr['id'])
|
|
||||||
);
|
|
||||||
q("DELETE FROM `profile` WHERE `id` = %d LIMIT 1",
|
|
||||||
intval($rr['id'])
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
24
mod/versions.php
Normal file
24
mod/versions.php
Normal file
|
@ -0,0 +1,24 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
function versions_content(&$a){
|
||||||
|
|
||||||
|
$sites = array();
|
||||||
|
|
||||||
|
//Grab a version list.
|
||||||
|
$versions = '';
|
||||||
|
$r = q("SELECT count(*) as `count`, `version` FROM `site-health` WHERE `version` IS NOT NULL GROUP BY `version` ORDER BY `version` DESC");
|
||||||
|
if(count($r)){
|
||||||
|
foreach($r as $version){
|
||||||
|
$versions .=
|
||||||
|
($version['count'] >= 10 ? '<b>' : '').
|
||||||
|
$version['version'] . ' ('.$version['count'].')<br>'."\r\n".
|
||||||
|
($version['count'] >= 10 ? '</b>' : '');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$tpl .= file_get_contents('view/versions.tpl');
|
||||||
|
return replace_macros($tpl, array(
|
||||||
|
'$versions' => $versions
|
||||||
|
));
|
||||||
|
|
||||||
|
}
|
33
view/health_details.tpl
Normal file
33
view/health_details.tpl
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
<h1>
|
||||||
|
<span class="health $health_name">♥</span> $name<br>
|
||||||
|
<sup><a href="$base_url">$base_url</a></sup>
|
||||||
|
</h1>
|
||||||
|
|
||||||
|
<p><a href="/health">« Back to index</a></p>
|
||||||
|
|
||||||
|
<div class="meta">
|
||||||
|
<h3>General information</h3>
|
||||||
|
<div class="users">$users users</div>
|
||||||
|
<div class="version">Friendica $version</div>
|
||||||
|
<div class="first_noticed">First noticed: $dt_first_noticed</div>
|
||||||
|
<div class="last_seen">Last update: $dt_last_seen</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="security">
|
||||||
|
<h3>Security</h3>
|
||||||
|
<div class="ssl_state">HTTPS: $ssl_state</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="performance">
|
||||||
|
<h3>Performance information</h3>
|
||||||
|
<div style="float:left;margin-right:30px;padding-top:20px;">
|
||||||
|
<div class="probe_speed">Probe speed: $avg_probe_timems</div>
|
||||||
|
<div class="photo_speed">Photo speed: $avg_photo_timems</div>
|
||||||
|
<div class="profile_speed">Profile speed: $avg_profile_timems</div>
|
||||||
|
<div class="scrape_speed">Scrape speed: $avg_scrape_timems</div>
|
||||||
|
<div class="submit_speed">Submit speed: $avg_submit_timems</div>
|
||||||
|
<span class="health perfect">$no_scrape_support</span>
|
||||||
|
</div>
|
||||||
|
<div id="probe-chart" class="speed-chart">Probe speed</div>
|
||||||
|
<div id="scrape-chart" class="speed-chart">Submit speed</div>
|
||||||
|
</div>
|
10
view/health_search.tpl
Normal file
10
view/health_search.tpl
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
<h1>Search your site</h1>
|
||||||
|
<form method="GET">
|
||||||
|
<label>Your site URL:</label>
|
||||||
|
<input type="text" name="s" placeholder="example.com" value="$searched" />
|
||||||
|
<input type="submit" value="Search" />
|
||||||
|
</form>
|
||||||
|
|
||||||
|
<p><a href="/health">« Back to index</a></p>
|
||||||
|
<h1>Search results</h1>
|
||||||
|
<div class="result-sites">$result</div>
|
14
view/health_summary.tpl
Normal file
14
view/health_summary.tpl
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
<h1>Search your site</h1>
|
||||||
|
<form method="GET">
|
||||||
|
<label>Your site URL:</label>
|
||||||
|
<input type="text" name="s" placeholder="example.com" />
|
||||||
|
<input type="submit" value="Search" />
|
||||||
|
</form>
|
||||||
|
|
||||||
|
<h1>Healthy public sites</h1>
|
||||||
|
<p>
|
||||||
|
These are sites with their registration set to an open policy and a decent health score.<br>
|
||||||
|
Not on the list: try searching.<br>
|
||||||
|
More info: ask <a href="https://fc.oscp.info/profile/beanow">Beanow</a>.
|
||||||
|
</p>
|
||||||
|
<div class="public-sites">$public_sites</div>
|
|
@ -1587,3 +1587,17 @@ input#dfrn-url {
|
||||||
margin-left: 20px;
|
margin-left: 20px;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.health{font-size:120%; vertical-align:bottom;}
|
||||||
|
.health.very-bad{ color:#f99; }
|
||||||
|
.health.bad{ color:#f1ba7a; }
|
||||||
|
.health.neutral{ color:#e6e782; }
|
||||||
|
.health.ok{ color:#bef273; }
|
||||||
|
.health.good{ color:#7cf273; }
|
||||||
|
.health.perfect{ color:#33ff80; }
|
||||||
|
|
||||||
|
.speed-chart{
|
||||||
|
float:left;
|
||||||
|
width:480px;
|
||||||
|
height:320px;
|
||||||
|
text-align:center;
|
||||||
|
}
|
2
view/versions.tpl
Normal file
2
view/versions.tpl
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
<h1>Used versions</h1>
|
||||||
|
<div class="version-list">$versions</div>
|
Loading…
Reference in a new issue