Merge remote-tracking branch 'upstream/develop' into more-temp-stuff

This commit is contained in:
Michael 2017-02-19 18:46:20 +00:00
commit 564bc22900
4 changed files with 90 additions and 65 deletions

View File

@ -64,7 +64,7 @@ you wish to communicate with the Diaspora network.
password, database name). password, database name).
- Friendica needs the permission to create and delete fields and tables in its own database. - Friendica needs the permission to create and delete fields and tables in its own database.
- Please check the additional notes if running on MySQ 5.7.17 or newer
4. If you know in advance that it will be impossible for the web server to 4. If you know in advance that it will be impossible for the web server to
write or create files in your web directory, create an empty file called write or create files in your web directory, create an empty file called
@ -291,3 +291,21 @@ This is obvious as soon as you notice that the friendica-cron uses proc_open to
execute php-scripts that also use proc_open, but it took me quite some time to execute php-scripts that also use proc_open, but it took me quite some time to
find that out. I hope this saves some time for other people using suhosin with find that out. I hope this saves some time for other people using suhosin with
function blacklists. function blacklists.
########################################################################
Unable to create all mysql tables on MySQL 5.7.17 or newer
#######################################################################
If the setup fails to create all the database tables and/or manual
creation from the command line fails, with this error:
ERROR 1067 (42000) at line XX: Invalid default value for 'created'
You need to adjust your my.cnf and add the following setting under
the [mysqld] section :
sql_mode = '';
After that, restart mysql and try again.

View File

@ -21,13 +21,13 @@ class ParseUrl {
/** /**
* @brief Search for chached embeddable data of an url otherwise fetch it * @brief Search for chached embeddable data of an url otherwise fetch it
* *
* @param type $url The url of the page which should be scraped * @param type $url The url of the page which should be scraped
* @param type $no_guessing If true the parse doens't search for * @param type $no_guessing If true the parse doens't search for
* preview pictures * preview pictures
* @param type $do_oembed The false option is used by the function fetch_oembed() * @param type $do_oembed The false option is used by the function fetch_oembed()
* to avoid endless loops * to avoid endless loops
* *
* @return array which contains needed data for embedding * @return array which contains needed data for embedding
* string 'url' => The url of the parsed page * string 'url' => The url of the parsed page
* string 'type' => Content type * string 'type' => Content type
@ -37,9 +37,9 @@ class ParseUrl {
* if $no_geuessing = false * if $no_geuessing = false
* array'images' = Array of preview pictures * array'images' = Array of preview pictures
* string 'keywords' => The tags which belong to the content * string 'keywords' => The tags which belong to the content
* *
* @see ParseUrl::getSiteinfo() for more information about scraping * @see ParseUrl::getSiteinfo() for more information about scraping
* embeddable content * embeddable content
*/ */
public static function getSiteinfoCached($url, $no_guessing = false, $do_oembed = true) { public static function getSiteinfoCached($url, $no_guessing = false, $do_oembed = true) {
@ -71,21 +71,21 @@ class ParseUrl {
} }
/** /**
* @brief Parse a page for embeddable content information * @brief Parse a page for embeddable content information
* *
* This method parses to url for meta data which can be used to embed * This method parses to url for meta data which can be used to embed
* the content. If available it prioritizes Open Graph meta tags. * the content. If available it prioritizes Open Graph meta tags.
* If this is not available it uses the twitter cards meta tags. * If this is not available it uses the twitter cards meta tags.
* As fallback it uses standard html elements with meta informations * As fallback it uses standard html elements with meta informations
* like \<title\>Awesome Title\</title\> or * like \<title\>Awesome Title\</title\> or
* \<meta name="description" content="An awesome description"\> * \<meta name="description" content="An awesome description"\>
* *
* @param type $url The url of the page which should be scraped * @param type $url The url of the page which should be scraped
* @param type $no_guessing If true the parse doens't search for * @param type $no_guessing If true the parse doens't search for
* preview pictures * preview pictures
* @param type $do_oembed The false option is used by the function fetch_oembed() * @param type $do_oembed The false option is used by the function fetch_oembed()
* to avoid endless loops * to avoid endless loops
* @param type $count Internal counter to avoid endless loops * @param type $count Internal counter to avoid endless loops
* *
* @return array which contains needed data for embedding * @return array which contains needed data for embedding
* string 'url' => The url of the parsed page * string 'url' => The url of the parsed page
* string 'type' => Content type * string 'type' => Content type
@ -95,13 +95,13 @@ class ParseUrl {
* if $no_geuessing = false * if $no_geuessing = false
* array'images' = Array of preview pictures * array'images' = Array of preview pictures
* string 'keywords' => The tags which belong to the content * string 'keywords' => The tags which belong to the content
* *
* @todo https://developers.google.com/+/plugins/snippet/ * @todo https://developers.google.com/+/plugins/snippet/
* @verbatim * @verbatim
* <meta itemprop="name" content="Awesome title"> * <meta itemprop="name" content="Awesome title">
* <meta itemprop="description" content="An awesome description"> * <meta itemprop="description" content="An awesome description">
* <meta itemprop="image" content="http://maple.libertreeproject.org/images/tree-icon.png"> * <meta itemprop="image" content="http://maple.libertreeproject.org/images/tree-icon.png">
* *
* <body itemscope itemtype="http://schema.org/Product"> * <body itemscope itemtype="http://schema.org/Product">
* <h1 itemprop="name">Shiny Trinket</h1> * <h1 itemprop="name">Shiny Trinket</h1>
* <img itemprop="image" src="{image-url}" /> * <img itemprop="image" src="{image-url}" />
@ -130,7 +130,7 @@ class ParseUrl {
$url = trim($url, "'"); $url = trim($url, "'");
$url = trim($url, '"'); $url = trim($url, '"');
$url = original_url($url); $url = strip_tracking_query_params($url);
$siteinfo["url"] = $url; $siteinfo["url"] = $url;
$siteinfo["type"] = "link"; $siteinfo["type"] = "link";
@ -142,8 +142,7 @@ class ParseUrl {
$ch = curl_init(); $ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, 1); curl_setopt($ch, CURLOPT_HEADER, 1);
curl_setopt($ch, CURLOPT_NOBODY, 1); curl_setopt($ch, CURLOPT_TIMEOUT, 10);
curl_setopt($ch, CURLOPT_TIMEOUT, 3);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_USERAGENT, $a->get_useragent()); curl_setopt($ch, CURLOPT_USERAGENT, $a->get_useragent());
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, (($check_cert) ? true : false)); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, (($check_cert) ? true : false));
@ -151,7 +150,6 @@ class ParseUrl {
$header = curl_exec($ch); $header = curl_exec($ch);
$curl_info = @curl_getinfo($ch); $curl_info = @curl_getinfo($ch);
$http_code = $curl_info["http_code"];
curl_close($ch); curl_close($ch);
$a->save_timestamp($stamp1, "network"); $a->save_timestamp($stamp1, "network");
@ -197,26 +195,6 @@ class ParseUrl {
} }
} }
$stamp1 = microtime(true);
// Now fetch the body as well
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, 1);
curl_setopt($ch, CURLOPT_NOBODY, 0);
curl_setopt($ch, CURLOPT_TIMEOUT, 10);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_USERAGENT, $a->get_useragent());
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, (($check_cert) ? true : false));
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, (($check_cert) ? 2 : false));
$header = curl_exec($ch);
$curl_info = @curl_getinfo($ch);
$http_code = $curl_info["http_code"];
curl_close($ch);
$a->save_timestamp($stamp1, "network");
// Fetch the first mentioned charset. Can be in body or header // Fetch the first mentioned charset. Can be in body or header
$charset = ""; $charset = "";
if (preg_match('/charset=(.*?)['."'".'"\s\n]/', $header, $matches)) { if (preg_match('/charset=(.*?)['."'".'"\s\n]/', $header, $matches)) {
@ -476,7 +454,7 @@ class ParseUrl {
/** /**
* @brief Convert tags from CSV to an array * @brief Convert tags from CSV to an array
* *
* @param string $string Tags * @param string $string Tags
* @return array with formatted Hashtags * @return array with formatted Hashtags
*/ */
@ -492,9 +470,9 @@ class ParseUrl {
/** /**
* @brief Add a hasht sign to a string * @brief Add a hasht sign to a string
* *
* This method is used as callback function * This method is used as callback function
* *
* @param string $tag The pure tag name * @param string $tag The pure tag name
* @param int $k Counter for internal use * @param int $k Counter for internal use
*/ */
@ -504,16 +482,16 @@ class ParseUrl {
/** /**
* @brief Add a scheme to an url * @brief Add a scheme to an url
* *
* The src attribute of some html elements (e.g. images) * The src attribute of some html elements (e.g. images)
* can miss the scheme so we need to add the correct * can miss the scheme so we need to add the correct
* scheme * scheme
* *
* @param string $url The url which possibly does have * @param string $url The url which possibly does have
* a missing scheme (a link to an image) * a missing scheme (a link to an image)
* @param string $scheme The url with a correct scheme * @param string $scheme The url with a correct scheme
* (e.g. the url from the webpage which does contain the image) * (e.g. the url from the webpage which does contain the image)
* *
* @return string The url with a scheme * @return string The url with a scheme
*/ */
private static function completeUrl($url, $scheme) { private static function completeUrl($url, $scheme) {

View File

@ -11,11 +11,11 @@ require_once('include/Probe.php');
/** /**
* @brief Curl wrapper * @brief Curl wrapper
* *
* If binary flag is true, return binary results. * If binary flag is true, return binary results.
* Set the cookiejar argument to a string (e.g. "/tmp/friendica-cookies.txt") * Set the cookiejar argument to a string (e.g. "/tmp/friendica-cookies.txt")
* to preserve cookies from one request to the next. * to preserve cookies from one request to the next.
* *
* @param string $url URL to fetch * @param string $url URL to fetch
* @param boolean $binary default false * @param boolean $binary default false
* TRUE if asked to return binary results (file download) * TRUE if asked to return binary results (file download)
@ -23,7 +23,7 @@ require_once('include/Probe.php');
* @param integer $timeout Timeout in seconds, default system config value or 60 seconds * @param integer $timeout Timeout in seconds, default system config value or 60 seconds
* @param string $accept_content supply Accept: header with 'accept_content' as the value * @param string $accept_content supply Accept: header with 'accept_content' as the value
* @param string $cookiejar Path to cookie jar file * @param string $cookiejar Path to cookie jar file
* *
* @return string The fetched content * @return string The fetched content
*/ */
function fetch_url($url,$binary = false, &$redirects = 0, $timeout = 0, $accept_content=Null, $cookiejar = 0) { function fetch_url($url,$binary = false, &$redirects = 0, $timeout = 0, $accept_content=Null, $cookiejar = 0) {
@ -218,13 +218,13 @@ function z_fetch_url($url,$binary = false, &$redirects = 0, $opts=array()) {
/** /**
* @brief Post request to $url * @brief Post request to $url
* *
* @param string $url URL to post * @param string $url URL to post
* @param mixed $params * @param mixed $params
* @param string $headers HTTP headers * @param string $headers HTTP headers
* @param integer $redirects Recursion counter for internal use - default = 0 * @param integer $redirects Recursion counter for internal use - default = 0
* @param integer $timeout The timeout in seconds, default system config value or 60 seconds * @param integer $timeout The timeout in seconds, default system config value or 60 seconds
* *
* @return string The content * @return string The content
*/ */
function post_url($url,$params, $headers = null, &$redirects = 0, $timeout = 0) { function post_url($url,$params, $headers = null, &$redirects = 0, $timeout = 0) {
@ -385,10 +385,10 @@ function http_status_exit($val, $description = array()) {
/** /**
* @brief Check URL to se if ts's real * @brief Check URL to se if ts's real
* *
* Take a URL from the wild, prepend http:// if necessary * Take a URL from the wild, prepend http:// if necessary
* and check DNS to see if it's real (or check if is a valid IP address) * and check DNS to see if it's real (or check if is a valid IP address)
* *
* @param string $url The URL to be validated * @param string $url The URL to be validated
* @return boolean True if it's a valid URL, fals if something wrong with it * @return boolean True if it's a valid URL, fals if something wrong with it
*/ */
@ -415,7 +415,7 @@ function validate_url(&$url) {
/** /**
* @brief Checks that email is an actual resolvable internet address * @brief Checks that email is an actual resolvable internet address
* *
* @param string $addr The email address * @param string $addr The email address
* @return boolean True if it's a valid email address, false if it's not * @return boolean True if it's a valid email address, false if it's not
*/ */
@ -436,10 +436,10 @@ function validate_email($addr) {
/** /**
* @brief Check if URL is allowed * @brief Check if URL is allowed
* *
* Check $url against our list of allowed sites, * Check $url against our list of allowed sites,
* wildcards allowed. If allowed_sites is unset return true; * wildcards allowed. If allowed_sites is unset return true;
* *
* @param string $url URL which get tested * @param string $url URL which get tested
* @return boolean True if url is allowed otherwise return false * @return boolean True if url is allowed otherwise return false
*/ */
@ -481,9 +481,9 @@ function allowed_url($url) {
/** /**
* @brief Check if email address is allowed to register here. * @brief Check if email address is allowed to register here.
* *
* Compare against our list (wildcards allowed). * Compare against our list (wildcards allowed).
* *
* @param type $email * @param type $email
* @return boolean False if not allowed, true if allowed * @return boolean False if not allowed, true if allowed
* or if allowed list is not configured * or if allowed list is not configured
@ -670,42 +670,71 @@ function fix_contact_ssl_policy(&$contact,$new_policy) {
} }
} }
function original_url($url, $depth=1, $fetchbody = false) { /**
* @brief Remove Google Analytics and other tracking platforms params from URL
$a = get_app(); *
* @param string $url Any user-submitted URL that may contain tracking params
// Remove Analytics Data from Google and other tracking platforms * @return string The same URL stripped of tracking parameters
*/
function strip_tracking_query_params($url)
{
$urldata = parse_url($url); $urldata = parse_url($url);
if (is_string($urldata["query"])) { if (is_string($urldata["query"])) {
$query = $urldata["query"]; $query = $urldata["query"];
parse_str($query, $querydata); parse_str($query, $querydata);
if (is_array($querydata)) if (is_array($querydata)) {
foreach ($querydata AS $param=>$value) foreach ($querydata AS $param => $value) {
if (in_array($param, array("utm_source", "utm_medium", "utm_term", "utm_content", "utm_campaign", if (in_array($param, array("utm_source", "utm_medium", "utm_term", "utm_content", "utm_campaign",
"wt_mc", "pk_campaign", "pk_kwd", "mc_cid", "mc_eid", "wt_mc", "pk_campaign", "pk_kwd", "mc_cid", "mc_eid",
"fb_action_ids", "fb_action_types", "fb_ref", "fb_action_ids", "fb_action_types", "fb_ref",
"awesm", "wtrid", "awesm", "wtrid",
"woo_campaign", "woo_source", "woo_medium", "woo_content", "woo_term"))) { "woo_campaign", "woo_source", "woo_medium", "woo_content", "woo_term"))) {
$pair = $param."=".urlencode($value); $pair = $param . "=" . urlencode($value);
$url = str_replace($pair, "", $url); $url = str_replace($pair, "", $url);
// Second try: if the url isn't encoded completely // Second try: if the url isn't encoded completely
$pair = $param."=".str_replace(" ", "+", $value); $pair = $param . "=" . str_replace(" ", "+", $value);
$url = str_replace($pair, "", $url); $url = str_replace($pair, "", $url);
// Third try: Maybey the url isn't encoded at all // Third try: Maybey the url isn't encoded at all
$pair = $param."=".$value; $pair = $param . "=" . $value;
$url = str_replace($pair, "", $url); $url = str_replace($pair, "", $url);
$url = str_replace(array("?&", "&&"), array("?", ""), $url); $url = str_replace(array("?&", "&&"), array("?", ""), $url);
} }
}
}
if (substr($url, -1, 1) == "?") if (substr($url, -1, 1) == "?") {
$url = substr($url, 0, -1); $url = substr($url, 0, -1);
}
} }
return $url;
}
/**
* @brief Returns the original URL of the provided URL
*
* This function strips tracking query params and follows redirections, either
* through HTTP code or meta refresh tags. Stops after 10 redirections.
*
* @todo Remove the $fetchbody parameter that generates an extraneous HEAD request
*
* @see ParseUrl::getSiteinfo
*
* @param string $url A user-submitted URL
* @param int $depth The current redirection recursion level (internal)
* @param bool $fetchbody Wether to fetch the body or not after the HEAD requests
* @return string A canonical URL
*/
function original_url($url, $depth = 1, $fetchbody = false) {
$a = get_app();
$url = strip_tracking_query_params($url);
if ($depth > 10) if ($depth > 10)
return($url); return($url);
@ -821,7 +850,7 @@ function short_link($url) {
/** /**
* @brief Encodes content to json * @brief Encodes content to json
* *
* This function encodes an array to json format * This function encodes an array to json format
* and adds an application/json HTTP header to the output. * and adds an application/json HTTP header to the output.
* After finishing the process is getting killed. * After finishing the process is getting killed.

View File

@ -51,7 +51,7 @@ if [ $( lsb_release -c | cut -f 2 ) == "trusty" ]; then
sudo service apache2 restart sudo service apache2 restart
elif [ $( lsb_release -c | cut -f 2 ) == "xenial" ]; then elif [ $( lsb_release -c | cut -f 2 ) == "xenial" ]; then
echo ">>> Installing PHP7" echo ">>> Installing PHP7"
sudo apt-get install -y php libapache2-mod-php php-cli php-mysql php-curl php-gd sudo apt-get install -y php libapache2-mod-php php-cli php-mysql php-curl php-gd php-mbstring
sudo apt-get install -y imagemagick sudo apt-get install -y imagemagick
sudo apt-get install -y php-imagick sudo apt-get install -y php-imagick
sudo systemctl restart apache2 sudo systemctl restart apache2