From 99cfae63d7303365d0c4b2256c7194edb590fb7f Mon Sep 17 00:00:00 2001 From: Hypolite Petovan Date: Fri, 17 Feb 2017 22:32:33 -0500 Subject: [PATCH 1/7] Clean trailing whitespaces --- include/ParseUrl.php | 30 +++++++++++++++--------------- include/network.php | 26 +++++++++++++------------- 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/include/ParseUrl.php b/include/ParseUrl.php index 549d705da..b85175a25 100644 --- a/include/ParseUrl.php +++ b/include/ParseUrl.php @@ -21,13 +21,13 @@ class ParseUrl { /** * @brief Search for chached embeddable data of an url otherwise fetch it - * + * * @param type $url The url of the page which should be scraped * @param type $no_guessing If true the parse doens't search for * preview pictures * @param type $do_oembed The false option is used by the function fetch_oembed() * to avoid endless loops - * + * * @return array which contains needed data for embedding * string 'url' => The url of the parsed page * string 'type' => Content type @@ -37,9 +37,9 @@ class ParseUrl { * if $no_geuessing = false * array'images' = Array of preview pictures * string 'keywords' => The tags which belong to the content - * + * * @see ParseUrl::getSiteinfo() for more information about scraping - * embeddable content + * embeddable content */ public static function getSiteinfoCached($url, $no_guessing = false, $do_oembed = true) { @@ -71,21 +71,21 @@ class ParseUrl { } /** * @brief Parse a page for embeddable content information - * + * * This method parses to url for meta data which can be used to embed * the content. If available it prioritizes Open Graph meta tags. * If this is not available it uses the twitter cards meta tags. * As fallback it uses standard html elements with meta informations * like \Awesome Title\ or * \ - * + * * @param type $url The url of the page which should be scraped * @param type $no_guessing If true the parse doens't search for * preview pictures * @param type $do_oembed The false option is used by the function fetch_oembed() * to avoid endless loops * @param type $count Internal counter to avoid endless loops - * + * * @return array which contains needed data for embedding * string 'url' => The url of the parsed page * string 'type' => Content type @@ -95,13 +95,13 @@ class ParseUrl { * if $no_geuessing = false * array'images' = Array of preview pictures * string 'keywords' => The tags which belong to the content - * + * * @todo https://developers.google.com/+/plugins/snippet/ * @verbatim * * * - * + * * *

Shiny Trinket

* @@ -476,7 +476,7 @@ class ParseUrl { /** * @brief Convert tags from CSV to an array - * + * * @param string $string Tags * @return array with formatted Hashtags */ @@ -492,9 +492,9 @@ class ParseUrl { /** * @brief Add a hasht sign to a string - * + * * This method is used as callback function - * + * * @param string $tag The pure tag name * @param int $k Counter for internal use */ @@ -504,16 +504,16 @@ class ParseUrl { /** * @brief Add a scheme to an url - * + * * The src attribute of some html elements (e.g. images) * can miss the scheme so we need to add the correct * scheme - * + * * @param string $url The url which possibly does have * a missing scheme (a link to an image) * @param string $scheme The url with a correct scheme * (e.g. the url from the webpage which does contain the image) - * + * * @return string The url with a scheme */ private static function completeUrl($url, $scheme) { diff --git a/include/network.php b/include/network.php index b7839de21..7385c94a0 100644 --- a/include/network.php +++ b/include/network.php @@ -11,11 +11,11 @@ require_once('include/Probe.php'); /** * @brief Curl wrapper - * + * * If binary flag is true, return binary results. * Set the cookiejar argument to a string (e.g. "/tmp/friendica-cookies.txt") * to preserve cookies from one request to the next. - * + * * @param string $url URL to fetch * @param boolean $binary default false * TRUE if asked to return binary results (file download) @@ -23,7 +23,7 @@ require_once('include/Probe.php'); * @param integer $timeout Timeout in seconds, default system config value or 60 seconds * @param string $accept_content supply Accept: header with 'accept_content' as the value * @param string $cookiejar Path to cookie jar file - * + * * @return string The fetched content */ function fetch_url($url,$binary = false, &$redirects = 0, $timeout = 0, $accept_content=Null, $cookiejar = 0) { @@ -218,13 +218,13 @@ function z_fetch_url($url,$binary = false, &$redirects = 0, $opts=array()) { /** * @brief Post request to $url - * + * * @param string $url URL to post * @param mixed $params * @param string $headers HTTP headers * @param integer $redirects Recursion counter for internal use - default = 0 * @param integer $timeout The timeout in seconds, default system config value or 60 seconds - * + * * @return string The content */ function post_url($url,$params, $headers = null, &$redirects = 0, $timeout = 0) { @@ -385,10 +385,10 @@ function http_status_exit($val, $description = array()) { /** * @brief Check URL to se if ts's real - * + * * Take a URL from the wild, prepend http:// if necessary * and check DNS to see if it's real (or check if is a valid IP address) - * + * * @param string $url The URL to be validated * @return boolean True if it's a valid URL, fals if something wrong with it */ @@ -415,7 +415,7 @@ function validate_url(&$url) { /** * @brief Checks that email is an actual resolvable internet address - * + * * @param string $addr The email address * @return boolean True if it's a valid email address, false if it's not */ @@ -436,10 +436,10 @@ function validate_email($addr) { /** * @brief Check if URL is allowed - * + * * Check $url against our list of allowed sites, * wildcards allowed. If allowed_sites is unset return true; - * + * * @param string $url URL which get tested * @return boolean True if url is allowed otherwise return false */ @@ -481,9 +481,9 @@ function allowed_url($url) { /** * @brief Check if email address is allowed to register here. - * + * * Compare against our list (wildcards allowed). - * + * * @param type $email * @return boolean False if not allowed, true if allowed * or if allowed list is not configured @@ -821,7 +821,7 @@ function short_link($url) { /** * @brief Encodes content to json - * + * * This function encodes an array to json format * and adds an application/json HTTP header to the output. * After finishing the process is getting killed. From 432587464ce16dff513ed2de340fa3437dbe45aa Mon Sep 17 00:00:00 2001 From: Hypolite Petovan Date: Fri, 17 Feb 2017 22:35:46 -0500 Subject: [PATCH 2/7] Fix Diaspora link attachment probe - Move analytics param stripping out of original_url - Remove HEAD curl request in ParseUrl::getSiteInfo - Replace original_url with strip_tracking_query_params in ParseUrl::getSiteInfo to prevent massive curl fest in border cases --- include/ParseUrl.php | 26 ++--------------------- include/network.php | 49 ++++++++++++++++++++++++++++++++++---------- 2 files changed, 40 insertions(+), 35 deletions(-) diff --git a/include/ParseUrl.php b/include/ParseUrl.php index b85175a25..3a2fe9d53 100644 --- a/include/ParseUrl.php +++ b/include/ParseUrl.php @@ -130,7 +130,7 @@ class ParseUrl { $url = trim($url, "'"); $url = trim($url, '"'); - $url = original_url($url); + $url = strip_tracking_query_params($url); $siteinfo["url"] = $url; $siteinfo["type"] = "link"; @@ -142,8 +142,7 @@ class ParseUrl { $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_HEADER, 1); - curl_setopt($ch, CURLOPT_NOBODY, 1); - curl_setopt($ch, CURLOPT_TIMEOUT, 3); + curl_setopt($ch, CURLOPT_TIMEOUT, 10); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_USERAGENT, $a->get_useragent()); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, (($check_cert) ? true : false)); @@ -151,7 +150,6 @@ class ParseUrl { $header = curl_exec($ch); $curl_info = @curl_getinfo($ch); - $http_code = $curl_info["http_code"]; curl_close($ch); $a->save_timestamp($stamp1, "network"); @@ -197,26 +195,6 @@ class ParseUrl { } } - $stamp1 = microtime(true); - - // Now fetch the body as well - $ch = curl_init(); - curl_setopt($ch, CURLOPT_URL, $url); - curl_setopt($ch, CURLOPT_HEADER, 1); - curl_setopt($ch, CURLOPT_NOBODY, 0); - curl_setopt($ch, CURLOPT_TIMEOUT, 10); - curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); - curl_setopt($ch, CURLOPT_USERAGENT, $a->get_useragent()); - curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, (($check_cert) ? true : false)); - curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, (($check_cert) ? 2 : false)); - - $header = curl_exec($ch); - $curl_info = @curl_getinfo($ch); - $http_code = $curl_info["http_code"]; - curl_close($ch); - - $a->save_timestamp($stamp1, "network"); - // Fetch the first mentioned charset. Can be in body or header $charset = ""; if (preg_match('/charset=(.*?)['."'".'"\s\n]/', $header, $matches)) { diff --git a/include/network.php b/include/network.php index 7385c94a0..ecbe0e5c6 100644 --- a/include/network.php +++ b/include/network.php @@ -670,42 +670,69 @@ function fix_contact_ssl_policy(&$contact,$new_policy) { } } -function original_url($url, $depth=1, $fetchbody = false) { - - $a = get_app(); - - // Remove Analytics Data from Google and other tracking platforms +/** + * @brief Remove Google Analytics and other tracking platforms params from URL + * + * @param string $url + * @return string + */ +function strip_tracking_query_params($url) +{ $urldata = parse_url($url); if (is_string($urldata["query"])) { $query = $urldata["query"]; parse_str($query, $querydata); - if (is_array($querydata)) - foreach ($querydata AS $param=>$value) + if (is_array($querydata)) { + foreach ($querydata AS $param => $value) { if (in_array($param, array("utm_source", "utm_medium", "utm_term", "utm_content", "utm_campaign", "wt_mc", "pk_campaign", "pk_kwd", "mc_cid", "mc_eid", "fb_action_ids", "fb_action_types", "fb_ref", "awesm", "wtrid", "woo_campaign", "woo_source", "woo_medium", "woo_content", "woo_term"))) { - $pair = $param."=".urlencode($value); + $pair = $param . "=" . urlencode($value); $url = str_replace($pair, "", $url); // Second try: if the url isn't encoded completely - $pair = $param."=".str_replace(" ", "+", $value); + $pair = $param . "=" . str_replace(" ", "+", $value); $url = str_replace($pair, "", $url); // Third try: Maybey the url isn't encoded at all - $pair = $param."=".$value; + $pair = $param . "=" . $value; $url = str_replace($pair, "", $url); $url = str_replace(array("?&", "&&"), array("?", ""), $url); } + } + } - if (substr($url, -1, 1) == "?") + if (substr($url, -1, 1) == "?") { $url = substr($url, 0, -1); + } } + return $url; +} + +/** + * @brief Returns the original URL of the provided URL + * + * This function strips tracking query params and follows redirections, either + * through HTTP code or meta refresh tags. Stops after 10 redirections. + * + * @see ParseUrl::getSiteinfo + * + * @param string $url + * @param int $depth + * @param bool $fetchbody + * @return string + */ +function original_url($url, $depth = 1, $fetchbody = false) { + $a = get_app(); + + $url = strip_tracking_query_params($url); + if ($depth > 10) return($url); From 2c959b925d20898579d4562d8d42669682de0957 Mon Sep 17 00:00:00 2001 From: Hypolite Petovan Date: Sat, 18 Feb 2017 20:35:31 -0500 Subject: [PATCH 3/7] Add param documentation --- include/network.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/network.php b/include/network.php index ecbe0e5c6..727d5e57e 100644 --- a/include/network.php +++ b/include/network.php @@ -673,8 +673,8 @@ function fix_contact_ssl_policy(&$contact,$new_policy) { /** * @brief Remove Google Analytics and other tracking platforms params from URL * - * @param string $url - * @return string + * @param string $url Any user-submitted URL that may contain tracking params + * @return string The same URL stripped of tracking parameters */ function strip_tracking_query_params($url) { From 58a444b4305bd8a1c2ab7ee172c3972e091cc964 Mon Sep 17 00:00:00 2001 From: Hypolite Petovan Date: Sat, 18 Feb 2017 20:39:16 -0500 Subject: [PATCH 4/7] Add original_url() param documentation --- include/network.php | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/include/network.php b/include/network.php index 727d5e57e..03f65a519 100644 --- a/include/network.php +++ b/include/network.php @@ -721,12 +721,14 @@ function strip_tracking_query_params($url) * This function strips tracking query params and follows redirections, either * through HTTP code or meta refresh tags. Stops after 10 redirections. * + * @todo Remove the $fetchbody parameter that generates an extraneous HEAD request + * * @see ParseUrl::getSiteinfo * - * @param string $url - * @param int $depth - * @param bool $fetchbody - * @return string + * @param string $url A user-submitted URL + * @param int $depth The current redirection recursion level (internal) + * @param bool $fetchbody Wether to fetch the body or not after the HEAD requests + * @return string A canonical URL */ function original_url($url, $depth = 1, $fetchbody = false) { $a = get_app(); From a32431e69350d94c73cbc473634d9aa329fb7f56 Mon Sep 17 00:00:00 2001 From: Eelco Maljaars Date: Sun, 19 Feb 2017 15:44:48 +0100 Subject: [PATCH 5/7] Added missing mbstring php module to xenial setup in vagrant --- util/vagrant_provision.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/vagrant_provision.sh b/util/vagrant_provision.sh index 100764cab..6b7f0b862 100644 --- a/util/vagrant_provision.sh +++ b/util/vagrant_provision.sh @@ -51,7 +51,7 @@ if [ $( lsb_release -c | cut -f 2 ) == "trusty" ]; then sudo service apache2 restart elif [ $( lsb_release -c | cut -f 2 ) == "xenial" ]; then echo ">>> Installing PHP7" - sudo apt-get install -y php libapache2-mod-php php-cli php-mysql php-curl php-gd + sudo apt-get install -y php libapache2-mod-php php-cli php-mysql php-curl php-gd php-mbstring sudo apt-get install -y imagemagick sudo apt-get install -y php-imagick sudo systemctl restart apache2 From 36265d984e9d2eaafe34f1bdada5ef860c483f56 Mon Sep 17 00:00:00 2001 From: Eelco Maljaars Date: Sun, 19 Feb 2017 15:53:29 +0100 Subject: [PATCH 6/7] Ran into an installation problem today that is linked to newer MySQL builds This commit expands the INSTALL.txt with notes when this problem arises and how it should be handled. In the end the database code should probably be fixed but this should help in the short term. --- INSTALL.txt | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/INSTALL.txt b/INSTALL.txt index a9d42495b..a96c9fd5a 100644 --- a/INSTALL.txt +++ b/INSTALL.txt @@ -64,7 +64,7 @@ you wish to communicate with the Diaspora network. password, database name). - Friendica needs the permission to create and delete fields and tables in its own database. - + - Please check the additional notes if running on MySQ 5.7.17 or newer 4. If you know in advance that it will be impossible for the web server to write or create files in your web directory, create an empty file called @@ -291,3 +291,21 @@ This is obvious as soon as you notice that the friendica-cron uses proc_open to execute php-scripts that also use proc_open, but it took me quite some time to find that out. I hope this saves some time for other people using suhosin with function blacklists. + +######################################################################## +Unable to create alle mysql tables on MySQL 5.7.17 or newer +####################################################################### + +If the setup fails to create all the database tables and/or manual +creation from the command line failes, with this error : + +ERROR 1067 (42000) at line XX: Invalid default value for 'created' + +You need to adjust your my.cnf and add the following setting under +the [mysqld] setion : + +sql_mode = ''; + +After that, restart mysql and try again. + + From 38aea0aeacbc85ad4b1e561a123af240fe6e4b29 Mon Sep 17 00:00:00 2001 From: Eelco Maljaars Date: Sun, 19 Feb 2017 16:36:40 +0100 Subject: [PATCH 7/7] Typos fixed --- INSTALL.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/INSTALL.txt b/INSTALL.txt index a96c9fd5a..4c57064f6 100644 --- a/INSTALL.txt +++ b/INSTALL.txt @@ -293,16 +293,16 @@ find that out. I hope this saves some time for other people using suhosin with function blacklists. ######################################################################## -Unable to create alle mysql tables on MySQL 5.7.17 or newer +Unable to create all mysql tables on MySQL 5.7.17 or newer ####################################################################### If the setup fails to create all the database tables and/or manual -creation from the command line failes, with this error : +creation from the command line fails, with this error: ERROR 1067 (42000) at line XX: Invalid default value for 'created' You need to adjust your my.cnf and add the following setting under -the [mysqld] setion : +the [mysqld] section : sql_mode = '';