Merge remote-tracking branch 'upstream/develop' into more-temp-stuff

2017-02-19 18:46:20 +00:00 · 2017-02-19 18:46:20 +00:00 · 564bc22900
commit 564bc22900
parent 9251d2af6f 82ebc673a0
4 changed files with 90 additions and 65 deletions
--- a/INSTALL.txt
+++ b/INSTALL.txt
@ -64,7 +64,7 @@ you wish to communicate with the Diaspora network.
 password, database name).

    - Friendica needs the permission to create and delete fields and tables in its own database.
-
+    - Please check the additional notes if running on MySQ 5.7.17 or newer

 4. If you know in advance that it will be impossible for the web server to 
 write or create files in your web directory, create an empty file called 
@ -291,3 +291,21 @@ This is obvious as soon as you notice that the friendica-cron uses proc_open to
 execute php-scripts that also use proc_open, but it took me quite some time to
 find that out. I hope this saves some time for other people using suhosin with
 function blacklists.
+
+########################################################################
+Unable to create all mysql tables on MySQL 5.7.17 or newer
+#######################################################################
+
+If the setup fails to create all the database tables and/or manual
+creation from the command line fails, with this error: 
+
+ERROR 1067 (42000) at line XX: Invalid default value for 'created' 
+
+You need to adjust your my.cnf and add the following setting under 
+the [mysqld] section : 
+
+sql_mode = '';
+
+After that, restart mysql and try again. 
+
+
--- a/include/ParseUrl.php
+++ b/include/ParseUrl.php
@ -21,13 +21,13 @@ class ParseUrl {

 	/**
 	 * @brief Search for chached embeddable data of an url otherwise fetch it
-	 * 
+	 *
 	 * @param type $url The url of the page which should be scraped
 	 * @param type $no_guessing If true the parse doens't search for
 	 *    preview pictures
 	 * @param type $do_oembed The false option is used by the function fetch_oembed()
 	 *    to avoid endless loops
-	 * 
+	 *
 	 * @return array which contains needed data for embedding
 	 *    string 'url' => The url of the parsed page
 	 *    string 'type' => Content type
@ -37,9 +37,9 @@ class ParseUrl {
 	 *                if $no_geuessing = false
 	 *    array'images' = Array of preview pictures
 	 *    string 'keywords' => The tags which belong to the content
-	 * 
+	 *
 	 * @see ParseUrl::getSiteinfo() for more information about scraping
-	 * embeddable content 
+	 * embeddable content
 	 */
 	public static function getSiteinfoCached($url, $no_guessing = false, $do_oembed = true) {

@ -71,21 +71,21 @@ class ParseUrl {
 	}
 	/**
 	 * @brief Parse a page for embeddable content information
-	 * 
+	 *
 	 * This method parses to url for meta data which can be used to embed
 	 * the content. If available it prioritizes Open Graph meta tags.
 	 * If this is not available it uses the twitter cards meta tags.
 	 * As fallback it uses standard html elements with meta informations
 	 * like \<title\>Awesome Title\</title\> or
 	 * \<meta name="description" content="An awesome description"\>
-	 * 
+	 *
 	 * @param type $url The url of the page which should be scraped
 	 * @param type $no_guessing If true the parse doens't search for
 	 *    preview pictures
 	 * @param type $do_oembed The false option is used by the function fetch_oembed()
 	 *    to avoid endless loops
 	 * @param type $count Internal counter to avoid endless loops
-	 * 
+	 *
 	 * @return array which contains needed data for embedding
 	 *    string 'url' => The url of the parsed page
 	 *    string 'type' => Content type
@ -95,13 +95,13 @@ class ParseUrl {
 	 *                if $no_geuessing = false
 	 *    array'images' = Array of preview pictures
 	 *    string 'keywords' => The tags which belong to the content
-	 * 
+	 *
 	 * @todo https://developers.google.com/+/plugins/snippet/
 	 * @verbatim
 	 * <meta itemprop="name" content="Awesome title">
 	 * <meta itemprop="description" content="An awesome description">
 	 * <meta itemprop="image" content="http://maple.libertreeproject.org/images/tree-icon.png">
-	 * 
+	 *
 	 * <body itemscope itemtype="http://schema.org/Product">
 	 *   <h1 itemprop="name">Shiny Trinket</h1>
 	 *   <img itemprop="image" src="{image-url}" />
@ -130,7 +130,7 @@ class ParseUrl {
 		$url = trim($url, "'");
 		$url = trim($url, '"');

-		$url = original_url($url);
+		$url = strip_tracking_query_params($url);

 		$siteinfo["url"] = $url;
 		$siteinfo["type"] = "link";
@ -142,8 +142,7 @@ class ParseUrl {
 		$ch = curl_init();
 		curl_setopt($ch, CURLOPT_URL, $url);
 		curl_setopt($ch, CURLOPT_HEADER, 1);
-		curl_setopt($ch, CURLOPT_NOBODY, 1);
-		curl_setopt($ch, CURLOPT_TIMEOUT, 3);
+		curl_setopt($ch, CURLOPT_TIMEOUT, 10);
 		curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
 		curl_setopt($ch, CURLOPT_USERAGENT, $a->get_useragent());
 		curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, (($check_cert) ? true : false));
@ -151,7 +150,6 @@ class ParseUrl {

 		$header = curl_exec($ch);
 		$curl_info = @curl_getinfo($ch);
-		$http_code = $curl_info["http_code"];
 		curl_close($ch);

 		$a->save_timestamp($stamp1, "network");
@ -197,26 +195,6 @@ class ParseUrl {
 			}
 		}

-		$stamp1 = microtime(true);
-
-		// Now fetch the body as well
-		$ch = curl_init();
-		curl_setopt($ch, CURLOPT_URL, $url);
-		curl_setopt($ch, CURLOPT_HEADER, 1);
-		curl_setopt($ch, CURLOPT_NOBODY, 0);
-		curl_setopt($ch, CURLOPT_TIMEOUT, 10);
-		curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
-		curl_setopt($ch, CURLOPT_USERAGENT, $a->get_useragent());
-		curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, (($check_cert) ? true : false));
-		curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, (($check_cert) ? 2 : false));
-
-		$header = curl_exec($ch);
-		$curl_info = @curl_getinfo($ch);
-		$http_code = $curl_info["http_code"];
-		curl_close($ch);
-
-		$a->save_timestamp($stamp1, "network");
-
 		// Fetch the first mentioned charset. Can be in body or header
 		$charset = "";
 		if (preg_match('/charset=(.*?)['."'".'"\s\n]/', $header, $matches)) {
@ -476,7 +454,7 @@ class ParseUrl {

 	/**
 	 * @brief Convert tags from CSV to an array
-	 * 
+	 *
 	 * @param string $string Tags
 	 * @return array with formatted Hashtags
 	 */
@ -492,9 +470,9 @@ class ParseUrl {

 	/**
 	 * @brief Add a hasht sign to a string
-	 * 
+	 *
 	 *  This method is used as callback function
-	 * 
+	 *
 	 * @param string $tag The pure tag name
 	 * @param int $k Counter for internal use
 	 */
@ -504,16 +482,16 @@ class ParseUrl {

 	/**
 	 * @brief Add a scheme to an url
-	 * 
+	 *
 	 * The src attribute of some html elements (e.g. images)
 	 * can miss the scheme so we need to add the correct
 	 * scheme
-	 * 
+	 *
 	 * @param string $url The url which possibly does have
 	 *    a missing scheme (a link to an image)
 	 * @param string $scheme The url with a correct scheme
 	 *    (e.g. the url from the webpage which does contain the image)
-	 * 
+	 *
 	 * @return string The url with a scheme
 	 */
 	private static function completeUrl($url, $scheme) {
--- a/include/network.php
+++ b/include/network.php
@ -11,11 +11,11 @@ require_once('include/Probe.php');

 /**
 * @brief Curl wrapper
- * 
+ *
 * If binary flag is true, return binary results.
 * Set the cookiejar argument to a string (e.g. "/tmp/friendica-cookies.txt")
 * to preserve cookies from one request to the next.
- * 
+ *
 * @param string $url URL to fetch
 * @param boolean $binary default false
 *    TRUE if asked to return binary results (file download)
@ -23,7 +23,7 @@ require_once('include/Probe.php');
 * @param integer $timeout Timeout in seconds, default system config value or 60 seconds
 * @param string $accept_content supply Accept: header with 'accept_content' as the value
 * @param string $cookiejar Path to cookie jar file
- * 
+ *
 * @return string The fetched content
 */
 function fetch_url($url,$binary = false, &$redirects = 0, $timeout = 0, $accept_content=Null, $cookiejar = 0) {
@ -218,13 +218,13 @@ function z_fetch_url($url,$binary = false, &$redirects = 0, $opts=array()) {

 /**
 * @brief Post request to $url
- * 
+ *
 * @param string $url URL to post
 * @param mixed $params
 * @param string $headers HTTP headers
 * @param integer $redirects Recursion counter for internal use - default = 0
 * @param integer $timeout The timeout in seconds, default system config value or 60 seconds
- * 
+ *
 * @return string The content
 */
 function post_url($url,$params, $headers = null, &$redirects = 0, $timeout = 0) {
@ -385,10 +385,10 @@ function http_status_exit($val, $description = array()) {

 /**
 * @brief Check URL to se if ts's real
- * 
+ *
 * Take a URL from the wild, prepend http:// if necessary
 * and check DNS to see if it's real (or check if is a valid IP address)
- * 
+ *
 * @param string $url The URL to be validated
 * @return boolean True if it's a valid URL, fals if something wrong with it
 */
@ -415,7 +415,7 @@ function validate_url(&$url) {

 /**
 * @brief Checks that email is an actual resolvable internet address
- * 
+ *
 * @param string $addr The email address
 * @return boolean True if it's a valid email address, false if it's not
 */
@ -436,10 +436,10 @@ function validate_email($addr) {

 /**
 * @brief Check if URL is allowed
- * 
+ *
 * Check $url against our list of allowed sites,
 * wildcards allowed. If allowed_sites is unset return true;
- * 
+ *
 * @param string $url URL which get tested
 * @return boolean True if url is allowed otherwise return false
 */
@ -481,9 +481,9 @@ function allowed_url($url) {

 /**
 * @brief Check if email address is allowed to register here.
- * 
+ *
 * Compare against our list (wildcards allowed).
- * 
+ *
 * @param type $email
 * @return boolean False if not allowed, true if allowed
 *    or if allowed list is not configured
@ -670,42 +670,71 @@ function fix_contact_ssl_policy(&$contact,$new_policy) {
 	}
 }

-function original_url($url, $depth=1, $fetchbody = false) {
-
-	$a = get_app();
-
-	// Remove Analytics Data from Google and other tracking platforms
+/**
+ * @brief Remove Google Analytics and other tracking platforms params from URL
+ *
+ * @param string $url Any user-submitted URL that may contain tracking params
+ * @return string The same URL stripped of tracking parameters
+ */
+function strip_tracking_query_params($url)
+{
 	$urldata = parse_url($url);
 	if (is_string($urldata["query"])) {
 		$query = $urldata["query"];
 		parse_str($query, $querydata);

-		if (is_array($querydata))
-			foreach ($querydata AS $param=>$value)
+		if (is_array($querydata)) {
+			foreach ($querydata AS $param => $value) {
 				if (in_array($param, array("utm_source", "utm_medium", "utm_term", "utm_content", "utm_campaign",
 							"wt_mc", "pk_campaign", "pk_kwd", "mc_cid", "mc_eid",
 							"fb_action_ids", "fb_action_types", "fb_ref",
 							"awesm", "wtrid",
 							"woo_campaign", "woo_source", "woo_medium", "woo_content", "woo_term"))) {

-					$pair = $param."=".urlencode($value);
+					$pair = $param . "=" . urlencode($value);
 					$url = str_replace($pair, "", $url);

 					// Second try: if the url isn't encoded completely
-					$pair = $param."=".str_replace(" ", "+", $value);
+					$pair = $param . "=" . str_replace(" ", "+", $value);
 					$url = str_replace($pair, "", $url);

 					// Third try: Maybey the url isn't encoded at all
-					$pair = $param."=".$value;
+					$pair = $param . "=" . $value;
 					$url = str_replace($pair, "", $url);

 					$url = str_replace(array("?&", "&&"), array("?", ""), $url);
 				}
+			}
+		}

-		if (substr($url, -1, 1) == "?")
+		if (substr($url, -1, 1) == "?") {
 			$url = substr($url, 0, -1);
+		}
 	}

+	return $url;
+}
+
+/**
+ * @brief Returns the original URL of the provided URL
+ *
+ * This function strips tracking query params and follows redirections, either
+ * through HTTP code or meta refresh tags. Stops after 10 redirections.
+ *
+ * @todo Remove the $fetchbody parameter that generates an extraneous HEAD request
+ *
+ * @see ParseUrl::getSiteinfo
+ *
+ * @param string $url A user-submitted URL
+ * @param int $depth The current redirection recursion level (internal)
+ * @param bool $fetchbody Wether to fetch the body or not after the HEAD requests
+ * @return string A canonical URL
+ */
+function original_url($url, $depth = 1, $fetchbody = false) {
+	$a = get_app();
+
+	$url = strip_tracking_query_params($url);
+
 	if ($depth > 10)
 		return($url);

@ -821,7 +850,7 @@ function short_link($url) {

 /**
 * @brief Encodes content to json
- * 
+ *
 * This function encodes an array to json format
 * and adds an application/json HTTP header to the output.
 * After finishing the process is getting killed.
--- a/util/vagrant_provision.sh
+++ b/util/vagrant_provision.sh
@ -51,7 +51,7 @@ if [ $( lsb_release -c | cut -f 2 ) == "trusty" ]; then
    sudo service apache2 restart
 elif [ $( lsb_release -c | cut -f 2 ) == "xenial" ]; then
    echo ">>> Installing PHP7"
-    sudo apt-get install -y php libapache2-mod-php php-cli php-mysql php-curl php-gd
+    sudo apt-get install -y php libapache2-mod-php php-cli php-mysql php-curl php-gd php-mbstring
    sudo apt-get install -y imagemagick
    sudo apt-get install -y php-imagick
    sudo systemctl restart apache2