Merge pull request #8784 from MrPetovan/bug/8781-link-preview-unlabeled-links-scheme

Add constraints on Page Info URLs
2020-06-18 06:46:35 +02:00 · 2020-06-18 06:46:35 +02:00 · db9f798bcd
parent df03b19a9d f544c8383b
commit db9f798bcd
9 changed files with 733 additions and 427 deletions
--- a/include/items.php
+++ b/include/items.php
@ -19,430 +19,56 @@
 *
 */

-use Friendica\Core\Hook;
-use Friendica\Core\Logger;
-use Friendica\Core\Protocol;
-use Friendica\Core\Renderer;
-use Friendica\Core\Session;
-use Friendica\Database\DBA;
-use Friendica\DI;
-use Friendica\Model\Item;
-use Friendica\Protocol\DFRN;
-use Friendica\Protocol\Feed;
-use Friendica\Protocol\OStatus;
-use Friendica\Util\Network;
-use Friendica\Util\ParseUrl;
-use Friendica\Util\Strings;
-
-require_once __DIR__ . '/../mod/share.php';
-
+/**
+ * @deprecated since 2020.06
+ * @see \Friendica\Content\PageInfo::getFooterFromData
+ */
 function add_page_info_data(array $data, $no_photos = false)
 {
-	Hook::callAll('page_info_data', $data);
-
-	if (empty($data['type'])) {
-		return '';
-	}
-
-	// It maybe is a rich content, but if it does have everything that a link has,
-	// then treat it that way
-	if (($data["type"] == "rich") && is_string($data["title"]) &&
-		is_string($data["text"]) && !empty($data["images"])) {
-		$data["type"] = "link";
-	}
-
-	$data["title"] = $data["title"] ?? '';
-
-	if ((($data["type"] != "link") && ($data["type"] != "video") && ($data["type"] != "photo")) || ($data["title"] == $data["url"])) {
-		return "";
-	}
-
-	if ($no_photos && ($data["type"] == "photo")) {
-		return "";
-	}
-
-	// Escape some bad characters
-	$data["url"] = str_replace(["[", "]"], ["&#91;", "&#93;"], htmlentities($data["url"], ENT_QUOTES, 'UTF-8', false));
-	$data["title"] = str_replace(["[", "]"], ["&#91;", "&#93;"], htmlentities($data["title"], ENT_QUOTES, 'UTF-8', false));
-
-	$text = "[attachment type='".$data["type"]."'";
-
-	if (empty($data["text"])) {
-		$data["text"] = $data["title"];
-	}
-
-	if (empty($data["text"])) {
-		$data["text"] = $data["url"];
-	}
-
-	if (!empty($data["url"])) {
-		$text .= " url='".$data["url"]."'";
-	}
-
-	if (!empty($data["title"])) {
-		$text .= " title='".$data["title"]."'";
-	}
-
-	// Only embedd a picture link when it seems to be a valid picture ("width" is set)
-	if (!empty($data["images"]) && !empty($data["images"][0]["width"])) {
-		$preview = str_replace(["[", "]"], ["&#91;", "&#93;"], htmlentities($data["images"][0]["src"], ENT_QUOTES, 'UTF-8', false));
-		// if the preview picture is larger than 500 pixels then show it in a larger mode
-		// But only, if the picture isn't higher than large (To prevent huge posts)
-		if (!DI::config()->get('system', 'always_show_preview') && ($data["images"][0]["width"] >= 500)
-			&& ($data["images"][0]["width"] >= $data["images"][0]["height"])) {
-			$text .= " image='".$preview."'";
-		} else {
-			$text .= " preview='".$preview."'";
-		}
-	}
-
-	$text .= "]".$data["text"]."[/attachment]";
-
-	$hashtags = "";
-	if (isset($data["keywords"]) && count($data["keywords"])) {
-		$hashtags = "\n";
-		foreach ($data["keywords"] as $keyword) {
-			/// @TODO make a positive list of allowed characters
-			$hashtag = str_replace([' ', '+', '/', '.', '#', '@', "'", '"', '’', '`', '(', ')', '„', '“'], '', $keyword);
-			$hashtags .= "#[url=" . DI::baseUrl() . "/search?tag=" . $hashtag . "]" . $hashtag . "[/url] ";
-		}
-	}
-
-	return "\n".$text.$hashtags;
-}
-
-function query_page_info($url, $photo = "", $keywords = false, $keyword_denylist = "")
-{
-	$data = ParseUrl::getSiteinfoCached($url, true);
-
-	if ($photo != "") {
-		$data["images"][0]["src"] = $photo;
-	}
-
-	Logger::log('fetch page info for ' . $url . ' ' . print_r($data, true), Logger::DEBUG);
-
-	if (!$keywords && isset($data["keywords"])) {
-		unset($data["keywords"]);
-	}
-
-	if (($keyword_denylist != "") && isset($data["keywords"])) {
-		$list = explode(", ", $keyword_denylist);
-
-		foreach ($list as $keyword) {
-			$keyword = trim($keyword);
-
-			$index = array_search($keyword, $data["keywords"]);
-			if ($index !== false) {
-				unset($data["keywords"][$index]);
-			}
-		}
-	}
-
-	return $data;
-}
-
-function get_page_keywords($url, $photo = "", $keywords = false, $keyword_denylist = "")
-{
-	$data = query_page_info($url, $photo, $keywords, $keyword_denylist);
-	if (empty($data["keywords"]) || !is_array($data["keywords"])) {
-		return [];
-	}
-
-	$taglist = [];
-	foreach ($data['keywords'] as $keyword) {
-		$hashtag = str_replace([" ", "+", "/", ".", "#", "'"],
-			["", "", "", "", "", ""], $keyword);
-
-		$taglist[] = $hashtag;
-	}
-
-	return $taglist;
-}
-
-function add_page_info($url, $no_photos = false, $photo = "", $keywords = false, $keyword_denylist = "")
-{
-	$data = query_page_info($url, $photo, $keywords, $keyword_denylist);
-
-	$text = '';
-
-	if (is_array($data)) {
-		$text = add_page_info_data($data, $no_photos);
-	}
-
-	return $text;
-}
-
-function add_page_info_to_body($body, $texturl = false, $no_photos = false)
-{
-	Logger::log('add_page_info_to_body: fetch page info for body ' . $body, Logger::DEBUG);
-
-	$URLSearchString = "^\[\]";
-
-	// Fix for Mastodon where the mentions are in a different format
-	$body = preg_replace("/\[url\=([$URLSearchString]*)\]([#!@])(.*?)\[\/url\]/ism",
-		'$2[url=$1]$3[/url]', $body);
-
-	// Adding these spaces is a quick hack due to my problems with regular expressions :)
-	preg_match("/[^!#@]\[url\]([$URLSearchString]*)\[\/url\]/ism", " " . $body, $matches);
-
-	if (!$matches) {
-		preg_match("/[^!#@]\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism", " " . $body, $matches);
-	}
-
-	// Convert urls without bbcode elements
-	if (!$matches && $texturl) {
-		preg_match("/([^\]\='".'"'."]|^)(https?\:\/\/[a-zA-Z0-9\:\/\-\?\&\;\.\=\_\~\#\%\$\!\+\,]+)/ism", " ".$body, $matches);
-
-		// Yeah, a hack. I really hate regular expressions :)
-		if ($matches) {
-			$matches[1] = $matches[2];
-		}
-	}
-
-	if ($matches) {
-		$footer = add_page_info($matches[1], $no_photos);
-	}
-
-	// Remove the link from the body if the link is attached at the end of the post
-	if (isset($footer) && (trim($footer) != "") && (strpos($footer, $matches[1]))) {
-		$removedlink = trim(str_replace($matches[1], "", $body));
-		if (($removedlink == "") || strstr($body, $removedlink)) {
-			$body = $removedlink;
-		}
-
-		$removedlink = preg_replace("/\[url\=" . preg_quote($matches[1], '/') . "\](.*?)\[\/url\]/ism", '', $body);
-		if (($removedlink == "") || strstr($body, $removedlink)) {
-			$body = $removedlink;
-		}
-	}
-
-	// Add the page information to the bottom
-	if (isset($footer) && (trim($footer) != "")) {
-		$body .= $footer;
-	}
-
-	return $body;
+	return "\n" . \Friendica\Content\PageInfo::getFooterFromData($data, $no_photos);
 }

 /**
- *
- * consume_feed - process atom feed and update anything/everything we might need to update
- *
- * $xml = the (atom) feed to consume - RSS isn't as fully supported but may work for simple feeds.
- *
- * $importer = the contact_record (joined to user_record) of the local user who owns this relationship.
- *             It is this person's stuff that is going to be updated.
- * $contact =  the person who is sending us stuff. If not set, we MAY be processing a "follow" activity
- *             from an external network and MAY create an appropriate contact record. Otherwise, we MUST
- *             have a contact record.
- * $hub = should we find a hub declation in the feed, pass it back to our calling process, who might (or
- *        might not) try and subscribe to it.
- * $datedir sorts in reverse order
- * $pass - by default ($pass = 0) we cannot guarantee that a parent item has been
- *      imported prior to its children being seen in the stream unless we are certain
- *      of how the feed is arranged/ordered.
- * With $pass = 1, we only pull parent items out of the stream.
- * With $pass = 2, we only pull children (comments/likes).
- *
- * So running this twice, first with pass 1 and then with pass 2 will do the right
- * thing regardless of feed ordering. This won't be adequate in a fully-threaded
- * model where comments can have sub-threads. That would require some massive sorting
- * to get all the feed items into a mostly linear ordering, and might still require
- * recursion.
- *
- * @param       $xml
- * @param array $importer
- * @param array $contact
- * @param       $hub
- * @throws ImagickException
- * @throws \Friendica\Network\HTTPException\InternalServerErrorException
+ * @deprecated since 2020.06
+ * @see \Friendica\Content\PageInfo::queryUrl
+ */
+function query_page_info($url, $photo = "", $keywords = false, $keyword_denylist = "")
+{
+	return \Friendica\Content\PageInfo::queryUrl($url, $photo, $keywords, $keyword_denylist);
+}
+
+/**
+ * @deprecated since 2020.06
+ * @see \Friendica\Content\PageInfo::getTagsFromUrl()
+ */
+function get_page_keywords($url, $photo = "", $keywords = false, $keyword_denylist = "")
+{
+	return $keywords ? \Friendica\Content\PageInfo::getTagsFromUrl($url, $photo, $keyword_denylist) : [];
+}
+
+/**
+ * @deprecated since 2020.06
+ * @see \Friendica\Content\PageInfo::getFooterFromUrl
+ */
+function add_page_info($url, $no_photos = false, $photo = "", $keywords = false, $keyword_denylist = "")
+{
+	return "\n" . \Friendica\Content\PageInfo::getFooterFromUrl($url, $no_photos, $photo, $keywords, $keyword_denylist);
+}
+
+/**
+ * @deprecated since 2020.06
+ * @see \Friendica\Content\PageInfo::appendToBody
+ */
+function add_page_info_to_body($body, $texturl = false, $no_photos = false)
+{
+	return \Friendica\Content\PageInfo::appendToBody($body, $texturl, $no_photos);
+}
+
+/**
+ * @deprecated since 2020.06
+ * @see \Friendica\Protocol\Feed::consume
 */
 function consume_feed($xml, array $importer, array $contact, &$hub)
 {
-	if ($contact['network'] === Protocol::OSTATUS) {
-		Logger::log("Consume OStatus messages ", Logger::DEBUG);
-		OStatus::import($xml, $importer, $contact, $hub);
-
-		return;
-	}
-
-	if ($contact['network'] === Protocol::FEED) {
-		Logger::log("Consume feeds", Logger::DEBUG);
-		Feed::import($xml, $importer, $contact);
-
-		return;
-	}
-
-	if ($contact['network'] === Protocol::DFRN) {
-		Logger::log("Consume DFRN messages", Logger::DEBUG);
-		$dfrn_importer = DFRN::getImporter($contact["id"], $importer["uid"]);
-		if (!empty($dfrn_importer)) {
-			Logger::log("Now import the DFRN feed");
-			DFRN::import($xml, $dfrn_importer, true);
-			return;
-		}
-	}
-}
-
-function subscribe_to_hub($url, array $importer, array $contact, $hubmode = 'subscribe')
-{
-	/*
-	 * Diaspora has different message-ids in feeds than they do
-	 * through the direct Diaspora protocol. If we try and use
-	 * the feed, we'll get duplicates. So don't.
-	 */
-	if ($contact['network'] === Protocol::DIASPORA) {
-		return;
-	}
-
-	// Without an importer we don't have a user id - so we quit
-	if (empty($importer)) {
-		return;
-	}
-
-	$user = DBA::selectFirst('user', ['nickname'], ['uid' => $importer['uid']]);
-
-	// No user, no nickname, we quit
-	if (!DBA::isResult($user)) {
-		return;
-	}
-
-	$push_url = DI::baseUrl() . '/pubsub/' . $user['nickname'] . '/' . $contact['id'];
-
-	// Use a single verify token, even if multiple hubs
-	$verify_token = ((strlen($contact['hub-verify'])) ? $contact['hub-verify'] : Strings::getRandomHex());
-
-	$params= 'hub.mode=' . $hubmode . '&hub.callback=' . urlencode($push_url) . '&hub.topic=' . urlencode($contact['poll']) . '&hub.verify=async&hub.verify_token=' . $verify_token;
-
-	Logger::log('subscribe_to_hub: ' . $hubmode . ' ' . $contact['name'] . ' to hub ' . $url . ' endpoint: '  . $push_url . ' with verifier ' . $verify_token);
-
-	if (!strlen($contact['hub-verify']) || ($contact['hub-verify'] != $verify_token)) {
-		DBA::update('contact', ['hub-verify' => $verify_token], ['id' => $contact['id']]);
-	}
-
-	$postResult = Network::post($url, $params);
-
-	Logger::log('subscribe_to_hub: returns: ' . $postResult->getReturnCode(), Logger::DEBUG);
-
-	return;
-
-}
-
-function drop_items(array $items)
-{
-	$uid = 0;
-
-	if (!Session::isAuthenticated()) {
-		return;
-	}
-
-	if (!empty($items)) {
-		foreach ($items as $item) {
-			$owner = Item::deleteForUser(['id' => $item], local_user());
-
-			if ($owner && !$uid) {
-				$uid = $owner;
-			}
-		}
-	}
-}
-
-function drop_item($id, $return = '')
-{
-	$a = DI::app();
-
-	// locate item to be deleted
-
-	$fields = ['id', 'uid', 'guid', 'contact-id', 'deleted', 'gravity', 'parent'];
-	$item = Item::selectFirstForUser(local_user(), $fields, ['id' => $id]);
-
-	if (!DBA::isResult($item)) {
-		notice(DI::l10n()->t('Item not found.') . EOL);
-		DI::baseUrl()->redirect('network');
-	}
-
-	if ($item['deleted']) {
-		return 0;
-	}
-
-	$contact_id = 0;
-
-	// check if logged in user is either the author or owner of this item
-	if (Session::getRemoteContactID($item['uid']) == $item['contact-id']) {
-		$contact_id = $item['contact-id'];
-	}
-
-	if ((local_user() == $item['uid']) || $contact_id) {
-		// Check if we should do HTML-based delete confirmation
-		if (!empty($_REQUEST['confirm'])) {
-			// <form> can't take arguments in its "action" parameter
-			// so add any arguments as hidden inputs
-			$query = explode_querystring(DI::args()->getQueryString());
-			$inputs = [];
-
-			foreach ($query['args'] as $arg) {
-				if (strpos($arg, 'confirm=') === false) {
-					$arg_parts = explode('=', $arg);
-					$inputs[] = ['name' => $arg_parts[0], 'value' => $arg_parts[1]];
-				}
-			}
-
-			return Renderer::replaceMacros(Renderer::getMarkupTemplate('confirm.tpl'), [
-				'$method' => 'get',
-				'$message' => DI::l10n()->t('Do you really want to delete this item?'),
-				'$extra_inputs' => $inputs,
-				'$confirm' => DI::l10n()->t('Yes'),
-				'$confirm_url' => $query['base'],
-				'$confirm_name' => 'confirmed',
-				'$cancel' => DI::l10n()->t('Cancel'),
-			]);
-		}
-		// Now check how the user responded to the confirmation query
-		if (!empty($_REQUEST['canceled'])) {
-			DI::baseUrl()->redirect('display/' . $item['guid']);
-		}
-
-		$is_comment = ($item['gravity'] == GRAVITY_COMMENT) ? true : false;
-		$parentitem = null;
-		if (!empty($item['parent'])){
-			$fields = ['guid'];
-			$parentitem = Item::selectFirstForUser(local_user(), $fields, ['id' => $item['parent']]);
-		}
-
-		// delete the item
-		Item::deleteForUser(['id' => $item['id']], local_user());
-
-		$return_url = hex2bin($return);
-
-		// removes update_* from return_url to ignore Ajax refresh
-		$return_url = str_replace("update_", "", $return_url);
-
-		// Check if delete a comment
-		if ($is_comment) {
-			// Return to parent guid
-			if (!empty($parentitem)) {
-				DI::baseUrl()->redirect('display/' . $parentitem['guid']);
-				//NOTREACHED
-			}
-			// In case something goes wrong
-			else {
-				DI::baseUrl()->redirect('network');
-				//NOTREACHED
-			}
-		}
-		else {
-			// if unknown location or deleting top level post called from display
-			if (empty($return_url) || strpos($return_url, 'display') !== false) {
-				DI::baseUrl()->redirect('network');
-				//NOTREACHED
-			} else {
-				DI::baseUrl()->redirect($return_url);
-				//NOTREACHED
-			}
-		}
-	} else {
-		notice(DI::l10n()->t('Permission denied.') . EOL);
-		DI::baseUrl()->redirect('display/' . $item['guid']);
-		//NOTREACHED
-	}
+	\Friendica\Protocol\Feed::consume($xml, $importer, $contact, $hub);
 }
--- a/mod/item.php
+++ b/mod/item.php
@ -34,6 +34,7 @@ use Friendica\Content\Text\BBCode;
 use Friendica\Core\Hook;
 use Friendica\Core\Logger;
 use Friendica\Core\Protocol;
+use Friendica\Core\Renderer;
 use Friendica\Core\Session;
 use Friendica\Core\System;
 use Friendica\Core\Worker;
@ -67,7 +68,10 @@ function item_post(App $a) {

 	if (!empty($_REQUEST['dropitems'])) {
 		$arr_drop = explode(',', $_REQUEST['dropitems']);
-		drop_items($arr_drop);
+		foreach ($arr_drop as $item) {
+			Item::deleteForUser(['id' => $item], $uid);
+		}
+
 		$json = ['success' => 1];
 		System::jsonExit($json);
 	}
@ -850,7 +854,9 @@ function item_content(App $a)

 	if (($a->argc >= 3) && ($a->argv[1] === 'drop') && intval($a->argv[2])) {
 		if (DI::mode()->isAjax()) {
-			$o = Item::deleteForUser(['id' => $a->argv[2]], local_user());
+			Item::deleteForUser(['id' => $a->argv[2]], local_user());
+			// ajax return: [<item id>, 0 (no perm) | <owner id>]
+			System::jsonExit([intval($a->argv[2]), local_user()]);
 		} else {
 			if (!empty($a->argv[3])) {
 				$o = drop_item($a->argv[2], $a->argv[3]);
@ -859,12 +865,110 @@ function item_content(App $a)
 				$o = drop_item($a->argv[2]);
 			}
 		}
-
-		if (DI::mode()->isAjax()) {
-			// ajax return: [<item id>, 0 (no perm) | <owner id>]
-			System::jsonExit([intval($a->argv[2]), intval($o)]);
-		}
 	}

 	return $o;
 }
+
+/**
+ * @param int    $id
+ * @param string $return
+ * @return string
+ * @throws HTTPException\InternalServerErrorException
+ */
+function drop_item(int $id, string $return = '')
+{
+	// locate item to be deleted
+	$fields = ['id', 'uid', 'guid', 'contact-id', 'deleted', 'gravity', 'parent'];
+	$item = Item::selectFirstForUser(local_user(), $fields, ['id' => $id]);
+
+	if (!DBA::isResult($item)) {
+		notice(DI::l10n()->t('Item not found.') . EOL);
+		DI::baseUrl()->redirect('network');
+	}
+
+	if ($item['deleted']) {
+		return '';
+	}
+
+	$contact_id = 0;
+
+	// check if logged in user is either the author or owner of this item
+	if (Session::getRemoteContactID($item['uid']) == $item['contact-id']) {
+		$contact_id = $item['contact-id'];
+	}
+
+	if ((local_user() == $item['uid']) || $contact_id) {
+		// Check if we should do HTML-based delete confirmation
+		if (!empty($_REQUEST['confirm'])) {
+			// <form> can't take arguments in its "action" parameter
+			// so add any arguments as hidden inputs
+			$query = explode_querystring(DI::args()->getQueryString());
+			$inputs = [];
+
+			foreach ($query['args'] as $arg) {
+				if (strpos($arg, 'confirm=') === false) {
+					$arg_parts = explode('=', $arg);
+					$inputs[] = ['name' => $arg_parts[0], 'value' => $arg_parts[1]];
+				}
+			}
+
+			return Renderer::replaceMacros(Renderer::getMarkupTemplate('confirm.tpl'), [
+				'$method' => 'get',
+				'$message' => DI::l10n()->t('Do you really want to delete this item?'),
+				'$extra_inputs' => $inputs,
+				'$confirm' => DI::l10n()->t('Yes'),
+				'$confirm_url' => $query['base'],
+				'$confirm_name' => 'confirmed',
+				'$cancel' => DI::l10n()->t('Cancel'),
+			]);
+		}
+		// Now check how the user responded to the confirmation query
+		if (!empty($_REQUEST['canceled'])) {
+			DI::baseUrl()->redirect('display/' . $item['guid']);
+		}
+
+		$is_comment = $item['gravity'] == GRAVITY_COMMENT;
+		$parentitem = null;
+		if (!empty($item['parent'])) {
+			$fields = ['guid'];
+			$parentitem = Item::selectFirstForUser(local_user(), $fields, ['id' => $item['parent']]);
+		}
+
+		// delete the item
+		Item::deleteForUser(['id' => $item['id']], local_user());
+
+		$return_url = hex2bin($return);
+
+		// removes update_* from return_url to ignore Ajax refresh
+		$return_url = str_replace("update_", "", $return_url);
+
+		// Check if delete a comment
+		if ($is_comment) {
+			// Return to parent guid
+			if (!empty($parentitem)) {
+				DI::baseUrl()->redirect('display/' . $parentitem['guid']);
+				//NOTREACHED
+			} // In case something goes wrong
+			else {
+				DI::baseUrl()->redirect('network');
+				//NOTREACHED
+			}
+		} else {
+			// if unknown location or deleting top level post called from display
+			if (empty($return_url) || strpos($return_url, 'display') !== false) {
+				DI::baseUrl()->redirect('network');
+				//NOTREACHED
+			} else {
+				DI::baseUrl()->redirect($return_url);
+				//NOTREACHED
+			}
+		}
+	} else {
+		notice(DI::l10n()->t('Permission denied.'));
+		DI::baseUrl()->redirect('display/' . $item['guid']);
+		//NOTREACHED
+	}
+
+	return '';
+}
--- a/src/Content/PageInfo.php
+++ b/src/Content/PageInfo.php
@ -0,0 +1,269 @@
+<?php
+/**
+ * @copyright Copyright (C) 2020, Friendica
+ *
+ * @license GNU AGPL version 3 or any later version
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+namespace Friendica\Content;
+
+use Friendica\Core\Hook;
+use Friendica\Core\Logger;
+use Friendica\DI;
+use Friendica\Network\HTTPException;
+use Friendica\Util\ParseUrl;
+use Friendica\Util\Strings;
+
+/**
+ * Extracts trailing URLs from post bodies to transform them in enriched attachment tags through Site Info query
+ */
+class PageInfo
+{
+	/**
+	 * @param string $body
+	 * @param bool   $searchNakedUrls
+	 * @param bool   $no_photos
+	 * @return string
+	 * @throws HTTPException\InternalServerErrorException
+	 */
+	public static function appendToBody(string $body, bool $searchNakedUrls = false, bool $no_photos = false)
+	{
+		Logger::info('add_page_info_to_body: fetch page info for body', ['body' => $body]);
+
+		$url = self::getRelevantUrlFromBody($body, $searchNakedUrls);
+		if (!$url) {
+			return $body;
+		}
+
+		$footer = self::getFooterFromUrl($url, $no_photos);
+		if (!$footer) {
+			return $body;
+		}
+
+		$body = self::stripTrailingUrlFromBody($body, $url);
+
+		$body .= "\n" . $footer;
+
+		return $body;
+	}
+
+	/**
+	 * @param string $url
+	 * @param bool $no_photos
+	 * @param string $photo
+	 * @param bool $keywords
+	 * @param string $keyword_denylist
+	 * @return string
+	 * @throws HTTPException\InternalServerErrorException
+	 */
+	public static function getFooterFromUrl(string $url, bool $no_photos = false, string $photo = '', bool $keywords = false, string $keyword_denylist = '')
+	{
+		$data = self::queryUrl($url, $photo, $keywords, $keyword_denylist);
+
+		return self::getFooterFromData($data, $no_photos);
+	}
+
+	/**
+	 * @param array $data
+	 * @param bool  $no_photos
+	 * @return string
+	 * @throws HTTPException\InternalServerErrorException
+	 */
+	public static function getFooterFromData(array $data, bool $no_photos = false)
+	{
+		Hook::callAll('page_info_data', $data);
+
+		if (empty($data['type'])) {
+			return '';
+		}
+
+		// It maybe is a rich content, but if it does have everything that a link has,
+		// then treat it that way
+		if (($data['type'] == 'rich') && is_string($data['title']) &&
+			is_string($data['text']) && !empty($data['images'])) {
+			$data['type'] = 'link';
+		}
+
+		$data['title'] = $data['title'] ?? '';
+
+		if ((($data['type'] != 'link') && ($data['type'] != 'video') && ($data['type'] != 'photo')) || ($data['title'] == $data['url'])) {
+			return '';
+		}
+
+		if ($no_photos && ($data['type'] == 'photo')) {
+			return '';
+		}
+
+		// Escape some bad characters
+		$data['url'] = str_replace(['[', ']'], ['&#91;', '&#93;'], htmlentities($data['url'], ENT_QUOTES, 'UTF-8', false));
+		$data['title'] = str_replace(['[', ']'], ['&#91;', '&#93;'], htmlentities($data['title'], ENT_QUOTES, 'UTF-8', false));
+
+		$text = "[attachment type='" . $data['type'] . "'";
+
+		if (empty($data['text'])) {
+			$data['text'] = $data['title'];
+		}
+
+		if (empty($data['text'])) {
+			$data['text'] = $data['url'];
+		}
+
+		if (!empty($data['url'])) {
+			$text .= " url='" . $data['url'] . "'";
+		}
+
+		if (!empty($data['title'])) {
+			$text .= " title='" . $data['title'] . "'";
+		}
+
+		// Only embedd a picture link when it seems to be a valid picture ("width" is set)
+		if (!empty($data['images']) && !empty($data['images'][0]['width'])) {
+			$preview = str_replace(['[', ']'], ['&#91;', '&#93;'], htmlentities($data['images'][0]['src'], ENT_QUOTES, 'UTF-8', false));
+			// if the preview picture is larger than 500 pixels then show it in a larger mode
+			// But only, if the picture isn't higher than large (To prevent huge posts)
+			if (!DI::config()->get('system', 'always_show_preview') && ($data['images'][0]['width'] >= 500)
+				&& ($data['images'][0]['width'] >= $data['images'][0]['height'])) {
+				$text .= " image='" . $preview . "'";
+			} else {
+				$text .= " preview='" . $preview . "'";
+			}
+		}
+
+		$text .= ']' . $data['text'] . '[/attachment]';
+
+		$hashtags = '';
+		if (!empty($data['keywords'])) {
+			$hashtags = "\n";
+			foreach ($data['keywords'] as $keyword) {
+				/// @TODO make a positive list of allowed characters
+				$hashtag = str_replace([' ', '+', '/', '.', '#', '@', "'", '"', '’', '`', '(', ')', '„', '“'], '', $keyword);
+				$hashtags .= '#[url=' . DI::baseUrl() . '/search?tag=' . $hashtag . ']' . $hashtag . '[/url] ';
+			}
+		}
+
+		return $text . $hashtags;
+	}
+
+	/**
+	 * @param string  $url
+	 * @param string $photo
+	 * @param bool $keywords
+	 * @param string $keyword_denylist
+	 * @return array|bool
+	 * @throws HTTPException\InternalServerErrorException
+	 */
+	public static function queryUrl(string $url, string $photo = '', bool $keywords = false, string $keyword_denylist = '')
+	{
+		$data = ParseUrl::getSiteinfoCached($url, true);
+
+		if ($photo != '') {
+			$data['images'][0]['src'] = $photo;
+		}
+
+		if (!$keywords) {
+			unset($data['keywords']);
+		} elseif ($keyword_denylist) {
+			$list = explode(', ', $keyword_denylist);
+
+			foreach ($list as $keyword) {
+				$keyword = trim($keyword);
+
+				$index = array_search($keyword, $data['keywords']);
+				if ($index !== false) {
+					unset($data['keywords'][$index]);
+				}
+			}
+		}
+
+		Logger::info('fetch page info for URL', ['url' => $url, 'data' => $data]);
+
+		return $data;
+	}
+
+	/**
+	 * @param string $url
+	 * @param string $photo
+	 * @param string $keyword_denylist
+	 * @return array
+	 * @throws HTTPException\InternalServerErrorException
+	 */
+	public static function getTagsFromUrl(string $url, string $photo = '', string $keyword_denylist = '')
+	{
+		$data = self::queryUrl($url, $photo, true, $keyword_denylist);
+
+		$taglist = [];
+		foreach ($data['keywords'] as $keyword) {
+			$hashtag = str_replace([' ', '+', '/', '.', '#', "'"],
+				['', '', '', '', '', ''], $keyword);
+
+			$taglist[] = $hashtag;
+		}
+
+		return $taglist;
+	}
+
+	/**
+	 * Picks a non-hashtag, non-mention, schemeful URL at the end of the provided body string to be converted into Page Info.
+	 *
+	 * @param string $body
+	 * @param bool   $searchNakedUrls Whether we should pick a naked URL (outside of BBCode tags) as a last resort
+	 * @return string|null
+	 */
+	protected static function getRelevantUrlFromBody(string $body, bool $searchNakedUrls = false)
+	{
+		$URLSearchString = 'https?://[^\[\]]*';
+
+		// Fix for Mastodon where the mentions are in a different format
+		$body = preg_replace("~\[url=($URLSearchString)]([#!@])(.*?)\[/url]~is", '$2[url=$1]$3[/url]', $body);
+
+		preg_match("~(?<![!#@])\[url]($URLSearchString)\[/url]$~is", $body, $matches);
+
+		if (!$matches) {
+			preg_match("~(?<![!#@])\[url=($URLSearchString)].*\[/url]$~is", $body, $matches);
+		}
+
+		if (!$matches && $searchNakedUrls) {
+			preg_match('~(?<=\W|^)(?<![=\]])(https?://.+)$~is', $body, $matches);
+			if ($matches && !Strings::endsWith($body, $matches[1])) {
+				unset($matches);
+			}
+		}
+
+		return $matches[1] ?? null;
+	}
+
+	/**
+	 * Remove the provided URL from the body if it is at the end of it.
+	 * Keep the link label if it isn't the full URL.
+	 *
+	 * @param string $body
+	 * @param string $url
+	 * @return string|string[]|null
+	 */
+	protected static function stripTrailingUrlFromBody(string $body, string $url)
+	{
+		$quotedUrl = preg_quote($url, '#');
+		$body = preg_replace("#(?:
+			\[url]$quotedUrl\[/url]|
+			\[url=$quotedUrl]$quotedUrl\[/url]|
+			\[url=$quotedUrl]([^[]*?)\[/url]|
+			$quotedUrl
+		)$#isx", '$1', $body);
+
+		return $body;
+	}
+}
--- a/src/Module/Debug/Babel.php
+++ b/src/Module/Debug/Babel.php
@ -22,6 +22,7 @@
 namespace Friendica\Module\Debug;

 use Friendica\BaseModule;
+use Friendica\Content\PageInfo;
 use Friendica\Content\Text;
 use Friendica\Core\Renderer;
 use Friendica\DI;
@ -113,6 +114,21 @@ class Babel extends BaseModule
 						'title'   => DI::l10n()->t('Item Tags'),
 						'content' => visible_whitespace(var_export($tags, true)),
 					];
+
+					$body2 = PageInfo::appendToBody($bbcode, true);
+					$results[] = [
+						'title'   => DI::l10n()->t('PageInfo::appendToBody'),
+						'content' => visible_whitespace($body2)
+					];
+					$html3 = Text\BBCode::convert($body2);
+					$results[] = [
+						'title'   => DI::l10n()->t('PageInfo::appendToBody => BBCode::convert (raw HTML)'),
+						'content' => visible_whitespace($html3)
+					];
+					$results[] = [
+						'title'   => DI::l10n()->t('PageInfo::appendToBody => BBCode::convert'),
+						'content' => $html3
+					];
 					break;
 				case 'diaspora':
 					$diaspora = trim($_REQUEST['text']);
--- a/src/Protocol/Feed.php
+++ b/src/Protocol/Feed.php
@ -37,7 +37,67 @@ use Friendica\Util\XML;
 /**
 * This class contain functions to import feeds (RSS/RDF/Atom)
 */
-class Feed {
+class Feed
+{
+	/**
+	 * consume - process atom feed and update anything/everything we might need to update
+	 *
+	 * $xml = the (atom) feed to consume - RSS isn't as fully supported but may work for simple feeds.
+	 *
+	 * $importer = the contact_record (joined to user_record) of the local user who owns this relationship.
+	 *             It is this person's stuff that is going to be updated.
+	 * $contact =  the person who is sending us stuff. If not set, we MAY be processing a "follow" activity
+	 *             from an external network and MAY create an appropriate contact record. Otherwise, we MUST
+	 *             have a contact record.
+	 * $hub = should we find a hub declation in the feed, pass it back to our calling process, who might (or
+	 *        might not) try and subscribe to it.
+	 * $datedir sorts in reverse order
+	 * $pass - by default ($pass = 0) we cannot guarantee that a parent item has been
+	 *      imported prior to its children being seen in the stream unless we are certain
+	 *      of how the feed is arranged/ordered.
+	 * With $pass = 1, we only pull parent items out of the stream.
+	 * With $pass = 2, we only pull children (comments/likes).
+	 *
+	 * So running this twice, first with pass 1 and then with pass 2 will do the right
+	 * thing regardless of feed ordering. This won't be adequate in a fully-threaded
+	 * model where comments can have sub-threads. That would require some massive sorting
+	 * to get all the feed items into a mostly linear ordering, and might still require
+	 * recursion.
+	 *
+	 * @param       $xml
+	 * @param array $importer
+	 * @param array $contact
+	 * @param       $hub
+	 * @throws ImagickException
+	 * @throws \Friendica\Network\HTTPException\InternalServerErrorException
+	 */
+	public static function consume($xml, array $importer, array $contact, &$hub)
+	{
+		if ($contact['network'] === Protocol::OSTATUS) {
+			Logger::info('Consume OStatus messages');
+			OStatus::import($xml, $importer, $contact, $hub);
+
+			return;
+		}
+
+		if ($contact['network'] === Protocol::FEED) {
+			Logger::info('Consume feeds');
+			self::import($xml, $importer, $contact);
+
+			return;
+		}
+
+		if ($contact['network'] === Protocol::DFRN) {
+			Logger::info('Consume DFRN messages');
+			$dfrn_importer = DFRN::getImporter($contact['id'], $importer['uid']);
+			if (!empty($dfrn_importer)) {
+				Logger::info('Now import the DFRN feed');
+				DFRN::import($xml, $dfrn_importer, true);
+				return;
+			}
+		}
+	}
+
 	/**
 	 * Read a RSS/RDF/Atom feed and create an item entry for it
 	 *
--- a/src/Util/Strings.php
+++ b/src/Util/Strings.php
@ -379,6 +379,7 @@ class Strings
 	/**
 	 * Check if the first string starts with the second
 	 *
+	 * @see http://maettig.com/code/php/php-performance-benchmarks.php#startswith
 	 * @param string $string
 	 * @param string $start
 	 * @return bool
@ -390,6 +391,21 @@ class Strings
 		return $return;
 	}

+	/**
+	 * Checks if the first string ends with the second
+	 *
+	 * @see http://maettig.com/code/php/php-performance-benchmarks.php#endswith
+	 * @param string $string
+	 * @param string $end
+	 * @return bool
+	 */
+	public static function endsWith(string $string, string $end)
+	{
+		$return = substr_compare($string, $end, -strlen($end)) === 0;
+
+		return $return;
+	}
+
 	/**
 	 * Returns the regular expression string to match URLs in a given text
 	 *
--- a/src/Worker/OnePoll.php
+++ b/src/Worker/OnePoll.php
@ -207,7 +207,7 @@ class OnePoll
 							continue;
 						}

-						subscribe_to_hub($h, $importer, $contact, $hubmode);
+						self::subscribeToHub($h, $importer, $contact, $hubmode);
 					}
 				}
 			}
@ -704,4 +704,56 @@ class OnePoll
 		Logger::log("Mail: closing connection for ".$mailconf['user']);
 		imap_close($mbox);
 	}
+
+
+	/**
+	 * @param string $url
+	 * @param array  $importer
+	 * @param array  $contact
+	 * @param string $hubmode
+	 * @throws \Friendica\Network\HTTPException\InternalServerErrorException
+	 */
+	private static function subscribeToHub(string $url, array $importer, array $contact, $hubmode = 'subscribe')
+	{
+		/*
+		 * Diaspora has different message-ids in feeds than they do
+		 * through the direct Diaspora protocol. If we try and use
+		 * the feed, we'll get duplicates. So don't.
+		 */
+		if ($contact['network'] === Protocol::DIASPORA) {
+			return;
+		}
+
+		// Without an importer we don't have a user id - so we quit
+		if (empty($importer)) {
+			return;
+		}
+
+		$user = DBA::selectFirst('user', ['nickname'], ['uid' => $importer['uid']]);
+
+		// No user, no nickname, we quit
+		if (!DBA::isResult($user)) {
+			return;
+		}
+
+		$push_url = DI::baseUrl() . '/pubsub/' . $user['nickname'] . '/' . $contact['id'];
+
+		// Use a single verify token, even if multiple hubs
+		$verify_token = ((strlen($contact['hub-verify'])) ? $contact['hub-verify'] : Strings::getRandomHex());
+
+		$params = 'hub.mode=' . $hubmode . '&hub.callback=' . urlencode($push_url) . '&hub.topic=' . urlencode($contact['poll']) . '&hub.verify=async&hub.verify_token=' . $verify_token;
+
+		Logger::log('subscribe_to_hub: ' . $hubmode . ' ' . $contact['name'] . ' to hub ' . $url . ' endpoint: ' . $push_url . ' with verifier ' . $verify_token);
+
+		if (!strlen($contact['hub-verify']) || ($contact['hub-verify'] != $verify_token)) {
+			DBA::update('contact', ['hub-verify' => $verify_token], ['id' => $contact['id']]);
+		}
+
+		$postResult = Network::post($url, $params);
+
+		Logger::log('subscribe_to_hub: returns: ' . $postResult->getReturnCode(), Logger::DEBUG);
+
+		return;
+
+	}
 }
--- a/tests/src/Content/PageInfoMock.php
+++ b/tests/src/Content/PageInfoMock.php
@ -0,0 +1,38 @@
+<?php
+/**
+ * @copyright Copyright (C) 2020, Friendica
+ *
+ * @license GNU AGPL version 3 or any later version
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+namespace Friendica\Test\src\Content;
+
+/**
+ * Class PageInfoMock
+ *
+ * Exposes protected methods for test in the inherited class
+ *
+ * @method static string|null getRelevantUrlFromBody(string $body, $searchNakedUrls = false)
+ * @method static string stripTrailingUrlFromBody(string $body, string $url)
+ */
+class PageInfoMock extends \Friendica\Content\PageInfo
+{
+	public static function __callStatic($name, $arguments)
+	{
+		return self::$name(...$arguments);
+	}
+}
--- a/tests/src/Content/PageInfoTest.php
+++ b/tests/src/Content/PageInfoTest.php
@ -0,0 +1,125 @@
+<?php
+/**
+ * @copyright Copyright (C) 2020, Friendica
+ *
+ * @license GNU AGPL version 3 or any later version
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ */
+
+namespace Friendica\Test\src\Content;
+
+use Friendica\Test\MockedTest;
+
+class PageInfoTest extends MockedTest
+{
+	public function dataGetRelevantUrlFromBody()
+	{
+		return [
+			'end-of-content' => [
+				'expected' => 'http://example.com/end-of-content',
+				'body' => 'Content[url]http://example.com/end-of-content[/url]',
+			],
+			'tag-no-attr' => [
+				'expected' => 'http://example.com/tag-no-attr',
+				'body' => '[url]http://example.com/tag-no-attr[/url]',
+			],
+			'tag-attr' => [
+				'expected' => 'http://example.com/tag-attr',
+				'body' => '[url=http://example.com/tag-attr]Example.com[/url]',
+			],
+			'mention' => [
+				'expected' => null,
+				'body' => '@[url=http://example.com/mention]Mention[/url]',
+			],
+			'mention-exclusive' => [
+				'expected' => null,
+				'body' => '@[url=http://example.com/mention-exclusive]Mention Exclusive[/url]',
+			],
+			'hashtag' => [
+				'expected' => null,
+				'body' => '#[url=http://example.com/hashtag]hashtag[/url]',
+			],
+			'naked-url-unexpected' => [
+				'expected' => null,
+				'body' => 'http://example.com/naked-url-unexpected',
+			],
+			'naked-url-expected' => [
+				'expected' => 'http://example.com/naked-url-expected',
+				'body' => 'http://example.com/naked-url-expected',
+				'searchNakedUrls' => true,
+			],
+			'naked-url-end-of-content-unexpected' => [
+				'expected' => null,
+				'body' => 'Contenthttp://example.com/naked-url-end-of-content-unexpected',
+				'searchNakedUrls' => true,
+			],
+			'naked-url-end-of-content-expected' => [
+				'expected' => 'http://example.com/naked-url-end-of-content-expected',
+				'body' => 'Content http://example.com/naked-url-end-of-content-expected',
+				'searchNakedUrls' => true,
+			],
+			'bug-8781-schemeless-link' => [
+				'expected' => null,
+				'body' => '[url]/posts/2576978090fd0138ee4c005056264835[/url]',
+			],
+		];
+	}
+
+	/**
+	 * @dataProvider dataGetRelevantUrlFromBody
+	 *
+	 * @param string|null $expected
+	 * @param string      $body
+	 * @param bool        $searchNakedUrls
+	 */
+	public function testGetRelevantUrlFromBody($expected, string $body, bool $searchNakedUrls = false)
+	{
+		$this->assertSame($expected, PageInfoMock::getRelevantUrlFromBody($body, $searchNakedUrls));
+	}
+
+	public function dataStripTrailingUrlFromBody()
+	{
+		return [
+			'naked-url-append' => [
+				'expected' => 'content',
+				'body' => 'contenthttps://example.com',
+				'url' => 'https://example.com',
+			],
+			'naked-url-not-at-the-end' => [
+				'expected' => 'https://example.comcontent',
+				'body' => 'https://example.comcontent',
+				'url' => 'https://example.com',
+			],
+			'bug-8781-labeled-link' => [
+				'expected' => 'link label',
+				'body' => '[url=https://example.com]link label[/url]',
+				'url' => 'https://example.com',
+			],
+		];
+	}
+
+	/**
+	 * @dataProvider dataStripTrailingUrlFromBody
+	 *
+	 * @param string $expected
+	 * @param string $body
+	 * @param string $url
+	 */
+	public function testStripTrailingUrlFromBody(string $expected, string $body, string $url)
+	{
+		$this->assertSame($expected, PageInfoMock::stripTrailingUrlFromBody($body, $url));
+	}
+}