From 51925f2994c604d41e64ffce95ca7623591cbca0 Mon Sep 17 00:00:00 2001 From: Hypolite Petovan Date: Wed, 17 Jun 2020 02:02:13 -0400 Subject: [PATCH 1/7] Add new endsWith method to Util\Strings --- src/Util/Strings.php | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/Util/Strings.php b/src/Util/Strings.php index 35e7ebe15..1d440c19b 100644 --- a/src/Util/Strings.php +++ b/src/Util/Strings.php @@ -379,6 +379,7 @@ class Strings /** * Check if the first string starts with the second * + * @see http://maettig.com/code/php/php-performance-benchmarks.php#startswith * @param string $string * @param string $start * @return bool @@ -390,6 +391,21 @@ class Strings return $return; } + /** + * Checks if the first string ends with the second + * + * @see http://maettig.com/code/php/php-performance-benchmarks.php#endswith + * @param string $string + * @param string $end + * @return bool + */ + public static function endsWith(string $string, string $end) + { + $return = substr_compare($string, $end, -strlen($end)) === 0; + + return $return; + } + /** * Returns the regular expression string to match URLs in a given text * From 3d55ef15467074fbf98fdf2217dc242c8a091ffc Mon Sep 17 00:00:00 2001 From: Hypolite Petovan Date: Wed, 17 Jun 2020 04:50:28 -0400 Subject: [PATCH 2/7] Move drop_items and drop_item out of include/items - They were only used in mod/item --- include/items.php | 119 --------------------------------------------- mod/item.php | 120 +++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 113 insertions(+), 126 deletions(-) diff --git a/include/items.php b/include/items.php index a81a3bdb2..b1d164624 100644 --- a/include/items.php +++ b/include/items.php @@ -327,122 +327,3 @@ function subscribe_to_hub($url, array $importer, array $contact, $hubmode = 'sub return; } - -function drop_items(array $items) -{ - $uid = 0; - - if (!Session::isAuthenticated()) { - return; - } - - if (!empty($items)) { - foreach ($items as $item) { - $owner = Item::deleteForUser(['id' => $item], local_user()); - - if ($owner && !$uid) { - $uid = $owner; - } - } - } -} - -function drop_item($id, $return = '') -{ - $a = DI::app(); - - // locate item to be deleted - - $fields = ['id', 'uid', 'guid', 'contact-id', 'deleted', 'gravity', 'parent']; - $item = Item::selectFirstForUser(local_user(), $fields, ['id' => $id]); - - if (!DBA::isResult($item)) { - notice(DI::l10n()->t('Item not found.') . EOL); - DI::baseUrl()->redirect('network'); - } - - if ($item['deleted']) { - return 0; - } - - $contact_id = 0; - - // check if logged in user is either the author or owner of this item - if (Session::getRemoteContactID($item['uid']) == $item['contact-id']) { - $contact_id = $item['contact-id']; - } - - if ((local_user() == $item['uid']) || $contact_id) { - // Check if we should do HTML-based delete confirmation - if (!empty($_REQUEST['confirm'])) { - //
can't take arguments in its "action" parameter - // so add any arguments as hidden inputs - $query = explode_querystring(DI::args()->getQueryString()); - $inputs = []; - - foreach ($query['args'] as $arg) { - if (strpos($arg, 'confirm=') === false) { - $arg_parts = explode('=', $arg); - $inputs[] = ['name' => $arg_parts[0], 'value' => $arg_parts[1]]; - } - } - - return Renderer::replaceMacros(Renderer::getMarkupTemplate('confirm.tpl'), [ - '$method' => 'get', - '$message' => DI::l10n()->t('Do you really want to delete this item?'), - '$extra_inputs' => $inputs, - '$confirm' => DI::l10n()->t('Yes'), - '$confirm_url' => $query['base'], - '$confirm_name' => 'confirmed', - '$cancel' => DI::l10n()->t('Cancel'), - ]); - } - // Now check how the user responded to the confirmation query - if (!empty($_REQUEST['canceled'])) { - DI::baseUrl()->redirect('display/' . $item['guid']); - } - - $is_comment = ($item['gravity'] == GRAVITY_COMMENT) ? true : false; - $parentitem = null; - if (!empty($item['parent'])){ - $fields = ['guid']; - $parentitem = Item::selectFirstForUser(local_user(), $fields, ['id' => $item['parent']]); - } - - // delete the item - Item::deleteForUser(['id' => $item['id']], local_user()); - - $return_url = hex2bin($return); - - // removes update_* from return_url to ignore Ajax refresh - $return_url = str_replace("update_", "", $return_url); - - // Check if delete a comment - if ($is_comment) { - // Return to parent guid - if (!empty($parentitem)) { - DI::baseUrl()->redirect('display/' . $parentitem['guid']); - //NOTREACHED - } - // In case something goes wrong - else { - DI::baseUrl()->redirect('network'); - //NOTREACHED - } - } - else { - // if unknown location or deleting top level post called from display - if (empty($return_url) || strpos($return_url, 'display') !== false) { - DI::baseUrl()->redirect('network'); - //NOTREACHED - } else { - DI::baseUrl()->redirect($return_url); - //NOTREACHED - } - } - } else { - notice(DI::l10n()->t('Permission denied.') . EOL); - DI::baseUrl()->redirect('display/' . $item['guid']); - //NOTREACHED - } -} diff --git a/mod/item.php b/mod/item.php index 6b9905132..38ff3b2a3 100644 --- a/mod/item.php +++ b/mod/item.php @@ -34,6 +34,7 @@ use Friendica\Content\Text\BBCode; use Friendica\Core\Hook; use Friendica\Core\Logger; use Friendica\Core\Protocol; +use Friendica\Core\Renderer; use Friendica\Core\Session; use Friendica\Core\System; use Friendica\Core\Worker; @@ -67,7 +68,10 @@ function item_post(App $a) { if (!empty($_REQUEST['dropitems'])) { $arr_drop = explode(',', $_REQUEST['dropitems']); - drop_items($arr_drop); + foreach ($arr_drop as $item) { + Item::deleteForUser(['id' => $item], $uid); + } + $json = ['success' => 1]; System::jsonExit($json); } @@ -850,7 +854,9 @@ function item_content(App $a) if (($a->argc >= 3) && ($a->argv[1] === 'drop') && intval($a->argv[2])) { if (DI::mode()->isAjax()) { - $o = Item::deleteForUser(['id' => $a->argv[2]], local_user()); + Item::deleteForUser(['id' => $a->argv[2]], local_user()); + // ajax return: [, 0 (no perm) | ] + System::jsonExit([intval($a->argv[2]), local_user()]); } else { if (!empty($a->argv[3])) { $o = drop_item($a->argv[2], $a->argv[3]); @@ -859,12 +865,112 @@ function item_content(App $a) $o = drop_item($a->argv[2]); } } - - if (DI::mode()->isAjax()) { - // ajax return: [, 0 (no perm) | ] - System::jsonExit([intval($a->argv[2]), intval($o)]); - } } return $o; } + +/** + * @param int $id + * @param string $return + * @return string + * @throws HTTPException\InternalServerErrorException + */ +function drop_item(int $id, string $return = '') +{ + // locate item to be deleted + $fields = ['id', 'uid', 'guid', 'contact-id', 'deleted', 'gravity', 'parent']; + $item = Item::selectFirstForUser(local_user(), $fields, ['id' => $id]); + + if (!DBA::isResult($item)) { + notice(DI::l10n()->t('Item not found.') . EOL); + DI::baseUrl()->redirect('network'); + } + + if ($item['deleted']) { + return ''; + } + + $contact_id = 0; + + // check if logged in user is either the author or owner of this item + if (Session::getRemoteContactID($item['uid']) == $item['contact-id']) { + $contact_id = $item['contact-id']; + } + + if ((local_user() == $item['uid']) || $contact_id) { + // Check if we should do HTML-based delete confirmation + if (!empty($_REQUEST['confirm'])) { + // can't take arguments in its "action" parameter + // so add any arguments as hidden inputs + $query = explode_querystring(DI::args()->getQueryString()); + $inputs = []; + + foreach ($query['args'] as $arg) { + if (strpos($arg, 'confirm=') === false) { + $arg_parts = explode('=', $arg); + $inputs[] = ['name' => $arg_parts[0], 'value' => $arg_parts[1]]; + } + } + + return Renderer::replaceMacros(Renderer::getMarkupTemplate('confirm.tpl'), [ + '$method' => 'get', + '$message' => DI::l10n()->t('Do you really want to delete this item?'), + '$extra_inputs' => $inputs, + '$confirm' => DI::l10n()->t('Yes'), + '$confirm_url' => $query['base'], + '$confirm_name' => 'confirmed', + '$cancel' => DI::l10n()->t('Cancel'), + ]); + } + // Now check how the user responded to the confirmation query + if (!empty($_REQUEST['canceled'])) { + DI::baseUrl()->redirect('display/' . $item['guid']); + } + + $is_comment = ($item['gravity'] == GRAVITY_COMMENT) ? true : false; + $parentitem = null; + if (!empty($item['parent'])){ + $fields = ['guid']; + $parentitem = Item::selectFirstForUser(local_user(), $fields, ['id' => $item['parent']]); + } + + // delete the item + Item::deleteForUser(['id' => $item['id']], local_user()); + + $return_url = hex2bin($return); + + // removes update_* from return_url to ignore Ajax refresh + $return_url = str_replace("update_", "", $return_url); + + // Check if delete a comment + if ($is_comment) { + // Return to parent guid + if (!empty($parentitem)) { + DI::baseUrl()->redirect('display/' . $parentitem['guid']); + //NOTREACHED + } + // In case something goes wrong + else { + DI::baseUrl()->redirect('network'); + //NOTREACHED + } + } + else { + // if unknown location or deleting top level post called from display + if (empty($return_url) || strpos($return_url, 'display') !== false) { + DI::baseUrl()->redirect('network'); + //NOTREACHED + } else { + DI::baseUrl()->redirect($return_url); + //NOTREACHED + } + } + } else { + notice(DI::l10n()->t('Permission denied.')); + DI::baseUrl()->redirect('display/' . $item['guid']); + //NOTREACHED + } + + return ''; +} From e3d20b436689191632267118eab11d32a78b1507 Mon Sep 17 00:00:00 2001 From: Hypolite Petovan Date: Wed, 17 Jun 2020 04:52:23 -0400 Subject: [PATCH 3/7] Move subscribe_to_hub function to Worker/OnePoll from include/items - It was its only usage --- include/items.php | 44 ---------------------------------- src/Worker/OnePoll.php | 54 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 53 insertions(+), 45 deletions(-) diff --git a/include/items.php b/include/items.php index b1d164624..c299d2cd3 100644 --- a/include/items.php +++ b/include/items.php @@ -283,47 +283,3 @@ function consume_feed($xml, array $importer, array $contact, &$hub) } } } - -function subscribe_to_hub($url, array $importer, array $contact, $hubmode = 'subscribe') -{ - /* - * Diaspora has different message-ids in feeds than they do - * through the direct Diaspora protocol. If we try and use - * the feed, we'll get duplicates. So don't. - */ - if ($contact['network'] === Protocol::DIASPORA) { - return; - } - - // Without an importer we don't have a user id - so we quit - if (empty($importer)) { - return; - } - - $user = DBA::selectFirst('user', ['nickname'], ['uid' => $importer['uid']]); - - // No user, no nickname, we quit - if (!DBA::isResult($user)) { - return; - } - - $push_url = DI::baseUrl() . '/pubsub/' . $user['nickname'] . '/' . $contact['id']; - - // Use a single verify token, even if multiple hubs - $verify_token = ((strlen($contact['hub-verify'])) ? $contact['hub-verify'] : Strings::getRandomHex()); - - $params= 'hub.mode=' . $hubmode . '&hub.callback=' . urlencode($push_url) . '&hub.topic=' . urlencode($contact['poll']) . '&hub.verify=async&hub.verify_token=' . $verify_token; - - Logger::log('subscribe_to_hub: ' . $hubmode . ' ' . $contact['name'] . ' to hub ' . $url . ' endpoint: ' . $push_url . ' with verifier ' . $verify_token); - - if (!strlen($contact['hub-verify']) || ($contact['hub-verify'] != $verify_token)) { - DBA::update('contact', ['hub-verify' => $verify_token], ['id' => $contact['id']]); - } - - $postResult = Network::post($url, $params); - - Logger::log('subscribe_to_hub: returns: ' . $postResult->getReturnCode(), Logger::DEBUG); - - return; - -} diff --git a/src/Worker/OnePoll.php b/src/Worker/OnePoll.php index 959d28237..fbe92215d 100644 --- a/src/Worker/OnePoll.php +++ b/src/Worker/OnePoll.php @@ -207,7 +207,7 @@ class OnePoll continue; } - subscribe_to_hub($h, $importer, $contact, $hubmode); + self::subscribeToHub($h, $importer, $contact, $hubmode); } } } @@ -704,4 +704,56 @@ class OnePoll Logger::log("Mail: closing connection for ".$mailconf['user']); imap_close($mbox); } + + + /** + * @param string $url + * @param array $importer + * @param array $contact + * @param string $hubmode + * @throws \Friendica\Network\HTTPException\InternalServerErrorException + */ + private static function subscribeToHub(string $url, array $importer, array $contact, $hubmode = 'subscribe') + { + /* + * Diaspora has different message-ids in feeds than they do + * through the direct Diaspora protocol. If we try and use + * the feed, we'll get duplicates. So don't. + */ + if ($contact['network'] === Protocol::DIASPORA) { + return; + } + + // Without an importer we don't have a user id - so we quit + if (empty($importer)) { + return; + } + + $user = DBA::selectFirst('user', ['nickname'], ['uid' => $importer['uid']]); + + // No user, no nickname, we quit + if (!DBA::isResult($user)) { + return; + } + + $push_url = DI::baseUrl() . '/pubsub/' . $user['nickname'] . '/' . $contact['id']; + + // Use a single verify token, even if multiple hubs + $verify_token = ((strlen($contact['hub-verify'])) ? $contact['hub-verify'] : Strings::getRandomHex()); + + $params = 'hub.mode=' . $hubmode . '&hub.callback=' . urlencode($push_url) . '&hub.topic=' . urlencode($contact['poll']) . '&hub.verify=async&hub.verify_token=' . $verify_token; + + Logger::log('subscribe_to_hub: ' . $hubmode . ' ' . $contact['name'] . ' to hub ' . $url . ' endpoint: ' . $push_url . ' with verifier ' . $verify_token); + + if (!strlen($contact['hub-verify']) || ($contact['hub-verify'] != $verify_token)) { + DBA::update('contact', ['hub-verify' => $verify_token], ['id' => $contact['id']]); + } + + $postResult = Network::post($url, $params); + + Logger::log('subscribe_to_hub: returns: ' . $postResult->getReturnCode(), Logger::DEBUG); + + return; + + } } From eba964ec127551290ad9f936e3919e5394b80ab7 Mon Sep 17 00:00:00 2001 From: Hypolite Petovan Date: Wed, 17 Jun 2020 04:54:44 -0400 Subject: [PATCH 4/7] Deprecate consume_feed() in favor of Protocol\Feed::consume --- include/items.php | 57 +++------------------------------------ src/Protocol/Feed.php | 62 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 64 insertions(+), 55 deletions(-) diff --git a/include/items.php b/include/items.php index c299d2cd3..2f218ea2a 100644 --- a/include/items.php +++ b/include/items.php @@ -225,61 +225,10 @@ function add_page_info_to_body($body, $texturl = false, $no_photos = false) } /** - * - * consume_feed - process atom feed and update anything/everything we might need to update - * - * $xml = the (atom) feed to consume - RSS isn't as fully supported but may work for simple feeds. - * - * $importer = the contact_record (joined to user_record) of the local user who owns this relationship. - * It is this person's stuff that is going to be updated. - * $contact = the person who is sending us stuff. If not set, we MAY be processing a "follow" activity - * from an external network and MAY create an appropriate contact record. Otherwise, we MUST - * have a contact record. - * $hub = should we find a hub declation in the feed, pass it back to our calling process, who might (or - * might not) try and subscribe to it. - * $datedir sorts in reverse order - * $pass - by default ($pass = 0) we cannot guarantee that a parent item has been - * imported prior to its children being seen in the stream unless we are certain - * of how the feed is arranged/ordered. - * With $pass = 1, we only pull parent items out of the stream. - * With $pass = 2, we only pull children (comments/likes). - * - * So running this twice, first with pass 1 and then with pass 2 will do the right - * thing regardless of feed ordering. This won't be adequate in a fully-threaded - * model where comments can have sub-threads. That would require some massive sorting - * to get all the feed items into a mostly linear ordering, and might still require - * recursion. - * - * @param $xml - * @param array $importer - * @param array $contact - * @param $hub - * @throws ImagickException - * @throws \Friendica\Network\HTTPException\InternalServerErrorException + * @deprecated since 2020.06 + * @see \Friendica\Protocol\Feed::consume */ function consume_feed($xml, array $importer, array $contact, &$hub) { - if ($contact['network'] === Protocol::OSTATUS) { - Logger::log("Consume OStatus messages ", Logger::DEBUG); - OStatus::import($xml, $importer, $contact, $hub); - - return; - } - - if ($contact['network'] === Protocol::FEED) { - Logger::log("Consume feeds", Logger::DEBUG); - Feed::import($xml, $importer, $contact); - - return; - } - - if ($contact['network'] === Protocol::DFRN) { - Logger::log("Consume DFRN messages", Logger::DEBUG); - $dfrn_importer = DFRN::getImporter($contact["id"], $importer["uid"]); - if (!empty($dfrn_importer)) { - Logger::log("Now import the DFRN feed"); - DFRN::import($xml, $dfrn_importer, true); - return; - } - } + \Friendica\Protocol\Feed::consume($xml, $importer, $contact, $hub); } diff --git a/src/Protocol/Feed.php b/src/Protocol/Feed.php index ed8cb2b98..e0f067826 100644 --- a/src/Protocol/Feed.php +++ b/src/Protocol/Feed.php @@ -37,7 +37,67 @@ use Friendica\Util\XML; /** * This class contain functions to import feeds (RSS/RDF/Atom) */ -class Feed { +class Feed +{ + /** + * consume - process atom feed and update anything/everything we might need to update + * + * $xml = the (atom) feed to consume - RSS isn't as fully supported but may work for simple feeds. + * + * $importer = the contact_record (joined to user_record) of the local user who owns this relationship. + * It is this person's stuff that is going to be updated. + * $contact = the person who is sending us stuff. If not set, we MAY be processing a "follow" activity + * from an external network and MAY create an appropriate contact record. Otherwise, we MUST + * have a contact record. + * $hub = should we find a hub declation in the feed, pass it back to our calling process, who might (or + * might not) try and subscribe to it. + * $datedir sorts in reverse order + * $pass - by default ($pass = 0) we cannot guarantee that a parent item has been + * imported prior to its children being seen in the stream unless we are certain + * of how the feed is arranged/ordered. + * With $pass = 1, we only pull parent items out of the stream. + * With $pass = 2, we only pull children (comments/likes). + * + * So running this twice, first with pass 1 and then with pass 2 will do the right + * thing regardless of feed ordering. This won't be adequate in a fully-threaded + * model where comments can have sub-threads. That would require some massive sorting + * to get all the feed items into a mostly linear ordering, and might still require + * recursion. + * + * @param $xml + * @param array $importer + * @param array $contact + * @param $hub + * @throws ImagickException + * @throws \Friendica\Network\HTTPException\InternalServerErrorException + */ + public static function consume($xml, array $importer, array $contact, &$hub) + { + if ($contact['network'] === Protocol::OSTATUS) { + Logger::log('Consume OStatus messages ', Logger::DEBUG); + OStatus::import($xml, $importer, $contact, $hub); + + return; + } + + if ($contact['network'] === Protocol::FEED) { + Logger::log('Consume feeds', Logger::DEBUG); + self::import($xml, $importer, $contact); + + return; + } + + if ($contact['network'] === Protocol::DFRN) { + Logger::log('Consume DFRN messages', Logger::DEBUG); + $dfrn_importer = DFRN::getImporter($contact['id'], $importer['uid']); + if (!empty($dfrn_importer)) { + Logger::log('Now import the DFRN feed'); + DFRN::import($xml, $dfrn_importer, true); + return; + } + } + } + /** * Read a RSS/RDF/Atom feed and create an item entry for it * From f3323aff5ec0ae918718432688ac82c66c8f99a2 Mon Sep 17 00:00:00 2001 From: Hypolite Petovan Date: Wed, 17 Jun 2020 04:57:21 -0400 Subject: [PATCH 5/7] Deprecate page_info functions to new PageInfo class - Add tests for parts not using remote requests - Add scheme requirement for page info URLs - Add policy to keep label from stripped Page Info links --- include/items.php | 210 +++------------------- src/Content/PageInfo.php | 269 +++++++++++++++++++++++++++++ tests/src/Content/PageInfoMock.php | 38 ++++ tests/src/Content/PageInfoTest.php | 125 ++++++++++++++ 4 files changed, 457 insertions(+), 185 deletions(-) create mode 100644 src/Content/PageInfo.php create mode 100644 tests/src/Content/PageInfoMock.php create mode 100644 tests/src/Content/PageInfoTest.php diff --git a/include/items.php b/include/items.php index 2f218ea2a..16fe897be 100644 --- a/include/items.php +++ b/include/items.php @@ -19,209 +19,49 @@ * */ -use Friendica\Core\Hook; -use Friendica\Core\Logger; -use Friendica\Core\Protocol; -use Friendica\Core\Renderer; -use Friendica\Core\Session; -use Friendica\Database\DBA; -use Friendica\DI; -use Friendica\Model\Item; -use Friendica\Protocol\DFRN; -use Friendica\Protocol\Feed; -use Friendica\Protocol\OStatus; -use Friendica\Util\Network; -use Friendica\Util\ParseUrl; -use Friendica\Util\Strings; - -require_once __DIR__ . '/../mod/share.php'; - +/** + * @deprecated since 2020.06 + * @see \Friendica\Content\PageInfo::getFooterFromData + */ function add_page_info_data(array $data, $no_photos = false) { - Hook::callAll('page_info_data', $data); - - if (empty($data['type'])) { - return ''; - } - - // It maybe is a rich content, but if it does have everything that a link has, - // then treat it that way - if (($data["type"] == "rich") && is_string($data["title"]) && - is_string($data["text"]) && !empty($data["images"])) { - $data["type"] = "link"; - } - - $data["title"] = $data["title"] ?? ''; - - if ((($data["type"] != "link") && ($data["type"] != "video") && ($data["type"] != "photo")) || ($data["title"] == $data["url"])) { - return ""; - } - - if ($no_photos && ($data["type"] == "photo")) { - return ""; - } - - // Escape some bad characters - $data["url"] = str_replace(["[", "]"], ["[", "]"], htmlentities($data["url"], ENT_QUOTES, 'UTF-8', false)); - $data["title"] = str_replace(["[", "]"], ["[", "]"], htmlentities($data["title"], ENT_QUOTES, 'UTF-8', false)); - - $text = "[attachment type='".$data["type"]."'"; - - if (empty($data["text"])) { - $data["text"] = $data["title"]; - } - - if (empty($data["text"])) { - $data["text"] = $data["url"]; - } - - if (!empty($data["url"])) { - $text .= " url='".$data["url"]."'"; - } - - if (!empty($data["title"])) { - $text .= " title='".$data["title"]."'"; - } - - // Only embedd a picture link when it seems to be a valid picture ("width" is set) - if (!empty($data["images"]) && !empty($data["images"][0]["width"])) { - $preview = str_replace(["[", "]"], ["[", "]"], htmlentities($data["images"][0]["src"], ENT_QUOTES, 'UTF-8', false)); - // if the preview picture is larger than 500 pixels then show it in a larger mode - // But only, if the picture isn't higher than large (To prevent huge posts) - if (!DI::config()->get('system', 'always_show_preview') && ($data["images"][0]["width"] >= 500) - && ($data["images"][0]["width"] >= $data["images"][0]["height"])) { - $text .= " image='".$preview."'"; - } else { - $text .= " preview='".$preview."'"; - } - } - - $text .= "]".$data["text"]."[/attachment]"; - - $hashtags = ""; - if (isset($data["keywords"]) && count($data["keywords"])) { - $hashtags = "\n"; - foreach ($data["keywords"] as $keyword) { - /// @TODO make a positive list of allowed characters - $hashtag = str_replace([' ', '+', '/', '.', '#', '@', "'", '"', '’', '`', '(', ')', '„', '“'], '', $keyword); - $hashtags .= "#[url=" . DI::baseUrl() . "/search?tag=" . $hashtag . "]" . $hashtag . "[/url] "; - } - } - - return "\n".$text.$hashtags; + return "\n" . \Friendica\Content\PageInfo::getFooterFromData($data, $no_photos); } +/** + * @deprecated since 2020.06 + * @see \Friendica\Content\PageInfo::queryUrl + */ function query_page_info($url, $photo = "", $keywords = false, $keyword_denylist = "") { - $data = ParseUrl::getSiteinfoCached($url, true); - - if ($photo != "") { - $data["images"][0]["src"] = $photo; - } - - Logger::log('fetch page info for ' . $url . ' ' . print_r($data, true), Logger::DEBUG); - - if (!$keywords && isset($data["keywords"])) { - unset($data["keywords"]); - } - - if (($keyword_denylist != "") && isset($data["keywords"])) { - $list = explode(", ", $keyword_denylist); - - foreach ($list as $keyword) { - $keyword = trim($keyword); - - $index = array_search($keyword, $data["keywords"]); - if ($index !== false) { - unset($data["keywords"][$index]); - } - } - } - - return $data; + return \Friendica\Content\PageInfo::queryUrl($url, $photo, $keywords, $keyword_denylist); } +/** + * @deprecated since 2020.06 + * @see \Friendica\Content\PageInfo::getTagsFromUrl() + */ function get_page_keywords($url, $photo = "", $keywords = false, $keyword_denylist = "") { - $data = query_page_info($url, $photo, $keywords, $keyword_denylist); - if (empty($data["keywords"]) || !is_array($data["keywords"])) { - return []; - } - - $taglist = []; - foreach ($data['keywords'] as $keyword) { - $hashtag = str_replace([" ", "+", "/", ".", "#", "'"], - ["", "", "", "", "", ""], $keyword); - - $taglist[] = $hashtag; - } - - return $taglist; + return $keywords ? \Friendica\Content\PageInfo::getTagsFromUrl($url, $photo, $keyword_denylist) : []; } +/** + * @deprecated since 2020.06 + * @see \Friendica\Content\PageInfo::getFooterFromUrl + */ function add_page_info($url, $no_photos = false, $photo = "", $keywords = false, $keyword_denylist = "") { - $data = query_page_info($url, $photo, $keywords, $keyword_denylist); - - $text = ''; - - if (is_array($data)) { - $text = add_page_info_data($data, $no_photos); - } - - return $text; + return "\n" . \Friendica\Content\PageInfo::getFooterFromUrl($url, $no_photos, $photo, $keywords, $keyword_denylist); } +/** + * @deprecated since 2020.06 + * @see \Friendica\Content\PageInfo::appendToBody + */ function add_page_info_to_body($body, $texturl = false, $no_photos = false) { - Logger::log('add_page_info_to_body: fetch page info for body ' . $body, Logger::DEBUG); - - $URLSearchString = "^\[\]"; - - // Fix for Mastodon where the mentions are in a different format - $body = preg_replace("/\[url\=([$URLSearchString]*)\]([#!@])(.*?)\[\/url\]/ism", - '$2[url=$1]$3[/url]', $body); - - // Adding these spaces is a quick hack due to my problems with regular expressions :) - preg_match("/[^!#@]\[url\]([$URLSearchString]*)\[\/url\]/ism", " " . $body, $matches); - - if (!$matches) { - preg_match("/[^!#@]\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism", " " . $body, $matches); - } - - // Convert urls without bbcode elements - if (!$matches && $texturl) { - preg_match("/([^\]\='".'"'."]|^)(https?\:\/\/[a-zA-Z0-9\:\/\-\?\&\;\.\=\_\~\#\%\$\!\+\,]+)/ism", " ".$body, $matches); - - // Yeah, a hack. I really hate regular expressions :) - if ($matches) { - $matches[1] = $matches[2]; - } - } - - if ($matches) { - $footer = add_page_info($matches[1], $no_photos); - } - - // Remove the link from the body if the link is attached at the end of the post - if (isset($footer) && (trim($footer) != "") && (strpos($footer, $matches[1]))) { - $removedlink = trim(str_replace($matches[1], "", $body)); - if (($removedlink == "") || strstr($body, $removedlink)) { - $body = $removedlink; - } - - $removedlink = preg_replace("/\[url\=" . preg_quote($matches[1], '/') . "\](.*?)\[\/url\]/ism", '', $body); - if (($removedlink == "") || strstr($body, $removedlink)) { - $body = $removedlink; - } - } - - // Add the page information to the bottom - if (isset($footer) && (trim($footer) != "")) { - $body .= $footer; - } - - return $body; + return \Friendica\Content\PageInfo::appendToBody($body, $texturl, $no_photos); } /** diff --git a/src/Content/PageInfo.php b/src/Content/PageInfo.php new file mode 100644 index 000000000..7d6f2eb9f --- /dev/null +++ b/src/Content/PageInfo.php @@ -0,0 +1,269 @@ +. + * + */ + +namespace Friendica\Content; + +use Friendica\Core\Hook; +use Friendica\Core\Logger; +use Friendica\DI; +use Friendica\Network\HTTPException; +use Friendica\Util\ParseUrl; +use Friendica\Util\Strings; + +/** + * Extracts trailing URLs from post bodies to transform them in enriched attachment tags through Site Info query + */ +class PageInfo +{ + /** + * @param string $body + * @param bool $searchNakedUrls + * @param bool $no_photos + * @return string + * @throws HTTPException\InternalServerErrorException + */ + public static function appendToBody(string $body, bool $searchNakedUrls = false, bool $no_photos = false) + { + Logger::info('add_page_info_to_body: fetch page info for body', ['body' => $body]); + + $url = self::getRelevantUrlFromBody($body, $searchNakedUrls); + if (!$url) { + return $body; + } + + $footer = self::getFooterFromUrl($url, $no_photos); + if (!$footer) { + return $body; + } + + $body = self::stripTrailingUrlFromBody($body, $url); + + $body .= "\n" . $footer; + + return $body; + } + + /** + * @param string $url + * @param bool $no_photos + * @param string $photo + * @param bool $keywords + * @param string $keyword_denylist + * @return string + * @throws HTTPException\InternalServerErrorException + */ + public static function getFooterFromUrl(string $url, bool $no_photos = false, string $photo = '', bool $keywords = false, string $keyword_denylist = '') + { + $data = self::queryUrl($url, $photo, $keywords, $keyword_denylist); + + return self::getFooterFromData($data, $no_photos); + } + + /** + * @param array $data + * @param bool $no_photos + * @return string + * @throws HTTPException\InternalServerErrorException + */ + public static function getFooterFromData(array $data, bool $no_photos = false) + { + Hook::callAll('page_info_data', $data); + + if (empty($data['type'])) { + return ''; + } + + // It maybe is a rich content, but if it does have everything that a link has, + // then treat it that way + if (($data['type'] == 'rich') && is_string($data['title']) && + is_string($data['text']) && !empty($data['images'])) { + $data['type'] = 'link'; + } + + $data['title'] = $data['title'] ?? ''; + + if ((($data['type'] != 'link') && ($data['type'] != 'video') && ($data['type'] != 'photo')) || ($data['title'] == $data['url'])) { + return ''; + } + + if ($no_photos && ($data['type'] == 'photo')) { + return ''; + } + + // Escape some bad characters + $data['url'] = str_replace(['[', ']'], ['[', ']'], htmlentities($data['url'], ENT_QUOTES, 'UTF-8', false)); + $data['title'] = str_replace(['[', ']'], ['[', ']'], htmlentities($data['title'], ENT_QUOTES, 'UTF-8', false)); + + $text = "[attachment type='" . $data['type'] . "'"; + + if (empty($data['text'])) { + $data['text'] = $data['title']; + } + + if (empty($data['text'])) { + $data['text'] = $data['url']; + } + + if (!empty($data['url'])) { + $text .= " url='" . $data['url'] . "'"; + } + + if (!empty($data['title'])) { + $text .= " title='" . $data['title'] . "'"; + } + + // Only embedd a picture link when it seems to be a valid picture ("width" is set) + if (!empty($data['images']) && !empty($data['images'][0]['width'])) { + $preview = str_replace(['[', ']'], ['[', ']'], htmlentities($data['images'][0]['src'], ENT_QUOTES, 'UTF-8', false)); + // if the preview picture is larger than 500 pixels then show it in a larger mode + // But only, if the picture isn't higher than large (To prevent huge posts) + if (!DI::config()->get('system', 'always_show_preview') && ($data['images'][0]['width'] >= 500) + && ($data['images'][0]['width'] >= $data['images'][0]['height'])) { + $text .= " image='" . $preview . "'"; + } else { + $text .= " preview='" . $preview . "'"; + } + } + + $text .= ']' . $data['text'] . '[/attachment]'; + + $hashtags = ''; + if (!empty($data['keywords'])) { + $hashtags = "\n"; + foreach ($data['keywords'] as $keyword) { + /// @TODO make a positive list of allowed characters + $hashtag = str_replace([' ', '+', '/', '.', '#', '@', "'", '"', '’', '`', '(', ')', '„', '“'], '', $keyword); + $hashtags .= '#[url=' . DI::baseUrl() . '/search?tag=' . $hashtag . ']' . $hashtag . '[/url] '; + } + } + + return $text . $hashtags; + } + + /** + * @param string $url + * @param string $photo + * @param bool $keywords + * @param string $keyword_denylist + * @return array|bool + * @throws HTTPException\InternalServerErrorException + */ + public static function queryUrl(string $url, string $photo = '', bool $keywords = false, string $keyword_denylist = '') + { + $data = ParseUrl::getSiteinfoCached($url, true); + + if ($photo != '') { + $data['images'][0]['src'] = $photo; + } + + if (!$keywords) { + unset($data['keywords']); + } elseif ($keyword_denylist) { + $list = explode(', ', $keyword_denylist); + + foreach ($list as $keyword) { + $keyword = trim($keyword); + + $index = array_search($keyword, $data['keywords']); + if ($index !== false) { + unset($data['keywords'][$index]); + } + } + } + + Logger::info('fetch page info for URL', ['url' => $url, 'data' => $data]); + + return $data; + } + + /** + * @param string $url + * @param string $photo + * @param string $keyword_denylist + * @return array + * @throws HTTPException\InternalServerErrorException + */ + public static function getTagsFromUrl(string $url, string $photo = '', string $keyword_denylist = '') + { + $data = self::queryUrl($url, $photo, true, $keyword_denylist); + + $taglist = []; + foreach ($data['keywords'] as $keyword) { + $hashtag = str_replace([' ', '+', '/', '.', '#', "'"], + ['', '', '', '', '', ''], $keyword); + + $taglist[] = $hashtag; + } + + return $taglist; + } + + /** + * Picks a non-hashtag, non-mention, schemeful URL at the end of the provided body string to be converted into Page Info. + * + * @param string $body + * @param bool $searchNakedUrls Whether we should pick a naked URL (outside of BBCode tags) as a last resort + * @return string|null + */ + protected static function getRelevantUrlFromBody(string $body, bool $searchNakedUrls = false) + { + $URLSearchString = 'https?://[^\[\]]*'; + + // Fix for Mastodon where the mentions are in a different format + $body = preg_replace("~\[url=($URLSearchString)]([#!@])(.*?)\[/url]~is", '$2[url=$1]$3[/url]', $body); + + preg_match("~(?. + * + */ + +namespace Friendica\Test\src\Content; + +/** + * Class PageInfoMock + * + * Exposes protected methods for test in the inherited class + * + * @method static string|null getRelevantUrlFromBody(string $body, $searchNakedUrls = false) + * @method static string stripTrailingUrlFromBody(string $body, string $url) + */ +class PageInfoMock extends \Friendica\Content\PageInfo +{ + public static function __callStatic($name, $arguments) + { + return self::$name(...$arguments); + } +} diff --git a/tests/src/Content/PageInfoTest.php b/tests/src/Content/PageInfoTest.php new file mode 100644 index 000000000..6f9641564 --- /dev/null +++ b/tests/src/Content/PageInfoTest.php @@ -0,0 +1,125 @@ +. + * + */ + +namespace Friendica\Test\src\Content; + +use Friendica\Test\MockedTest; + +class PageInfoTest extends MockedTest +{ + public function dataGetRelevantUrlFromBody() + { + return [ + 'end-of-content' => [ + 'expected' => 'http://example.com/end-of-content', + 'body' => 'Content[url]http://example.com/end-of-content[/url]', + ], + 'tag-no-attr' => [ + 'expected' => 'http://example.com/tag-no-attr', + 'body' => '[url]http://example.com/tag-no-attr[/url]', + ], + 'tag-attr' => [ + 'expected' => 'http://example.com/tag-attr', + 'body' => '[url=http://example.com/tag-attr]Example.com[/url]', + ], + 'mention' => [ + 'expected' => null, + 'body' => '@[url=http://example.com/mention]Mention[/url]', + ], + 'mention-exclusive' => [ + 'expected' => null, + 'body' => '@[url=http://example.com/mention-exclusive]Mention Exclusive[/url]', + ], + 'hashtag' => [ + 'expected' => null, + 'body' => '#[url=http://example.com/hashtag]hashtag[/url]', + ], + 'naked-url-unexpected' => [ + 'expected' => null, + 'body' => 'http://example.com/naked-url-unexpected', + ], + 'naked-url-expected' => [ + 'expected' => 'http://example.com/naked-url-expected', + 'body' => 'http://example.com/naked-url-expected', + 'searchNakedUrls' => true, + ], + 'naked-url-end-of-content-unexpected' => [ + 'expected' => null, + 'body' => 'Contenthttp://example.com/naked-url-end-of-content-unexpected', + 'searchNakedUrls' => true, + ], + 'naked-url-end-of-content-expected' => [ + 'expected' => 'http://example.com/naked-url-end-of-content-expected', + 'body' => 'Content http://example.com/naked-url-end-of-content-expected', + 'searchNakedUrls' => true, + ], + 'bug-8781-schemeless-link' => [ + 'expected' => null, + 'body' => '[url]/posts/2576978090fd0138ee4c005056264835[/url]', + ], + ]; + } + + /** + * @dataProvider dataGetRelevantUrlFromBody + * + * @param string|null $expected + * @param string $body + * @param bool $searchNakedUrls + */ + public function testGetRelevantUrlFromBody($expected, string $body, bool $searchNakedUrls = false) + { + $this->assertSame($expected, PageInfoMock::getRelevantUrlFromBody($body, $searchNakedUrls)); + } + + public function dataStripTrailingUrlFromBody() + { + return [ + 'naked-url-append' => [ + 'expected' => 'content', + 'body' => 'contenthttps://example.com', + 'url' => 'https://example.com', + ], + 'naked-url-not-at-the-end' => [ + 'expected' => 'https://example.comcontent', + 'body' => 'https://example.comcontent', + 'url' => 'https://example.com', + ], + 'bug-8781-labeled-link' => [ + 'expected' => 'link label', + 'body' => '[url=https://example.com]link label[/url]', + 'url' => 'https://example.com', + ], + ]; + } + + /** + * @dataProvider dataStripTrailingUrlFromBody + * + * @param string $expected + * @param string $body + * @param string $url + */ + public function testStripTrailingUrlFromBody(string $expected, string $body, string $url) + { + $this->assertSame($expected, PageInfoMock::stripTrailingUrlFromBody($body, $url)); + } +} From 4e7c45f835b62512f3c651b875a1327cc369ba19 Mon Sep 17 00:00:00 2001 From: Hypolite Petovan Date: Wed, 17 Jun 2020 04:58:13 -0400 Subject: [PATCH 6/7] Add PageInfo result panels to Debug\Babel --- src/Module/Debug/Babel.php | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/Module/Debug/Babel.php b/src/Module/Debug/Babel.php index 5d688d657..2954bc010 100644 --- a/src/Module/Debug/Babel.php +++ b/src/Module/Debug/Babel.php @@ -22,6 +22,7 @@ namespace Friendica\Module\Debug; use Friendica\BaseModule; +use Friendica\Content\PageInfo; use Friendica\Content\Text; use Friendica\Core\Renderer; use Friendica\DI; @@ -113,6 +114,21 @@ class Babel extends BaseModule 'title' => DI::l10n()->t('Item Tags'), 'content' => visible_whitespace(var_export($tags, true)), ]; + + $body2 = PageInfo::appendToBody($bbcode, true); + $results[] = [ + 'title' => DI::l10n()->t('PageInfo::appendToBody'), + 'content' => visible_whitespace($body2) + ]; + $html3 = Text\BBCode::convert($body2); + $results[] = [ + 'title' => DI::l10n()->t('PageInfo::appendToBody => BBCode::convert (raw HTML)'), + 'content' => visible_whitespace($html3) + ]; + $results[] = [ + 'title' => DI::l10n()->t('PageInfo::appendToBody => BBCode::convert'), + 'content' => $html3 + ]; break; case 'diaspora': $diaspora = trim($_REQUEST['text']); From f544c8383ba79ff952453e682c6550602aeb0e08 Mon Sep 17 00:00:00 2001 From: Hypolite Petovan Date: Wed, 17 Jun 2020 21:44:28 -0400 Subject: [PATCH 7/7] Implement pull request feedback - Improve formatting of drop_item in mod/item - Replace deprecated Logger::log() calls in Protocol\Feed::consume --- mod/item.php | 10 ++++------ src/Protocol/Feed.php | 8 ++++---- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/mod/item.php b/mod/item.php index 38ff3b2a3..e56e4c68d 100644 --- a/mod/item.php +++ b/mod/item.php @@ -928,9 +928,9 @@ function drop_item(int $id, string $return = '') DI::baseUrl()->redirect('display/' . $item['guid']); } - $is_comment = ($item['gravity'] == GRAVITY_COMMENT) ? true : false; + $is_comment = $item['gravity'] == GRAVITY_COMMENT; $parentitem = null; - if (!empty($item['parent'])){ + if (!empty($item['parent'])) { $fields = ['guid']; $parentitem = Item::selectFirstForUser(local_user(), $fields, ['id' => $item['parent']]); } @@ -949,14 +949,12 @@ function drop_item(int $id, string $return = '') if (!empty($parentitem)) { DI::baseUrl()->redirect('display/' . $parentitem['guid']); //NOTREACHED - } - // In case something goes wrong + } // In case something goes wrong else { DI::baseUrl()->redirect('network'); //NOTREACHED } - } - else { + } else { // if unknown location or deleting top level post called from display if (empty($return_url) || strpos($return_url, 'display') !== false) { DI::baseUrl()->redirect('network'); diff --git a/src/Protocol/Feed.php b/src/Protocol/Feed.php index e0f067826..2ca615581 100644 --- a/src/Protocol/Feed.php +++ b/src/Protocol/Feed.php @@ -74,24 +74,24 @@ class Feed public static function consume($xml, array $importer, array $contact, &$hub) { if ($contact['network'] === Protocol::OSTATUS) { - Logger::log('Consume OStatus messages ', Logger::DEBUG); + Logger::info('Consume OStatus messages'); OStatus::import($xml, $importer, $contact, $hub); return; } if ($contact['network'] === Protocol::FEED) { - Logger::log('Consume feeds', Logger::DEBUG); + Logger::info('Consume feeds'); self::import($xml, $importer, $contact); return; } if ($contact['network'] === Protocol::DFRN) { - Logger::log('Consume DFRN messages', Logger::DEBUG); + Logger::info('Consume DFRN messages'); $dfrn_importer = DFRN::getImporter($contact['id'], $importer['uid']); if (!empty($dfrn_importer)) { - Logger::log('Now import the DFRN feed'); + Logger::info('Now import the DFRN feed'); DFRN::import($xml, $dfrn_importer, true); return; }