Merge pull request #9935 from MrPetovan/bug/9929-harden-parse-url
Harden /parseurl
This commit is contained in:
commit
e8a4d9af14
18 changed files with 299 additions and 249 deletions
12
database.sql
12
database.sql
|
@ -1,6 +1,6 @@
|
||||||
-- ------------------------------------------
|
-- ------------------------------------------
|
||||||
-- Friendica 2021.03-dev (Red Hot Poker)
|
-- Friendica 2021.03-dev (Red Hot Poker)
|
||||||
-- DB_UPDATE_VERSION 1402
|
-- DB_UPDATE_VERSION 1403
|
||||||
-- ------------------------------------------
|
-- ------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@ -959,15 +959,17 @@ CREATE TABLE IF NOT EXISTS `openwebauth-token` (
|
||||||
-- TABLE parsed_url
|
-- TABLE parsed_url
|
||||||
--
|
--
|
||||||
CREATE TABLE IF NOT EXISTS `parsed_url` (
|
CREATE TABLE IF NOT EXISTS `parsed_url` (
|
||||||
`url` varbinary(255) NOT NULL COMMENT 'page url',
|
`url_hash` binary(64) NOT NULL COMMENT 'page url hash',
|
||||||
`guessing` boolean NOT NULL DEFAULT '0' COMMENT 'is the \'guessing\' mode active?',
|
`guessing` boolean NOT NULL DEFAULT '0' COMMENT 'is the \'guessing\' mode active?',
|
||||||
`oembed` boolean NOT NULL DEFAULT '0' COMMENT 'is the data the result of oembed?',
|
`oembed` boolean NOT NULL DEFAULT '0' COMMENT 'is the data the result of oembed?',
|
||||||
|
`url` text NOT NULL COMMENT 'page url',
|
||||||
`content` mediumtext COMMENT 'page data',
|
`content` mediumtext COMMENT 'page data',
|
||||||
`created` datetime NOT NULL DEFAULT '0001-01-01 00:00:00' COMMENT 'datetime of creation',
|
`created` datetime NOT NULL DEFAULT '0001-01-01 00:00:00' COMMENT 'datetime of creation',
|
||||||
PRIMARY KEY(`url`,`guessing`,`oembed`),
|
`expires` datetime NOT NULL DEFAULT '0001-01-01 00:00:00' COMMENT 'datetime of expiration',
|
||||||
INDEX `created` (`created`)
|
PRIMARY KEY(`url_hash`,`guessing`,`oembed`),
|
||||||
|
INDEX `created` (`created`),
|
||||||
|
INDEX `expires` (`expires`)
|
||||||
) DEFAULT COLLATE utf8mb4_general_ci COMMENT='cache for \'parse_url\' queries';
|
) DEFAULT COLLATE utf8mb4_general_ci COMMENT='cache for \'parse_url\' queries';
|
||||||
|
|
||||||
--
|
--
|
||||||
-- TABLE pconfig
|
-- TABLE pconfig
|
||||||
--
|
--
|
||||||
|
|
|
@ -47,16 +47,12 @@ function oexchange_content(App $a) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
$url = ((!empty($_REQUEST['url']))
|
$url = !empty($_REQUEST['url']) ? trim($_REQUEST['url']) : '';
|
||||||
? urlencode(Strings::escapeTags(trim($_REQUEST['url']))) : '');
|
$title = !empty($_REQUEST['title']) ? trim($_REQUEST['title']) : '';
|
||||||
$title = ((!empty($_REQUEST['title']))
|
$description = !empty($_REQUEST['description']) ? trim($_REQUEST['description']) : '';
|
||||||
? '&title=' . urlencode(Strings::escapeTags(trim($_REQUEST['title']))) : '');
|
$tags = !empty($_REQUEST['tags']) ? trim($_REQUEST['tags']) : '';
|
||||||
$description = ((!empty($_REQUEST['description']))
|
|
||||||
? '&description=' . urlencode(Strings::escapeTags(trim($_REQUEST['description']))) : '');
|
|
||||||
$tags = ((!empty($_REQUEST['tags']))
|
|
||||||
? '&tags=' . urlencode(Strings::escapeTags(trim($_REQUEST['tags']))) : '');
|
|
||||||
|
|
||||||
$s = DI::httpRequest()->fetch(DI::baseUrl() . '/parse_url?url=' . $url . $title . $description . $tags);
|
$s = \Friendica\Content\Text\BBCode::embedURL($url, true, $title, $description, $tags);
|
||||||
|
|
||||||
if (!strlen($s)) {
|
if (!strlen($s)) {
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -1,207 +0,0 @@
|
||||||
<?php
|
|
||||||
/**
|
|
||||||
* @copyright Copyright (C) 2020, Friendica
|
|
||||||
*
|
|
||||||
* @license GNU AGPL version 3 or any later version
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU Affero General Public License as
|
|
||||||
* published by the Free Software Foundation, either version 3 of the
|
|
||||||
* License, or (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU Affero General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU Affero General Public License
|
|
||||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
*
|
|
||||||
* This module does parse an url for embeddable content (audio, video, image files or link)
|
|
||||||
* information and does format this information to BBCode
|
|
||||||
*
|
|
||||||
* @see ParseUrl::getSiteinfo() for more information about scraping embeddable content
|
|
||||||
*/
|
|
||||||
|
|
||||||
use Friendica\App;
|
|
||||||
use Friendica\Content\PageInfo;
|
|
||||||
use Friendica\Core\Hook;
|
|
||||||
use Friendica\Core\Logger;
|
|
||||||
use Friendica\Core\System;
|
|
||||||
use Friendica\DI;
|
|
||||||
use Friendica\Util\ParseUrl;
|
|
||||||
use Friendica\Util\Strings;
|
|
||||||
|
|
||||||
function parse_url_content(App $a)
|
|
||||||
{
|
|
||||||
$text = null;
|
|
||||||
$str_tags = '';
|
|
||||||
$format = '';
|
|
||||||
$ret= ['success' => false, 'contentType' => ''];
|
|
||||||
|
|
||||||
$br = "\n";
|
|
||||||
|
|
||||||
if (!empty($_GET['binurl']) && Strings::isHex($_GET['binurl'])) {
|
|
||||||
$url = trim(hex2bin($_GET['binurl']));
|
|
||||||
} elseif (!empty($_GET['url'])) {
|
|
||||||
$url = trim($_GET['url']);
|
|
||||||
// fallback in case no url is valid
|
|
||||||
} else {
|
|
||||||
Logger::info('No url given');
|
|
||||||
exit();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!empty($_GET['title'])) {
|
|
||||||
$title = strip_tags(trim($_GET['title']));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!empty($_GET['description'])) {
|
|
||||||
$text = strip_tags(trim($_GET['description']));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!empty($_GET['tags'])) {
|
|
||||||
$arr_tags = ParseUrl::convertTagsToArray($_GET['tags']);
|
|
||||||
if (count($arr_tags)) {
|
|
||||||
$str_tags = $br . implode(' ', $arr_tags) . $br;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (isset($_GET['format']) && $_GET['format'] == 'json') {
|
|
||||||
$format = 'json';
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add url scheme if it is missing
|
|
||||||
$arrurl = parse_url($url);
|
|
||||||
if (empty($arrurl['scheme'])) {
|
|
||||||
if (!empty($arrurl['host'])) {
|
|
||||||
$url = 'http:' . $url;
|
|
||||||
} else {
|
|
||||||
$url = 'http://' . $url;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Logger::log($url);
|
|
||||||
|
|
||||||
// Check if the URL is an image, video or audio file. If so format
|
|
||||||
// the URL with the corresponding BBCode media tag
|
|
||||||
// Fetch the header of the URL
|
|
||||||
$curlResponse = DI::httpRequest()->head($url);
|
|
||||||
|
|
||||||
if ($curlResponse->isSuccess()) {
|
|
||||||
$hdrs = $curlResponse->getHeaderArray();
|
|
||||||
|
|
||||||
$type = null;
|
|
||||||
$content_type = '';
|
|
||||||
$bbcode = '';
|
|
||||||
if (array_key_exists('Content-Type', $hdrs)) {
|
|
||||||
$type = $hdrs['Content-Type'];
|
|
||||||
}
|
|
||||||
if ($type) {
|
|
||||||
if (stripos($type, 'image/') !== false) {
|
|
||||||
$content_type = 'image';
|
|
||||||
$bbcode = $br . '[img]' . $url . '[/img]' . $br;
|
|
||||||
}
|
|
||||||
if (stripos($type, 'video/') !== false) {
|
|
||||||
$content_type = 'video';
|
|
||||||
$bbcode = $br . '[video]' . $url . '[/video]' . $br;
|
|
||||||
}
|
|
||||||
if (stripos($type, 'audio/') !== false) {
|
|
||||||
$content_type = 'audio';
|
|
||||||
$bbcode = $br . '[audio]' . $url . '[/audio]' . $br;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!empty($content_type)) {
|
|
||||||
if ($format == 'json') {
|
|
||||||
$ret['contentType'] = $content_type;
|
|
||||||
$ret['data'] = ['url' => $url];
|
|
||||||
$ret['success'] = true;
|
|
||||||
System::jsonExit($ret);
|
|
||||||
}
|
|
||||||
|
|
||||||
echo $bbcode;
|
|
||||||
exit();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
$template = '[bookmark=%s]%s[/bookmark]%s';
|
|
||||||
|
|
||||||
$arr = ['url' => $url, 'format' => $format, 'text' => null];
|
|
||||||
|
|
||||||
Hook::callAll('parse_link', $arr);
|
|
||||||
|
|
||||||
if ($arr['text']) {
|
|
||||||
if ($format == 'json') {
|
|
||||||
System::jsonExit($arr['text']);
|
|
||||||
} else {
|
|
||||||
echo $arr['text'];
|
|
||||||
exit();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If there is already some content information submitted we don't
|
|
||||||
// need to parse the url for content.
|
|
||||||
if (!empty($url) && !empty($title) && !empty($text)) {
|
|
||||||
$title = str_replace(["\r", "\n"], ['', ''], $title);
|
|
||||||
|
|
||||||
$text = '[quote]' . trim($text) . '[/quote]' . $br;
|
|
||||||
|
|
||||||
$result = sprintf($template, $url, ($title) ? $title : $url, $text) . $str_tags;
|
|
||||||
|
|
||||||
Logger::log('(unparsed): returns: ' . $result);
|
|
||||||
|
|
||||||
echo $result;
|
|
||||||
exit();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fetch the information directly from the webpage
|
|
||||||
$siteinfo = ParseUrl::getSiteinfo($url);
|
|
||||||
|
|
||||||
unset($siteinfo['keywords']);
|
|
||||||
|
|
||||||
// Bypass attachment if parse url for a comment
|
|
||||||
if (!empty($_GET['noAttachment'])) {
|
|
||||||
echo $br . '[url=' . $url . ']' . $siteinfo['title'] . '[/url]';
|
|
||||||
exit();
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($format == 'json') {
|
|
||||||
$ret['data'] = $siteinfo;
|
|
||||||
$ret['contentType'] = 'attachment';
|
|
||||||
$ret['success'] = true;
|
|
||||||
|
|
||||||
System::jsonExit($ret);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Format it as BBCode attachment
|
|
||||||
$info = "\n" . PageInfo::getFooterFromData($siteinfo);
|
|
||||||
|
|
||||||
echo $info;
|
|
||||||
|
|
||||||
exit();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Legacy function to call ParseUrl::getSiteinfoCached
|
|
||||||
*
|
|
||||||
* Note: We have moved the function to ParseUrl.php. This function is only for
|
|
||||||
* legacy support and will be remove in the future
|
|
||||||
*
|
|
||||||
* @param string $url The url of the page which should be scraped
|
|
||||||
* @param bool $no_guessing If true the parse doens't search for
|
|
||||||
* preview pictures
|
|
||||||
* @param bool $do_oembed The false option is used by the function fetch_oembed()
|
|
||||||
* to avoid endless loops
|
|
||||||
*
|
|
||||||
* @return array which contains needed data for embedding
|
|
||||||
*
|
|
||||||
* @throws \Friendica\Network\HTTPException\InternalServerErrorException
|
|
||||||
* @see ParseUrl::getSiteinfoCached()
|
|
||||||
*
|
|
||||||
* @deprecated since version 3.6 use ParseUrl::getSiteinfoCached instead
|
|
||||||
*/
|
|
||||||
function parseurl_getsiteinfo_cached($url, $no_guessing = false, $do_oembed = true)
|
|
||||||
{
|
|
||||||
$siteinfo = ParseUrl::getSiteinfoCached($url, $no_guessing, $do_oembed);
|
|
||||||
return $siteinfo;
|
|
||||||
}
|
|
|
@ -27,6 +27,7 @@ use Exception;
|
||||||
use Friendica\Content\ContactSelector;
|
use Friendica\Content\ContactSelector;
|
||||||
use Friendica\Content\Item;
|
use Friendica\Content\Item;
|
||||||
use Friendica\Content\OEmbed;
|
use Friendica\Content\OEmbed;
|
||||||
|
use Friendica\Content\PageInfo;
|
||||||
use Friendica\Content\Smilies;
|
use Friendica\Content\Smilies;
|
||||||
use Friendica\Core\Hook;
|
use Friendica\Core\Hook;
|
||||||
use Friendica\Core\Logger;
|
use Friendica\Core\Logger;
|
||||||
|
@ -2210,4 +2211,75 @@ class BBCode
|
||||||
|
|
||||||
return $header;
|
return $header;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the BBCode relevant to embed the provided URL in a post body.
|
||||||
|
* For media type, it will return [img], [video] and [audio] tags.
|
||||||
|
* For regular web pages, it will either output a [bookmark] tag if title and description were provided,
|
||||||
|
* an [attachment] tag or a simple [url] tag depending on $tryAttachment.
|
||||||
|
*
|
||||||
|
* @param string $url
|
||||||
|
* @param bool $tryAttachment
|
||||||
|
* @param string|null $title
|
||||||
|
* @param string|null $description
|
||||||
|
* @param string|null $tags
|
||||||
|
* @return string
|
||||||
|
* @throws \Friendica\Network\HTTPException\InternalServerErrorException
|
||||||
|
*@see ParseUrl::getSiteinfoCached
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public static function embedURL(string $url, bool $tryAttachment = true, string $title = null, string $description = null, string $tags = null): string
|
||||||
|
{
|
||||||
|
DI::logger()->info($url);
|
||||||
|
|
||||||
|
// If there is already some content information submitted we don't
|
||||||
|
// need to parse the url for content.
|
||||||
|
if (!empty($title) && !empty($description)) {
|
||||||
|
$title = str_replace(["\r", "\n"], ['', ''], $title);
|
||||||
|
|
||||||
|
$description = '[quote]' . trim($description) . '[/quote]' . "\n";
|
||||||
|
|
||||||
|
$str_tags = '';
|
||||||
|
if (!empty($tags)) {
|
||||||
|
$arr_tags = ParseUrl::convertTagsToArray($tags);
|
||||||
|
if (count($arr_tags)) {
|
||||||
|
$str_tags = "\n" . implode(' ', $arr_tags) . "\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$result = sprintf('[bookmark=%s]%s[/bookmark]%s', $url, ($title) ? $title : $url, $description) . $str_tags;
|
||||||
|
|
||||||
|
DI::logger()->info('(unparsed): returns: ' . $result);
|
||||||
|
|
||||||
|
return $result;
|
||||||
|
}
|
||||||
|
|
||||||
|
$siteinfo = ParseUrl::getSiteinfoCached($url);
|
||||||
|
|
||||||
|
if (in_array($siteinfo['type'], ['image', 'video', 'audio'])) {
|
||||||
|
switch ($siteinfo['type']) {
|
||||||
|
case 'video':
|
||||||
|
$bbcode = "\n" . '[video]' . $url . '[/video]' . "\n";
|
||||||
|
break;
|
||||||
|
case 'audio':
|
||||||
|
$bbcode = "\n" . '[audio]' . $url . '[/audio]' . "\n";
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
$bbcode = "\n" . '[img]' . $url . '[/img]' . "\n";
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return $bbcode;
|
||||||
|
}
|
||||||
|
|
||||||
|
unset($siteinfo['keywords']);
|
||||||
|
|
||||||
|
// Bypass attachment if parse url for a comment
|
||||||
|
if (!$tryAttachment) {
|
||||||
|
return "\n" . '[url=' . $url . ']' . $siteinfo['title'] . '[/url]';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Format it as BBCode attachment
|
||||||
|
return "\n" . PageInfo::getFooterFromData($siteinfo);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
129
src/Module/ParseUrl.php
Normal file
129
src/Module/ParseUrl.php
Normal file
|
@ -0,0 +1,129 @@
|
||||||
|
<?php
|
||||||
|
/**
|
||||||
|
* @copyright Copyright (C) 2020, Friendica
|
||||||
|
*
|
||||||
|
* @license GNU AGPL version 3 or any later version
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU Affero General Public License as
|
||||||
|
* published by the Free Software Foundation, either version 3 of the
|
||||||
|
* License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU Affero General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Affero General Public License
|
||||||
|
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
namespace Friendica\Module;
|
||||||
|
|
||||||
|
use Friendica\BaseModule;
|
||||||
|
use Friendica\Content\Text\BBCode;
|
||||||
|
use Friendica\Core\Hook;
|
||||||
|
use Friendica\Core\Session;
|
||||||
|
use Friendica\Core\System;
|
||||||
|
use Friendica\Network\HTTPException\BadRequestException;
|
||||||
|
use Friendica\Util;
|
||||||
|
|
||||||
|
class ParseUrl extends BaseModule
|
||||||
|
{
|
||||||
|
public static function rawContent(array $parameters = [])
|
||||||
|
{
|
||||||
|
if (!Session::isAuthenticated()) {
|
||||||
|
throw new \Friendica\Network\HTTPException\ForbiddenException();
|
||||||
|
}
|
||||||
|
|
||||||
|
$format = '';
|
||||||
|
$title = '';
|
||||||
|
$description = '';
|
||||||
|
$ret = ['success' => false, 'contentType' => ''];
|
||||||
|
|
||||||
|
if (!empty($_GET['binurl']) && Util\Strings::isHex($_GET['binurl'])) {
|
||||||
|
$url = trim(hex2bin($_GET['binurl']));
|
||||||
|
} elseif (!empty($_GET['url'])) {
|
||||||
|
$url = trim($_GET['url']);
|
||||||
|
// fallback in case no url is valid
|
||||||
|
} else {
|
||||||
|
throw new BadRequestException('No url given');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!empty($_GET['title'])) {
|
||||||
|
$title = strip_tags(trim($_GET['title']));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!empty($_GET['description'])) {
|
||||||
|
$description = strip_tags(trim($_GET['description']));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!empty($_GET['tags'])) {
|
||||||
|
$arr_tags = Util\ParseUrl::convertTagsToArray($_GET['tags']);
|
||||||
|
if (count($arr_tags)) {
|
||||||
|
$str_tags = "\n" . implode(' ', $arr_tags) . "\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isset($_GET['format']) && $_GET['format'] == 'json') {
|
||||||
|
$format = 'json';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add url scheme if it is missing
|
||||||
|
$arrurl = parse_url($url);
|
||||||
|
if (empty($arrurl['scheme'])) {
|
||||||
|
if (!empty($arrurl['host'])) {
|
||||||
|
$url = 'http:' . $url;
|
||||||
|
} else {
|
||||||
|
$url = 'http://' . $url;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$arr = ['url' => $url, 'format' => $format, 'text' => null];
|
||||||
|
|
||||||
|
Hook::callAll('parse_link', $arr);
|
||||||
|
|
||||||
|
if ($arr['text']) {
|
||||||
|
if ($format == 'json') {
|
||||||
|
System::jsonExit($arr['text']);
|
||||||
|
} else {
|
||||||
|
echo $arr['text'];
|
||||||
|
exit();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($format == 'json') {
|
||||||
|
$siteinfo = Util\ParseUrl::getSiteinfoCached($url);
|
||||||
|
|
||||||
|
if (in_array($siteinfo['type'], ['image', 'video', 'audio'])) {
|
||||||
|
switch ($siteinfo['type']) {
|
||||||
|
case 'video':
|
||||||
|
$content_type = 'video';
|
||||||
|
break;
|
||||||
|
case 'audio':
|
||||||
|
$content_type = 'audio';
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
$content_type = 'image';
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
$ret['contentType'] = $content_type;
|
||||||
|
$ret['data'] = ['url' => $url];
|
||||||
|
$ret['success'] = true;
|
||||||
|
} else {
|
||||||
|
unset($siteinfo['keywords']);
|
||||||
|
|
||||||
|
$ret['data'] = $siteinfo;
|
||||||
|
$ret['contentType'] = 'attachment';
|
||||||
|
$ret['success'] = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
System::jsonExit($ret);
|
||||||
|
} else {
|
||||||
|
echo BBCode::embedURL($url, empty($_GET['noAttachment']), $title, $description, $_GET['tags'] ?? '');
|
||||||
|
exit();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -29,6 +29,7 @@ use Friendica\Core\Logger;
|
||||||
use Friendica\Database\Database;
|
use Friendica\Database\Database;
|
||||||
use Friendica\Database\DBA;
|
use Friendica\Database\DBA;
|
||||||
use Friendica\DI;
|
use Friendica\DI;
|
||||||
|
use Friendica\Network\HTTPException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get information about a given URL
|
* Get information about a given URL
|
||||||
|
@ -37,6 +38,9 @@ use Friendica\DI;
|
||||||
*/
|
*/
|
||||||
class ParseUrl
|
class ParseUrl
|
||||||
{
|
{
|
||||||
|
const DEFAULT_EXPIRATION_FAILURE = 'now + 1 day';
|
||||||
|
const DEFAULT_EXPIRATION_SUCCESS = 'now + 3 months';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Maximum number of characters for the description
|
* Maximum number of characters for the description
|
||||||
*/
|
*/
|
||||||
|
@ -65,18 +69,23 @@ class ParseUrl
|
||||||
* array 'images' => (optional) Array of preview pictures
|
* array 'images' => (optional) Array of preview pictures
|
||||||
* string 'keywords' => (optional) The tags which belong to the content
|
* string 'keywords' => (optional) The tags which belong to the content
|
||||||
*
|
*
|
||||||
* @throws \Friendica\Network\HTTPException\InternalServerErrorException
|
* @throws HTTPException\InternalServerErrorException
|
||||||
* @see ParseUrl::getSiteinfo() for more information about scraping
|
* @see ParseUrl::getSiteinfo() for more information about scraping
|
||||||
* embeddable content
|
* embeddable content
|
||||||
*/
|
*/
|
||||||
public static function getSiteinfoCached($url, $no_guessing = false, $do_oembed = true)
|
public static function getSiteinfoCached($url, $no_guessing = false, $do_oembed = true): array
|
||||||
{
|
{
|
||||||
if ($url == "") {
|
if (empty($url)) {
|
||||||
return false;
|
return [
|
||||||
|
'url' => '',
|
||||||
|
'type' => 'error',
|
||||||
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$urlHash = hash('sha256', $url);
|
||||||
|
|
||||||
$parsed_url = DBA::selectFirst('parsed_url', ['content'],
|
$parsed_url = DBA::selectFirst('parsed_url', ['content'],
|
||||||
['url' => Strings::normaliseLink($url), 'guessing' => !$no_guessing, 'oembed' => $do_oembed]
|
['url_hash' => $urlHash, 'guessing' => !$no_guessing, 'oembed' => $do_oembed]
|
||||||
);
|
);
|
||||||
if (!empty($parsed_url['content'])) {
|
if (!empty($parsed_url['content'])) {
|
||||||
$data = unserialize($parsed_url['content']);
|
$data = unserialize($parsed_url['content']);
|
||||||
|
@ -85,12 +94,20 @@ class ParseUrl
|
||||||
|
|
||||||
$data = self::getSiteinfo($url, $no_guessing, $do_oembed);
|
$data = self::getSiteinfo($url, $no_guessing, $do_oembed);
|
||||||
|
|
||||||
DBA::insert(
|
$expires = $data['expires'];
|
||||||
|
|
||||||
|
unset($data['expires']);
|
||||||
|
|
||||||
|
DI::dba()->insert(
|
||||||
'parsed_url',
|
'parsed_url',
|
||||||
[
|
[
|
||||||
'url' => substr(Strings::normaliseLink($url), 0, 255), 'guessing' => !$no_guessing,
|
'url_hash' => $urlHash,
|
||||||
'oembed' => $do_oembed, 'content' => serialize($data),
|
'guessing' => !$no_guessing,
|
||||||
'created' => DateTimeFormat::utcNow()
|
'oembed' => $do_oembed,
|
||||||
|
'url' => $url,
|
||||||
|
'content' => serialize($data),
|
||||||
|
'created' => DateTimeFormat::utcNow(),
|
||||||
|
'expires' => $expires,
|
||||||
],
|
],
|
||||||
Database::INSERT_UPDATE
|
Database::INSERT_UPDATE
|
||||||
);
|
);
|
||||||
|
@ -117,7 +134,7 @@ class ParseUrl
|
||||||
*
|
*
|
||||||
* @return array which contains needed data for embedding
|
* @return array which contains needed data for embedding
|
||||||
* string 'url' => The url of the parsed page
|
* string 'url' => The url of the parsed page
|
||||||
* string 'type' => Content type
|
* string 'type' => Content type (error, link, photo, image, audio, video)
|
||||||
* string 'title' => (optional) The title of the content
|
* string 'title' => (optional) The title of the content
|
||||||
* string 'text' => (optional) The description for the content
|
* string 'text' => (optional) The description for the content
|
||||||
* string 'image' => (optional) A preview image of the content (only available if $no_guessing = false)
|
* string 'image' => (optional) A preview image of the content (only available if $no_guessing = false)
|
||||||
|
@ -140,6 +157,13 @@ class ParseUrl
|
||||||
*/
|
*/
|
||||||
public static function getSiteinfo($url, $no_guessing = false, $do_oembed = true, $count = 1)
|
public static function getSiteinfo($url, $no_guessing = false, $do_oembed = true, $count = 1)
|
||||||
{
|
{
|
||||||
|
if (empty($url)) {
|
||||||
|
return [
|
||||||
|
'url' => '',
|
||||||
|
'type' => 'error',
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
// Check if the URL does contain a scheme
|
// Check if the URL does contain a scheme
|
||||||
$scheme = parse_url($url, PHP_URL_SCHEME);
|
$scheme = parse_url($url, PHP_URL_SCHEME);
|
||||||
|
|
||||||
|
@ -154,6 +178,7 @@ class ParseUrl
|
||||||
$siteinfo = [
|
$siteinfo = [
|
||||||
'url' => $url,
|
'url' => $url,
|
||||||
'type' => 'link',
|
'type' => 'link',
|
||||||
|
'expires' => DateTimeFormat::utc(self::DEFAULT_EXPIRATION_FAILURE),
|
||||||
];
|
];
|
||||||
|
|
||||||
if ($count > 10) {
|
if ($count > 10) {
|
||||||
|
@ -166,16 +191,35 @@ class ParseUrl
|
||||||
return $siteinfo;
|
return $siteinfo;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$siteinfo['expires'] = DateTimeFormat::utc(self::DEFAULT_EXPIRATION_SUCCESS);
|
||||||
|
|
||||||
// If the file is too large then exit
|
// If the file is too large then exit
|
||||||
if (($curlResult->getInfo()['download_content_length'] ?? 0) > 1000000) {
|
if (($curlResult->getInfo()['download_content_length'] ?? 0) > 1000000) {
|
||||||
return $siteinfo;
|
return $siteinfo;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Native media type, no need for HTML parsing
|
||||||
|
$type = $curlResult->getHeader('Content-Type');
|
||||||
|
if ($type) {
|
||||||
|
preg_match('#(image|video|audio)/#i', $type, $matches);
|
||||||
|
if ($matches) {
|
||||||
|
$siteinfo['type'] = array_pop($matches);
|
||||||
|
return $siteinfo;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// If it isn't a HTML file then exit
|
// If it isn't a HTML file then exit
|
||||||
if (($curlResult->getContentType() != '') && !strstr(strtolower($curlResult->getContentType()), 'html')) {
|
if (($curlResult->getContentType() != '') && !strstr(strtolower($curlResult->getContentType()), 'html')) {
|
||||||
return $siteinfo;
|
return $siteinfo;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ($cacheControlHeader = $curlResult->getHeader('Cache-Control')) {
|
||||||
|
if (preg_match('/max-age=([0-9]+)/i', $cacheControlHeader, $matches)) {
|
||||||
|
$maxAge = max(86400, (int)array_pop($matches));
|
||||||
|
$siteinfo['expires'] = DateTimeFormat::utc("now + $maxAge seconds");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
$header = $curlResult->getHeader();
|
$header = $curlResult->getHeader();
|
||||||
$body = $curlResult->getBody();
|
$body = $curlResult->getBody();
|
||||||
|
|
||||||
|
|
|
@ -64,7 +64,7 @@ class ClearCache
|
||||||
// Delete the cached OEmbed entries that are older than three month
|
// Delete the cached OEmbed entries that are older than three month
|
||||||
DBA::delete('oembed', ["`created` < NOW() - INTERVAL 3 MONTH"]);
|
DBA::delete('oembed', ["`created` < NOW() - INTERVAL 3 MONTH"]);
|
||||||
|
|
||||||
// Delete the cached "parse_url" entries that are older than three month
|
// Delete the cached "parsed_url" entries that are expired
|
||||||
DBA::delete('parsed_url', ["`created` < NOW() - INTERVAL 3 MONTH"]);
|
DBA::delete('parsed_url', ["`expires` < NOW()"]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -55,7 +55,7 @@
|
||||||
use Friendica\Database\DBA;
|
use Friendica\Database\DBA;
|
||||||
|
|
||||||
if (!defined('DB_UPDATE_VERSION')) {
|
if (!defined('DB_UPDATE_VERSION')) {
|
||||||
define('DB_UPDATE_VERSION', 1402);
|
define('DB_UPDATE_VERSION', 1403);
|
||||||
}
|
}
|
||||||
|
|
||||||
return [
|
return [
|
||||||
|
@ -1019,15 +1019,18 @@ return [
|
||||||
"parsed_url" => [
|
"parsed_url" => [
|
||||||
"comment" => "cache for 'parse_url' queries",
|
"comment" => "cache for 'parse_url' queries",
|
||||||
"fields" => [
|
"fields" => [
|
||||||
"url" => ["type" => "varbinary(255)", "not null" => "1", "primary" => "1", "comment" => "page url"],
|
"url_hash" => ["type" => "binary(64)", "not null" => "1", "primary" => "1", "comment" => "page url hash"],
|
||||||
"guessing" => ["type" => "boolean", "not null" => "1", "default" => "0", "primary" => "1", "comment" => "is the 'guessing' mode active?"],
|
"guessing" => ["type" => "boolean", "not null" => "1", "default" => "0", "primary" => "1", "comment" => "is the 'guessing' mode active?"],
|
||||||
"oembed" => ["type" => "boolean", "not null" => "1", "default" => "0", "primary" => "1", "comment" => "is the data the result of oembed?"],
|
"oembed" => ["type" => "boolean", "not null" => "1", "default" => "0", "primary" => "1", "comment" => "is the data the result of oembed?"],
|
||||||
|
"url" => ["type" => "text", "not null" => "1", "comment" => "page url"],
|
||||||
"content" => ["type" => "mediumtext", "comment" => "page data"],
|
"content" => ["type" => "mediumtext", "comment" => "page data"],
|
||||||
"created" => ["type" => "datetime", "not null" => "1", "default" => DBA::NULL_DATETIME, "comment" => "datetime of creation"],
|
"created" => ["type" => "datetime", "not null" => "1", "default" => DBA::NULL_DATETIME, "comment" => "datetime of creation"],
|
||||||
|
"expires" => ["type" => "datetime", "not null" => "1", "default" => DBA::NULL_DATETIME, "comment" => "datetime of expiration"],
|
||||||
],
|
],
|
||||||
"indexes" => [
|
"indexes" => [
|
||||||
"PRIMARY" => ["url", "guessing", "oembed"],
|
"PRIMARY" => ["url_hash", "guessing", "oembed"],
|
||||||
"created" => ["created"],
|
"created" => ["created"],
|
||||||
|
"expires" => ["expires"],
|
||||||
]
|
]
|
||||||
],
|
],
|
||||||
"pconfig" => [
|
"pconfig" => [
|
||||||
|
|
|
@ -346,6 +346,7 @@ return [
|
||||||
'/openid' => [Module\Security\OpenID::class, [R::GET]],
|
'/openid' => [Module\Security\OpenID::class, [R::GET]],
|
||||||
'/opensearch' => [Module\OpenSearch::class, [R::GET]],
|
'/opensearch' => [Module\OpenSearch::class, [R::GET]],
|
||||||
|
|
||||||
|
'/parseurl' => [Module\ParseUrl::class, [R::GET]],
|
||||||
'/permission/tooltip/{type}/{id:\d+}' => [Module\PermissionTooltip::class, [R::GET]],
|
'/permission/tooltip/{type}/{id:\d+}' => [Module\PermissionTooltip::class, [R::GET]],
|
||||||
|
|
||||||
'/photo' => [
|
'/photo' => [
|
||||||
|
|
10
update.php
10
update.php
|
@ -817,3 +817,13 @@ function update_1400()
|
||||||
|
|
||||||
return Update::SUCCESS;
|
return Update::SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function pre_update_1403()
|
||||||
|
{
|
||||||
|
// Necessary before a primary key change
|
||||||
|
if (!DBA::e("DROP TABLE `parsed_url`")) {
|
||||||
|
return Update::FAILED;
|
||||||
|
}
|
||||||
|
|
||||||
|
return Update::SUCCESS;
|
||||||
|
}
|
||||||
|
|
|
@ -178,7 +178,7 @@
|
||||||
* @returns {void}
|
* @returns {void}
|
||||||
*/
|
*/
|
||||||
var getContentData = function(binurl, callback) {
|
var getContentData = function(binurl, callback) {
|
||||||
$.get('parse_url?binurl='+ binurl + '&format=json', function (answer) {
|
$.get('parseurl?binurl='+ binurl + '&format=json', function (answer) {
|
||||||
obj = sanitizeInputData(answer);
|
obj = sanitizeInputData(answer);
|
||||||
|
|
||||||
// Put the data into a cache
|
// Put the data into a cache
|
||||||
|
|
|
@ -103,7 +103,7 @@ function enableOnUser(){
|
||||||
if(reply && reply.length) {
|
if(reply && reply.length) {
|
||||||
reply = bin2hex(reply);
|
reply = bin2hex(reply);
|
||||||
$('#profile-rotator').show();
|
$('#profile-rotator').show();
|
||||||
$.get('parse_url?binurl=' + reply, function(data) {
|
$.get('parseurl?binurl=' + reply, function(data) {
|
||||||
addeditortext(data);
|
addeditortext(data);
|
||||||
$('#profile-rotator').hide();
|
$('#profile-rotator').hide();
|
||||||
});
|
});
|
||||||
|
@ -160,7 +160,7 @@ function enableOnUser(){
|
||||||
if(reply && reply.length) {
|
if(reply && reply.length) {
|
||||||
reply = bin2hex(reply);
|
reply = bin2hex(reply);
|
||||||
$('#profile-rotator').show();
|
$('#profile-rotator').show();
|
||||||
$.get('parse_url?binurl=' + reply, function(data) {
|
$.get('parseurl?binurl=' + reply, function(data) {
|
||||||
if (!editor) $("#profile-jot-text").val("");
|
if (!editor) $("#profile-jot-text").val("");
|
||||||
initEditor(function(){
|
initEditor(function(){
|
||||||
addeditortext(data);
|
addeditortext(data);
|
||||||
|
|
|
@ -23,7 +23,7 @@
|
||||||
reply = prompt("{{$linkurl}}");
|
reply = prompt("{{$linkurl}}");
|
||||||
if(reply && reply.length) {
|
if(reply && reply.length) {
|
||||||
$('#profile-rotator').show();
|
$('#profile-rotator').show();
|
||||||
$.get('parse_url?url=' + reply, function(data) {
|
$.get('parseurl?url=' + reply, function(data) {
|
||||||
addeditortext(data);
|
addeditortext(data);
|
||||||
$('#profile-rotator').hide();
|
$('#profile-rotator').hide();
|
||||||
});
|
});
|
||||||
|
@ -42,7 +42,7 @@
|
||||||
event.preventDefault();
|
event.preventDefault();
|
||||||
if(reply && reply.length) {
|
if(reply && reply.length) {
|
||||||
$('#profile-rotator').show();
|
$('#profile-rotator').show();
|
||||||
$.get('parse_url?url=' + reply, function(data) {
|
$.get('parseurl?url=' + reply, function(data) {
|
||||||
addeditortext(data);
|
addeditortext(data);
|
||||||
$('#profile-rotator').hide();
|
$('#profile-rotator').hide();
|
||||||
});
|
});
|
||||||
|
|
|
@ -7,7 +7,7 @@
|
||||||
reply = prompt("{{$linkurl}}");
|
reply = prompt("{{$linkurl}}");
|
||||||
if(reply && reply.length) {
|
if(reply && reply.length) {
|
||||||
$('#profile-rotator').show();
|
$('#profile-rotator').show();
|
||||||
$.get('parse_url?url=' + reply, function(data) {
|
$.get('parseurl?url=' + reply, function(data) {
|
||||||
addeditortext(data);
|
addeditortext(data);
|
||||||
$('#profile-rotator').hide();
|
$('#profile-rotator').hide();
|
||||||
});
|
});
|
||||||
|
@ -26,7 +26,7 @@
|
||||||
event.preventDefault();
|
event.preventDefault();
|
||||||
if(reply && reply.length) {
|
if(reply && reply.length) {
|
||||||
$('#profile-rotator').show();
|
$('#profile-rotator').show();
|
||||||
$.get('parse_url?url=' + reply, function(data) {
|
$.get('parseurl?url=' + reply, function(data) {
|
||||||
addeditortext(data);
|
addeditortext(data);
|
||||||
$('#profile-rotator').hide();
|
$('#profile-rotator').hide();
|
||||||
});
|
});
|
||||||
|
|
|
@ -30,7 +30,7 @@ function jotGetLink() {
|
||||||
// Fallback: insert the attachment bbcode directly into the textarea
|
// Fallback: insert the attachment bbcode directly into the textarea
|
||||||
// if the attachment live preview isn't available
|
// if the attachment live preview isn't available
|
||||||
} else {
|
} else {
|
||||||
$.get("parse_url?binurl=" + bin2hex(reply) + noAttachment, function (data) {
|
$.get("parseurl?binurl=" + bin2hex(reply) + noAttachment, function (data) {
|
||||||
addeditortext(data);
|
addeditortext(data);
|
||||||
$("#profile-rotator").hide();
|
$("#profile-rotator").hide();
|
||||||
});
|
});
|
||||||
|
|
|
@ -40,7 +40,7 @@ function commentGetLink(id, prompttext) {
|
||||||
reply = prompt(prompttext);
|
reply = prompt(prompttext);
|
||||||
if (reply && reply.length) {
|
if (reply && reply.length) {
|
||||||
reply = bin2hex(reply);
|
reply = bin2hex(reply);
|
||||||
$.get("parse_url?noAttachment=1&binurl=" + reply, function (data) {
|
$.get("parseurl?noAttachment=1&binurl=" + reply, function (data) {
|
||||||
addCommentText(data, id);
|
addCommentText(data, id);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -64,7 +64,7 @@ function commentLinkDrop(event, id) {
|
||||||
event.preventDefault();
|
event.preventDefault();
|
||||||
if (reply && reply.length) {
|
if (reply && reply.length) {
|
||||||
reply = bin2hex(reply);
|
reply = bin2hex(reply);
|
||||||
$.get("parse_url?noAttachment=1&binurl=" + reply, function (data) {
|
$.get("parseurl?noAttachment=1&binurl=" + reply, function (data) {
|
||||||
addCommentText(data, id);
|
addCommentText(data, id);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
|
@ -223,7 +223,7 @@
|
||||||
if (currentText.includes("[attachment") && currentText.includes("[/attachment]")) {
|
if (currentText.includes("[attachment") && currentText.includes("[/attachment]")) {
|
||||||
noAttachment = '&noAttachment=1';
|
noAttachment = '&noAttachment=1';
|
||||||
}
|
}
|
||||||
$.get('parse_url?binurl=' + reply + noAttachment, function(data) {
|
$.get('parseurl?binurl=' + reply + noAttachment, function(data) {
|
||||||
if (!editor) $("#profile-jot-text").val("");
|
if (!editor) $("#profile-jot-text").val("");
|
||||||
initEditor(function(){
|
initEditor(function(){
|
||||||
addeditortext(data);
|
addeditortext(data);
|
||||||
|
|
|
@ -129,7 +129,7 @@ function enableOnUser(){
|
||||||
if(reply && reply.length) {
|
if(reply && reply.length) {
|
||||||
reply = bin2hex(reply);
|
reply = bin2hex(reply);
|
||||||
$('#profile-rotator').show();
|
$('#profile-rotator').show();
|
||||||
$.get('parse_url?binurl=' + reply, function(data) {
|
$.get('parseurl?binurl=' + reply, function(data) {
|
||||||
addeditortext(data);
|
addeditortext(data);
|
||||||
$('#profile-rotator').hide();
|
$('#profile-rotator').hide();
|
||||||
});
|
});
|
||||||
|
@ -190,7 +190,7 @@ function enableOnUser(){
|
||||||
if(reply && reply.length) {
|
if(reply && reply.length) {
|
||||||
reply = bin2hex(reply);
|
reply = bin2hex(reply);
|
||||||
$('#profile-rotator').show();
|
$('#profile-rotator').show();
|
||||||
$.get('parse_url?binurl=' + reply, function(data) {
|
$.get('parseurl?binurl=' + reply, function(data) {
|
||||||
if (!editor) $("#profile-jot-text").val("");
|
if (!editor) $("#profile-jot-text").val("");
|
||||||
initEditor(function(){
|
initEditor(function(){
|
||||||
addeditortext(data);
|
addeditortext(data);
|
||||||
|
|
Loading…
Reference in a new issue