Merge remote-tracking branch 'upstream/develop' into no-item

This commit is contained in:
Michael 2021-02-19 06:59:45 +00:00
commit abcbbd2ec1
19 changed files with 301 additions and 256 deletions

View file

@ -27,6 +27,7 @@ use Exception;
use Friendica\Content\ContactSelector;
use Friendica\Content\Item;
use Friendica\Content\OEmbed;
use Friendica\Content\PageInfo;
use Friendica\Content\Smilies;
use Friendica\Core\Hook;
use Friendica\Core\Logger;
@ -2210,4 +2211,75 @@ class BBCode
return $header;
}
/**
* Returns the BBCode relevant to embed the provided URL in a post body.
* For media type, it will return [img], [video] and [audio] tags.
* For regular web pages, it will either output a [bookmark] tag if title and description were provided,
* an [attachment] tag or a simple [url] tag depending on $tryAttachment.
*
* @param string $url
* @param bool $tryAttachment
* @param string|null $title
* @param string|null $description
* @param string|null $tags
* @return string
* @throws \Friendica\Network\HTTPException\InternalServerErrorException
*@see ParseUrl::getSiteinfoCached
*
*/
public static function embedURL(string $url, bool $tryAttachment = true, string $title = null, string $description = null, string $tags = null): string
{
DI::logger()->info($url);
// If there is already some content information submitted we don't
// need to parse the url for content.
if (!empty($title) && !empty($description)) {
$title = str_replace(["\r", "\n"], ['', ''], $title);
$description = '[quote]' . trim($description) . '[/quote]' . "\n";
$str_tags = '';
if (!empty($tags)) {
$arr_tags = ParseUrl::convertTagsToArray($tags);
if (count($arr_tags)) {
$str_tags = "\n" . implode(' ', $arr_tags) . "\n";
}
}
$result = sprintf('[bookmark=%s]%s[/bookmark]%s', $url, ($title) ? $title : $url, $description) . $str_tags;
DI::logger()->info('(unparsed): returns: ' . $result);
return $result;
}
$siteinfo = ParseUrl::getSiteinfoCached($url);
if (in_array($siteinfo['type'], ['image', 'video', 'audio'])) {
switch ($siteinfo['type']) {
case 'video':
$bbcode = "\n" . '[video]' . $url . '[/video]' . "\n";
break;
case 'audio':
$bbcode = "\n" . '[audio]' . $url . '[/audio]' . "\n";
break;
default:
$bbcode = "\n" . '[img]' . $url . '[/img]' . "\n";
break;
}
return $bbcode;
}
unset($siteinfo['keywords']);
// Bypass attachment if parse url for a comment
if (!$tryAttachment) {
return "\n" . '[url=' . $url . ']' . $siteinfo['title'] . '[/url]';
}
// Format it as BBCode attachment
return "\n" . PageInfo::getFooterFromData($siteinfo);
}
}

129
src/Module/ParseUrl.php Normal file
View file

@ -0,0 +1,129 @@
<?php
/**
* @copyright Copyright (C) 2020, Friendica
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
namespace Friendica\Module;
use Friendica\BaseModule;
use Friendica\Content\Text\BBCode;
use Friendica\Core\Hook;
use Friendica\Core\Session;
use Friendica\Core\System;
use Friendica\Network\HTTPException\BadRequestException;
use Friendica\Util;
class ParseUrl extends BaseModule
{
public static function rawContent(array $parameters = [])
{
if (!Session::isAuthenticated()) {
throw new \Friendica\Network\HTTPException\ForbiddenException();
}
$format = '';
$title = '';
$description = '';
$ret = ['success' => false, 'contentType' => ''];
if (!empty($_GET['binurl']) && Util\Strings::isHex($_GET['binurl'])) {
$url = trim(hex2bin($_GET['binurl']));
} elseif (!empty($_GET['url'])) {
$url = trim($_GET['url']);
// fallback in case no url is valid
} else {
throw new BadRequestException('No url given');
}
if (!empty($_GET['title'])) {
$title = strip_tags(trim($_GET['title']));
}
if (!empty($_GET['description'])) {
$description = strip_tags(trim($_GET['description']));
}
if (!empty($_GET['tags'])) {
$arr_tags = Util\ParseUrl::convertTagsToArray($_GET['tags']);
if (count($arr_tags)) {
$str_tags = "\n" . implode(' ', $arr_tags) . "\n";
}
}
if (isset($_GET['format']) && $_GET['format'] == 'json') {
$format = 'json';
}
// Add url scheme if it is missing
$arrurl = parse_url($url);
if (empty($arrurl['scheme'])) {
if (!empty($arrurl['host'])) {
$url = 'http:' . $url;
} else {
$url = 'http://' . $url;
}
}
$arr = ['url' => $url, 'format' => $format, 'text' => null];
Hook::callAll('parse_link', $arr);
if ($arr['text']) {
if ($format == 'json') {
System::jsonExit($arr['text']);
} else {
echo $arr['text'];
exit();
}
}
if ($format == 'json') {
$siteinfo = Util\ParseUrl::getSiteinfoCached($url);
if (in_array($siteinfo['type'], ['image', 'video', 'audio'])) {
switch ($siteinfo['type']) {
case 'video':
$content_type = 'video';
break;
case 'audio':
$content_type = 'audio';
break;
default:
$content_type = 'image';
break;
}
$ret['contentType'] = $content_type;
$ret['data'] = ['url' => $url];
$ret['success'] = true;
} else {
unset($siteinfo['keywords']);
$ret['data'] = $siteinfo;
$ret['contentType'] = 'attachment';
$ret['success'] = true;
}
System::jsonExit($ret);
} else {
echo BBCode::embedURL($url, empty($_GET['noAttachment']), $title, $description, $_GET['tags'] ?? '');
exit();
}
}
}

View file

@ -29,6 +29,7 @@ use Friendica\Core\Logger;
use Friendica\Database\Database;
use Friendica\Database\DBA;
use Friendica\DI;
use Friendica\Network\HTTPException;
/**
* Get information about a given URL
@ -37,6 +38,9 @@ use Friendica\DI;
*/
class ParseUrl
{
const DEFAULT_EXPIRATION_FAILURE = 'now + 1 day';
const DEFAULT_EXPIRATION_SUCCESS = 'now + 3 months';
/**
* Maximum number of characters for the description
*/
@ -65,18 +69,23 @@ class ParseUrl
* array 'images' => (optional) Array of preview pictures
* string 'keywords' => (optional) The tags which belong to the content
*
* @throws \Friendica\Network\HTTPException\InternalServerErrorException
* @throws HTTPException\InternalServerErrorException
* @see ParseUrl::getSiteinfo() for more information about scraping
* embeddable content
*/
public static function getSiteinfoCached($url, $no_guessing = false, $do_oembed = true)
public static function getSiteinfoCached($url, $no_guessing = false, $do_oembed = true): array
{
if ($url == "") {
return false;
if (empty($url)) {
return [
'url' => '',
'type' => 'error',
];
}
$urlHash = hash('sha256', $url);
$parsed_url = DBA::selectFirst('parsed_url', ['content'],
['url' => Strings::normaliseLink($url), 'guessing' => !$no_guessing, 'oembed' => $do_oembed]
['url_hash' => $urlHash, 'guessing' => !$no_guessing, 'oembed' => $do_oembed]
);
if (!empty($parsed_url['content'])) {
$data = unserialize($parsed_url['content']);
@ -85,12 +94,20 @@ class ParseUrl
$data = self::getSiteinfo($url, $no_guessing, $do_oembed);
DBA::insert(
$expires = $data['expires'];
unset($data['expires']);
DI::dba()->insert(
'parsed_url',
[
'url' => substr(Strings::normaliseLink($url), 0, 255), 'guessing' => !$no_guessing,
'oembed' => $do_oembed, 'content' => serialize($data),
'created' => DateTimeFormat::utcNow()
'url_hash' => $urlHash,
'guessing' => !$no_guessing,
'oembed' => $do_oembed,
'url' => $url,
'content' => serialize($data),
'created' => DateTimeFormat::utcNow(),
'expires' => $expires,
],
Database::INSERT_UPDATE
);
@ -117,7 +134,7 @@ class ParseUrl
*
* @return array which contains needed data for embedding
* string 'url' => The url of the parsed page
* string 'type' => Content type
* string 'type' => Content type (error, link, photo, image, audio, video)
* string 'title' => (optional) The title of the content
* string 'text' => (optional) The description for the content
* string 'image' => (optional) A preview image of the content (only available if $no_guessing = false)
@ -140,6 +157,13 @@ class ParseUrl
*/
public static function getSiteinfo($url, $no_guessing = false, $do_oembed = true, $count = 1)
{
if (empty($url)) {
return [
'url' => '',
'type' => 'error',
];
}
// Check if the URL does contain a scheme
$scheme = parse_url($url, PHP_URL_SCHEME);
@ -154,6 +178,7 @@ class ParseUrl
$siteinfo = [
'url' => $url,
'type' => 'link',
'expires' => DateTimeFormat::utc(self::DEFAULT_EXPIRATION_FAILURE),
];
if ($count > 10) {
@ -166,16 +191,35 @@ class ParseUrl
return $siteinfo;
}
$siteinfo['expires'] = DateTimeFormat::utc(self::DEFAULT_EXPIRATION_SUCCESS);
// If the file is too large then exit
if (($curlResult->getInfo()['download_content_length'] ?? 0) > 1000000) {
return $siteinfo;
}
// Native media type, no need for HTML parsing
$type = $curlResult->getHeader('Content-Type');
if ($type) {
preg_match('#(image|video|audio)/#i', $type, $matches);
if ($matches) {
$siteinfo['type'] = array_pop($matches);
return $siteinfo;
}
}
// If it isn't a HTML file then exit
if (($curlResult->getContentType() != '') && !strstr(strtolower($curlResult->getContentType()), 'html')) {
return $siteinfo;
}
if ($cacheControlHeader = $curlResult->getHeader('Cache-Control')) {
if (preg_match('/max-age=([0-9]+)/i', $cacheControlHeader, $matches)) {
$maxAge = max(86400, (int)array_pop($matches));
$siteinfo['expires'] = DateTimeFormat::utc("now + $maxAge seconds");
}
}
$header = $curlResult->getHeader();
$body = $curlResult->getBody();

View file

@ -64,7 +64,7 @@ class ClearCache
// Delete the cached OEmbed entries that are older than three month
DBA::delete('oembed', ["`created` < NOW() - INTERVAL 3 MONTH"]);
// Delete the cached "parse_url" entries that are older than three month
DBA::delete('parsed_url', ["`created` < NOW() - INTERVAL 3 MONTH"]);
// Delete the cached "parsed_url" entries that are expired
DBA::delete('parsed_url', ["`expires` < NOW()"]);
}
}