friendica_2021.01_tupambae_.../mod/parse_url.php

<?php
/**
 * @copyright Copyright (C) 2020, Friendica
 *
 * @license GNU AGPL version 3 or any later version
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 *
 * This module does parse an url for embeddable content (audio, video, image files or link)
 * information and does format this information to BBCode
 *
 * @see ParseUrl::getSiteinfo() for more information about scraping embeddable content
 */

use Friendica\App;
use Friendica\Content\PageInfo;
use Friendica\Core\Hook;
use Friendica\Core\Logger;
use Friendica\Core\System;
use Friendica\DI;
use Friendica\Util\ParseUrl;
use Friendica\Util\Strings;

function parse_url_content(App $a)
{
	$text = null;
	$str_tags = '';
	$format = '';
	$ret= ['success' => false, 'contentType' => ''];

	$br = "\n";

	if (!empty($_GET['binurl']) && Strings::isHex($_GET['binurl'])) {
		$url = trim(hex2bin($_GET['binurl']));
	} elseif (!empty($_GET['url'])) {
		$url = trim($_GET['url']);
	// fallback in case no url is valid
	} else {
		Logger::info('No url given');
		exit();
	}

	if (!empty($_GET['title'])) {
		$title = strip_tags(trim($_GET['title']));
	}

	if (!empty($_GET['description'])) {
		$text = strip_tags(trim($_GET['description']));
	}

	if (!empty($_GET['tags'])) {
		$arr_tags = ParseUrl::convertTagsToArray($_GET['tags']);
		if (count($arr_tags)) {
			$str_tags = $br . implode(' ', $arr_tags) . $br;
		}
	}

	if (isset($_GET['format']) && $_GET['format'] == 'json') {
		$format = 'json';
	}

	// Add url scheme if it is missing
	$arrurl = parse_url($url);
	if (empty($arrurl['scheme'])) {
		if (!empty($arrurl['host'])) {
			$url = 'http:' . $url;
		} else {
			$url = 'http://' . $url;
		}
	}

	Logger::log($url);

	// Check if the URL is an image, video or audio file. If so format
	// the URL with the corresponding BBCode media tag
	// Fetch the header of the URL
	$curlResponse = DI::httpRequest()->head($url);

	if ($curlResponse->isSuccess()) {
		$hdrs = $curlResponse->getHeaderArray();

		$type = null;
		$content_type = '';
		$bbcode = '';
		if (array_key_exists('Content-Type', $hdrs)) {
			$type = $hdrs['Content-Type'];
		}
		if ($type) {
			if (stripos($type, 'image/') !== false) {
				$content_type = 'image';
				$bbcode = $br . '[img]' . $url . '[/img]' . $br;
			}
			if (stripos($type, 'video/') !== false) {
				$content_type = 'video';
				$bbcode = $br . '[video]' . $url . '[/video]' . $br;
			}
			if (stripos($type, 'audio/') !== false) {
				$content_type = 'audio';
				$bbcode = $br . '[audio]' . $url . '[/audio]' . $br;
			}
		}
		if (!empty($content_type)) {
			if ($format == 'json') {
				$ret['contentType'] = $content_type;
				$ret['data'] = ['url' => $url];
				$ret['success'] = true;
				System::jsonExit($ret);
			}

			echo $bbcode;
			exit();
		}
	}


	$template = '[bookmark=%s]%s[/bookmark]%s';

	$arr = ['url' => $url, 'text' => ''];

	Hook::callAll('parse_link', $arr);

	if (strlen($arr['text'])) {
		echo $arr['text'];
		exit();
	}

	// If there is already some content information submitted we don't
	// need to parse the url for content.
	if (!empty($url) && !empty($title) && !empty($text)) {
		$title = str_replace(["\r", "\n"], ['', ''], $title);

		$text = '[quote]' . trim($text) . '[/quote]' . $br;

		$result = sprintf($template, $url, ($title) ? $title : $url, $text) . $str_tags;

		Logger::log('(unparsed): returns: ' . $result);

		echo $result;
		exit();
	}

	// Fetch the information directly from the webpage
	$siteinfo = ParseUrl::getSiteinfo($url);

	unset($siteinfo['keywords']);

	// Bypass attachment if parse url for a comment
	if (!empty($_GET['noAttachment'])) {
		echo $br . '[url=' . $url . ']' . $siteinfo['title'] . '[/url]';
		exit();
	}

	if ($format == 'json') {
		$ret['data'] = $siteinfo;
		$ret['contentType'] = 'attachment';
		$ret['success'] = true;

		System::jsonExit($ret);
	}

	// Format it as BBCode attachment
	$info = "\n" . PageInfo::getFooterFromData($siteinfo);

	echo $info;

	exit();
}

/**
 * Legacy function to call ParseUrl::getSiteinfoCached
 *
 * Note: We have moved the function to ParseUrl.php. This function is only for
 * legacy support and will be remove in the future
 *
 * @param string $url         The url of the page which should be scraped
 * @param bool   $no_guessing If true the parse doens't search for
 *                            preview pictures
 * @param bool   $do_oembed   The false option is used by the function fetch_oembed()
 *                            to avoid endless loops
 *
 * @return array which contains needed data for embedding
 *
 * @throws \Friendica\Network\HTTPException\InternalServerErrorException
 * @see   ParseUrl::getSiteinfoCached()
 *
 * @deprecated since version 3.6 use ParseUrl::getSiteinfoCached instead
 */
function parseurl_getsiteinfo_cached($url, $no_guessing = false, $do_oembed = true)
{
	$siteinfo = ParseUrl::getSiteinfoCached($url, $no_guessing, $do_oembed);
	return $siteinfo;
}
ajaxify the wall poster 2010-07-23 07:41:45 +02:00			`<?php`
Normalize App parameter declaration (mod folder, 2 out of 3) 2017-01-09 13:14:25 +01:00			`/**`
Add license info at Friendica PHP files 2020-02-09 16:18:46 +01:00			`* @copyright Copyright (C) 2020, Friendica`
			`*`
			`* @license GNU AGPL version 3 or any later version`
			`*`
			`* This program is free software: you can redistribute it and/or modify`
			`* it under the terms of the GNU Affero General Public License as`
			`* published by the Free Software Foundation, either version 3 of the`
			`* License, or (at your option) any later version.`
			`*`
			`* This program is distributed in the hope that it will be useful,`
			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`* GNU Affero General Public License for more details.`
			`*`
			`* You should have received a copy of the GNU Affero General Public License`
			`* along with this program. If not, see <https://www.gnu.org/licenses/>.`
Normalize App parameter declaration (mod folder, 2 out of 3) 2017-01-09 13:14:25 +01:00			`*`
- Remove $theme_richtext_editor boot var - Remove "richtext" feature - Remove fix_mce_lf() function - Remove nomce parameter 2017-01-27 04:57:53 +01:00			`* This module does parse an url for embeddable content (audio, video, image files or link)`
			`* information and does format this information to BBCode`
Normalize App parameter declaration (mod folder, 2 out of 3) 2017-01-09 13:14:25 +01:00			`*`
			`* @see ParseUrl::getSiteinfo() for more information about scraping embeddable content`
Fix formatting in mod/parse_url 2018-09-02 23:24:56 +02:00			`*/`
Adding hexadecimal check for parse_rul see https://github.com/friendica/friendica/issues/6917#issuecomment-475461338 2019-05-29 20:32:16 +02:00
Move App to src - Add `use Friendica\App;` wherever needed 2017-04-30 06:07:00 +02:00			`use Friendica\App;`
Replace *_page_info function calls with Content\PageInfo equivalent 2020-07-14 16:15:04 +02:00			`use Friendica\Content\PageInfo;`
Replace deprecated Addon::callHooks with Hook::callAll - Update documentation 2018-12-26 07:06:24 +01:00			`use Friendica\Core\Hook;`
Log function implement log() function. 2018-10-29 22:20:46 +01:00			`use Friendica\Core\Logger;`
attachment preview: frontend work (works with frio) 2019-02-01 19:18:08 +01:00			`use Friendica\Core\System;`
Rename "HTTPRequest::curl()" to HTTPRequest::get() 2020-03-04 22:35:09 +01:00			`use Friendica\DI;`
Move ParseUrl to Util namespace 2018-01-04 18:03:15 +01:00			`use Friendica\Util\ParseUrl;`
Adding hexadecimal check for parse_rul see https://github.com/friendica/friendica/issues/6917#issuecomment-475461338 2019-05-29 20:32:16 +02:00			`use Friendica\Util\Strings;`
update writable for dfrn contacts when using local delivery 2012-09-06 01:26:11 +02:00
Fix formatting in mod/parse_url 2018-09-02 23:24:56 +02:00			`function parse_url_content(App $a)`
			`{`
parse_url: recognize image/video/audio files + move functions into own class 2016-11-24 01:11:22 +01:00			`$text = null;`
Fix formatting in mod/parse_url 2018-09-02 23:24:56 +02:00			`$str_tags = '';`
attachment preview: frontend work (works with frio) 2019-02-01 19:18:08 +01:00			`$format = '';`
			`$ret= ['success' => false, 'contentType' => ''];`
OEmbed and parse_url are now cached in dedicated tables 2016-01-14 23:59:51 +01:00
- Remove $theme_richtext_editor boot var - Remove "richtext" feature - Remove fix_mce_lf() function - Remove nomce parameter 2017-01-27 04:57:53 +01:00			`$br = "\n";`
Support for twitter cards (in both directions) 2014-02-22 15:46:19 +01:00
Adding hexadecimal check for parse_rul see https://github.com/friendica/friendica/issues/6917#issuecomment-475461338 2019-05-29 20:32:16 +02:00			`if (!empty($_GET['binurl']) && Strings::isHex($_GET['binurl'])) {`
Fix formatting in mod/parse_url 2018-09-02 23:24:56 +02:00			`$url = trim(hex2bin($_GET['binurl']));`
Adding hexadecimal check for parse_rul see https://github.com/friendica/friendica/issues/6917#issuecomment-475461338 2019-05-29 20:32:16 +02:00			`} elseif (!empty($_GET['url'])) {`
Fix formatting in mod/parse_url 2018-09-02 23:24:56 +02:00			`$url = trim($_GET['url']);`
Adding hexadecimal check for parse_rul see https://github.com/friendica/friendica/issues/6917#issuecomment-475461338 2019-05-29 20:32:16 +02:00			`// fallback in case no url is valid`
			`} else {`
Change fallback logic 2019-05-30 13:45:39 +02:00			`Logger::info('No url given');`
			`exit();`
Improved handling with wrong formatted urls and fetchinfo 2016-11-12 21:23:00 +01:00			`}`

Fix formatting in mod/parse_url 2018-09-02 23:24:56 +02:00			`if (!empty($_GET['title'])) {`
			`$title = strip_tags(trim($_GET['title']));`
Preventing an endless loop while fetching site infos 2014-07-14 08:37:40 +02:00			`}`

Fix formatting in mod/parse_url 2018-09-02 23:24:56 +02:00			`if (!empty($_GET['description'])) {`
			`$text = strip_tags(trim($_GET['description']));`
Suppoorting Open Graph and Dublin Core when displaying single item Following link shorteners in parse_url 2013-03-02 14:46:06 +01:00			`}`

Fix formatting in mod/parse_url 2018-09-02 23:24:56 +02:00			`if (!empty($_GET['tags'])) {`
			`$arr_tags = ParseUrl::convertTagsToArray($_GET['tags']);`
parse_url: recognize image/video/audio files + move functions into own class 2016-11-24 01:11:22 +01:00			`if (count($arr_tags)) {`
Fix formatting in mod/parse_url 2018-09-02 23:24:56 +02:00			`$str_tags = $br . implode(' ', $arr_tags) . $br;`
The order for the page parsing is changed. 2015-04-05 20:40:31 +02:00			`}`
			`}`

attachment preview: some small corrections 2019-02-03 16:26:13 +01:00			`if (isset($_GET['format']) && $_GET['format'] == 'json') {`
attachment preview: frontend work (works with frio) 2019-02-01 19:18:08 +01:00			`$format = 'json';`
			`}`

parse_url: recognize image/video/audio files + move functions into own class 2016-11-24 01:11:22 +01:00			`// Add url scheme if it is missing`
			`$arrurl = parse_url($url);`
Replace x() by isset(), !empty() or defaults() - Remove extraneous parentheses around empty() calls - Remove duplicate calls to intval(), count() or strlen() after empty() - Replace ternary operators outputting binary value with empty() return value - Rewrite defaults() without x() 2018-11-30 15:06:22 +01:00			`if (empty($arrurl['scheme'])) {`
			`if (!empty($arrurl['host'])) {`
Fix formatting in mod/parse_url 2018-09-02 23:24:56 +02:00			`$url = 'http:' . $url;`
parse_url: recognize image/video/audio files + move functions into own class 2016-11-24 01:11:22 +01:00			`} else {`
Fix formatting in mod/parse_url 2018-09-02 23:24:56 +02:00			`$url = 'http://' . $url;`
Added some more timestamp measuring 2015-03-07 23:14:26 +01:00			`}`
community: Only show top postings, no comments parse_url: Follow location in "meta"-header 2013-02-24 12:54:53 +01:00			`}`

Log function implement log() function. 2018-10-29 22:20:46 +01:00			`Logger::log($url);`
parse_url: recognize image/video/audio files + move functions into own class 2016-11-24 01:11:22 +01:00
parse_url: add some docu 2016-11-25 16:59:31 +01:00			`// Check if the URL is an image, video or audio file. If so format`
			`// the URL with the corresponding BBCode media tag`
parse_url: recognize image/video/audio files + move functions into own class 2016-11-24 01:11:22 +01:00			`// Fetch the header of the URL`
Remove option 'novalidate' flag from HTTPRequest options 2020-10-18 22:23:07 +02:00			`$curlResponse = DI::httpRequest()->head($url);`
Fix formatting in mod/parse_url 2018-09-02 23:24:56 +02:00
Curl Response Refactoring - extended Curl to parse Curl Response - refactored Network::curl() - replaced every Network::curl() execution with the new Curl container 2018-10-10 21:08:43 +02:00			`if ($curlResponse->isSuccess()) {`
Use CurlResult->getHeaderArray instead of custom conversion 2020-10-23 08:03:28 +02:00			`$hdrs = $curlResponse->getHeaderArray();`

Fix notices in Parse URL feature 2018-10-17 00:27:13 +02:00			`$type = null;`
attachment preview: frontend work (works with frio) 2019-02-01 19:18:08 +01:00			`$content_type = '';`
			`$bbcode = '';`
Fix formatting in mod/parse_url 2018-09-02 23:24:56 +02:00			`if (array_key_exists('Content-Type', $hdrs)) {`
			`$type = $hdrs['Content-Type'];`
parse_url: recognize image/video/audio files + move functions into own class 2016-11-24 01:11:22 +01:00			`}`
			`if ($type) {`
Fix formatting in mod/parse_url 2018-09-02 23:24:56 +02:00			`if (stripos($type, 'image/') !== false) {`
attachment preview: frontend work (works with frio) 2019-02-01 19:18:08 +01:00			`$content_type = 'image';`
			`$bbcode = $br . '[img]' . $url . '[/img]' . $br;`
parse url: Emtpy values in meta headers are now ignored. Oembed data has priority. Videos are embedded with some further information. 2014-04-04 10:58:31 +02:00			`}`
Fix formatting in mod/parse_url 2018-09-02 23:24:56 +02:00			`if (stripos($type, 'video/') !== false) {`
attachment preview: frontend work (works with frio) 2019-02-01 19:18:08 +01:00			`$content_type = 'video';`
			`$bbcode = $br . '[video]' . $url . '[/video]' . $br;`
parse url: Emtpy values in meta headers are now ignored. Oembed data has priority. Videos are embedded with some further information. 2014-04-04 10:58:31 +02:00			`}`
Fix formatting in mod/parse_url 2018-09-02 23:24:56 +02:00			`if (stripos($type, 'audio/') !== false) {`
attachment preview: frontend work (works with frio) 2019-02-01 19:18:08 +01:00			`$content_type = 'audio';`
			`$bbcode = $br . '[audio]' . $url . '[/audio]' . $br;`
			`}`
			`}`
			`if (!empty($content_type)) {`
			`if ($format == 'json') {`
			`$ret['contentType'] = $content_type;`
			`$ret['data'] = ['url' => $url];`
			`$ret['success'] = true;`
			`System::jsonExit($ret);`
parse_url: Fetch multiple pictures so that the user can decide what to take. 2012-07-12 08:20:27 +02:00			`}`
attachment preview: frontend work (works with frio) 2019-02-01 19:18:08 +01:00
			`echo $bbcode;`
			`exit();`
Added some more timestamp measuring 2015-03-07 23:14:26 +01:00			`}`
parse_url: Fetch multiple pictures so that the user can decide what to take. 2012-07-12 08:20:27 +02:00			`}`
parse_url: complete new code for fetching website information 2012-07-12 01:17:33 +02:00
Add Link promt in comment + only add URL instead of attachement via parse_url 2018-10-17 21:05:45 +02:00
Fix formatting in mod/parse_url 2018-09-02 23:24:56 +02:00			`$template = '[bookmark=%s]%s[/bookmark]%s';`
ajaxify the wall poster 2010-07-23 07:41:45 +02:00
Fix formatting in mod/parse_url 2018-09-02 23:24:56 +02:00			`$arr = ['url' => $url, 'text' => ''];`
more plugin hooks 2010-12-26 00:01:02 +01:00
Replace deprecated Addon::callHooks with Hook::callAll - Update documentation 2018-12-26 07:06:24 +01:00			`Hook::callAll('parse_link', $arr);`
more plugin hooks 2010-12-26 00:01:02 +01:00
Fix formatting in mod/parse_url 2018-09-02 23:24:56 +02:00			`if (strlen($arr['text'])) {`
			`echo $arr['text'];`
			`exit();`
more plugin hooks 2010-12-26 00:01:02 +01:00			`}`

And some more notices removed ... (#5533) * Fix for vanishing notes * The field needs to be part of the selected fields ... * And some more notes ... 2018-07-31 07:54:25 +02:00			`// If there is already some content information submitted we don't`
parse_url: add some docu 2016-11-25 16:59:31 +01:00			`// need to parse the url for content.`
And some more notices removed ... (#5533) * Fix for vanishing notes * The field needs to be part of the selected fields ... * And some more notes ... 2018-07-31 07:54:25 +02:00			`if (!empty($url) && !empty($title) && !empty($text)) {`
Fix formatting in mod/parse_url 2018-09-02 23:24:56 +02:00			`$title = str_replace(["\r", "\n"], ['', ''], $title);`
allow setting all the bookmarked URL fields via script/API 2011-09-20 07:21:55 +02:00
Fix formatting in mod/parse_url 2018-09-02 23:24:56 +02:00			`$text = '[quote]' . trim($text) . '[/quote]' . $br;`
allow setting all the bookmarked URL fields via script/API 2011-09-20 07:21:55 +02:00
parse_url: recognize image/video/audio files + move functions into own class 2016-11-24 01:11:22 +01:00			`$result = sprintf($template, $url, ($title) ? $title : $url, $text) . $str_tags;`
allow setting all the bookmarked URL fields via script/API 2011-09-20 07:21:55 +02:00
Log function implement log() function. 2018-10-29 22:20:46 +01:00			`Logger::log('(unparsed): returns: ' . $result);`
allow setting all the bookmarked URL fields via script/API 2011-09-20 07:21:55 +02:00
			`echo $result;`
Fix formatting in mod/parse_url 2018-09-02 23:24:56 +02:00			`exit();`
allow setting all the bookmarked URL fields via script/API 2011-09-20 07:21:55 +02:00			`}`

parse_url: add some docu 2016-11-25 16:59:31 +01:00			`// Fetch the information directly from the webpage`
parse_url: recognize image/video/audio files + move functions into own class 2016-11-24 01:11:22 +01:00			`$siteinfo = ParseUrl::getSiteinfo($url);`
allow setting all the bookmarked URL fields via script/API 2011-09-20 07:21:55 +02:00
Fix formatting in mod/parse_url 2018-09-02 23:24:56 +02:00			`unset($siteinfo['keywords']);`
Merge remote-tracking branch 'upstream/master' Conflicts: mod/crepair.php 2014-01-05 16:22:42 +01:00
Add multimedia link for jot + enables hover text in jot + autosize in when paste link + renaming / Code Standards 2018-10-24 16:20:10 +02:00			`// Bypass attachment if parse url for a comment`
			`if (!empty($_GET['noAttachment'])) {`
			`echo $br . '[url=' . $url . ']' . $siteinfo['title'] . '[/url]';`
			`exit();`
			`}`

attachment preview: frontend work (works with frio) 2019-02-01 19:18:08 +01:00			`if ($format == 'json') {`
			`$ret['data'] = $siteinfo;`
			`$ret['contentType'] = 'attachment';`
			`$ret['success'] = true;`

			`System::jsonExit($ret);`
			`}`

parse_url: recognize image/video/audio files + move functions into own class 2016-11-24 01:11:22 +01:00			`// Format it as BBCode attachment`
Replace *_page_info function calls with Content\PageInfo equivalent 2020-07-14 16:15:04 +02:00			`$info = "\n" . PageInfo::getFooterFromData($siteinfo);`
Bugfix: ' in titles were a problem. 2016-04-18 20:57:01 +02:00
			`echo $info;`

Fix formatting in mod/parse_url 2018-09-02 23:24:56 +02:00			`exit();`
Add oembed user option for use oembed instead of standard yotube embedding. Remove global option and parse_url use of oembed. 2011-05-23 10:37:09 +02:00			`}`
legacy support for function parseurl_getsiteinfo_cached() 2016-11-28 15:26:51 +01:00
			`/**`
@brief is removed completely 2020-01-19 07:05:23 +01:00			`* Legacy function to call ParseUrl::getSiteinfoCached`
Normalize App parameter declaration (mod folder, 2 out of 3) 2017-01-09 13:14:25 +01:00			`*`
legacy support for function parseurl_getsiteinfo_cached() 2016-11-28 15:26:51 +01:00			`* Note: We have moved the function to ParseUrl.php. This function is only for`
			`* legacy support and will be remove in the future`
Normalize App parameter declaration (mod folder, 2 out of 3) 2017-01-09 13:14:25 +01:00			`*`
Update PHPDoc in mod/ 2019-01-07 07:07:42 +01:00			`* @param string $url The url of the page which should be scraped`
			`* @param bool $no_guessing If true the parse doens't search for`
			`* preview pictures`
			`* @param bool $do_oembed The false option is used by the function fetch_oembed()`
			`* to avoid endless loops`
Normalize App parameter declaration (mod folder, 2 out of 3) 2017-01-09 13:14:25 +01:00			`*`
legacy support for function parseurl_getsiteinfo_cached() 2016-11-28 15:26:51 +01:00			`* @return array which contains needed data for embedding`
Normalize App parameter declaration (mod folder, 2 out of 3) 2017-01-09 13:14:25 +01:00			`*`
Update PHPDoc in mod/ 2019-01-07 07:07:42 +01:00			`* @throws \Friendica\Network\HTTPException\InternalServerErrorException`
			`* @see ParseUrl::getSiteinfoCached()`
Normalize App parameter declaration (mod folder, 2 out of 3) 2017-01-09 13:14:25 +01:00			`*`
Update PHPDoc in mod/ 2019-01-07 07:07:42 +01:00			`* @deprecated since version 3.6 use ParseUrl::getSiteinfoCached instead`
legacy support for function parseurl_getsiteinfo_cached() 2016-11-28 15:26:51 +01:00			`*/`
Fix formatting in mod/parse_url 2018-09-02 23:24:56 +02:00			`function parseurl_getsiteinfo_cached($url, $no_guessing = false, $do_oembed = true)`
			`{`
legacy support for function parseurl_getsiteinfo_cached() 2016-11-28 15:26:51 +01:00			`$siteinfo = ParseUrl::getSiteinfoCached($url, $no_guessing, $do_oembed);`
			`return $siteinfo;`
			`}`