Friendica Communications Platform (please note that this is a clone of the repository at github, issues are handled there) https://friendi.ca
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

414 lines
12 KiB

  1. <?php
  2. /**
  3. * @file src/Content/OEmbed.php
  4. */
  5. namespace Friendica\Content;
  6. use DOMDocument;
  7. use DOMNode;
  8. use DOMText;
  9. use DOMXPath;
  10. use Exception;
  11. use Friendica\Core\Cache\Cache;
  12. use Friendica\Core\Config;
  13. use Friendica\Core\Hook;
  14. use Friendica\Core\L10n;
  15. use Friendica\Core\Renderer;
  16. use Friendica\Database\DBA;
  17. use Friendica\DI;
  18. use Friendica\Util\DateTimeFormat;
  19. use Friendica\Util\Network;
  20. use Friendica\Util\ParseUrl;
  21. use Friendica\Util\Proxy as ProxyUtils;
  22. use Friendica\Util\Strings;
  23. /**
  24. * Handles all OEmbed content fetching and replacement
  25. *
  26. * OEmbed is a standard used to allow an embedded representation of a URL on
  27. * third party sites
  28. *
  29. * @see https://oembed.com
  30. *
  31. * @author Hypolite Petovan <hypolite@mrpetovan.com>
  32. */
  33. class OEmbed
  34. {
  35. public static function replaceCallback($matches)
  36. {
  37. $embedurl = $matches[1];
  38. $j = self::fetchURL($embedurl, !self::isAllowedURL($embedurl));
  39. $s = self::formatObject($j);
  40. return $s;
  41. }
  42. /**
  43. * @brief Get data from an URL to embed its content.
  44. *
  45. * @param string $embedurl The URL from which the data should be fetched.
  46. * @param bool $no_rich_type If set to true rich type content won't be fetched.
  47. *
  48. * @return \Friendica\Object\OEmbed
  49. * @throws \Friendica\Network\HTTPException\InternalServerErrorException
  50. */
  51. public static function fetchURL($embedurl, $no_rich_type = false)
  52. {
  53. $embedurl = trim($embedurl, '\'"');
  54. $a = DI::app();
  55. $cache_key = 'oembed:' . $a->videowidth . ':' . $embedurl;
  56. $condition = ['url' => Strings::normaliseLink($embedurl), 'maxwidth' => $a->videowidth];
  57. $oembed_record = DBA::selectFirst('oembed', ['content'], $condition);
  58. if (DBA::isResult($oembed_record)) {
  59. $json_string = $oembed_record['content'];
  60. } else {
  61. $json_string = DI::cache()->get($cache_key);
  62. }
  63. // These media files should now be caught in bbcode.php
  64. // left here as a fallback in case this is called from another source
  65. $noexts = ['mp3', 'mp4', 'ogg', 'ogv', 'oga', 'ogm', 'webm'];
  66. $ext = pathinfo(strtolower($embedurl), PATHINFO_EXTENSION);
  67. $oembed = new \Friendica\Object\OEmbed($embedurl);
  68. if ($json_string) {
  69. $oembed->parseJSON($json_string);
  70. } else {
  71. $json_string = '';
  72. if (!in_array($ext, $noexts)) {
  73. // try oembed autodiscovery
  74. $html_text = Network::fetchUrl($embedurl, false, 15, 'text/*');
  75. if ($html_text) {
  76. $dom = @DOMDocument::loadHTML($html_text);
  77. if ($dom) {
  78. $xpath = new DOMXPath($dom);
  79. $entries = $xpath->query("//link[@type='application/json+oembed']");
  80. foreach ($entries as $e) {
  81. $href = $e->getAttributeNode('href')->nodeValue;
  82. $json_string = Network::fetchUrl($href . '&maxwidth=' . $a->videowidth);
  83. break;
  84. }
  85. $entries = $xpath->query("//link[@type='text/json+oembed']");
  86. foreach ($entries as $e) {
  87. $href = $e->getAttributeNode('href')->nodeValue;
  88. $json_string = Network::fetchUrl($href . '&maxwidth=' . $a->videowidth);
  89. break;
  90. }
  91. }
  92. }
  93. }
  94. $json_string = trim($json_string);
  95. if (!$json_string || $json_string[0] != '{') {
  96. $json_string = '{"type":"error"}';
  97. }
  98. $oembed->parseJSON($json_string);
  99. if (!empty($oembed->type) && $oembed->type != 'error') {
  100. DBA::insert('oembed', [
  101. 'url' => Strings::normaliseLink($embedurl),
  102. 'maxwidth' => $a->videowidth,
  103. 'content' => $json_string,
  104. 'created' => DateTimeFormat::utcNow()
  105. ], true);
  106. $cache_ttl = Cache::DAY;
  107. } else {
  108. $cache_ttl = Cache::FIVE_MINUTES;
  109. }
  110. DI::cache()->set($cache_key, $json_string, $cache_ttl);
  111. }
  112. if ($oembed->type == 'error') {
  113. return $oembed;
  114. }
  115. // Always embed the SSL version
  116. $oembed->html = str_replace(['http://www.youtube.com/', 'http://player.vimeo.com/'], ['https://www.youtube.com/', 'https://player.vimeo.com/'], $oembed->html);
  117. // If fetching information doesn't work, then improve via internal functions
  118. if ($no_rich_type && ($oembed->type == 'rich')) {
  119. $data = ParseUrl::getSiteinfoCached($embedurl, true, false);
  120. $oembed->type = $data['type'];
  121. if ($oembed->type == 'photo') {
  122. $oembed->url = $data['url'];
  123. }
  124. if (isset($data['title'])) {
  125. $oembed->title = $data['title'];
  126. }
  127. if (isset($data['text'])) {
  128. $oembed->description = $data['text'];
  129. }
  130. if (!empty($data['images'])) {
  131. $oembed->thumbnail_url = $data['images'][0]['src'];
  132. $oembed->thumbnail_width = $data['images'][0]['width'];
  133. $oembed->thumbnail_height = $data['images'][0]['height'];
  134. }
  135. }
  136. Hook::callAll('oembed_fetch_url', $embedurl, $oembed);
  137. return $oembed;
  138. }
  139. private static function formatObject(\Friendica\Object\OEmbed $oembed)
  140. {
  141. $ret = '<div class="oembed ' . $oembed->type . '">';
  142. switch ($oembed->type) {
  143. case "video":
  144. if ($oembed->thumbnail_url) {
  145. $tw = (isset($oembed->thumbnail_width) && intval($oembed->thumbnail_width)) ? $oembed->thumbnail_width : 200;
  146. $th = (isset($oembed->thumbnail_height) && intval($oembed->thumbnail_height)) ? $oembed->thumbnail_height : 180;
  147. // make sure we don't attempt divide by zero, fallback is a 1:1 ratio
  148. $tr = (($th) ? $tw / $th : 1);
  149. $th = 120;
  150. $tw = $th * $tr;
  151. $tpl = Renderer::getMarkupTemplate('oembed_video.tpl');
  152. $ret .= Renderer::replaceMacros($tpl, [
  153. '$embedurl' => $oembed->embed_url,
  154. '$escapedhtml' => base64_encode($oembed->html),
  155. '$tw' => $tw,
  156. '$th' => $th,
  157. '$turl' => $oembed->thumbnail_url,
  158. ]);
  159. } else {
  160. $ret = $oembed->html;
  161. }
  162. break;
  163. case "photo":
  164. $ret .= '<img width="' . $oembed->width . '" src="' . ProxyUtils::proxifyUrl($oembed->url) . '">';
  165. break;
  166. case "link":
  167. break;
  168. case "rich":
  169. $ret .= ProxyUtils::proxifyHtml($oembed->html);
  170. break;
  171. }
  172. // add link to source if not present in "rich" type
  173. if ($oembed->type != 'rich' || !strpos($oembed->html, $oembed->embed_url)) {
  174. $ret .= '<h4>';
  175. if (!empty($oembed->title)) {
  176. if (!empty($oembed->provider_name)) {
  177. $ret .= $oembed->provider_name . ": ";
  178. }
  179. $ret .= '<a href="' . $oembed->embed_url . '" rel="oembed">' . $oembed->title . '</a>';
  180. if (!empty($oembed->author_name)) {
  181. $ret .= ' (' . $oembed->author_name . ')';
  182. }
  183. } elseif (!empty($oembed->provider_name) || !empty($oembed->author_name)) {
  184. $embedlink = "";
  185. if (!empty($oembed->provider_name)) {
  186. $embedlink .= $oembed->provider_name;
  187. }
  188. if (!empty($oembed->author_name)) {
  189. if ($embedlink != "") {
  190. $embedlink .= ": ";
  191. }
  192. $embedlink .= $oembed->author_name;
  193. }
  194. if (trim($embedlink) == "") {
  195. $embedlink = $oembed->embed_url;
  196. }
  197. $ret .= '<a href="' . $oembed->embed_url . '" rel="oembed">' . $embedlink . '</a>';
  198. } else {
  199. $ret .= '<a href="' . $oembed->embed_url . '" rel="oembed">' . $oembed->embed_url . '</a>';
  200. }
  201. $ret .= "</h4>";
  202. } elseif (!strpos($oembed->html, $oembed->embed_url)) {
  203. // add <a> for html2bbcode conversion
  204. $ret .= '<a href="' . $oembed->embed_url . '" rel="oembed">' . $oembed->title . '</a>';
  205. }
  206. $ret .= '</div>';
  207. return str_replace("\n", "", $ret);
  208. }
  209. public static function BBCode2HTML($text)
  210. {
  211. $stopoembed = Config::get("system", "no_oembed");
  212. if ($stopoembed == true) {
  213. return preg_replace("/\[embed\](.+?)\[\/embed\]/is", "<!-- oembed $1 --><i>" . L10n::t('Embedding disabled') . " : $1</i><!-- /oembed $1 -->", $text);
  214. }
  215. return preg_replace_callback("/\[embed\](.+?)\[\/embed\]/is", ['self', 'replaceCallback'], $text);
  216. }
  217. /**
  218. * Find <span class='oembed'>..<a href='url' rel='oembed'>..</a></span>
  219. * and replace it with [embed]url[/embed]
  220. *
  221. * @param $text
  222. * @return string
  223. */
  224. public static function HTML2BBCode($text)
  225. {
  226. // start parser only if 'oembed' is in text
  227. if (strpos($text, "oembed")) {
  228. // convert non ascii chars to html entities
  229. $html_text = mb_convert_encoding($text, 'HTML-ENTITIES', mb_detect_encoding($text));
  230. // If it doesn't parse at all, just return the text.
  231. $dom = @DOMDocument::loadHTML($html_text);
  232. if (!$dom) {
  233. return $text;
  234. }
  235. $xpath = new DOMXPath($dom);
  236. $xattr = self::buildXPath("class", "oembed");
  237. $entries = $xpath->query("//div[$xattr]");
  238. $xattr = "@rel='oembed'"; //oe_build_xpath("rel","oembed");
  239. foreach ($entries as $e) {
  240. $href = $xpath->evaluate("a[$xattr]/@href", $e)->item(0)->nodeValue;
  241. if (!is_null($href)) {
  242. $e->parentNode->replaceChild(new DOMText("[embed]" . $href . "[/embed]"), $e);
  243. }
  244. }
  245. return self::getInnerHTML($dom->getElementsByTagName("body")->item(0));
  246. } else {
  247. return $text;
  248. }
  249. }
  250. /**
  251. * Determines if rich content OEmbed is allowed for the provided URL
  252. *
  253. * @brief Determines if rich content OEmbed is allowed for the provided URL
  254. * @param string $url
  255. * @return boolean
  256. * @throws \Friendica\Network\HTTPException\InternalServerErrorException
  257. */
  258. public static function isAllowedURL($url)
  259. {
  260. if (!Config::get('system', 'no_oembed_rich_content')) {
  261. return true;
  262. }
  263. $domain = parse_url($url, PHP_URL_HOST);
  264. if (empty($domain)) {
  265. return false;
  266. }
  267. $str_allowed = Config::get('system', 'allowed_oembed', '');
  268. if (empty($str_allowed)) {
  269. return false;
  270. }
  271. $allowed = explode(',', $str_allowed);
  272. return Network::isDomainAllowed($domain, $allowed);
  273. }
  274. public static function getHTML($url, $title = null)
  275. {
  276. // Always embed the SSL version
  277. $url = str_replace(["http://www.youtube.com/", "http://player.vimeo.com/"],
  278. ["https://www.youtube.com/", "https://player.vimeo.com/"], $url);
  279. $o = self::fetchURL($url, !self::isAllowedURL($url));
  280. if (!is_object($o) || property_exists($o, 'type') && $o->type == 'error') {
  281. throw new Exception('OEmbed failed for URL: ' . $url);
  282. }
  283. if (!empty($title)) {
  284. $o->title = $title;
  285. }
  286. $html = self::formatObject($o);
  287. return $html;
  288. }
  289. /**
  290. * @brief Generates the iframe HTML for an oembed attachment.
  291. *
  292. * Width and height are given by the remote, and are regularly too small for
  293. * the generated iframe.
  294. *
  295. * The width is entirely discarded for the actual width of the post, while fixed
  296. * height is used as a starting point before the inevitable resizing.
  297. *
  298. * Since the iframe is automatically resized on load, there are no need for ugly
  299. * and impractical scrollbars.
  300. *
  301. * @todo This function is currently unused until someone™ adds support for a separate OEmbed domain
  302. *
  303. * @param string $src Original remote URL to embed
  304. * @param string $width
  305. * @param string $height
  306. * @return string formatted HTML
  307. *
  308. * @throws \Friendica\Network\HTTPException\InternalServerErrorException
  309. * @see oembed_format_object()
  310. */
  311. private static function iframe($src, $width, $height)
  312. {
  313. if (!$height || strstr($height, '%')) {
  314. $height = '200';
  315. }
  316. $width = '100%';
  317. $src = DI::baseUrl() . '/oembed/' . Strings::base64UrlEncode($src);
  318. return '<iframe onload="resizeIframe(this);" class="embed_rich" height="' . $height . '" width="' . $width . '" src="' . $src . '" allowfullscreen scrolling="no" frameborder="no">' . L10n::t('Embedded content') . '</iframe>';
  319. }
  320. /**
  321. * Generates an XPath query to select elements whose provided attribute contains
  322. * the provided value in a space-separated list.
  323. *
  324. * @brief Generates attribute search XPath string
  325. *
  326. * @param string $attr Name of the attribute to seach
  327. * @param string $value Value to search in a space-separated list
  328. * @return string
  329. */
  330. private static function buildXPath($attr, $value)
  331. {
  332. // https://www.westhoffswelt.de/blog/2009/6/9/select-html-elements-with-more-than-one-css-class-using-xpath
  333. return "contains(normalize-space(@$attr), ' $value ') or substring(normalize-space(@$attr), 1, string-length('$value') + 1) = '$value ' or substring(normalize-space(@$attr), string-length(@$attr) - string-length('$value')) = ' $value' or @$attr = '$value'";
  334. }
  335. /**
  336. * Returns the inner XML string of a provided DOMNode
  337. *
  338. * @brief Returns the inner XML string of a provided DOMNode
  339. *
  340. * @param DOMNode $node
  341. * @return string
  342. */
  343. private static function getInnerHTML(DOMNode $node)
  344. {
  345. $innerHTML = '';
  346. $children = $node->childNodes;
  347. foreach ($children as $child) {
  348. $innerHTML .= $child->ownerDocument->saveXML($child);
  349. }
  350. return $innerHTML;
  351. }
  352. }