Friendica Communications Platform (please note that this is a clone of the repository at github, issues are handled there) https://friendi.ca
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

413 lines
12KB

  1. <?php
  2. /**
  3. * @file src/Content/OEmbed.php
  4. */
  5. namespace Friendica\Content;
  6. use DOMDocument;
  7. use DOMNode;
  8. use DOMText;
  9. use DOMXPath;
  10. use Exception;
  11. use Friendica\Core\Addon;
  12. use Friendica\Core\Cache;
  13. use Friendica\Core\Config;
  14. use Friendica\Core\L10n;
  15. use Friendica\Core\Renderer;
  16. use Friendica\Core\System;
  17. use Friendica\Database\DBA;
  18. use Friendica\Util\DateTimeFormat;
  19. use Friendica\Util\Network;
  20. use Friendica\Util\ParseUrl;
  21. use Friendica\Util\Proxy as ProxyUtils;
  22. use Friendica\Util\Strings;
  23. /**
  24. * Handles all OEmbed content fetching and replacement
  25. *
  26. * OEmbed is a standard used to allow an embedded representation of a URL on
  27. * third party sites
  28. *
  29. * @see https://oembed.com
  30. *
  31. * @author Hypolite Petovan <hypolite@mrpetovan.com>
  32. */
  33. class OEmbed
  34. {
  35. public static function replaceCallback($matches)
  36. {
  37. $embedurl = $matches[1];
  38. $j = self::fetchURL($embedurl, !self::isAllowedURL($embedurl));
  39. $s = self::formatObject($j);
  40. return $s;
  41. }
  42. /**
  43. * @brief Get data from an URL to embed its content.
  44. *
  45. * @param string $embedurl The URL from which the data should be fetched.
  46. * @param bool $no_rich_type If set to true rich type content won't be fetched.
  47. *
  48. * @return \Friendica\Object\OEmbed
  49. */
  50. public static function fetchURL($embedurl, $no_rich_type = false)
  51. {
  52. $embedurl = trim($embedurl, '\'"');
  53. $a = get_app();
  54. $cache_key = 'oembed:' . $a->videowidth . ':' . $embedurl;
  55. $condition = ['url' => Strings::normaliseLink($embedurl), 'maxwidth' => $a->videowidth];
  56. $oembed_record = DBA::selectFirst('oembed', ['content'], $condition);
  57. if (DBA::isResult($oembed_record)) {
  58. $json_string = $oembed_record['content'];
  59. } else {
  60. $json_string = Cache::get($cache_key);
  61. }
  62. // These media files should now be caught in bbcode.php
  63. // left here as a fallback in case this is called from another source
  64. $noexts = ['mp3', 'mp4', 'ogg', 'ogv', 'oga', 'ogm', 'webm'];
  65. $ext = pathinfo(strtolower($embedurl), PATHINFO_EXTENSION);
  66. $oembed = new \Friendica\Object\OEmbed($embedurl);
  67. if ($json_string) {
  68. $oembed->parseJSON($json_string);
  69. } else {
  70. $json_string = '';
  71. if (!in_array($ext, $noexts)) {
  72. // try oembed autodiscovery
  73. $redirects = 0;
  74. $html_text = Network::fetchUrl($embedurl, false, $redirects, 15, 'text/*');
  75. if ($html_text) {
  76. $dom = @DOMDocument::loadHTML($html_text);
  77. if ($dom) {
  78. $xpath = new DOMXPath($dom);
  79. $entries = $xpath->query("//link[@type='application/json+oembed']");
  80. foreach ($entries as $e) {
  81. $href = $e->getAttributeNode('href')->nodeValue;
  82. $json_string = Network::fetchUrl($href . '&maxwidth=' . $a->videowidth);
  83. break;
  84. }
  85. $entries = $xpath->query("//link[@type='text/json+oembed']");
  86. foreach ($entries as $e) {
  87. $href = $e->getAttributeNode('href')->nodeValue;
  88. $json_string = Network::fetchUrl($href . '&maxwidth=' . $a->videowidth);
  89. break;
  90. }
  91. }
  92. }
  93. }
  94. $json_string = trim($json_string);
  95. if (!$json_string || $json_string[0] != '{') {
  96. $json_string = '{"type":"error"}';
  97. }
  98. $oembed->parseJSON($json_string);
  99. if (!empty($oembed->type) && $oembed->type != 'error') {
  100. DBA::insert('oembed', [
  101. 'url' => Strings::normaliseLink($embedurl),
  102. 'maxwidth' => $a->videowidth,
  103. 'content' => $json_string,
  104. 'created' => DateTimeFormat::utcNow()
  105. ], true);
  106. $cache_ttl = Cache::DAY;
  107. } else {
  108. $cache_ttl = Cache::FIVE_MINUTES;
  109. }
  110. Cache::set($cache_key, $json_string, $cache_ttl);
  111. }
  112. if ($oembed->type == 'error') {
  113. return $oembed;
  114. }
  115. // Always embed the SSL version
  116. $oembed->html = str_replace(['http://www.youtube.com/', 'http://player.vimeo.com/'], ['https://www.youtube.com/', 'https://player.vimeo.com/'], $oembed->html);
  117. // If fetching information doesn't work, then improve via internal functions
  118. if ($no_rich_type && ($oembed->type == 'rich')) {
  119. $data = ParseUrl::getSiteinfoCached($embedurl, true, false);
  120. $oembed->type = $data['type'];
  121. if ($oembed->type == 'photo') {
  122. $oembed->url = $data['url'];
  123. }
  124. if (isset($data['title'])) {
  125. $oembed->title = $data['title'];
  126. }
  127. if (isset($data['text'])) {
  128. $oembed->description = $data['text'];
  129. }
  130. if (!empty($data['images'])) {
  131. $oembed->thumbnail_url = $data['images'][0]['src'];
  132. $oembed->thumbnail_width = $data['images'][0]['width'];
  133. $oembed->thumbnail_height = $data['images'][0]['height'];
  134. }
  135. }
  136. Addon::callHooks('oembed_fetch_url', $embedurl, $oembed);
  137. return $oembed;
  138. }
  139. private static function formatObject(\Friendica\Object\OEmbed $oembed)
  140. {
  141. $ret = '<div class="oembed ' . $oembed->type . '">';
  142. switch ($oembed->type) {
  143. case "video":
  144. if ($oembed->thumbnail_url) {
  145. $tw = (isset($oembed->thumbnail_width) && intval($oembed->thumbnail_width)) ? $oembed->thumbnail_width : 200;
  146. $th = (isset($oembed->thumbnail_height) && intval($oembed->thumbnail_height)) ? $oembed->thumbnail_height : 180;
  147. // make sure we don't attempt divide by zero, fallback is a 1:1 ratio
  148. $tr = (($th) ? $tw / $th : 1);
  149. $th = 120;
  150. $tw = $th * $tr;
  151. $tpl = Renderer::getMarkupTemplate('oembed_video.tpl');
  152. $ret .= Renderer::replaceMacros($tpl, [
  153. '$baseurl' => System::baseUrl(),
  154. '$embedurl' => $oembed->embed_url,
  155. '$escapedhtml' => base64_encode($oembed->html),
  156. '$tw' => $tw,
  157. '$th' => $th,
  158. '$turl' => $oembed->thumbnail_url,
  159. ]);
  160. } else {
  161. $ret = $oembed->html;
  162. }
  163. break;
  164. case "photo":
  165. $ret .= '<img width="' . $oembed->width . '" src="' . ProxyUtils::proxifyUrl($oembed->url) . '">';
  166. break;
  167. case "link":
  168. break;
  169. case "rich":
  170. $ret .= ProxyUtils::proxifyHtml($oembed->html);
  171. break;
  172. }
  173. // add link to source if not present in "rich" type
  174. if ($oembed->type != 'rich' || !strpos($oembed->html, $oembed->embed_url)) {
  175. $ret .= '<h4>';
  176. if (!empty($oembed->title)) {
  177. if (!empty($oembed->provider_name)) {
  178. $ret .= $oembed->provider_name . ": ";
  179. }
  180. $ret .= '<a href="' . $oembed->embed_url . '" rel="oembed">' . $oembed->title . '</a>';
  181. if (!empty($oembed->author_name)) {
  182. $ret .= ' (' . $oembed->author_name . ')';
  183. }
  184. } elseif (!empty($oembed->provider_name) || !empty($oembed->author_name)) {
  185. $embedlink = "";
  186. if (!empty($oembed->provider_name)) {
  187. $embedlink .= $oembed->provider_name;
  188. }
  189. if (!empty($oembed->author_name)) {
  190. if ($embedlink != "") {
  191. $embedlink .= ": ";
  192. }
  193. $embedlink .= $oembed->author_name;
  194. }
  195. if (trim($embedlink) == "") {
  196. $embedlink = $oembed->embed_url;
  197. }
  198. $ret .= '<a href="' . $oembed->embed_url . '" rel="oembed">' . $embedlink . '</a>';
  199. } else {
  200. $ret .= '<a href="' . $oembed->embed_url . '" rel="oembed">' . $oembed->embed_url . '</a>';
  201. }
  202. $ret .= "</h4>";
  203. } elseif (!strpos($oembed->html, $oembed->embed_url)) {
  204. // add <a> for html2bbcode conversion
  205. $ret .= '<a href="' . $oembed->embed_url . '" rel="oembed">' . $oembed->title . '</a>';
  206. }
  207. $ret .= '</div>';
  208. return str_replace("\n", "", $ret);
  209. }
  210. public static function BBCode2HTML($text)
  211. {
  212. $stopoembed = Config::get("system", "no_oembed");
  213. if ($stopoembed == true) {
  214. return preg_replace("/\[embed\](.+?)\[\/embed\]/is", "<!-- oembed $1 --><i>" . L10n::t('Embedding disabled') . " : $1</i><!-- /oembed $1 -->", $text);
  215. }
  216. return preg_replace_callback("/\[embed\](.+?)\[\/embed\]/is", ['self', 'replaceCallback'], $text);
  217. }
  218. /**
  219. * Find <span class='oembed'>..<a href='url' rel='oembed'>..</a></span>
  220. * and replace it with [embed]url[/embed]
  221. */
  222. public static function HTML2BBCode($text)
  223. {
  224. // start parser only if 'oembed' is in text
  225. if (strpos($text, "oembed")) {
  226. // convert non ascii chars to html entities
  227. $html_text = mb_convert_encoding($text, 'HTML-ENTITIES', mb_detect_encoding($text));
  228. // If it doesn't parse at all, just return the text.
  229. $dom = @DOMDocument::loadHTML($html_text);
  230. if (!$dom) {
  231. return $text;
  232. }
  233. $xpath = new DOMXPath($dom);
  234. $xattr = self::buildXPath("class", "oembed");
  235. $entries = $xpath->query("//div[$xattr]");
  236. $xattr = "@rel='oembed'"; //oe_build_xpath("rel","oembed");
  237. foreach ($entries as $e) {
  238. $href = $xpath->evaluate("a[$xattr]/@href", $e)->item(0)->nodeValue;
  239. if (!is_null($href)) {
  240. $e->parentNode->replaceChild(new DOMText("[embed]" . $href . "[/embed]"), $e);
  241. }
  242. }
  243. return self::getInnerHTML($dom->getElementsByTagName("body")->item(0));
  244. } else {
  245. return $text;
  246. }
  247. }
  248. /**
  249. * Determines if rich content OEmbed is allowed for the provided URL
  250. *
  251. * @brief Determines if rich content OEmbed is allowed for the provided URL
  252. * @param string $url
  253. * @return boolean
  254. */
  255. public static function isAllowedURL($url)
  256. {
  257. if (!Config::get('system', 'no_oembed_rich_content')) {
  258. return true;
  259. }
  260. $domain = parse_url($url, PHP_URL_HOST);
  261. if (empty($domain)) {
  262. return false;
  263. }
  264. $str_allowed = Config::get('system', 'allowed_oembed', '');
  265. if (empty($str_allowed)) {
  266. return false;
  267. }
  268. $allowed = explode(',', $str_allowed);
  269. return Network::isDomainAllowed($domain, $allowed);
  270. }
  271. public static function getHTML($url, $title = null)
  272. {
  273. // Always embed the SSL version
  274. $url = str_replace(["http://www.youtube.com/", "http://player.vimeo.com/"],
  275. ["https://www.youtube.com/", "https://player.vimeo.com/"], $url);
  276. $o = self::fetchURL($url, !self::isAllowedURL($url));
  277. if (!is_object($o) || property_exists($o, 'type') && $o->type == 'error') {
  278. throw new Exception('OEmbed failed for URL: ' . $url);
  279. }
  280. if (!empty($title)) {
  281. $o->title = $title;
  282. }
  283. $html = self::formatObject($o);
  284. return $html;
  285. }
  286. /**
  287. * @brief Generates the iframe HTML for an oembed attachment.
  288. *
  289. * Width and height are given by the remote, and are regularly too small for
  290. * the generated iframe.
  291. *
  292. * The width is entirely discarded for the actual width of the post, while fixed
  293. * height is used as a starting point before the inevitable resizing.
  294. *
  295. * Since the iframe is automatically resized on load, there are no need for ugly
  296. * and impractical scrollbars.
  297. *
  298. * @todo This function is currently unused until someone™ adds support for a separate OEmbed domain
  299. *
  300. * @param string $src Original remote URL to embed
  301. * @param string $width
  302. * @param string $height
  303. * @return string formatted HTML
  304. *
  305. * @see oembed_format_object()
  306. */
  307. private static function iframe($src, $width, $height)
  308. {
  309. $a = get_app();
  310. if (!$height || strstr($height, '%')) {
  311. $height = '200';
  312. }
  313. $width = '100%';
  314. $src = System::baseUrl() . '/oembed/' . Strings::base64UrlEncode($src);
  315. return '<iframe onload="resizeIframe(this);" class="embed_rich" height="' . $height . '" width="' . $width . '" src="' . $src . '" allowfullscreen scrolling="no" frameborder="no">' . L10n::t('Embedded content') . '</iframe>';
  316. }
  317. /**
  318. * Generates an XPath query to select elements whose provided attribute contains
  319. * the provided value in a space-separated list.
  320. *
  321. * @brief Generates attribute search XPath string
  322. *
  323. * @param string $attr Name of the attribute to seach
  324. * @param string $value Value to search in a space-separated list
  325. * @return string
  326. */
  327. private static function buildXPath($attr, $value)
  328. {
  329. // https://www.westhoffswelt.de/blog/2009/6/9/select-html-elements-with-more-than-one-css-class-using-xpath
  330. return "contains(normalize-space(@$attr), ' $value ') or substring(normalize-space(@$attr), 1, string-length('$value') + 1) = '$value ' or substring(normalize-space(@$attr), string-length(@$attr) - string-length('$value')) = ' $value' or @$attr = '$value'";
  331. }
  332. /**
  333. * Returns the inner XML string of a provided DOMNode
  334. *
  335. * @brief Returns the inner XML string of a provided DOMNode
  336. *
  337. * @param DOMNode $node
  338. * @return string
  339. */
  340. private static function getInnerHTML(DOMNode $node)
  341. {
  342. $innerHTML = '';
  343. $children = $node->childNodes;
  344. foreach ($children as $child) {
  345. $innerHTML .= $child->ownerDocument->saveXML($child);
  346. }
  347. return $innerHTML;
  348. }
  349. }