From 738d7a221e863d1fe769ee2fcd98cab944880127 Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Sun, 2 Feb 2014 09:54:33 +0100 Subject: [PATCH] API: Support for entitites. (embedded pictures in twidere) --- include/api.php | 156 +++++++++++++++++++++++++++++++++++++++-- include/bbcode.php | 21 +++--- include/html2plain.php | 4 +- 3 files changed, 164 insertions(+), 17 deletions(-) diff --git a/include/api.php b/include/api.php index ef3abb0553..f1221ef0b6 100644 --- a/include/api.php +++ b/include/api.php @@ -807,6 +807,10 @@ 'statusnet_conversation_id' => $lastwall['parent'], ); + $entities = api_get_entitities($status_info['text'], $lastwall['body']); + if (count($entities) > 0) + $status_info['entities'] = $entities; + if (($lastwall['item_network'] != "") AND ($status["source"] == 'web')) $status_info["source"] = network_to_name($lastwall['item_network']); elseif (($lastwall['item_network'] != "") AND (network_to_name($lastwall['item_network']) != $status_info["source"])) @@ -896,6 +900,10 @@ 'statusnet_conversation_id' => $lastwall['parent'], ); + $entities = api_get_entitities($user_info['text'], $lastwall['body']); + if (count($entities) > 0) + $user_info['entities'] = $entities; + if (($lastwall['item_network'] != "") AND ($user_info["status"]["source"] == 'web')) $user_info["status"]["source"] = network_to_name($lastwall['item_network']); if (($lastwall['item_network'] != "") AND (network_to_name($lastwall['item_network']) != $user_info["status"]["source"])) @@ -965,7 +973,7 @@ AND `contact`.`blocked` = 0 AND `contact`.`pending` = 0 $sql_extra AND `item`.`id`>%d - ORDER BY `item`.`received` DESC LIMIT %d ,%d ", + ORDER BY `item`.`id` DESC LIMIT %d ,%d ", //intval($user_info['uid']), intval(api_user()), intval($since_id), @@ -1046,7 +1054,7 @@ AND `contact`.`blocked` = 0 AND `contact`.`pending` = 0 $sql_extra AND `item`.`id`>%d - ORDER BY `received` DESC LIMIT %d, %d ", + ORDER BY `item`.`id` DESC LIMIT %d, %d ", intval($since_id), intval($start), intval($count)); @@ -1178,7 +1186,7 @@ AND `item`.`uid` = %d AND `contact`.`id` = `item`.`contact-id` AND `contact`.`blocked` = 0 AND `contact`.`pending` = 0 AND `item`.`id`>%d $sql_extra - ORDER BY `item`.`received` DESC LIMIT %d ,%d", + ORDER BY `item`.`id` DESC LIMIT %d ,%d", intval($id), intval(api_user()), intval($since_id), intval($start), intval($count) @@ -1348,7 +1356,7 @@ AND `contact`.`blocked` = 0 AND `contact`.`pending` = 0 $sql_extra AND `item`.`id`>%d - ORDER BY `item`.`received` DESC LIMIT %d ,%d ", + ORDER BY `item`.`id` DESC LIMIT %d ,%d ", //intval($user_info['uid']), intval(api_user()), intval($since_id), @@ -1421,7 +1429,7 @@ AND `contact`.`blocked` = 0 AND `contact`.`pending` = 0 $sql_extra AND `item`.`id`>%d - ORDER BY `item`.`received` DESC LIMIT %d ,%d ", + ORDER BY `item`.`id` DESC LIMIT %d ,%d ", intval(api_user()), intval($user_info['cid']), intval($since_id), @@ -1485,7 +1493,7 @@ AND `contact`.`blocked` = 0 AND `contact`.`pending` = 0 $sql_extra AND `item`.`id`>%d - ORDER BY `item`.`received` DESC LIMIT %d ,%d ", + ORDER BY `item`.`id` DESC LIMIT %d ,%d ", //intval($user_info['uid']), intval(api_user()), intval($since_id), @@ -1615,6 +1623,120 @@ return $ret; } + function api_get_entitities($text, $bbcode) { + /* + To-Do: + * remove links to pictures if they are links of a picture + * Some video stuff isn't recognized + * Links at the first character of the post + * different sizes of pictures + * caching picture data (using the id for that?) (See privacy_image_cache) + */ + + $include_entities = (x($_REQUEST,'include_entities')?$_REQUEST['include_entities']:true); + +// To-Do +// if (!$include_entities OR ($include_entities == "false")) +// return false; + + $entities = array(); + $entities["hashtags"] = array(); + $entities["symbols"] = array(); + $entities["urls"] = array(); + $entities["user_mentions"] = array(); + + $bbcode = preg_replace("/\[bookmark\=([^\]]*)\](.*?)\[\/bookmark\]/ism",'[url=$1]$2[/url]',$bbcode); + //$bbcode = preg_replace("/\[url\](.*?)\[\/url\]/ism",'[url=$1]$1[/url]',$bbcode); + $bbcode = preg_replace("/\[video\](.*?)\[\/video\]/ism",'[url=$1]$1[/url]',$bbcode); + $bbcode = preg_replace("/\[youtube\](.*?)\[\/youtube\]/ism",'[url=$1]$1[/url]',$bbcode); + $bbcode = preg_replace("/\[vimeo\](.*?)\[\/vimeo\]/ism",'[url=$1]$1[/url]',$bbcode); + $bbcode = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '[img]$3[/img]', $bbcode); + + $URLSearchString = "^\[\]"; + //preg_match_all("/\[url\]([$URLSearchString]*)\[\/url\]/ism", $bbcode, $urls1); + preg_match_all("/\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism", $bbcode, $urls); + + $ordered_urls = array(); + foreach ($urls[1] AS $id=>$url) { + //$start = strpos($text, $url, $offset); + $start = iconv_strpos($text, $url, 0, "UTF-8"); + if (!($start === false)) + $ordered_urls[$start] = array("url" => $url, "title" => $urls[2][$id]); + } + + ksort($ordered_urls); + + $offset = 0; + //foreach ($urls[1] AS $id=>$url) { + foreach ($ordered_urls AS $url) { + if ((substr($url["title"], 0, 7) != "http://") AND (substr($url["title"], 0, 8) != "https://") AND + !strpos($url["title"], "http://") AND !strpos($url["title"], "https://")) + $display_url = $url["title"]; + else { + $display_url = str_replace(array("http://www.", "https://www."), array("", ""), $url["url"]); + $display_url = str_replace(array("http://", "https://"), array("", ""), $display_url); + + if (strlen($display_url) > 26) + $display_url = substr($display_url, 0, 25)."…"; + } + + //$start = strpos($text, $url, $offset); + $start = iconv_strpos($text, $url["url"], $offset, "UTF-8"); + if (!($start === false)) { + $entities["urls"][] = array("url" => $url["url"], + "expanded_url" => $url["url"], + "display_url" => $display_url, + "indices" => array($start, $start+strlen($url["url"]))); + $offset = $start + 1; + } + } + + preg_match_all("/\[img](.*?)\[\/img\]/ism", $bbcode, $images); + $ordered_images = array(); + foreach ($images[1] AS $image) { + //$start = strpos($text, $url, $offset); + $start = iconv_strpos($text, $image, 0, "UTF-8"); + if (!($start === false)) + $ordered_images[$start] = $image; + } + //$entities["media"] = array(); + $offset = 0; + + foreach ($ordered_images AS $url) { + $display_url = str_replace(array("http://www.", "https://www."), array("", ""), $url); + $display_url = str_replace(array("http://", "https://"), array("", ""), $display_url); + + if (strlen($display_url) > 26) + $display_url = substr($display_url, 0, 25)."…"; + + $start = iconv_strpos($text, $url, $offset, "UTF-8"); + if (!($start === false)) { + $redirects = 0; + $img_str = fetch_url($url,true, $redirects, 10); + $image = @imagecreatefromstring($img_str); + if ($image) { + $entities["media"][] = array( + "id" => $start, + "id_str" => (string)$start, + "indices" => array($start, $start+strlen($url)), + "media_url" => $url, + "media_url_https" => $url, + "url" => $url, + "display_url" => $display_url, + "expanded_url" => $url, + "type" => "photo", + "sizes" => array("medium" => array( + "w" => imagesx($image), + "h" => imagesy($image), + "resize" => "fit"))); + } + $offset = $start + 1; + } + } + + return($entities); + } + function api_format_items($r,$user_info, $filter_user = false) { $a = get_app(); @@ -1669,6 +1791,7 @@ } // Workaround for ostatus messages where the title is identically to the body + //$statusbody = trim(html2plain(bbcode(api_clean_plain_items($item['body']), false, false, 5, true), 0)); $statusbody = trim(html2plain(bbcode(api_clean_plain_items($item['body']), false, false, 2, true), 0)); $statustitle = trim($item['title']); @@ -1697,10 +1820,15 @@ 'favorited' => $item['starred'] ? true : false, //'attachments' => array(), 'user' => $status_user , + //'entities' => NULL, 'statusnet_html' => trim(bbcode($item['body'], false, false)), 'statusnet_conversation_id' => $item['parent'], ); + $entities = api_get_entitities($status['text'], $item['body']); + if (count($entities) > 0) + $status['entities'] = $entities; + if (($item['item_network'] != "") AND ($status["source"] == 'web')) $status["source"] = network_to_name($item['item_network']); else if (($item['item_network'] != "") AND (network_to_name($item['item_network']) != $status["source"])) @@ -1779,6 +1907,20 @@ } api_register_func('api/help/test','api_help_test',false); + function api_lists(&$a,$type) { + + $ret = array(); + return array($ret); + } + api_register_func('api/lists','api_lists',true); + + function api_lists_list(&$a,$type) { + + $ret = array(); + return array($ret); + } + api_register_func('api/lists/list','api_lists_list',true); + /** * https://dev.twitter.com/docs/api/1/get/statuses/friends * This function is deprecated by Twitter @@ -2052,7 +2194,7 @@ if ($max_id > 0) $sql_extra .= ' AND `mail`.`id` <= '.intval($max_id); - $r = q("SELECT `mail`.*, `contact`.`nurl` AS `contact-url` FROM `mail`,`contact` WHERE `mail`.`contact-id` = `contact`.`id` AND `mail`.`uid`=%d AND $sql_extra AND `mail`.`id` > %d ORDER BY `mail`.`created` DESC LIMIT %d,%d", + $r = q("SELECT `mail`.*, `contact`.`nurl` AS `contact-url` FROM `mail`,`contact` WHERE `mail`.`contact-id` = `contact`.`id` AND `mail`.`uid`=%d AND $sql_extra AND `mail`.`id` > %d ORDER BY `mail`.`id` DESC LIMIT %d,%d", intval(api_user()), intval($since_id), intval($start), intval($count) diff --git a/include/bbcode.php b/include/bbcode.php index bede60e23a..ee066f05a8 100644 --- a/include/bbcode.php +++ b/include/bbcode.php @@ -554,7 +554,7 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true, $simplehtml = fal $Text = preg_replace("/\[(\w*)\](\s*)/ism", '$2[$1]', $Text); $Text = preg_replace("/(\s*)\[\/(\w*)\]/ism", '[/$2]$1', $Text); - // Extract the private images which use data url's since preg has issues with + // Extract the private images which use data urls since preg has issues with // large data sizes. Stash them away while we do bbcode conversion, and then put them back // in after we've done all the regex matching. We cannot use any preg functions to do this. @@ -618,18 +618,23 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true, $simplehtml = fal // Set up the parameters for a MAIL search string $MAILSearchString = $URLSearchString; + if ($simplehtml == 5) + $Text = preg_replace("/[^#@]\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism", '[url]$1[/url]', $Text); // Perform URL Search + if ($tryoembed) + $Text = preg_replace_callback("/\[bookmark\=([^\]]*)\].*?\[\/bookmark\]/ism",'tryoembed',$Text); + + if ($simplehtml == 5) + $Text = preg_replace("/\[bookmark\=([^\]]*)\](.*?)\[\/bookmark\]/ism",'[url]$1[/url]',$Text); + else + $Text = preg_replace("/\[bookmark\=([^\]]*)\](.*?)\[\/bookmark\]/ism",'[url=$1]$2[/url]',$Text); + // if the HTML is used to generate plain text, then don't do this search, but replace all URL of that kind to text if (!$forplaintext) $Text = preg_replace("/([^\]\='".'"'."]|^)(https?\:\/\/[a-zA-Z0-9\:\/\-\?\&\;\.\=\_\~\#\%\$\!\+\,]+)/ism", '$1$2', $Text); else - $Text = preg_replace("(\[url\](.*?)\[\/url\])ism","$1",$Text); - - if ($tryoembed) - $Text = preg_replace_callback("/\[bookmark\=([^\]]*)\].*?\[\/bookmark\]/ism",'tryoembed',$Text); - - $Text = preg_replace("/\[bookmark\=([^\]]*)\](.*?)\[\/bookmark\]/ism",'[url=$1]$2[/url]',$Text); + $Text = preg_replace("(\[url\](.*?)\[\/url\])ism"," $1 ",$Text); if ($tryoembed) $Text = preg_replace_callback("/\[url\]([$URLSearchString]*)\[\/url\]/ism",'tryoembed',$Text); @@ -786,7 +791,7 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true, $simplehtml = fal $Text = preg_replace_callback("/\[share(.*?)\](.*?)\[\/share\]/ism","bb_ShareAttributes",$Text); elseif ($simplehtml == 1) $Text = preg_replace_callback("/\[share(.*?)\](.*?)\[\/share\]/ism","bb_ShareAttributesSimple",$Text); - elseif ($simplehtml == 2) + elseif (($simplehtml == 2) OR ($simplehtml == 5)) $Text = preg_replace_callback("/\[share(.*?)\](.*?)\[\/share\]/ism","bb_ShareAttributesSimple2",$Text); elseif ($simplehtml == 3) $Text = preg_replace_callback("/(.*?)\[share(.*?)\](.*?)\[\/share\]/ism","bb_ShareAttributesDiaspora",$Text); diff --git a/include/html2plain.php b/include/html2plain.php index bcdf89c2cc..445a4ab347 100644 --- a/include/html2plain.php +++ b/include/html2plain.php @@ -191,9 +191,9 @@ function html2plain($html, $wraplength = 75, $compact = false) //node2bbcode($doc, 'img', array('title'=>'/(.+)/'), '$1', ''); //node2bbcode($doc, 'img', array(), '', ''); if (!$compact) - node2bbcode($doc, 'img', array('src'=>'/(.+)/'), '[img]$1', '[/img]'); + node2bbcode($doc, 'img', array('src'=>'/(.+)/'), ' [img]$1', '[/img] '); else - node2bbcode($doc, 'img', array('src'=>'/(.+)/'), '', ''); + node2bbcode($doc, 'img', array('src'=>'/(.+)/'), ' ', ' '); node2bbcode($doc, 'iframe', array('src'=>'/(.+)/'), ' $1 ', '', true);