friendica-addons/discourse/discourse.php

344 lines
11 KiB
PHP
Raw Normal View History

2019-11-24 00:43:59 +01:00
<?php
/**
* Name: Discourse Mail Connector
* Description: Improves mails from Discourse in mailing list mode
* Version: 0.1
* Author: Michael Vogel <http://pirati.ca/profile/heluecht>
*
*/
2019-11-24 00:43:59 +01:00
use Friendica\App;
use Friendica\Content\Text\Markdown;
2019-11-24 00:43:59 +01:00
use Friendica\Core\Hook;
use Friendica\Core\Logger;
use Friendica\Core\Protocol;
use Friendica\Core\Renderer;
use Friendica\Database\DBA;
use Friendica\DI;
use Friendica\Model\Contact;
use Friendica\Util\DateTimeFormat;
use Friendica\Util\Strings;
2019-11-24 00:43:59 +01:00
2019-11-26 08:21:49 +01:00
/* Todo:
* - Obtaining API tokens to be able to read non public posts as well
* - Handling duplicates (possibly using some non visible marker)
* - Fetching missing posts
* - Fetch topic information
* - Support mail free mode when write tokens are available
* - Fix incomplete (relative) links (hosts are missing)
*/
2019-11-24 00:43:59 +01:00
function discourse_install()
{
2019-11-25 23:58:01 +01:00
Hook::register('email_getmessage', __FILE__, 'discourse_email_getmessage');
Hook::register('connector_settings', __FILE__, 'discourse_settings');
Hook::register('connector_settings_post', __FILE__, 'discourse_settings_post');
2019-11-24 00:43:59 +01:00
}
function discourse_settings(array &$data)
2019-11-24 00:43:59 +01:00
{
2022-10-20 23:51:49 +02:00
if (!DI::userSession()->getLocalUserId()) {
2019-11-25 23:58:01 +01:00
return;
}
2022-10-20 23:51:49 +02:00
$enabled = intval(DI::pConfig()->get(DI::userSession()->getLocalUserId(), 'discourse', 'enabled'));
2019-11-25 23:58:01 +01:00
$t = Renderer::getMarkupTemplate('connector_settings.tpl', 'addon/discourse/');
$html = Renderer::replaceMacros($t, [
'$enabled' => ['enabled', DI::l10n()->t('Enable processing of Discourse mailing list mails'), $enabled, DI::l10n()->t('If enabled, incoming mails from Discourse will be improved so they look much better. To make it work, you have to configure the e-mail settings in Friendica. You also have to enable the mailing list mode in Discourse. Then you have to add the Discourse mail account as contact.')],
2019-11-25 23:58:01 +01:00
]);
$data = [
'connector' => 'discourse',
'title' => DI::l10n()->t('Discourse'),
'image' => 'images/discourse.png',
'enabled' => $enabled,
'html' => $html,
];
2019-11-24 00:43:59 +01:00
}
function discourse_settings_post()
2019-11-24 00:43:59 +01:00
{
2022-10-20 23:51:49 +02:00
if (!DI::userSession()->getLocalUserId() || empty($_POST['discourse-submit'])) {
2019-11-25 23:58:01 +01:00
return;
}
2022-10-20 23:51:49 +02:00
DI::pConfig()->set(DI::userSession()->getLocalUserId(), 'discourse', 'enabled', intval($_POST['enabled']));
2019-11-24 00:43:59 +01:00
}
function discourse_email_getmessage(&$message)
2019-11-24 00:43:59 +01:00
{
2019-11-25 23:58:01 +01:00
if (empty($message['item']['uid'])) {
return;
}
if (!DI::pConfig()->get($message['item']['uid'], 'discourse', 'enabled')) {
2019-11-25 23:58:01 +01:00
return;
}
2019-11-24 00:43:59 +01:00
2019-11-25 23:23:19 +01:00
// We do assume that all Discourse servers are running with SSL
if (preg_match('=topic/(.*\d)/(.*\d)@(.*)=', $message['item']['uri'], $matches) &&
discourse_fetch_post_from_api($message, $matches[2], $matches[3])) {
2019-11-25 23:58:01 +01:00
Logger::info('Fetched comment via API (message-id mode)', ['host' => $matches[3], 'topic' => $matches[1], 'post' => $matches[2]]);
2019-11-25 23:23:19 +01:00
return;
}
if (preg_match('=topic/(.*\d)@(.*)=', $message['item']['uri'], $matches) &&
discourse_fetch_topic_from_api($message, 'https://' . $matches[2], $matches[1], 1)) {
2019-11-25 23:58:01 +01:00
Logger::info('Fetched starting post via API (message-id mode)', ['host' => $matches[2], 'topic' => $matches[1]]);
2019-11-25 23:23:19 +01:00
return;
2019-11-24 00:43:59 +01:00
}
2019-11-25 23:23:19 +01:00
2019-11-24 00:43:59 +01:00
// Search in the text part for the link to the discourse entry and the text body
if (!empty($message['text'])) {
$message = discourse_get_text($message);
2019-11-24 00:43:59 +01:00
}
2019-11-25 23:23:19 +01:00
if (empty($message['item']['plink']) || !preg_match('=(http.*)/t/.*/(.*\d)/(.*\d)=', $message['item']['plink'], $matches)) {
Logger::info('This is no Discourse post');
2019-11-25 23:58:01 +01:00
return;
2019-11-24 00:43:59 +01:00
}
2019-11-25 23:23:19 +01:00
if (discourse_fetch_topic_from_api($message, $matches[1], $matches[2], $matches[3])) {
2019-11-25 23:58:01 +01:00
Logger::info('Fetched post via API (plink mode)', ['host' => $matches[1], 'topic' => $matches[2], 'id' => $matches[3]]);
2019-11-25 23:23:19 +01:00
return;
}
2019-11-26 00:09:18 +01:00
Logger::info('Fallback mode', ['plink' => $message['item']['plink']]);
2019-11-24 00:43:59 +01:00
// Search in the HTML part for the discourse entry and the author profile
if (!empty($message['html'])) {
$message = discourse_get_html($message);
}
// Remove the title on comments, they don't serve any purpose there
if ($message['item']['thr-parent'] != $message['item']['uri']) {
unset($message['item']['title']);
2019-11-24 00:43:59 +01:00
}
}
function discourse_fetch_post($host, $topic, $pid)
2019-11-24 00:43:59 +01:00
{
$url = $host . '/t/' . $topic . '/' . $pid . '.json';
$curlResult = DI::httpClient()->get($url);
2019-11-24 00:43:59 +01:00
if (!$curlResult->isSuccess()) {
Logger::info('No success', ['url' => $url]);
2019-11-24 00:43:59 +01:00
return false;
}
$raw = $curlResult->getBodyString();
2019-11-24 00:43:59 +01:00
$data = json_decode($raw, true);
$posts = $data['post_stream']['posts'];
foreach($posts as $post) {
if ($post['post_number'] != $pid) {
2019-11-25 23:23:19 +01:00
/// @todo Possibly fetch missing posts here
2019-11-24 00:43:59 +01:00
continue;
}
Logger::info('Got post data from topic', $post);
return $post;
2019-11-24 00:43:59 +01:00
}
Logger::info('Post not found', ['host' => $host, 'topic' => $topic, 'pid' => $pid]);
2019-11-24 00:43:59 +01:00
return false;
}
function discourse_fetch_topic_from_api(&$message, $host, $topic, $pid)
{
$post = discourse_fetch_post($host, $topic, $pid);
if (empty($post)) {
return false;
}
$message = discourse_process_post($message, $post, $host);
return true;
}
2019-11-24 00:43:59 +01:00
function discourse_fetch_post_from_api(&$message, $post, $host)
{
$hostaddr = 'https://' . $host;
$url = $hostaddr . '/posts/' . $post . '.json';
$curlResult = DI::httpClient()->get($url);
2019-11-24 00:43:59 +01:00
if (!$curlResult->isSuccess()) {
return false;
}
$raw = $curlResult->getBodyString();
2019-11-24 00:43:59 +01:00
$data = json_decode($raw, true);
if (empty($data)) {
return false;
}
$message = discourse_process_post($message, $data, $hostaddr);
2019-11-24 00:43:59 +01:00
Logger::info('Got API data', $message);
return true;
}
function discourse_get_user($post, $hostaddr)
2019-11-24 00:43:59 +01:00
{
$host = parse_url($hostaddr, PHP_URL_HOST);
2019-11-25 23:23:19 +01:00
// Currently unused contact fields:
// - display_username
// - user_id
$contact = [];
$contact['uid'] = 0;
$contact['network'] = Protocol::DISCOURSE;
$contact['name'] = $contact['nick'] = $post['username'];
if (!empty($post['name'])) {
$contact['name'] = $post['name'];
2019-11-24 00:43:59 +01:00
}
$contact['about'] = $post['user_title'];
if (parse_url($post['avatar_template'], PHP_URL_SCHEME)) {
$contact['photo'] = str_replace('{size}', '300', $post['avatar_template']);
} else {
$contact['photo'] = $hostaddr . str_replace('{size}', '300', $post['avatar_template']);
}
$contact['addr'] = $contact['nick'] . '@' . $host;
$contact['contact-type'] = Contact::TYPE_PERSON;
$contact['url'] = $hostaddr . '/u/' . $contact['nick'];
$contact['nurl'] = Strings::normaliseLink($contact['url']);
$contact['baseurl'] = $hostaddr;
Logger::info('Contact', $contact);
$contact['id'] = Contact::getIdForURL($contact['url'], 0, false, $contact);
if (!empty($contact['id'])) {
$avatar = $contact['photo'];
unset($contact['photo']);
DBA::update('contact', $contact, ['id' => $contact['id']]);
Contact::updateAvatar($contact['id'], $avatar);
$contact['photo'] = $avatar;
}
return $contact;
}
function discourse_process_post($message, $post, $hostaddr)
{
$host = parse_url($hostaddr, PHP_URL_HOST);
2019-11-24 00:43:59 +01:00
$message['html'] = $post['cooked'];
$contact = discourse_get_user($post, $hostaddr);
$message['item']['author-id'] = $contact['id'];
$message['item']['author-link'] = $contact['url'];
$message['item']['author-name'] = $contact['name'];
$message['item']['author-avatar'] = $contact['photo'];
2019-11-24 00:43:59 +01:00
$message['item']['created'] = DateTimeFormat::utc($post['created_at']);
$message['item']['plink'] = $hostaddr . '/t/' . $post['topic_slug'] . '/' . $post['topic_id'] . '/' . $post['post_number'];
if ($post['post_number'] == 1) {
$message['item']['parent-uri'] = $message['item']['uri'] = 'topic/' . $post['topic_id'] . '@' . $host;
2019-11-26 19:59:50 +01:00
// Remove the Discourse forum name from the subject
$pattern = '=\[.*\].*\s(\[.*\].*)=';
if (preg_match($pattern, $message['item']['title'])) {
$message['item']['title'] = preg_replace($pattern, '$1', $message['item']['title']);
}
2019-11-25 23:23:19 +01:00
/// @ToDo Fetch thread information
} else {
$message['item']['uri'] = 'topic/' . $post['topic_id'] . '/' . $post['id'] . '@' . $host;
unset($message['item']['title']);
if (empty($post['reply_to_post_number']) || $post['reply_to_post_number'] == 1) {
$message['item']['parent-uri'] = 'topic/' . $post['topic_id'] . '@' . $host;
} else {
$reply = discourse_fetch_post($hostaddr, $post['topic_id'], $post['reply_to_post_number']);
$message['item']['parent-uri'] = 'topic/' . $post['topic_id'] . '/' . $reply['id'] . '@' . $host;
}
}
return $message;
2019-11-24 00:43:59 +01:00
}
function discourse_get_html($message)
2019-11-24 00:43:59 +01:00
{
$doc = new DOMDocument();
$doc2 = new DOMDocument();
$doc->preserveWhiteSpace = false;
$html = mb_convert_encoding($message['html'], 'HTML-ENTITIES', "UTF-8");
@$doc->loadHTML($html, LIBXML_HTML_NODEFDTD);
$xpath = new DomXPath($doc);
2019-11-25 23:23:19 +01:00
// Fetch the first 'div' before the 'hr' - hopefully this fits for all systems
2019-11-24 00:43:59 +01:00
$result = $xpath->query("//hr//preceding::div[1]");
$div = $doc2->importNode($result->item(0), true);
$doc2->appendChild($div);
$message['html'] = $doc2->saveHTML();
Logger::info('Found html body', ['html' => $message['html']]);
$profile = discourse_get_profile($xpath);
2019-11-25 23:23:19 +01:00
if (!empty($profile['url'])) {
2019-11-24 00:43:59 +01:00
Logger::info('Found profile', $profile);
$message['item']['author-id'] = Contact::getIdForURL($profile['url'], 0, false, $profile);
$message['item']['author-link'] = $profile['url'];
2019-11-24 00:43:59 +01:00
$message['item']['author-name'] = $profile['name'];
$message['item']['author-avatar'] = $profile['photo'];
2019-11-24 00:43:59 +01:00
}
return $message;
2019-11-24 00:43:59 +01:00
}
function discourse_get_text($message)
2019-11-24 00:43:59 +01:00
{
$text = $message['text'];
$text = str_replace("\r", '', $text);
$pos = strpos($text, "\n---\n");
if ($pos == 0) {
Logger::info('No separator found', ['text' => $text]);
return $message;
}
2019-11-24 00:43:59 +01:00
$message['text'] = trim(substr($text, 0, $pos));
2019-11-24 00:43:59 +01:00
Logger::info('Found text body', ['text' => $message['text']]);
$message['text'] = Markdown::toBBCode($message['text']);
$text = substr($text, $pos);
Logger::info('Found footer', ['text' => $text]);
if (preg_match('=\((http.*/t/.*/.*\d/.*\d)\)=', $text, $link)) {
$message['item']['plink'] = $link[1];
Logger::info('Found plink', ['plink' => $message['item']['plink']]);
2019-11-24 00:43:59 +01:00
}
return $message;
2019-11-24 00:43:59 +01:00
}
function discourse_get_profile($xpath)
{
$profile = [];
$list = $xpath->query("//td//following::img");
foreach ($list as $node) {
$attr = [];
foreach ($node->attributes as $attribute) {
$attr[$attribute->name] = $attribute->value;
}
if (!empty($attr['src']) && !empty($attr['title'])
&& !empty($attr['width']) && !empty($attr['height'])
&& ($attr['width'] == $attr['height'])) {
$profile = ['photo' => $attr['src'], 'name' => $attr['title']];
2019-11-24 00:43:59 +01:00
break;
}
}
$list = $xpath->query("//td//following::a");
foreach ($list as $node) {
if (!empty(trim($node->textContent)) && $node->attributes->length) {
$attr = [];
foreach ($node->attributes as $attribute) {
$attr[$attribute->name] = $attribute->value;
}
if (!empty($attr['href']) && (strpos($attr['href'], '/' . $profile['name']))) {
$profile['url'] = $attr['href'];
2019-11-24 00:43:59 +01:00
break;
}
}
}
return $profile;
}