2019-11-24 00:43:59 +01:00
< ? php
/**
* Name : Discourse Mail Connector
* Description : Improves mails from Discourse in mailing list mode
* Version : 0.1
* Author : Michael Vogel < http :// pirati . ca / profile / heluecht >
*
*/
2020-03-04 22:07:04 +01:00
2019-11-24 00:43:59 +01:00
use Friendica\App ;
2020-03-04 22:07:04 +01:00
use Friendica\Content\Text\Markdown ;
2019-11-24 00:43:59 +01:00
use Friendica\Core\Hook ;
use Friendica\Core\Logger ;
2019-11-25 12:14:22 +01:00
use Friendica\Core\Protocol ;
2020-03-04 22:07:04 +01:00
use Friendica\Core\Renderer ;
2019-11-25 12:14:22 +01:00
use Friendica\Database\DBA ;
2020-01-18 16:50:56 +01:00
use Friendica\DI ;
2019-11-25 12:14:22 +01:00
use Friendica\Model\Contact ;
2020-03-04 22:07:04 +01:00
use Friendica\Util\DateTimeFormat ;
2019-11-25 12:14:22 +01:00
use Friendica\Util\Strings ;
2019-11-24 00:43:59 +01:00
2019-11-26 08:21:49 +01:00
/* Todo :
* - Obtaining API tokens to be able to read non public posts as well
* - Handling duplicates ( possibly using some non visible marker )
* - Fetching missing posts
* - Fetch topic information
* - Support mail free mode when write tokens are available
* - Fix incomplete ( relative ) links ( hosts are missing )
*/
2019-11-24 00:43:59 +01:00
function discourse_install ()
{
2019-11-25 23:58:01 +01:00
Hook :: register ( 'email_getmessage' , __FILE__ , 'discourse_email_getmessage' );
Hook :: register ( 'connector_settings' , __FILE__ , 'discourse_settings' );
Hook :: register ( 'connector_settings_post' , __FILE__ , 'discourse_settings_post' );
2019-11-24 00:43:59 +01:00
}
2019-11-25 23:58:01 +01:00
function discourse_settings ( App $a , & $s )
2019-11-24 00:43:59 +01:00
{
2019-11-25 23:58:01 +01:00
if ( ! local_user ()) {
return ;
}
2020-01-18 16:50:56 +01:00
$enabled = intval ( DI :: pConfig () -> get ( local_user (), 'discourse' , 'enabled' ));
2019-11-25 23:58:01 +01:00
2019-11-27 06:54:26 +01:00
$t = Renderer :: getMarkupTemplate ( 'settings.tpl' , 'addon/discourse/' );
$s .= Renderer :: replaceMacros ( $t , [
2020-01-18 20:52:33 +01:00
'$title' => DI :: l10n () -> t ( 'Discourse' ),
'$enabled' => [ 'enabled' , DI :: l10n () -> t ( 'Enable processing of Discourse mailing list mails' ), $enabled , DI :: l10n () -> t ( 'If enabled, incoming mails from Discourse will be improved so they look much better. To make it work, you have to configure the e-mail settings in Friendica. You also have to enable the mailing list mode in Discourse. Then you have to add the Discourse mail account as contact.' )],
'$submit' => DI :: l10n () -> t ( 'Save Settings' ),
2019-11-25 23:58:01 +01:00
]);
2019-11-24 00:43:59 +01:00
}
2019-11-25 23:58:01 +01:00
function discourse_settings_post ( App $a )
2019-11-24 00:43:59 +01:00
{
2019-11-25 23:58:01 +01:00
if ( ! local_user () || empty ( $_POST [ 'discourse-submit' ])) {
return ;
}
2020-01-18 16:54:49 +01:00
DI :: pConfig () -> set ( local_user (), 'discourse' , 'enabled' , intval ( $_POST [ 'enabled' ]));
2019-11-24 00:43:59 +01:00
}
function discourse_email_getmessage ( App $a , & $message )
{
2019-11-25 23:58:01 +01:00
if ( empty ( $message [ 'item' ][ 'uid' ])) {
return ;
}
2020-01-18 16:50:56 +01:00
if ( ! DI :: pConfig () -> get ( $message [ 'item' ][ 'uid' ], 'discourse' , 'enabled' )) {
2019-11-25 23:58:01 +01:00
return ;
}
2019-11-24 00:43:59 +01:00
2019-11-25 23:23:19 +01:00
// We do assume that all Discourse servers are running with SSL
if ( preg_match ( '=topic/(.*\d)/(.*\d)@(.*)=' , $message [ 'item' ][ 'uri' ], $matches ) &&
discourse_fetch_post_from_api ( $message , $matches [ 2 ], $matches [ 3 ])) {
2019-11-25 23:58:01 +01:00
Logger :: info ( 'Fetched comment via API (message-id mode)' , [ 'host' => $matches [ 3 ], 'topic' => $matches [ 1 ], 'post' => $matches [ 2 ]]);
2019-11-25 23:23:19 +01:00
return ;
}
if ( preg_match ( '=topic/(.*\d)@(.*)=' , $message [ 'item' ][ 'uri' ], $matches ) &&
discourse_fetch_topic_from_api ( $message , 'https://' . $matches [ 2 ], $matches [ 1 ], 1 )) {
2019-11-25 23:58:01 +01:00
Logger :: info ( 'Fetched starting post via API (message-id mode)' , [ 'host' => $matches [ 2 ], 'topic' => $matches [ 1 ]]);
2019-11-25 23:23:19 +01:00
return ;
2019-11-24 00:43:59 +01:00
}
2019-11-25 23:23:19 +01:00
2019-11-24 00:43:59 +01:00
// Search in the text part for the link to the discourse entry and the text body
if ( ! empty ( $message [ 'text' ])) {
2019-11-25 12:14:22 +01:00
$message = discourse_get_text ( $message );
2019-11-24 00:43:59 +01:00
}
2019-11-25 23:23:19 +01:00
if ( empty ( $message [ 'item' ][ 'plink' ]) || ! preg_match ( '=(http.*)/t/.*/(.*\d)/(.*\d)=' , $message [ 'item' ][ 'plink' ], $matches )) {
Logger :: info ( 'This is no Discourse post' );
2019-11-25 23:58:01 +01:00
return ;
2019-11-24 00:43:59 +01:00
}
2019-11-25 23:23:19 +01:00
if ( discourse_fetch_topic_from_api ( $message , $matches [ 1 ], $matches [ 2 ], $matches [ 3 ])) {
2019-11-25 23:58:01 +01:00
Logger :: info ( 'Fetched post via API (plink mode)' , [ 'host' => $matches [ 1 ], 'topic' => $matches [ 2 ], 'id' => $matches [ 3 ]]);
2019-11-25 23:23:19 +01:00
return ;
}
2019-11-26 00:09:18 +01:00
Logger :: info ( 'Fallback mode' , [ 'plink' => $message [ 'item' ][ 'plink' ]]);
2019-11-24 00:43:59 +01:00
// Search in the HTML part for the discourse entry and the author profile
if ( ! empty ( $message [ 'html' ])) {
2019-11-25 12:14:22 +01:00
$message = discourse_get_html ( $message );
}
// Remove the title on comments, they don't serve any purpose there
if ( $message [ 'item' ][ 'parent-uri' ] != $message [ 'item' ][ 'uri' ]) {
unset ( $message [ 'item' ][ 'title' ]);
2019-11-24 00:43:59 +01:00
}
}
2019-11-25 12:14:22 +01:00
function discourse_fetch_post ( $host , $topic , $pid )
2019-11-24 00:43:59 +01:00
{
2019-11-25 12:14:22 +01:00
$url = $host . '/t/' . $topic . '/' . $pid . '.json' ;
2020-03-04 22:35:08 +01:00
$curlResult = DI :: httpRequest () -> get ( $url );
2019-11-24 00:43:59 +01:00
if ( ! $curlResult -> isSuccess ()) {
2019-11-25 12:14:22 +01:00
Logger :: info ( 'No success' , [ 'url' => $url ]);
2019-11-24 00:43:59 +01:00
return false ;
}
2019-11-25 12:14:22 +01:00
2019-11-24 00:43:59 +01:00
$raw = $curlResult -> getBody ();
$data = json_decode ( $raw , true );
$posts = $data [ 'post_stream' ][ 'posts' ];
foreach ( $posts as $post ) {
if ( $post [ 'post_number' ] != $pid ) {
2019-11-25 23:23:19 +01:00
/// @todo Possibly fetch missing posts here
2019-11-24 00:43:59 +01:00
continue ;
}
Logger :: info ( 'Got post data from topic' , $post );
2019-11-25 12:14:22 +01:00
return $post ;
2019-11-24 00:43:59 +01:00
}
2019-11-25 12:14:22 +01:00
Logger :: info ( 'Post not found' , [ 'host' => $host , 'topic' => $topic , 'pid' => $pid ]);
2019-11-24 00:43:59 +01:00
return false ;
}
2019-11-25 12:14:22 +01:00
function discourse_fetch_topic_from_api ( & $message , $host , $topic , $pid )
{
$post = discourse_fetch_post ( $host , $topic , $pid );
if ( empty ( $post )) {
return false ;
}
$message = discourse_process_post ( $message , $post , $host );
return true ;
}
2019-11-24 00:43:59 +01:00
function discourse_fetch_post_from_api ( & $message , $post , $host )
{
2019-11-25 12:14:22 +01:00
$hostaddr = 'https://' . $host ;
$url = $hostaddr . '/posts/' . $post . '.json' ;
2020-03-04 22:35:08 +01:00
$curlResult = DI :: httpRequest () -> get ( $url );
2019-11-24 00:43:59 +01:00
if ( ! $curlResult -> isSuccess ()) {
return false ;
}
$raw = $curlResult -> getBody ();
$data = json_decode ( $raw , true );
if ( empty ( $data )) {
return false ;
}
2019-11-25 12:14:22 +01:00
$message = discourse_process_post ( $message , $data , $hostaddr );
2019-11-24 00:43:59 +01:00
Logger :: info ( 'Got API data' , $message );
return true ;
}
2019-11-25 12:14:22 +01:00
function discourse_get_user ( $post , $hostaddr )
2019-11-24 00:43:59 +01:00
{
2019-11-25 12:14:22 +01:00
$host = parse_url ( $hostaddr , PHP_URL_HOST );
2019-11-25 23:23:19 +01:00
// Currently unused contact fields:
// - display_username
// - user_id
2019-11-25 12:14:22 +01:00
$contact = [];
$contact [ 'uid' ] = 0 ;
$contact [ 'network' ] = Protocol :: DISCOURSE ;
$contact [ 'name' ] = $contact [ 'nick' ] = $post [ 'username' ];
if ( ! empty ( $post [ 'name' ])) {
$contact [ 'name' ] = $post [ 'name' ];
2019-11-24 00:43:59 +01:00
}
2019-11-25 12:14:22 +01:00
$contact [ 'about' ] = $post [ 'user_title' ];
if ( parse_url ( $post [ 'avatar_template' ], PHP_URL_SCHEME )) {
$contact [ 'photo' ] = str_replace ( '{size}' , '300' , $post [ 'avatar_template' ]);
} else {
$contact [ 'photo' ] = $hostaddr . str_replace ( '{size}' , '300' , $post [ 'avatar_template' ]);
}
$contact [ 'addr' ] = $contact [ 'nick' ] . '@' . $host ;
$contact [ 'contact-type' ] = Contact :: TYPE_PERSON ;
$contact [ 'url' ] = $hostaddr . '/u/' . $contact [ 'nick' ];
$contact [ 'nurl' ] = Strings :: normaliseLink ( $contact [ 'url' ]);
$contact [ 'baseurl' ] = $hostaddr ;
Logger :: info ( 'Contact' , $contact );
2020-07-16 17:44:32 +02:00
$contact [ 'id' ] = Contact :: getIdForURL ( $contact [ 'url' ], 0 , false , $contact );
2019-11-25 12:14:22 +01:00
if ( ! empty ( $contact [ 'id' ])) {
$avatar = $contact [ 'photo' ];
unset ( $contact [ 'photo' ]);
DBA :: update ( 'contact' , $contact , [ 'id' => $contact [ 'id' ]]);
2020-07-25 13:49:44 +02:00
Contact :: updateAvatar ( $contact [ 'id' ], $avatar );
2019-11-25 12:14:22 +01:00
$contact [ 'photo' ] = $avatar ;
}
return $contact ;
}
function discourse_process_post ( $message , $post , $hostaddr )
{
$host = parse_url ( $hostaddr , PHP_URL_HOST );
2019-11-24 00:43:59 +01:00
$message [ 'html' ] = $post [ 'cooked' ];
2019-11-25 12:14:22 +01:00
$contact = discourse_get_user ( $post , $hostaddr );
$message [ 'item' ][ 'author-id' ] = $contact [ 'id' ];
$message [ 'item' ][ 'author-link' ] = $contact [ 'url' ];
$message [ 'item' ][ 'author-name' ] = $contact [ 'name' ];
$message [ 'item' ][ 'author-avatar' ] = $contact [ 'photo' ];
2019-11-24 00:43:59 +01:00
$message [ 'item' ][ 'created' ] = DateTimeFormat :: utc ( $post [ 'created_at' ]);
2019-11-25 12:14:22 +01:00
$message [ 'item' ][ 'plink' ] = $hostaddr . '/t/' . $post [ 'topic_slug' ] . '/' . $post [ 'topic_id' ] . '/' . $post [ 'post_number' ];
if ( $post [ 'post_number' ] == 1 ) {
$message [ 'item' ][ 'parent-uri' ] = $message [ 'item' ][ 'uri' ] = 'topic/' . $post [ 'topic_id' ] . '@' . $host ;
2019-11-26 19:59:50 +01:00
// Remove the Discourse forum name from the subject
$pattern = '=\[.*\].*\s(\[.*\].*)=' ;
if ( preg_match ( $pattern , $message [ 'item' ][ 'title' ])) {
$message [ 'item' ][ 'title' ] = preg_replace ( $pattern , '$1' , $message [ 'item' ][ 'title' ]);
}
2019-11-25 23:23:19 +01:00
/// @ToDo Fetch thread information
2019-11-25 12:14:22 +01:00
} else {
$message [ 'item' ][ 'uri' ] = 'topic/' . $post [ 'topic_id' ] . '/' . $post [ 'id' ] . '@' . $host ;
unset ( $message [ 'item' ][ 'title' ]);
if ( empty ( $post [ 'reply_to_post_number' ]) || $post [ 'reply_to_post_number' ] == 1 ) {
$message [ 'item' ][ 'parent-uri' ] = 'topic/' . $post [ 'topic_id' ] . '@' . $host ;
} else {
$reply = discourse_fetch_post ( $hostaddr , $post [ 'topic_id' ], $post [ 'reply_to_post_number' ]);
$message [ 'item' ][ 'parent-uri' ] = 'topic/' . $post [ 'topic_id' ] . '/' . $reply [ 'id' ] . '@' . $host ;
}
}
return $message ;
2019-11-24 00:43:59 +01:00
}
2019-11-25 12:14:22 +01:00
function discourse_get_html ( $message )
2019-11-24 00:43:59 +01:00
{
$doc = new DOMDocument ();
$doc2 = new DOMDocument ();
$doc -> preserveWhiteSpace = false ;
$html = mb_convert_encoding ( $message [ 'html' ], 'HTML-ENTITIES' , " UTF-8 " );
@ $doc -> loadHTML ( $html , LIBXML_HTML_NODEFDTD );
$xpath = new DomXPath ( $doc );
2019-11-25 23:23:19 +01:00
// Fetch the first 'div' before the 'hr' - hopefully this fits for all systems
2019-11-24 00:43:59 +01:00
$result = $xpath -> query ( " //hr//preceding::div[1] " );
$div = $doc2 -> importNode ( $result -> item ( 0 ), true );
$doc2 -> appendChild ( $div );
$message [ 'html' ] = $doc2 -> saveHTML ();
Logger :: info ( 'Found html body' , [ 'html' => $message [ 'html' ]]);
$profile = discourse_get_profile ( $xpath );
2019-11-25 23:23:19 +01:00
if ( ! empty ( $profile [ 'url' ])) {
2019-11-24 00:43:59 +01:00
Logger :: info ( 'Found profile' , $profile );
2020-07-16 17:44:32 +02:00
$message [ 'item' ][ 'author-id' ] = Contact :: getIdForURL ( $profile [ 'url' ], 0 , false , $profile );
2019-11-25 12:14:22 +01:00
$message [ 'item' ][ 'author-link' ] = $profile [ 'url' ];
2019-11-24 00:43:59 +01:00
$message [ 'item' ][ 'author-name' ] = $profile [ 'name' ];
2019-11-25 12:14:22 +01:00
$message [ 'item' ][ 'author-avatar' ] = $profile [ 'photo' ];
2019-11-24 00:43:59 +01:00
}
2019-11-25 12:14:22 +01:00
return $message ;
2019-11-24 00:43:59 +01:00
}
2019-11-25 12:14:22 +01:00
function discourse_get_text ( $message )
2019-11-24 00:43:59 +01:00
{
$text = $message [ 'text' ];
$text = str_replace ( " \r " , '' , $text );
$pos = strpos ( $text , " \n --- \n " );
2019-11-25 12:14:22 +01:00
if ( $pos == 0 ) {
Logger :: info ( 'No separator found' , [ 'text' => $text ]);
return $message ;
}
2019-11-24 00:43:59 +01:00
2019-11-25 12:14:22 +01:00
$message [ 'text' ] = trim ( substr ( $text , 0 , $pos ));
2019-11-24 00:43:59 +01:00
2019-11-25 12:14:22 +01:00
Logger :: info ( 'Found text body' , [ 'text' => $message [ 'text' ]]);
$message [ 'text' ] = Markdown :: toBBCode ( $message [ 'text' ]);
$text = substr ( $text , $pos );
Logger :: info ( 'Found footer' , [ 'text' => $text ]);
if ( preg_match ( '=\((http.*/t/.*/.*\d/.*\d)\)=' , $text , $link )) {
$message [ 'item' ][ 'plink' ] = $link [ 1 ];
Logger :: info ( 'Found plink' , [ 'plink' => $message [ 'item' ][ 'plink' ]]);
2019-11-24 00:43:59 +01:00
}
2019-11-25 12:14:22 +01:00
return $message ;
2019-11-24 00:43:59 +01:00
}
function discourse_get_profile ( $xpath )
{
$profile = [];
$list = $xpath -> query ( " //td//following::img " );
foreach ( $list as $node ) {
$attr = [];
foreach ( $node -> attributes as $attribute ) {
$attr [ $attribute -> name ] = $attribute -> value ;
}
if ( ! empty ( $attr [ 'src' ]) && ! empty ( $attr [ 'title' ])
&& ! empty ( $attr [ 'width' ]) && ! empty ( $attr [ 'height' ])
&& ( $attr [ 'width' ] == $attr [ 'height' ])) {
2019-11-25 12:14:22 +01:00
$profile = [ 'photo' => $attr [ 'src' ], 'name' => $attr [ 'title' ]];
2019-11-24 00:43:59 +01:00
break ;
}
}
$list = $xpath -> query ( " //td//following::a " );
foreach ( $list as $node ) {
if ( ! empty ( trim ( $node -> textContent )) && $node -> attributes -> length ) {
$attr = [];
foreach ( $node -> attributes as $attribute ) {
$attr [ $attribute -> name ] = $attribute -> value ;
}
if ( ! empty ( $attr [ 'href' ]) && ( strpos ( $attr [ 'href' ], '/' . $profile [ 'name' ]))) {
2019-11-25 12:14:22 +01:00
$profile [ 'url' ] = $attr [ 'href' ];
2019-11-24 00:43:59 +01:00
break ;
}
}
}
return $profile ;
}