From f6320f3319a8009c7bdf51035b226f0ea688d8bd Mon Sep 17 00:00:00 2001 From: Michael Date: Fri, 24 Feb 2012 07:11:26 +0100 Subject: [PATCH] Encoding for incoming mails. Fixed invalid message id (with side effect at the moment). Sending mail as text mail. Encoding name when importing mail contact. --- include/Scrape.php | 15 +++- include/delivery.php | 41 ++++++--- include/email.php | 10 ++- include/html2plain.php | 180 +++++++++++++++++++++++++++++++++++++++ include/notifier.php | 26 ++++-- include/poller.php | 30 ++++++- include/quoteconvert.php | 132 ++++++++++++++++++++++++++++ 7 files changed, 411 insertions(+), 23 deletions(-) create mode 100644 include/html2plain.php create mode 100644 include/quoteconvert.php diff --git a/include/Scrape.php b/include/Scrape.php index 52405ae2..e98978f3 100755 --- a/include/Scrape.php +++ b/include/Scrape.php @@ -445,10 +445,19 @@ function probe_url($url, $mode = PROBE_NORMAL) { $adr = imap_rfc822_parse_adrlist($x->to,''); if(isset($adr)) { foreach($adr as $feadr) { - if((strcasecmp($feadr->mailbox,$name) == 0) - &&(strcasecmp($feadr->host,$phost) == 0) + if((strcasecmp($feadr->mailbox,$name) == 0) + &&(strcasecmp($feadr->host,$phost) == 0) && (strlen($feadr->personal))) { - $vcard['fn'] = notags($feadr->personal); + + $personal = imap_mime_header_decode($feadr->personal); + $vcard['fn'] = ""; + foreach($personal as $perspart) + if ($perspart->charset != "default") + $vcard['fn'] .= iconv($perspart->charset, 'UTF-8//IGNORE', $perspart->text); + else + $vcard['fn'] .= $perspart->text; + + $vcard['fn'] = notags($vcard['fn']); } } } diff --git a/include/delivery.php b/include/delivery.php index 677d8938..c7c2e8a3 100755 --- a/include/delivery.php +++ b/include/delivery.php @@ -1,6 +1,7 @@ ' . "\n"; + + // for testing purposes: Collect exported mails + $file = tempnam("/tmp/friendica/", "mail-out-"); + file_put_contents($file, json_encode($it)); + + $headers .= 'Message-Id: <' . cleanupmessageid($it['uri']). '>' . "\n"; + + //logger("Mail: uri: ".$it['uri']." parent-uri ".$it['parent-uri'], LOGGER_DEBUG); + //logger("Mail: Data: ".print_r($it, true), LOGGER_DEBUG); + //logger("Mail: Data: ".print_r($it, true), LOGGER_DATA); + if($it['uri'] !== $it['parent-uri']) { - $header .= 'References: <' . $it['parent-uri'] . '>' . "\n"; + $headers .= 'References: <' . cleanupmessageid($it['parent-uri']) . '>' . "\n"; if(! strlen($it['title'])) { $r = q("SELECT `title` FROM `item` WHERE `parent-uri` = '%s' LIMIT 1", dbesc($it['parent-uri']) @@ -451,10 +462,12 @@ function delivery_run($argv, $argc){ } } $headers .= 'MIME-Version: 1.0' . "\n"; - $headers .= 'Content-Type: text/html; charset=UTF-8' . "\n"; + //$headers .= 'Content-Type: text/html; charset=UTF-8' . "\n"; + $headers .= 'Content-Type: text/plain; charset=UTF-8' . "\n"; $headers .= 'Content-Transfer-Encoding: 8bit' . "\n\n"; $html = prepare_body($it); - $message = '' . $html . ''; + //$message = '' . $html . ''; + $message = html2plain($html); logger('notifier: email delivery to ' . $addr); mail($addr, $subject, $message, $headers); } @@ -473,7 +486,7 @@ function delivery_run($argv, $argc){ if((! $contact['pubkey']) && (! $public_message)) break; - + if($target_item['verb'] === ACTIVITY_DISLIKE) { // unsupported break; @@ -514,6 +527,14 @@ function delivery_run($argv, $argc){ return; } +function cleanupmessageid($messageid) { + global $a; + + if (!strpos($messageid, '@')) + $messageid = str_replace(":", ".", $messageid).'@'.$a->get_hostname(); + return($messageid); +} + if (array_search(__file__,get_included_files())===0){ delivery_run($argv,$argc); killme(); diff --git a/include/email.php b/include/email.php index 3e6a8186..45268226 100755 --- a/include/email.php +++ b/include/email.php @@ -79,6 +79,10 @@ function email_get_msg($mbox,$uid) { if(! $struc) return $ret; + // for testing purposes: Collect imported mails + // $file = tempnam("/tmp/friendica2/", "mail-in-"); + // file_put_contents($file, json_encode($struc)); + if(! $struc->parts) { $ret['body'] = email_get_part($mbox,$uid,$struc,0); } @@ -99,13 +103,17 @@ function email_get_part($mbox,$uid,$p,$partno) { // $partno = '1', '2', '2.1', '2.1.3', etc for multipart, 0 if simple global $htmlmsg,$plainmsg,$charset,$attachments; - echo $partno; + //echo $partno; // DECODE DATA $data = ($partno) ? @imap_fetchbody($mbox,$uid,$partno, FT_UID|FT_PEEK) : @imap_body($mbox,$uid,FT_UID|FT_PEEK); + // for testing purposes: Collect imported mails + // $file = tempnam("/tmp/friendica2/", "mail-body-"); + // file_put_contents($file, $data); + // Any part may be encoded, even plain text messages, so check everything. if ($p->encoding==4) $data = quoted_printable_decode($data); diff --git a/include/html2plain.php b/include/html2plain.php new file mode 100644 index 00000000..2a4cf663 --- /dev/null +++ b/include/html2plain.php @@ -0,0 +1,180 @@ + 0) and strlen($line) > $wraplen) { + $newline = trim(substr($line, 0, $pos)); + if ($level > 0) + $newline = str_repeat(">", $level).' '.$newline; + + $newlines[] = $newline." "; + $line = substr($line, $pos+1); + } + + } while ((strlen($line) > $wraplen) and !($oldline == $line)); + + if ($level > 0) + $line = str_repeat(">", $level).' '.$line; + + $newlines[] = $line; + + + return(implode($newlines, "\n")); +} + +function quotelevel($message) +{ + $lines = explode("\n", $message); + + $newlines = array(); + $level = 0; + foreach($lines as $line) {; + $line = trim($line); + $startquote = false; + while (strpos("*".$line, '[quote]') > 0) { + $level++; + $pos = strpos($line, '[quote]'); + $line = substr($line, 0, $pos).substr($line, $pos+7); + $startquote = true; + } + + $currlevel = $level; + + while (strpos("*".$line, '[/quote]') > 0) { + $level--; + if ($level < 0) + $level = 0; + + $pos = strpos($line, '[/quote]'); + $line = substr($line, 0, $pos).substr($line, $pos+8); + } + + if (!$startquote or ($line != '')) + $newlines[] = breaklines($line, $currlevel); + } + return(implode($newlines, "\n")); +} + +function html2plain($html) +{ + global $lang; + + $message = str_replace("\r", "", $html); + + $doc = new DOMDocument(); + $doc->preserveWhiteSpace = false; + + $message = mb_convert_encoding($message, 'HTML-ENTITIES', "UTF-8"); + + @$doc->loadHTML($message); + + $xpath = new DomXPath($doc); + $list = $xpath->query("//pre"); + foreach ($list as $node) { + $node->nodeValue = str_replace("\n", "\r", $node->nodeValue); + } + + $message = $doc->saveHTML(); + $message = str_replace(array("\n<", ">\n", "\r", "\n", "\xC3\x82\xC2\xA0"), array("<", ">", "
", " ", ""), $message); + $message = preg_replace('= [\s]*=i', " ", $message); + + // nach ... suchen, die ... miteinander vergleichen und bei Gleichheit durch ein einzelnes ... ersetzen. + $pattern = '/(.*?)<\/a>/is'; + preg_match_all($pattern, $message, $result, PREG_SET_ORDER); + + foreach ($result as $treffer) { + if ($treffer[1] == $treffer[2]) { + $search = ''.$treffer[1].''; + $message = str_replace($search, $treffer[1], $message); + } + } + @$doc->loadHTML($message); + + node2bbcode($doc, 'html', array(), '', ''); + node2bbcode($doc, 'body', array(), '', ''); + + // MyBB-Auszeichnungen + node2bbcode($doc, 'span', array('style'=>'text-decoration: underline;'), '_', '_'); + node2bbcode($doc, 'span', array('style'=>'font-style: italic;'), '/', '/'); + node2bbcode($doc, 'span', array('style'=>'font-weight: bold;'), '*', '*'); + + node2bbcode($doc, 'strong', array(), '*', '*'); + node2bbcode($doc, 'b', array(), '*', '*'); + node2bbcode($doc, 'i', array(), '/', '/'); + node2bbcode($doc, 'u', array(), '_', '_'); + + node2bbcode($doc, 'blockquote', array(), '[quote]', "[/quote]\n"); + + node2bbcode($doc, 'br', array(), "\n", ''); + + node2bbcode($doc, 'span', array(), "", ""); + node2bbcode($doc, 'pre', array(), "", ""); + node2bbcode($doc, 'div', array(), "\r", "\r"); + node2bbcode($doc, 'p', array(), "\n", "\n"); + + //node2bbcode($doc, 'ul', array(), "\n[list]", "[/list]\n"); + //node2bbcode($doc, 'ol', array(), "\n[list=1]", "[/list]\n"); + node2bbcode($doc, 'li', array(), "\n* ", "\n"); + + node2bbcode($doc, 'hr', array(), str_repeat("-", 70), ""); + + node2bbcode($doc, 'tr', array(), "\n", ""); + node2bbcode($doc, 'td', array(), "\t", ""); + + node2bbcode($doc, 'h1', array(), "\n\n*", "*\n"); + node2bbcode($doc, 'h2', array(), "\n\n*", "*\n"); + node2bbcode($doc, 'h3', array(), "\n\n*", "*\n"); + node2bbcode($doc, 'h4', array(), "\n\n*", "*\n"); + node2bbcode($doc, 'h5', array(), "\n\n*", "*\n"); + node2bbcode($doc, 'h6', array(), "\n\n*", "*\n"); + + node2bbcode($doc, 'a', array('href'=>'/(.+)/'), ' $1', '', true); + node2bbcode($doc, 'img', array('alt'=>'/(.+)/'), '$1', ''); + node2bbcode($doc, 'img', array('title'=>'/(.+)/'), '$1', ''); + node2bbcode($doc, 'img', array(), '', ''); + node2bbcode($doc, 'img', array('src'=>'/(.+)/'), '[img]$1', '[/img]'); + + $message = $doc->saveHTML(); + + $message = str_replace("[img]", "", $message); + $message = str_replace("[/img]", "", $message); + + // was ersetze ich da? + // Irgendein stoerrisches UTF-Zeug + $message = str_replace(chr(194).chr(160), ' ', $message); + + $message = str_replace(" ", " ", $message); + + // Aufeinanderfolgende DIVs + $message = preg_replace('=\r *\r=i', "\n", $message); + $message = str_replace("\r", "\n", $message); + + $message = strip_tags($message); + + $message = html_entity_decode($message, ENT_QUOTES, 'UTF-8'); + + do { + $oldmessage = $message; + $message = str_replace("\n\n\n", "\n\n", $message); + } while ($oldmessage != $message); + + $message = quotelevel(trim($message)); + + return(trim($message)); +} +?> diff --git a/include/notifier.php b/include/notifier.php index 37bc6dae..3ad433af 100755 --- a/include/notifier.php +++ b/include/notifier.php @@ -2,6 +2,7 @@ require_once("boot.php"); require_once('include/queue_fn.php'); +require_once('include/html2plain.php'); /* * This file was at one time responsible for doing all deliveries, but this caused @@ -633,7 +634,7 @@ function notifier_run($argv, $argc){ ); if($r1 && $r1[0]['reply_to']) $reply_to = $r1[0]['reply_to']; - + $subject = (($it['title']) ? email_header_encode($it['title'],'UTF-8') : t("\x28no subject\x29")) ; // only expose our real email address to true friends @@ -646,10 +647,14 @@ function notifier_run($argv, $argc){ if($reply_to) $headers .= 'Reply-to: ' . $reply_to . "\n"; - $headers .= 'Message-id: <' . $it['uri'] . '>' . "\n"; + // for testing purposes: Collect exported mails + $file = tempnam("/tmp/friendica/", "mail-out2-"); + file_put_contents($file, json_encode($it)); + + $headers .= 'Message-Id: <' . cleanupmessageid($it['uri']) . '>' . "\n"; if($it['uri'] !== $it['parent-uri']) { - $header .= 'References: <' . $it['parent-uri'] . '>' . "\n"; + $headers .= 'References: <' . cleanupmessageid($it['parent-uri']) . '>' . "\n"; if(! strlen($it['title'])) { $r = q("SELECT `title` FROM `item` WHERE `parent-uri` = '%s' LIMIT 1", dbesc($it['parent-uri']) @@ -667,10 +672,12 @@ function notifier_run($argv, $argc){ } $headers .= 'MIME-Version: 1.0' . "\n"; - $headers .= 'Content-Type: text/html; charset=UTF-8' . "\n"; + //$headers .= 'Content-Type: text/html; charset=UTF-8' . "\n"; + $headers .= 'Content-Type: text/plain; charset=UTF-8' . "\n"; $headers .= 'Content-Transfer-Encoding: 8bit' . "\n\n"; $html = prepare_body($it); - $message = '' . $html . ''; + //$message = '' . $html . ''; + $message = html2plain($html); logger('notifier: email delivery to ' . $addr); mail($addr, $subject, $message, $headers); } @@ -834,6 +841,15 @@ function notifier_run($argv, $argc){ return; } +function cleanupmessageid($messageid) { + global $a; + + if (!strpos($messageid, '@')) + $messageid = str_replace(":", ".", $messageid).'@'.$a->get_hostname(); + + return($messageid); +} + if (array_search(__file__,get_included_files())===0){ notifier_run($argv,$argc); killme(); diff --git a/include/poller.php b/include/poller.php index 3e7a1e9b..ad15ec24 100755 --- a/include/poller.php +++ b/include/poller.php @@ -1,6 +1,7 @@ subject)); + + // Decoding the header + $subject = imap_mime_header_decode($meta->subject); + $datarray['title'] = ""; + foreach($subject as $subpart) + if ($subpart->charset != "default") + $datarray['title'] .= iconv($subpart->charset, 'UTF-8//IGNORE', $subpart->text); + else + $datarray['title'] .= $subpart->text; + + $datarray['title'] = notags(trim($datarray['title'])); + + //$datarray['title'] = notags(trim($meta->subject)); $datarray['created'] = datetime_convert('UTC','UTC',$meta->date); $r = email_get_msg($mbox,$msg_uid); @@ -463,15 +476,24 @@ function poller_run($argv, $argc){ logger("Mail: can't fetch msg ".$msg_uid); continue; } - $datarray['body'] = escape_tags($r['body']); + $datarray['body'] = escape_tags(convertquote($r['body'], false)); logger("Mail: Importing ".$msg_uid); // some mailing lists have the original author as 'from' - add this sender info to msg body. // todo: adding a gravatar for the original author would be cool - if(! stristr($meta->from,$contact['addr'])) - $datarray['body'] = t('From: ') . escape_tags($meta->from) . "\n\n" . $datarray['body']; + if(! stristr($meta->from,$contact['addr'])) { + $from = imap_mime_header_decode($meta->from); + $fromdecoded = ""; + foreach($from as $frompart) + if ($frompart->charset != "default") + $fromdecoded .= iconv($frompart->charset, 'UTF-8//IGNORE', $frompart->text); + else + $fromdecoded .= $frompart->text; + + $datarray['body'] = "[b]".t('From: ') . escape_tags($fromdecoded) . "[/b]\n\n" . $datarray['body']; + } $datarray['uid'] = $importer_uid; $datarray['contact-id'] = $contact['id']; diff --git a/include/quoteconvert.php b/include/quoteconvert.php new file mode 100644 index 00000000..3aee9323 --- /dev/null +++ b/include/quoteconvert.php @@ -0,0 +1,132 @@ +0) and ((substr($quoteline, 0, 1) == '>') + or (substr($quoteline, 0, 1) == ' '))) { + if (substr($quoteline, 0, 1) == '>') + $quotelevel++; + + $quoteline = ltrim(substr($quoteline, 1)); + } + + //echo $quotelevel.'*'.$quoteline."\r\n"; + + $arrlevel[$i] = $quotelevel; + $arrbody[$i] = $quoteline; + } + + $quotelevel = 0; + $previousquote = 0; + $arrbodyquoted = array(); + + for ($i = 0; $i < count($arrbody); $i++) { + + $previousquote = $quotelevel; + $quotelevel = $arrlevel[$i]; + $currline = $arrbody[$i]; + + while ($previousquote < $quotelevel) { + if ($sender != '') { + $quote = "[quote title=$sender]"; + $sender = ''; + } else + $quote = "[quote]"; + + $arrbody[$i] = $quote.$arrbody[$i]; + $previousquote++; + } + + while ($previousquote > $quotelevel) { + $arrbody[$i] = '[/quote]'.$arrbody[$i]; + $previousquote--; + } + + $arrbodyquoted[] = $arrbody[$i]; + } + while ($quotelevel > 0) { + $arrbodyquoted[] = '[/quote]'; + $quotelevel--; + } + + $body = implode("\n", $arrbodyquoted); + + if (strlen($body) > 0) + $body = $body."\n\n"; + + if ($reply) + $body = removetofu($body); + + return($body); +} + +function removetofu($message) +{ + $message = trim($message); + + do { + $oldmessage = $message; + $message = preg_replace('=\[/quote\][\s](.*?)\[quote\]=i', '$1', $message); + $message = str_replace("[/quote][quote]", "", $message); + } while ($message != $oldmessage); + + $quotes = array(); + + $startquotes = 0; + + $start = 0; + + while(($pos = strpos($message, '[quote', $start)) > 0) { + $quotes[$pos] = -1; + $start = $pos + 7; + $startquotes++; + } + + $endquotes = 0; + $start = 0; + + while(($pos = strpos($message, '[/quote]', $start)) > 0) { + $start = $pos + 7; + $endquotes++; + } + + while ($endquotes < $startquotes) { + $message .= '[/quote]'; + ++$endquotes; + } + + $start = 0; + + while(($pos = strpos($message, '[/quote]', $start)) > 0) { + $quotes[$pos] = 1; + $start = $pos + 7; + } + + if (strtolower(substr($message, -8)) != '[/quote]') + return($message); + + krsort($quotes); + + $quotelevel = 0; + $quotestart = 0; + foreach ($quotes as $index => $quote) { + $quotelevel += $quote; + + if (($quotelevel == 0) and ($quotestart == 0)) + $quotestart = $index; + } + + if ($quotestart != 0) { + $message = trim(substr($message, 0, $quotestart))."\n[collapsed]\n".substr($message, $quotestart+7, -8).'[/collapsed]'; + } + + return($message); +} +?>