[Xfce4-commits] <postler:master> Encode headers and message body to UTF-8, fix hex decoding

Christian Dywan noreply at xfce.org
Sun May 30 05:34:01 CEST 2010


Updating branch refs/heads/master
         to 8bbd9b49ee148b16e83ad843c086557aad2df017 (commit)
       from ba6e00a03588bb77d1af7fde0907be0750f5cde9 (commit)

commit 8bbd9b49ee148b16e83ad843c086557aad2df017
Author: Christian Dywan <christian at twotoasts.de>
Date:   Sun May 30 04:57:03 2010 +0200

    Encode headers and message body to UTF-8, fix hex decoding
    
    Individual headers may be randomly encoded with different
    character sets or even parts of a header, so the only safe
    option is to always encode to UTF-8.
    
    Hexadecimal values are decoded as expected.

 postler/postler-content.vala  |   34 +++++++++++++-------
 postler/postler-messages.vala |   69 ++++++++++++++++++++++++++++++-----------
 2 files changed, 73 insertions(+), 30 deletions(-)

diff --git a/postler/postler-content.vala b/postler/postler-content.vala
index a9fec22..3f8e1e5 100644
--- a/postler/postler-content.vala
+++ b/postler/postler-content.vala
@@ -129,6 +129,10 @@ public class Postler.Content : WebKit.WebView {
         return quoted;
     }
 
+    static string parse_encoded (string quoted, out string charset) {
+        return Postler.Messages.parse_encoded (quoted, out charset);
+    }
+
     string format_header (string header, string data) {
         if (data != "")
             return "<b>%s</b> %s<br>".printf (header, data);
@@ -188,14 +192,11 @@ public class Postler.Content : WebKit.WebView {
                     content_type = parts[1].strip ();
                 else if (field == "content-transfer-encoding")
                     content_encoding = parts[1].strip ();
-                else if (field == "subject") {
-                    subject = parts[1].strip ();
-                    subject = Postler.Messages.parse_encoded (subject, out charset);
-                }
+                else if (field == "subject")
+                    subject = parse_encoded (parts[1], out charset);
                 else if (field == "from") {
-                    from = parts[1].strip ();
                     string from_charset = null;
-                    from = Postler.Messages.parse_encoded (from, out from_charset);
+                    from = parse_encoded (parts[1], out from_charset);
                 }
                 else if (field == "date") {
                     time_t timestamp;
@@ -203,10 +204,14 @@ public class Postler.Content : WebKit.WebView {
                 }
                 else if (field == "to")
                     recipient = parts[1].strip ();
-                else if (field == "cc")
-                    carbon_copy = parts[1];
-                else if (field == "reply-to")
-                    reply = parts[1] + "<br>";
+                else if (field == "cc") {
+                    string cc_charset = null;
+                    carbon_copy = parse_encoded (parts[1], out cc_charset);
+                }
+                else if (field == "reply-to") {
+                    string reply_charset = null;
+                    reply = parse_encoded (parts[1], out reply_charset);
+                }
                 else if (field == "organization")
                     organization = parts[1];
                 else if (field == "x-mailer")
@@ -235,7 +240,7 @@ public class Postler.Content : WebKit.WebView {
                 parts = content_type.split ("; charset=");
                 if (parts != null && parts[0] != null && parts[1] != null) {
                     mime_type = parts[0];
-                    charset = parts[1];
+                    charset = parts[1].replace ("\"", " ").strip ();
                 }
             }
 
@@ -253,6 +258,11 @@ public class Postler.Content : WebKit.WebView {
                     line = quoted_printable_decode (line);
                 else if (content_encoding == "base64")
                     line = (string)GLib.Base64.decode (line);
+                try {
+                    if (charset != null)
+                        line = GLib.convert (line, -1, "UTF-8", charset, null);
+                }
+                catch (GLib.ConvertError error) { }
                 /* TODO: Encoding, attachments */
                 /* TODO: Can we parse and localize quoting, such as this?
                    > Em Quinta-feira 20 Maio 2010, =E0s 17:20:09, Pablo escreveu:
@@ -324,7 +334,7 @@ public class Postler.Content : WebKit.WebView {
                         format_header (_("Application:"), x_mailer),
                         plain_text ? "font-family: Monospace;" : "",
                         body_chunk),
-                mime_type, charset, "about:blank");
+                mime_type, "UTF-8", "about:blank");
             last_location = location;
         } catch (GLib.Error contents_error) {
             load_string ("""
diff --git a/postler/postler-messages.vala b/postler/postler-messages.vala
index c461dbe..61d6301 100644
--- a/postler/postler-messages.vala
+++ b/postler/postler-messages.vala
@@ -73,31 +73,37 @@ public class Postler.Messages : Gtk.TreeView {
             new Gtk.CellRendererText (), "text", Columns.SIZE, null);
     }
 
+    static bool evaluate_hex (GLib.MatchInfo     match_info,
+                              GLib.StringBuilder result,
+                              void*              user_data) {
+        string match = "0x" + match_info.fetch (2);
+        result.append_printf ("%c", (int)match.to_ulong (null));
+        return false;
+    }
+
     internal static string quoted_printable_decode (string quoted) {
-        return quoted.replace ("_", " ").replace ("=20", " ");
-        /* FIXME: Evaluate hexa-decimal values
         try {
             var regex = new GLib.Regex ("([=]([0-9A-F][0-9A-F]))");
-            return regex.replace (quoted, -1, 0, "\\x\\2").replace ("_", " ");
+            return regex.replace_eval (quoted, -1, 0, 0,
+                                       evaluate_hex, null).replace ("_", " ");
         }
         catch (GLib.RegexError error) {
             GLib.critical (_("Failed to decode string \"%s\": %s"),
                            quoted, error.message);
         }
-        return quoted; */
+        return quoted;
     }
 
-    internal static string parse_encoded (string encoded, out string charset) {
-        /* format "=?charset?encoding?encoded?=",
-           if in doubt, bail out and take the raw data */
-        if (encoded[0] != '=' || encoded[1] != '?')
+    static string decode_piece (string encoded, out string charset) {
+        if (!(encoded[0] == '=' && encoded[1] == '?'))
             return encoded;
         int token = 2;
         while (encoded[token] != '?')
             token++;
-        if (encoded[token] != '?')
-            return encoded;
-        charset = encoded[2:token];
+        charset = encoded[2:token].up ();
+        /* Encoding aliases */
+        if (charset == "KS_C_5601-1987")
+            charset = "CP949";
         token++;
         unichar encoding = encoded[token];
         if (encoding != 'Q' && encoding != 'B')
@@ -105,12 +111,37 @@ public class Postler.Messages : Gtk.TreeView {
         token++;
         if (encoded[token] != '?')
             return encoded;
-        /* We assume the last character is = */
+        token++;
+        string[] pieces = encoded.slice (token, encoded.length).split ("?=");
+        if (pieces == null || pieces[0] == null)
+            return encoded;
+        string unquoted;
         if (encoding == 'Q')
-            return quoted_printable_decode (encoded[token:-2]);
-        else if (encoding == 'B')
-            return (string)GLib.Base64.decode (encoded[token:-2]);
-        return encoded[token:-2];
+            unquoted = quoted_printable_decode (pieces[0]);
+        else if (encoding == 'B') 
+            unquoted = (string)GLib.Base64.decode (pieces[0]);
+        else
+            unquoted = pieces[0];
+        try {
+            return GLib.convert (unquoted, -1, "UTF-8", charset, null) +
+                (pieces[1] != null ? pieces[1] : "");
+        }
+        catch (GLib.ConvertError error) {
+            GLib.message (_("Failed to convert \"%s\": %s"), encoded, error.message);
+            return pieces[0] + (pieces[1] != null ? pieces[1] : "");
+        }
+    }
+
+    internal static string parse_encoded (string encoded, out string charset) {
+        /* format "=?charset?encoding?encoded?=",
+           if in doubt, bail out and take the raw data */
+        string[] pieces = encoded.strip ().split ("?=");
+        if (pieces == null || pieces[0] == null)
+            return encoded;
+        var decoded = new GLib.StringBuilder ();
+        foreach (var piece in pieces)
+            decoded.append (decode_piece (piece, out charset));
+        return decoded.str;
     }
 
     internal static string[] parse_address (string address)
@@ -228,8 +259,10 @@ public class Postler.Messages : Gtk.TreeView {
                             string field = parts[0].down ();
                             if (field == "subject")
                                 subject = parts[1].strip ();
-                            else if (field == "from")
-                                from = parts[1];
+                            else if (field == "from") {
+                                string from_charset = null;
+                                from = parse_encoded (parts[1], out from_charset);
+                            }
                             else if (field == "date")
                                 date = format_date (parts[1], out timestamp);
                         }



More information about the Xfce4-commits mailing list