[Xfce4-commits] <postler:master> Encode headers and message body to UTF-8, fix hex decoding
Christian Dywan
noreply at xfce.org
Sun May 30 05:34:01 CEST 2010
Updating branch refs/heads/master
to 8bbd9b49ee148b16e83ad843c086557aad2df017 (commit)
from ba6e00a03588bb77d1af7fde0907be0750f5cde9 (commit)
commit 8bbd9b49ee148b16e83ad843c086557aad2df017
Author: Christian Dywan <christian at twotoasts.de>
Date: Sun May 30 04:57:03 2010 +0200
Encode headers and message body to UTF-8, fix hex decoding
Individual headers may be randomly encoded with different
character sets or even parts of a header, so the only safe
option is to always encode to UTF-8.
Hexadecimal values are decoded as expected.
postler/postler-content.vala | 34 +++++++++++++-------
postler/postler-messages.vala | 69 ++++++++++++++++++++++++++++++-----------
2 files changed, 73 insertions(+), 30 deletions(-)
diff --git a/postler/postler-content.vala b/postler/postler-content.vala
index a9fec22..3f8e1e5 100644
--- a/postler/postler-content.vala
+++ b/postler/postler-content.vala
@@ -129,6 +129,10 @@ public class Postler.Content : WebKit.WebView {
return quoted;
}
+ static string parse_encoded (string quoted, out string charset) {
+ return Postler.Messages.parse_encoded (quoted, out charset);
+ }
+
string format_header (string header, string data) {
if (data != "")
return "<b>%s</b> %s<br>".printf (header, data);
@@ -188,14 +192,11 @@ public class Postler.Content : WebKit.WebView {
content_type = parts[1].strip ();
else if (field == "content-transfer-encoding")
content_encoding = parts[1].strip ();
- else if (field == "subject") {
- subject = parts[1].strip ();
- subject = Postler.Messages.parse_encoded (subject, out charset);
- }
+ else if (field == "subject")
+ subject = parse_encoded (parts[1], out charset);
else if (field == "from") {
- from = parts[1].strip ();
string from_charset = null;
- from = Postler.Messages.parse_encoded (from, out from_charset);
+ from = parse_encoded (parts[1], out from_charset);
}
else if (field == "date") {
time_t timestamp;
@@ -203,10 +204,14 @@ public class Postler.Content : WebKit.WebView {
}
else if (field == "to")
recipient = parts[1].strip ();
- else if (field == "cc")
- carbon_copy = parts[1];
- else if (field == "reply-to")
- reply = parts[1] + "<br>";
+ else if (field == "cc") {
+ string cc_charset = null;
+ carbon_copy = parse_encoded (parts[1], out cc_charset);
+ }
+ else if (field == "reply-to") {
+ string reply_charset = null;
+ reply = parse_encoded (parts[1], out reply_charset);
+ }
else if (field == "organization")
organization = parts[1];
else if (field == "x-mailer")
@@ -235,7 +240,7 @@ public class Postler.Content : WebKit.WebView {
parts = content_type.split ("; charset=");
if (parts != null && parts[0] != null && parts[1] != null) {
mime_type = parts[0];
- charset = parts[1];
+ charset = parts[1].replace ("\"", " ").strip ();
}
}
@@ -253,6 +258,11 @@ public class Postler.Content : WebKit.WebView {
line = quoted_printable_decode (line);
else if (content_encoding == "base64")
line = (string)GLib.Base64.decode (line);
+ try {
+ if (charset != null)
+ line = GLib.convert (line, -1, "UTF-8", charset, null);
+ }
+ catch (GLib.ConvertError error) { }
/* TODO: Encoding, attachments */
/* TODO: Can we parse and localize quoting, such as this?
> Em Quinta-feira 20 Maio 2010, =E0s 17:20:09, Pablo escreveu:
@@ -324,7 +334,7 @@ public class Postler.Content : WebKit.WebView {
format_header (_("Application:"), x_mailer),
plain_text ? "font-family: Monospace;" : "",
body_chunk),
- mime_type, charset, "about:blank");
+ mime_type, "UTF-8", "about:blank");
last_location = location;
} catch (GLib.Error contents_error) {
load_string ("""
diff --git a/postler/postler-messages.vala b/postler/postler-messages.vala
index c461dbe..61d6301 100644
--- a/postler/postler-messages.vala
+++ b/postler/postler-messages.vala
@@ -73,31 +73,37 @@ public class Postler.Messages : Gtk.TreeView {
new Gtk.CellRendererText (), "text", Columns.SIZE, null);
}
+ static bool evaluate_hex (GLib.MatchInfo match_info,
+ GLib.StringBuilder result,
+ void* user_data) {
+ string match = "0x" + match_info.fetch (2);
+ result.append_printf ("%c", (int)match.to_ulong (null));
+ return false;
+ }
+
internal static string quoted_printable_decode (string quoted) {
- return quoted.replace ("_", " ").replace ("=20", " ");
- /* FIXME: Evaluate hexa-decimal values
try {
var regex = new GLib.Regex ("([=]([0-9A-F][0-9A-F]))");
- return regex.replace (quoted, -1, 0, "\\x\\2").replace ("_", " ");
+ return regex.replace_eval (quoted, -1, 0, 0,
+ evaluate_hex, null).replace ("_", " ");
}
catch (GLib.RegexError error) {
GLib.critical (_("Failed to decode string \"%s\": %s"),
quoted, error.message);
}
- return quoted; */
+ return quoted;
}
- internal static string parse_encoded (string encoded, out string charset) {
- /* format "=?charset?encoding?encoded?=",
- if in doubt, bail out and take the raw data */
- if (encoded[0] != '=' || encoded[1] != '?')
+ static string decode_piece (string encoded, out string charset) {
+ if (!(encoded[0] == '=' && encoded[1] == '?'))
return encoded;
int token = 2;
while (encoded[token] != '?')
token++;
- if (encoded[token] != '?')
- return encoded;
- charset = encoded[2:token];
+ charset = encoded[2:token].up ();
+ /* Encoding aliases */
+ if (charset == "KS_C_5601-1987")
+ charset = "CP949";
token++;
unichar encoding = encoded[token];
if (encoding != 'Q' && encoding != 'B')
@@ -105,12 +111,37 @@ public class Postler.Messages : Gtk.TreeView {
token++;
if (encoded[token] != '?')
return encoded;
- /* We assume the last character is = */
+ token++;
+ string[] pieces = encoded.slice (token, encoded.length).split ("?=");
+ if (pieces == null || pieces[0] == null)
+ return encoded;
+ string unquoted;
if (encoding == 'Q')
- return quoted_printable_decode (encoded[token:-2]);
- else if (encoding == 'B')
- return (string)GLib.Base64.decode (encoded[token:-2]);
- return encoded[token:-2];
+ unquoted = quoted_printable_decode (pieces[0]);
+ else if (encoding == 'B')
+ unquoted = (string)GLib.Base64.decode (pieces[0]);
+ else
+ unquoted = pieces[0];
+ try {
+ return GLib.convert (unquoted, -1, "UTF-8", charset, null) +
+ (pieces[1] != null ? pieces[1] : "");
+ }
+ catch (GLib.ConvertError error) {
+ GLib.message (_("Failed to convert \"%s\": %s"), encoded, error.message);
+ return pieces[0] + (pieces[1] != null ? pieces[1] : "");
+ }
+ }
+
+ internal static string parse_encoded (string encoded, out string charset) {
+ /* format "=?charset?encoding?encoded?=",
+ if in doubt, bail out and take the raw data */
+ string[] pieces = encoded.strip ().split ("?=");
+ if (pieces == null || pieces[0] == null)
+ return encoded;
+ var decoded = new GLib.StringBuilder ();
+ foreach (var piece in pieces)
+ decoded.append (decode_piece (piece, out charset));
+ return decoded.str;
}
internal static string[] parse_address (string address)
@@ -228,8 +259,10 @@ public class Postler.Messages : Gtk.TreeView {
string field = parts[0].down ();
if (field == "subject")
subject = parts[1].strip ();
- else if (field == "from")
- from = parts[1];
+ else if (field == "from") {
+ string from_charset = null;
+ from = parse_encoded (parts[1], out from_charset);
+ }
else if (field == "date")
date = format_date (parts[1], out timestamp);
}
More information about the Xfce4-commits
mailing list