From 99bc1ec8671ae14c26a110e5157c0bf82a53d190 Mon Sep 17 00:00:00 2001 From: Inigo Lopez de Heredia Date: Wed, 19 Nov 2014 19:41:38 +0000 Subject: [PATCH] 'Q' encode non US-ASCII header values(RFC 2047) * Read field value, and determine if an encoding is needed * "Q" encode UTF-8 values * Add test function --- email.go | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++- email_test.go | 22 ++++++++++++++++++ 2 files changed, 83 insertions(+), 1 deletion(-) diff --git a/email.go b/email.go index 3c5b265..9dc9964 100644 --- a/email.go +++ b/email.go @@ -326,8 +326,68 @@ func headerToBytes(buff *bytes.Buffer, header textproto.MIMEHeader) { // bytes.Buffer.Write() never returns an error. io.WriteString(buff, field) io.WriteString(buff, ": ") - io.WriteString(buff, subval) + buff.Write(encodeHeader(field, subval)) io.WriteString(buff, "\r\n") } } } + +// encodeHeader checks whether the header value needs to be encoded, and returns the header-safe byte stream. +// If the field type is not encodable, or if the string contains only US-ASCII chars, the value is returned as is. +func encodeHeader(field string, value string) []byte { + if field == "Content-Type" || field == "Content-Disposition" { + return []byte(value) + } + ascii := true + for i := 0; i < len(value); i++ { + if value[i] < ' ' || value[i] > '~' { + ascii = false + break + } + } + if ascii { + return []byte(value) + } + var b bytes.Buffer + encodeText(&b, value, true) + return b.Bytes() +} + +// encodeText performs a UTF-8 "Q" encoding on the given string, according to RFC 2047. +// Output bytes are written to "buff". +func encodeText(buff *bytes.Buffer, s string, first bool) { + // First off, calculate the resulting encoded value's length. + encodedLen := 0 + for i := 0; i < len(s); i++ { + if isPrintable[s[i]] { + encodedLen++ + } else { + encodedLen = encodedLen + 3 // 1:3 conversion rate for Q encoding. + } + } + encodedLen = encodedLen + 12 // 12 = size of "=?UTF-8?Q?" + "?= + + if encodedLen > MaxLineLength { + // Split the text (keeping multi-byte characters together), and recurse. + r := []rune(s) + encodeText(buff, string(r[:len(r)/2]), first) + encodeText(buff, string(r[len(r)/2:]), false) + } else { + if !first { + buff.WriteString("\r\n ") + } + buff.WriteString("=?UTF-8?Q?") + + for i := 0; i < len(s); i++ { + switch c := s[i]; { + case c == ' ': + buff.WriteByte('_') + case isPrintable[c]: + buff.WriteByte(c) + default: + fmt.Fprintf(buff, "=%02X", c) + } + } + buff.WriteString("?=") + } +} diff --git a/email_test.go b/email_test.go index 7f61c53..06a5b31 100644 --- a/email_test.go +++ b/email_test.go @@ -184,3 +184,25 @@ func Benchmark_base64Wrap(b *testing.B) { base64Wrap(ioutil.Discard, file) } } + +func Test_encodeHeader(t *testing.T) { + // Plain ASCII (unchanged). + subject := "Plain ASCII email subject, !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" + expected := []byte("Plain ASCII email subject, !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~") + + b := encodeHeader("Subject", subject) + if !bytes.Equal(b, expected) { + t.Errorf("encodeHeader generated incorrect results: %#q != %#q", b, expected) + } + + // UTF-8 ('q' encoded). + subject = "UTF-8 email subject. It can contain é, ñ, or £. Long subject headers will be split in multiple lines!" + expected = []byte("=?UTF-8?Q?UTF-8_email_subject._It_c?=\r\n" + + " =?UTF-8?Q?an_contain_=C3=A9,_=C3=B1,_or_=C2=A3._Lo?=\r\n" + + " =?UTF-8?Q?ng_subject_headers_will_be_split_in_multiple_lines!?=") + + b = encodeHeader("Subject", subject) + if !bytes.Equal(b, expected) { + t.Errorf("encodeHeader generated incorrect results: %#q != %#q", b, expected) + } +}