jhillyerd · May 17, 2022 · May 17, 2022 · May 17, 2022 · May 26, 2022 · Jun 2, 2022
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,7 +5,19 @@ All notable changes to this project will be documented in this file.
 This project adheres to [Semantic Versioning](http://semver.org/).
 
 
-## [Unreleased]
+## [0.10.0] - 2022-07-20
+
+### Added
+- Support for parser options! (#248)
+- Option to skip parsing of malformed parts (#248)
+- Envelope.Date() method for parsing date (#253)
+- Option to handle missing multipart boundaries (#257)
+
+### Fixed
+- Remove trailing HTML tags (#252)
+- Buffer overrun in quoted-printable (#254)
+- Corrected quoted-printable equals counting (#255)
+- Improve splitting inside quoted text (#256)
 
 
 ## [0.9.4] - 2022-05-16

diff --git a/detect.go b/detect.go
@@ -8,14 +8,21 @@ import (
 )
 
 // detectMultipartMessage returns true if the message has a recognized multipart Content-Type header
-func detectMultipartMessage(root *Part) bool {
+func detectMultipartMessage(root *Part, multipartWOBoundaryAsSinglepart bool) bool {
 	// Parse top-level multipart
 	ctype := root.Header.Get(hnContentType)
-	mtype, _, _, err := mediatype.Parse(ctype)
+	mtype, params, _, err := mediatype.Parse(ctype)
+	if err != nil {
+		return false
+	}
+
+	if boundary := params[hpBoundary]; multipartWOBoundaryAsSinglepart && boundary == "" {
+		return false
+	}
 
 	// According to rfc2046#section-5.1.7 all other multipart should
 	// be treated as multipart/mixed
-	return err == nil && strings.HasPrefix(mtype, ctMultipartPrefix)
+	return strings.HasPrefix(mtype, ctMultipartPrefix)
 }
 
 // detectAttachmentHeader returns true, if the given header defines an attachment. First it checks

diff --git a/detect_test.go b/detect_test.go
@@ -14,7 +14,7 @@ func TestDetectSinglePart(t *testing.T) {
 		t.Fatal(err)
 	}
 
-	if detectMultipartMessage(msg) {
+	if detectMultipartMessage(msg, false) {
 		t.Error("Failed to identify non-multipart message")
 	}
 }
@@ -26,7 +26,7 @@ func TestDetectMultiPart(t *testing.T) {
 		t.Fatal(err)
 	}
 
-	if !detectMultipartMessage(msg) {
+	if !detectMultipartMessage(msg, false) {
 		t.Error("Failed to identify multipart MIME message")
 	}
 }
@@ -38,11 +38,27 @@ func TestDetectUnknownMultiPart(t *testing.T) {
 		t.Fatal(err)
 	}
 
-	if !detectMultipartMessage(msg) {
+	if !detectMultipartMessage(msg, false) {
 		t.Error("Failed to identify multipart MIME message of unknown type")
 	}
 }
 
+func TestDetectMultipartWithoutBoundary(t *testing.T) {
+	r, _ := os.Open(filepath.Join("testdata", "mail", "multipart-wo-boundary.raw"))
+	msg, err := ReadParts(r)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if !detectMultipartMessage(msg, false) {
+		t.Error("Failed to identify multipart MIME message")
+	}
+
+	if detectMultipartMessage(msg, true) {
+		t.Error("Failed to identify multipart MIME message without boundaries as single-part")
+	}
+}
+
 func TestDetectBinaryBody(t *testing.T) {
 	ttable := []struct {
 		filename    string

diff --git a/envelope.go b/envelope.go
@@ -7,6 +7,7 @@ import (
 	"net/mail"
 	"net/textproto"
 	"strings"
+	"time"
 
 	"github.com/jaytaylor/html2text"
 	"github.com/jhillyerd/enmime/internal/coding"
@@ -114,6 +115,15 @@ func (e *Envelope) AddressList(key string) ([]*mail.Address, error) {
 	return ParseAddressList(e.header.Get(key))
 }
 
+// Date parses the Date header field.
+func (e *Envelope) Date() (time.Time, error) {
+	hdr := e.GetHeader("Date")
+	if hdr == "" {
+		return time.Time{}, mail.ErrHeaderNotPresent
+	}
+	return mail.ParseDate(hdr)
+}
+
 // Clone returns a clone of the current Envelope
 func (e *Envelope) Clone() *Envelope {
 	if e == nil {
@@ -137,25 +147,36 @@ func (e *Envelope) Clone() *Envelope {
 // provided reader into an Envelope, downconverting HTML to plain text if needed, and sorting the
 // attachments, inlines and other parts into their respective slices. Errors are collected from all
 // Parts and placed into the Envelope.Errors slice.
+// Uses default parser.
 func ReadEnvelope(r io.Reader) (*Envelope, error) {
+	return defaultParser.ReadEnvelope(r)
+}
+
+// ReadEnvelope is the same as ReadEnvelope, but respects parser configurations.
+func (p Parser) ReadEnvelope(r io.Reader) (*Envelope, error) {
 	// Read MIME parts from reader
-	root, err := ReadParts(r)
+	root, err := p.ReadParts(r)
 	if err != nil {
 		return nil, errors.WithMessage(err, "Failed to ReadParts")
 	}
-	return EnvelopeFromPart(root)
+	return p.EnvelopeFromPart(root)
 }
 
 // EnvelopeFromPart uses the provided Part tree to build an Envelope, downconverting HTML to plain
 // text if needed, and sorting the attachments, inlines and other parts into their respective
 // slices.  Errors are collected from all Parts and placed into the Envelopes Errors slice.
 func EnvelopeFromPart(root *Part) (*Envelope, error) {
+	return defaultParser.EnvelopeFromPart(root)
+}
+
+// EnvelopeFromPart is the same as EnvelopeFromPart, but respects parser configurations.
+func (p Parser) EnvelopeFromPart(root *Part) (*Envelope, error) {
 	e := &Envelope{
 		Root:   root,
 		header: &root.Header,
 	}
 
-	if detectMultipartMessage(root) {
+	if detectMultipartMessage(root, p.multipartWOBoundaryAsSinglePart) {
 		// Multi-part message (message with attachments, etc)
 		if err := parseMultiPartBody(root, e); err != nil {
 			return nil, err

diff --git a/envelope_test.go b/envelope_test.go
@@ -7,6 +7,7 @@ import (
 	"sort"
 	"strings"
 	"testing"
+	"time"
 
 	"github.com/go-test/deep"
 	"github.com/jhillyerd/enmime"
@@ -34,6 +35,12 @@ func TestParseHeaderOnly(t *testing.T) {
 	if len(e.Root.Header) != 7 {
 		t.Errorf("Expected 7 headers, got %d", len(e.Root.Header))
 	}
+	dt, err := e.Date()
+	if err != nil {
+		t.Errorf("Failed to parse Date header: %v", err)
+	} else if !dt.Equal(time.Date(2012, time.October, 19, 5, 48, 39, 0, time.UTC)) {
+		t.Errorf("Incorrect date parsed: %s", dt)
+	}
 }
 
 func TestParseNonMime(t *testing.T) {
@@ -582,6 +589,31 @@ func TestParseHTMLOnlyCharsetInHeaderOnly(t *testing.T) {
 	}
 }
 
+func TestParseMultipartWOBoundaryFails(t *testing.T) {
+	r := test.OpenTestData("mail", "multipart-wo-boundary.raw")
+	_, err := enmime.ReadEnvelope(r)
+	if err == nil {
+		t.Fatal("Expecting parsing to fail")
+	}
+
+	if !strings.Contains(err.Error(), "unable to locate boundary param in Content-Type header") {
+		t.Fatal("Expecting for unable to locate boundary error")
+	}
+}
+
+func TestParseMultipartWOBoundaryAsSinglepart(t *testing.T) {
+	r := test.OpenTestData("mail", "multipart-wo-boundary.raw")
+	p := enmime.NewParser(enmime.MultipartWOBoundaryAsSinglePart(true))
+	e, err := p.ReadEnvelope(r)
+	if err != nil {
+		t.Fatal("Failed to parse MIME:", err)
+	}
+
+	if !bytes.Contains(e.Root.Content, []byte(`I'm  multipart message without boundary`)) {
+		t.Fatal("Expecting multipart without boundary to be parsed")
+	}
+}
+
 func TestEnvelopeGetHeader(t *testing.T) {
 	// Test empty header
 	e := &enmime.Envelope{}

diff --git a/error.go b/error.go
@@ -23,6 +23,8 @@ const (
 	ErrorCharsetDeclaration = "Character Set Declaration Mismatch"
 	// ErrorMissingRecipient name.
 	ErrorMissingRecipient = "no recipients (to, cc, bcc) set"
+	// ErrorMalformedChildPart name.
+	ErrorMalformedChildPart = "Malformed child part"
 )
 
 // MaxPartErrors limits number of part parsing errors, errors after the limit are ignored. 0 means unlimited.

diff --git a/internal/coding/quotedprint.go b/internal/coding/quotedprint.go
@@ -52,6 +52,17 @@ func (qp *QPCleaner) Read(dest []byte) (n int, err error) {
 		qp.lineLen++
 	}
 
+	// safeWriteByte outputs a single byte, storing overflow for next read. Updates counters.
+	safeWriteByte := func(in byte) {
+		if n < destLen {
+			dest[n] = in
+			n++
+		} else {
+			qp.overflow = append(qp.overflow, in)
+		}
+		qp.lineLen++
+	}
+
 	// writeBytes outputs multiple bytes, storing overflow for next read. Updates counters.
 	writeBytes := func(in []byte) {
 		nc := copy(dest[n:], in)
@@ -100,8 +111,7 @@ func (qp *QPCleaner) Read(dest []byte) (n int, err error) {
 				return 0, err
 			}
 			if validHexBytes(hexBytes) {
-				dest[n] = b
-				n++
+				safeWriteByte(b)
 			} else {
 				writeBytes(escapedEquals)
 			}

diff --git a/internal/coding/quotedprint_test.go b/internal/coding/quotedprint_test.go
@@ -148,7 +148,41 @@ func TestQPCleanerLineBreakBufferFull(t *testing.T) {
 		t.Fatal(err)
 	}
 	if n != 1025 {
-		t.Errorf("Unexpected result length: %d", n)
+		t.Error("got:", n, "want:", 1025)
+	}
+}
+
+func TestQPCleanerEqualSignOverflow(t *testing.T) {
+	// Test processing of =3D quoted printable token being at the end of line
+	// in combination with almost full buffer.
+	// qp cleaner should add =\r\n and then output =3D without buffer overflow
+
+	input := append(bytes.Repeat([]byte("abc"), 341), []byte("=3D")...)
+	inbuf := bytes.NewBuffer(input)
+	qp := coding.NewQPCleaner(inbuf)
+
+	output := make([]byte, 1024)
+	n, err := qp.Read(output)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if n != 1024 {
+		t.Error("got:", n, "want:", 1024)
+	}
+	if string(output[1020:]) != "abc=" {
+		t.Error("got:", string(output[1020:]), "want:", "abc=")
+	}
+
+	n, err = qp.Read(output)
+	if err != io.EOF {
+		t.Fatal(err)
+	}
+	if n != 5 {
+		t.Error("got:", n, "want:", 5)
+	}
+	output = output[:n]
+	if string(output) != "\r\n=3D" {
+		t.Error("got:", string(output), "want:", "\r\n=3D")
 	}
 }
 
@@ -171,6 +205,39 @@ func TestQPPeekError(t *testing.T) {
 	}
 }
 
+func TestQPCleanerQuotedLineLength(t *testing.T) {
+	input := strings.Repeat("=BC", 700) // ~ two lines of token
+	inr := strings.NewReader(input)
+	qp := coding.NewQPCleaner(inr)
+
+	// Check line length is counted proerly even for quoted printable encoded chars
+	longLineLen := coding.MaxQPLineLen + 2
+	output := make([]byte, longLineLen)
+	n, err := qp.Read(output)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if n != longLineLen {
+		t.Error("got:", n, "want:", longLineLen)
+	}
+	if string(output[longLineLen-2:]) != "\r\n" {
+		t.Error("got:", string(output[longLineLen-2:]), "want:", "\r\n")
+	}
+
+	// Check line length is correct also when overflow buffer of QPCleaner is used
+	output = make([]byte, longLineLen)
+	n, err = qp.Read(output)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if n != longLineLen {
+		t.Error("got:", n, "want:", longLineLen)
+	}
+	if string(output[longLineLen-2:]) != "\r\n" {
+		t.Error("got:", string(output[longLineLen-2:]), "want:", "\r\n")
+	}
+}
+
 var result int
 
 func BenchmarkQPCleaner(b *testing.B) {

diff --git a/internal/stringutil/find.go b/internal/stringutil/find.go
@@ -0,0 +1,39 @@
+package stringutil
+
+// FindUnquoted returns the indexes of the instance of v in s, or empty slice if v is not present in s.
+// It ignores v present inside quoted runs.
+func FindUnquoted(s string, v rune, quote rune) []int {
+	escaped := false
+	quoted := false
+	indexes := make([]int, 0)
+	quotedIndexes := make([]int, 0)
+
+	for i := 0; i < len(s); i++ {
+		switch rune(s[i]) {
+		case escape:
+			escaped = !escaped // escape can escape itself.
+		case quote:
+			if escaped {
+				escaped = false
+				continue
+			}
+
+			quoted = !quoted
+			if !quoted {
+				quotedIndexes = quotedIndexes[:0] // drop possible indices inside quoted segment
+			}
+		case v:
+			escaped = false
+			if quoted {
+				quotedIndexes = append(quotedIndexes, i)
+			} else {
+				indexes = append(indexes, i)
+			}
+		default:
+			escaped = false
+		}
+
+	}
+
+	return append(indexes, quotedIndexes...)
+}