Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: jhillyerd/enmime
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: v0.9.4
Choose a base ref
...
head repository: jhillyerd/enmime
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: v0.10.0
Choose a head ref
  • 10 commits
  • 20 files changed
  • 6 contributors

Commits on May 17, 2022

  1. feat: add option skip malformed parts (#248)

    * feat: add option skip malformed parts
    
    This commit introduces Parser, which allows to configure how MIME will
    be parsed. Default parser behavior isn't changed - ReadEnvelope and
    ReadParts will work as before.
    
    Currently only adding option to skip malformed parts.
    
    Co-authored-by: Dmytro Kasianenko <dmytro.kasianeneko@outreach.io>
    2 people authored and jhillyerd committed May 17, 2022
    Copy the full SHA
    1cec25e View commit details
  2. Copy the full SHA
    7331d07 View commit details
  3. Update CHANGELOG.md

    jhillyerd committed May 17, 2022
    Copy the full SHA
    390998f View commit details

Commits on May 26, 2022

  1. fix(meditype): remove traling HTML tags (#252)

    Co-authored-by: Dmytro Kasianenko <dmytro.kasianeneko@outreach.io>
    dmytrokasianenko-outreach and Dmytro Kasianenko authored May 26, 2022

    Verified

    This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
    Copy the full SHA
    f8aba51 View commit details

Commits on Jun 2, 2022

  1. Verified

    This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
    Copy the full SHA
    e143fa3 View commit details

Commits on Jun 5, 2022

  1. fix: buffer overrun when quoted printable is at the end of line and o…

    …utput buffer (#254)
    
    * fix: buffer overrun when quoted printable is at the end of line and output buffer
    
    Co-authored-by: Pavel Bazika <pavel.bazika@icewarp.com>
    pavelbazika and Pavel Bazika authored Jun 5, 2022

    Verified

    This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
    Copy the full SHA
    d29462e View commit details

Commits on Jun 7, 2022

  1. fix: QPCleaner line length equal sign counting (#255)

    Co-authored-by: Pavel Bazika <pavel.bazika@icewarp.com>
    pavelbazika and Pavel Bazika authored Jun 7, 2022

    Verified

    This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
    Copy the full SHA
    4906981 View commit details

Commits on Jun 15, 2022

  1. fix(mediatype): quoted semicolon (#256)

    * feat(stringutil): add find quoted
    
    * feat(stringutil): add split after quoted
    
    * fix(mediatype): fix splitting with  inside quoted text
    
    * fix: rename Quoted -> Unquoted
    
    Co-authored-by: Dmytro Kasianenko <dmytro.kasianeneko@outreach.io>
    dmytrokasianenko-outreach and Dmytro Kasianenko authored Jun 15, 2022

    Verified

    This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
    Copy the full SHA
    48f4e03 View commit details

Commits on Jul 4, 2022

  1. feat: add option to treat multipart w/o boundary as single-part (#257)

    * feat(detect): add possibility to detect multipart w/o boundary as singlepart
    * feat(parser): add option to treat multipart w/o boundary as singlepart
    
    Co-authored-by: Dmytro Kasianenko <dmytro.kasianeneko@outreach.io>
    dmytrokasianenko-outreach and Dmytro Kasianenko authored Jul 4, 2022

    Verified

    This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
    Copy the full SHA
    5376447 View commit details

Commits on Jul 21, 2022

  1. Copy the full SHA
    bde17c3 View commit details
14 changes: 13 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -5,7 +5,19 @@ All notable changes to this project will be documented in this file.
This project adheres to [Semantic Versioning](http://semver.org/).


## [Unreleased]
## [0.10.0] - 2022-07-20

### Added
- Support for parser options! (#248)
- Option to skip parsing of malformed parts (#248)
- Envelope.Date() method for parsing date (#253)
- Option to handle missing multipart boundaries (#257)

### Fixed
- Remove trailing HTML tags (#252)
- Buffer overrun in quoted-printable (#254)
- Corrected quoted-printable equals counting (#255)
- Improve splitting inside quoted text (#256)


## [0.9.4] - 2022-05-16
13 changes: 10 additions & 3 deletions detect.go
Original file line number Diff line number Diff line change
@@ -8,14 +8,21 @@ import (
)

// detectMultipartMessage returns true if the message has a recognized multipart Content-Type header
func detectMultipartMessage(root *Part) bool {
func detectMultipartMessage(root *Part, multipartWOBoundaryAsSinglepart bool) bool {
// Parse top-level multipart
ctype := root.Header.Get(hnContentType)
mtype, _, _, err := mediatype.Parse(ctype)
mtype, params, _, err := mediatype.Parse(ctype)
if err != nil {
return false
}

if boundary := params[hpBoundary]; multipartWOBoundaryAsSinglepart && boundary == "" {
return false
}

// According to rfc2046#section-5.1.7 all other multipart should
// be treated as multipart/mixed
return err == nil && strings.HasPrefix(mtype, ctMultipartPrefix)
return strings.HasPrefix(mtype, ctMultipartPrefix)
}

// detectAttachmentHeader returns true, if the given header defines an attachment. First it checks
22 changes: 19 additions & 3 deletions detect_test.go
Original file line number Diff line number Diff line change
@@ -14,7 +14,7 @@ func TestDetectSinglePart(t *testing.T) {
t.Fatal(err)
}

if detectMultipartMessage(msg) {
if detectMultipartMessage(msg, false) {
t.Error("Failed to identify non-multipart message")
}
}
@@ -26,7 +26,7 @@ func TestDetectMultiPart(t *testing.T) {
t.Fatal(err)
}

if !detectMultipartMessage(msg) {
if !detectMultipartMessage(msg, false) {
t.Error("Failed to identify multipart MIME message")
}
}
@@ -38,11 +38,27 @@ func TestDetectUnknownMultiPart(t *testing.T) {
t.Fatal(err)
}

if !detectMultipartMessage(msg) {
if !detectMultipartMessage(msg, false) {
t.Error("Failed to identify multipart MIME message of unknown type")
}
}

func TestDetectMultipartWithoutBoundary(t *testing.T) {
r, _ := os.Open(filepath.Join("testdata", "mail", "multipart-wo-boundary.raw"))
msg, err := ReadParts(r)
if err != nil {
t.Fatal(err)
}

if !detectMultipartMessage(msg, false) {
t.Error("Failed to identify multipart MIME message")
}

if detectMultipartMessage(msg, true) {
t.Error("Failed to identify multipart MIME message without boundaries as single-part")
}
}

func TestDetectBinaryBody(t *testing.T) {
ttable := []struct {
filename string
27 changes: 24 additions & 3 deletions envelope.go
Original file line number Diff line number Diff line change
@@ -7,6 +7,7 @@ import (
"net/mail"
"net/textproto"
"strings"
"time"

"github.com/jaytaylor/html2text"
"github.com/jhillyerd/enmime/internal/coding"
@@ -114,6 +115,15 @@ func (e *Envelope) AddressList(key string) ([]*mail.Address, error) {
return ParseAddressList(e.header.Get(key))
}

// Date parses the Date header field.
func (e *Envelope) Date() (time.Time, error) {
hdr := e.GetHeader("Date")
if hdr == "" {
return time.Time{}, mail.ErrHeaderNotPresent
}
return mail.ParseDate(hdr)
}

// Clone returns a clone of the current Envelope
func (e *Envelope) Clone() *Envelope {
if e == nil {
@@ -137,25 +147,36 @@ func (e *Envelope) Clone() *Envelope {
// provided reader into an Envelope, downconverting HTML to plain text if needed, and sorting the
// attachments, inlines and other parts into their respective slices. Errors are collected from all
// Parts and placed into the Envelope.Errors slice.
// Uses default parser.
func ReadEnvelope(r io.Reader) (*Envelope, error) {
return defaultParser.ReadEnvelope(r)
}

// ReadEnvelope is the same as ReadEnvelope, but respects parser configurations.
func (p Parser) ReadEnvelope(r io.Reader) (*Envelope, error) {
// Read MIME parts from reader
root, err := ReadParts(r)
root, err := p.ReadParts(r)
if err != nil {
return nil, errors.WithMessage(err, "Failed to ReadParts")
}
return EnvelopeFromPart(root)
return p.EnvelopeFromPart(root)
}

// EnvelopeFromPart uses the provided Part tree to build an Envelope, downconverting HTML to plain
// text if needed, and sorting the attachments, inlines and other parts into their respective
// slices. Errors are collected from all Parts and placed into the Envelopes Errors slice.
func EnvelopeFromPart(root *Part) (*Envelope, error) {
return defaultParser.EnvelopeFromPart(root)
}

// EnvelopeFromPart is the same as EnvelopeFromPart, but respects parser configurations.
func (p Parser) EnvelopeFromPart(root *Part) (*Envelope, error) {
e := &Envelope{
Root: root,
header: &root.Header,
}

if detectMultipartMessage(root) {
if detectMultipartMessage(root, p.multipartWOBoundaryAsSinglePart) {
// Multi-part message (message with attachments, etc)
if err := parseMultiPartBody(root, e); err != nil {
return nil, err
32 changes: 32 additions & 0 deletions envelope_test.go
Original file line number Diff line number Diff line change
@@ -7,6 +7,7 @@ import (
"sort"
"strings"
"testing"
"time"

"github.com/go-test/deep"
"github.com/jhillyerd/enmime"
@@ -34,6 +35,12 @@ func TestParseHeaderOnly(t *testing.T) {
if len(e.Root.Header) != 7 {
t.Errorf("Expected 7 headers, got %d", len(e.Root.Header))
}
dt, err := e.Date()
if err != nil {
t.Errorf("Failed to parse Date header: %v", err)
} else if !dt.Equal(time.Date(2012, time.October, 19, 5, 48, 39, 0, time.UTC)) {
t.Errorf("Incorrect date parsed: %s", dt)
}
}

func TestParseNonMime(t *testing.T) {
@@ -582,6 +589,31 @@ func TestParseHTMLOnlyCharsetInHeaderOnly(t *testing.T) {
}
}

func TestParseMultipartWOBoundaryFails(t *testing.T) {
r := test.OpenTestData("mail", "multipart-wo-boundary.raw")
_, err := enmime.ReadEnvelope(r)
if err == nil {
t.Fatal("Expecting parsing to fail")
}

if !strings.Contains(err.Error(), "unable to locate boundary param in Content-Type header") {
t.Fatal("Expecting for unable to locate boundary error")
}
}

func TestParseMultipartWOBoundaryAsSinglepart(t *testing.T) {
r := test.OpenTestData("mail", "multipart-wo-boundary.raw")
p := enmime.NewParser(enmime.MultipartWOBoundaryAsSinglePart(true))
e, err := p.ReadEnvelope(r)
if err != nil {
t.Fatal("Failed to parse MIME:", err)
}

if !bytes.Contains(e.Root.Content, []byte(`I'm multipart message without boundary`)) {
t.Fatal("Expecting multipart without boundary to be parsed")
}
}

func TestEnvelopeGetHeader(t *testing.T) {
// Test empty header
e := &enmime.Envelope{}
2 changes: 2 additions & 0 deletions error.go
Original file line number Diff line number Diff line change
@@ -23,6 +23,8 @@ const (
ErrorCharsetDeclaration = "Character Set Declaration Mismatch"
// ErrorMissingRecipient name.
ErrorMissingRecipient = "no recipients (to, cc, bcc) set"
// ErrorMalformedChildPart name.
ErrorMalformedChildPart = "Malformed child part"
)

// MaxPartErrors limits number of part parsing errors, errors after the limit are ignored. 0 means unlimited.
14 changes: 12 additions & 2 deletions internal/coding/quotedprint.go
Original file line number Diff line number Diff line change
@@ -52,6 +52,17 @@ func (qp *QPCleaner) Read(dest []byte) (n int, err error) {
qp.lineLen++
}

// safeWriteByte outputs a single byte, storing overflow for next read. Updates counters.
safeWriteByte := func(in byte) {
if n < destLen {
dest[n] = in
n++
} else {
qp.overflow = append(qp.overflow, in)
}
qp.lineLen++
}

// writeBytes outputs multiple bytes, storing overflow for next read. Updates counters.
writeBytes := func(in []byte) {
nc := copy(dest[n:], in)
@@ -100,8 +111,7 @@ func (qp *QPCleaner) Read(dest []byte) (n int, err error) {
return 0, err
}
if validHexBytes(hexBytes) {
dest[n] = b
n++
safeWriteByte(b)
} else {
writeBytes(escapedEquals)
}
69 changes: 68 additions & 1 deletion internal/coding/quotedprint_test.go
Original file line number Diff line number Diff line change
@@ -148,7 +148,41 @@ func TestQPCleanerLineBreakBufferFull(t *testing.T) {
t.Fatal(err)
}
if n != 1025 {
t.Errorf("Unexpected result length: %d", n)
t.Error("got:", n, "want:", 1025)
}
}

func TestQPCleanerEqualSignOverflow(t *testing.T) {
// Test processing of =3D quoted printable token being at the end of line
// in combination with almost full buffer.
// qp cleaner should add =\r\n and then output =3D without buffer overflow

input := append(bytes.Repeat([]byte("abc"), 341), []byte("=3D")...)
inbuf := bytes.NewBuffer(input)
qp := coding.NewQPCleaner(inbuf)

output := make([]byte, 1024)
n, err := qp.Read(output)
if err != nil {
t.Fatal(err)
}
if n != 1024 {
t.Error("got:", n, "want:", 1024)
}
if string(output[1020:]) != "abc=" {
t.Error("got:", string(output[1020:]), "want:", "abc=")
}

n, err = qp.Read(output)
if err != io.EOF {
t.Fatal(err)
}
if n != 5 {
t.Error("got:", n, "want:", 5)
}
output = output[:n]
if string(output) != "\r\n=3D" {
t.Error("got:", string(output), "want:", "\r\n=3D")
}
}

@@ -171,6 +205,39 @@ func TestQPPeekError(t *testing.T) {
}
}

func TestQPCleanerQuotedLineLength(t *testing.T) {
input := strings.Repeat("=BC", 700) // ~ two lines of token
inr := strings.NewReader(input)
qp := coding.NewQPCleaner(inr)

// Check line length is counted proerly even for quoted printable encoded chars
longLineLen := coding.MaxQPLineLen + 2
output := make([]byte, longLineLen)
n, err := qp.Read(output)
if err != nil {
t.Fatal(err)
}
if n != longLineLen {
t.Error("got:", n, "want:", longLineLen)
}
if string(output[longLineLen-2:]) != "\r\n" {
t.Error("got:", string(output[longLineLen-2:]), "want:", "\r\n")
}

// Check line length is correct also when overflow buffer of QPCleaner is used
output = make([]byte, longLineLen)
n, err = qp.Read(output)
if err != nil {
t.Fatal(err)
}
if n != longLineLen {
t.Error("got:", n, "want:", longLineLen)
}
if string(output[longLineLen-2:]) != "\r\n" {
t.Error("got:", string(output[longLineLen-2:]), "want:", "\r\n")
}
}

var result int

func BenchmarkQPCleaner(b *testing.B) {
39 changes: 39 additions & 0 deletions internal/stringutil/find.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package stringutil

// FindUnquoted returns the indexes of the instance of v in s, or empty slice if v is not present in s.
// It ignores v present inside quoted runs.
func FindUnquoted(s string, v rune, quote rune) []int {
escaped := false
quoted := false
indexes := make([]int, 0)
quotedIndexes := make([]int, 0)

for i := 0; i < len(s); i++ {
switch rune(s[i]) {
case escape:
escaped = !escaped // escape can escape itself.
case quote:
if escaped {
escaped = false
continue
}

quoted = !quoted
if !quoted {
quotedIndexes = quotedIndexes[:0] // drop possible indices inside quoted segment
}
case v:
escaped = false
if quoted {
quotedIndexes = append(quotedIndexes, i)
} else {
indexes = append(indexes, i)
}
default:
escaped = false
}

}

return append(indexes, quotedIndexes...)
}
Loading