diff --git a/ctl.go b/ctl.go index 9709e40..3e90951 100644 --- a/ctl.go +++ b/ctl.go @@ -800,7 +800,7 @@ func servectlcmd(ctx context.Context, ctl *ctl, shutdown func()) { m.Size = correctSize mr := acc.MessageReader(m) - part, err := message.EnsurePart(mr, m.Size) + part, err := message.EnsurePart(log, false, mr, m.Size) if err != nil { _, werr := fmt.Fprintf(w, "parsing message %d again: %v (continuing)\n", m.ID, err) ctl.xcheck(werr, "write") @@ -890,7 +890,7 @@ func servectlcmd(ctx context.Context, ctl *ctl, shutdown func()) { return q.ForEach(func(m store.Message) error { lastID = m.ID mr := acc.MessageReader(m) - p, err := message.EnsurePart(mr, m.Size) + p, err := message.EnsurePart(log, false, mr, m.Size) if err != nil { _, err := fmt.Fprintf(w, "parsing message %d: %v (continuing)\n", m.ID, err) ctl.xcheck(err, "write") diff --git a/dmarcrpt/parse.go b/dmarcrpt/parse.go index 5106065..a727bb3 100644 --- a/dmarcrpt/parse.go +++ b/dmarcrpt/parse.go @@ -13,6 +13,7 @@ import ( "strings" "github.com/mjl-/mox/message" + "github.com/mjl-/mox/mlog" "github.com/mjl-/mox/moxio" ) @@ -33,17 +34,17 @@ func ParseReport(r io.Reader) (*Feedback, error) { // ParseMessageReport parses an aggregate feedback report from a mail message. The // maximum message size is 15MB, the maximum report size after decompression is // 20MB. -func ParseMessageReport(r io.ReaderAt) (*Feedback, error) { +func ParseMessageReport(log *mlog.Log, r io.ReaderAt) (*Feedback, error) { // ../rfc/7489:1801 - p, err := message.Parse(&moxio.LimitAtReader{R: r, Limit: 15 * 1024 * 1024}) + p, err := message.Parse(log, true, &moxio.LimitAtReader{R: r, Limit: 15 * 1024 * 1024}) if err != nil { return nil, fmt.Errorf("parsing mail message: %s", err) } - return parseMessageReport(p) + return parseMessageReport(log, p) } -func parseMessageReport(p message.Part) (*Feedback, error) { +func parseMessageReport(log *mlog.Log, p message.Part) (*Feedback, error) { // Pretty much any mime structure is allowed. ../rfc/7489:1861 // In practice, some parties will send the report as the only (non-multipart) // content of the message. @@ -53,14 +54,14 @@ func parseMessageReport(p message.Part) (*Feedback, error) { } for { - sp, err := p.ParseNextPart() + sp, err := p.ParseNextPart(log) if err == io.EOF { return nil, ErrNoReport } if err != nil { return nil, err } - report, err := parseMessageReport(*sp) + report, err := parseMessageReport(log, *sp) if err == ErrNoReport { continue } else if err != nil || report != nil { diff --git a/dmarcrpt/parse_test.go b/dmarcrpt/parse_test.go index 96f1ed1..a93d3c0 100644 --- a/dmarcrpt/parse_test.go +++ b/dmarcrpt/parse_test.go @@ -5,8 +5,12 @@ import ( "reflect" "strings" "testing" + + "github.com/mjl-/mox/mlog" ) +var xlog = mlog.New("dmarcrpt") + const reportExample = ` @@ -130,7 +134,7 @@ func TestParseMessageReport(t *testing.T) { if err != nil { t.Fatalf("open %q: %s", p, err) } - _, err = ParseMessageReport(f) + _, err = ParseMessageReport(xlog, f) if err != nil { t.Fatalf("ParseMessageReport: %q: %s", p, err) } @@ -138,7 +142,7 @@ func TestParseMessageReport(t *testing.T) { } // No report in a non-multipart message. - _, err = ParseMessageReport(strings.NewReader("From: \r\n\r\nNo report.\r\n")) + _, err = ParseMessageReport(xlog, strings.NewReader("From: \r\n\r\nNo report.\r\n")) if err != ErrNoReport { t.Fatalf("message without report, got err %#v, expected ErrNoreport", err) } @@ -164,7 +168,7 @@ MIME-Version: 1.0 --===============5735553800636657282==-- `, "\n", "\r\n") - _, err = ParseMessageReport(strings.NewReader(multipartNoreport)) + _, err = ParseMessageReport(xlog, strings.NewReader(multipartNoreport)) if err != ErrNoReport { t.Fatalf("message without report, got err %#v, expected ErrNoreport", err) } diff --git a/dsn/dsn_test.go b/dsn/dsn_test.go index 9d431eb..01bda47 100644 --- a/dsn/dsn_test.go +++ b/dsn/dsn_test.go @@ -19,6 +19,8 @@ import ( "github.com/mjl-/mox/smtp" ) +var xlog = mlog.New("dsn") + func xparseDomain(s string) dns.Domain { d, err := dns.ParseDomain(s) if err != nil { @@ -33,7 +35,7 @@ func xparseIPDomain(s string) dns.IPDomain { func tparseMessage(t *testing.T, data []byte, nparts int) (*Message, *message.Part) { t.Helper() - m, p, err := Parse(bytes.NewReader(data)) + m, p, err := Parse(xlog, bytes.NewReader(data)) if err != nil { t.Fatalf("parsing dsn: %v", err) } diff --git a/dsn/parse.go b/dsn/parse.go index c2ab695..161508b 100644 --- a/dsn/parse.go +++ b/dsn/parse.go @@ -11,6 +11,7 @@ import ( "github.com/mjl-/mox/dns" "github.com/mjl-/mox/message" + "github.com/mjl-/mox/mlog" "github.com/mjl-/mox/smtp" ) @@ -22,17 +23,17 @@ import ( // The first return value is the machine-parsed DSN message. The second value is // the entire MIME multipart message. Use its Parts field to access the // human-readable text and optional original message/headers. -func Parse(r io.ReaderAt) (*Message, *message.Part, error) { +func Parse(log *mlog.Log, r io.ReaderAt) (*Message, *message.Part, error) { // DSNs can mix and match subtypes with and without utf-8. ../rfc/6533:441 - part, err := message.Parse(r) + part, err := message.Parse(log, false, r) if err != nil { return nil, nil, fmt.Errorf("parsing message: %v", err) } if part.MediaType != "MULTIPART" || part.MediaSubType != "REPORT" { return nil, nil, fmt.Errorf(`message has content-type %q, must have "message/report"`, strings.ToLower(part.MediaType+"/"+part.MediaSubType)) } - err = part.Walk(nil) + err = part.Walk(log, nil) if err != nil { return nil, nil, fmt.Errorf("parsing message parts: %v", err) } diff --git a/imapserver/append_test.go b/imapserver/append_test.go index d74b408..fbde472 100644 --- a/imapserver/append_test.go +++ b/imapserver/append_test.go @@ -55,7 +55,7 @@ func TestAppend(t *testing.T) { tc3.transactf("ok", "noop") tc3.xuntagged() // Inbox is not selected, nothing to report. - tc2.transactf("ok", "append inbox (\\Seen) \" 1-Jan-2022 10:10:00 +0100\" UTF8 ({34+}\r\ncontent-type: text/plain;;\r\n\r\ntest)") + tc2.transactf("ok", "append inbox (\\Seen) \" 1-Jan-2022 10:10:00 +0100\" UTF8 ({47+}\r\ncontent-type: just completely invalid;;\r\n\r\ntest)") tc2.xuntagged(imapclient.UntaggedExists(2)) tc2.xcodeArg(imapclient.CodeAppendUID{UIDValidity: 1, UID: 2}) diff --git a/import.go b/import.go index 5e8f878..b3d42fa 100644 --- a/import.go +++ b/import.go @@ -325,7 +325,7 @@ func importctl(ctx context.Context, ctl *ctl, mbox bool) { mb.Add(m.MailboxCounts()) // Parse message and store parsed information for later fast retrieval. - p, err := message.EnsurePart(msgf, m.Size) + p, err := message.EnsurePart(ctl.log, false, msgf, m.Size) if err != nil { ctl.log.Infox("parsing message, continuing", err, mlog.Field("path", origPath)) } diff --git a/junk.go b/junk.go index 83ba273..409a9e3 100644 --- a/junk.go +++ b/junk.go @@ -309,6 +309,8 @@ func cmdJunkPlay(c *cmd) { var nbad, nnodate, nham, nspam, nsent int + jlog := mlog.New("junkplay") + scanDir := func(dir string, ham, sent bool) { for _, name := range listDir(dir) { path := dir + "/" + name @@ -316,7 +318,7 @@ func cmdJunkPlay(c *cmd) { xcheckf(err, "open %q", path) fi, err := mf.Stat() xcheckf(err, "stat %q", path) - p, err := message.EnsurePart(mf, fi.Size()) + p, err := message.EnsurePart(jlog, false, mf, fi.Size()) if err != nil { nbad++ if err := mf.Close(); err != nil { @@ -396,7 +398,7 @@ func cmdJunkPlay(c *cmd) { }() fi, err := mf.Stat() xcheckf(err, "stat %q", path) - p, err := message.EnsurePart(mf, fi.Size()) + p, err := message.EnsurePart(jlog, false, mf, fi.Size()) if err != nil { log.Printf("bad sent message %q: %s", path, err) return diff --git a/junk/filter.go b/junk/filter.go index afceeee..b96a21f 100644 --- a/junk/filter.go +++ b/junk/filter.go @@ -501,7 +501,7 @@ func (f *Filter) ClassifyMessagePath(ctx context.Context, path string) (probabil } func (f *Filter) ClassifyMessageReader(ctx context.Context, mf io.ReaderAt, size int64) (probability float64, words map[string]struct{}, nham, nspam int, rerr error) { - m, err := message.EnsurePart(mf, size) + m, err := message.EnsurePart(f.log, false, mf, size) if err != nil && errors.Is(err, message.ErrBadContentType) { // Invalid content-type header is a sure sign of spam. //f.log.Infox("parsing content", err) @@ -567,7 +567,7 @@ func (f *Filter) Train(ctx context.Context, ham bool, words map[string]struct{}) } func (f *Filter) TrainMessage(ctx context.Context, r io.ReaderAt, size int64, ham bool) error { - p, _ := message.EnsurePart(r, size) + p, _ := message.EnsurePart(f.log, false, r, size) words, err := f.ParseMessage(p) if err != nil { return fmt.Errorf("parsing mail contents: %v", err) @@ -576,7 +576,7 @@ func (f *Filter) TrainMessage(ctx context.Context, r io.ReaderAt, size int64, ha } func (f *Filter) UntrainMessage(ctx context.Context, r io.ReaderAt, size int64, ham bool) error { - p, _ := message.EnsurePart(r, size) + p, _ := message.EnsurePart(f.log, false, r, size) words, err := f.ParseMessage(p) if err != nil { return fmt.Errorf("parsing mail contents: %v", err) diff --git a/junk/parse.go b/junk/parse.go index c480444..d5b8859 100644 --- a/junk/parse.go +++ b/junk/parse.go @@ -31,7 +31,7 @@ func (f *Filter) tokenizeMail(path string) (bool, map[string]struct{}, error) { if err != nil { return false, nil, err } - p, _ := message.EnsurePart(mf, fi.Size()) + p, _ := message.EnsurePart(f.log, false, mf, fi.Size()) words, err := f.ParseMessage(p) return true, words, err } diff --git a/main.go b/main.go index 9cf7acf..e4903a4 100644 --- a/main.go +++ b/main.go @@ -1443,11 +1443,13 @@ headers prepended. c.Usage() } + clog := mlog.New("dkimsign") + msgf, err := os.Open(args[0]) xcheckf(err, "open message") defer msgf.Close() - p, err := message.Parse(msgf) + p, err := message.Parse(clog, true, msgf) xcheckf(err, "parsing message") if len(p.Envelope.From) != 1 { @@ -1591,7 +1593,7 @@ can be found in message headers. data, err := io.ReadAll(os.Stdin) xcheckf(err, "read message") - dmarcFrom, _, err := message.From(bytes.NewReader(data)) + dmarcFrom, _, err := message.From(mlog.New("dmarcverify"), false, bytes.NewReader(data)) xcheckf(err, "extract dmarc from message") const ignoreTestMode = false @@ -1621,10 +1623,12 @@ understand email deliverability problems. c.Usage() } + clog := mlog.New("dmarcparsereportmsg") + for _, arg := range args { f, err := os.Open(arg) xcheckf(err, "open %q", arg) - feedback, err := dmarcrpt.ParseMessageReport(f) + feedback, err := dmarcrpt.ParseMessageReport(clog, f) xcheckf(err, "parse report in %q", arg) meta := feedback.ReportMetadata fmt.Printf("Report: period %s-%s, organisation %q, reportID %q, %s\n", time.Unix(meta.DateRange.Begin, 0).UTC().String(), time.Unix(meta.DateRange.End, 0).UTC().String(), meta.OrgName, meta.ReportID, meta.Email) @@ -1673,9 +1677,11 @@ func cmdDMARCDBAddReport(c *cmd) { mustLoadConfig() + clog := mlog.New("dmarcdbaddreport") + fromdomain := xparseDomain(args[0], "domain") fmt.Fprintln(os.Stderr, "reading report message from stdin") - report, err := dmarcrpt.ParseMessageReport(os.Stdin) + report, err := dmarcrpt.ParseMessageReport(clog, os.Stdin) xcheckf(err, "parse message") err = dmarcdb.AddReport(context.Background(), report, fromdomain) xcheckf(err, "add dmarc report") @@ -1712,10 +1718,12 @@ The report is printed in formatted JSON. c.Usage() } + clog := mlog.New("tlsrptparsereportmsg") + for _, arg := range args { f, err := os.Open(arg) xcheckf(err, "open %q", arg) - report, err := tlsrpt.ParseMessage(f) + report, err := tlsrpt.ParseMessage(clog, f) xcheckf(err, "parse report in %q", arg) // todo future: only print the highlights? enc := json.NewEncoder(os.Stdout) @@ -1855,11 +1863,13 @@ func cmdTLSRPTDBAddReport(c *cmd) { mustLoadConfig() + clog := mlog.New("tlsrptdbaddreport") + // First read message, to get the From-header. Then parse it as TLSRPT. fmt.Fprintln(os.Stderr, "reading report message from stdin") buf, err := io.ReadAll(os.Stdin) xcheckf(err, "reading message") - part, err := message.Parse(bytes.NewReader(buf)) + part, err := message.Parse(clog, true, bytes.NewReader(buf)) xcheckf(err, "parsing message") if part.Envelope == nil || len(part.Envelope.From) != 1 { log.Fatalf("message must have one From-header") @@ -1867,7 +1877,7 @@ func cmdTLSRPTDBAddReport(c *cmd) { from := part.Envelope.From[0] domain := xparseDomain(from.Host, "domain") - report, err := tlsrpt.ParseMessage(bytes.NewReader(buf)) + report, err := tlsrpt.ParseMessage(clog, bytes.NewReader(buf)) xcheckf(err, "parsing tls report in message") mailfrom := from.User + "@" + from.Host // todo future: should escape and such @@ -2276,6 +2286,8 @@ func cmdEnsureParsed(c *cmd) { c.Usage() } + clog := mlog.New("ensureparsed") + mustLoadConfig() a, err := store.OpenAccount(args[0]) xcheckf(err, "open account") @@ -2298,7 +2310,7 @@ func cmdEnsureParsed(c *cmd) { } for _, m := range l { mr := a.MessageReader(m) - p, err := message.EnsurePart(mr, m.Size) + p, err := message.EnsurePart(clog, false, mr, m.Size) if err != nil { log.Printf("parsing message %d: %v (continuing)", m.ID, err) } @@ -2351,13 +2363,15 @@ func cmdMessageParse(c *cmd) { c.Usage() } + clog := mlog.New("messageparse") + f, err := os.Open(args[0]) xcheckf(err, "open") defer f.Close() - part, err := message.Parse(f) + part, err := message.Parse(clog, false, f) xcheckf(err, "parsing message") - err = part.Walk(nil) + err = part.Walk(clog, nil) xcheckf(err, "parsing nested parts") enc := json.NewEncoder(os.Stdout) enc.SetIndent("", "\t") diff --git a/message/from.go b/message/from.go index 9550479..66a3c19 100644 --- a/message/from.go +++ b/message/from.go @@ -6,6 +6,7 @@ import ( "net/textproto" "github.com/mjl-/mox/dns" + "github.com/mjl-/mox/mlog" "github.com/mjl-/mox/smtp" ) @@ -16,12 +17,12 @@ import ( // From headers may be present. From returns an error if there is not exactly // one address. This address can be used for evaluating a DMARC policy against // SPF and DKIM results. -func From(r io.ReaderAt) (raddr smtp.Address, header textproto.MIMEHeader, rerr error) { +func From(log *mlog.Log, strict bool, r io.ReaderAt) (raddr smtp.Address, header textproto.MIMEHeader, rerr error) { // ../rfc/7489:1243 // todo: only allow utf8 if enabled in session/message? - p, err := Parse(r) + p, err := Parse(log, strict, r) if err != nil { // todo: should we continue with p, perhaps headers can be parsed? return raddr, nil, fmt.Errorf("parsing message: %v", err) diff --git a/message/part.go b/message/part.go index f193bc5..b5894a8 100644 --- a/message/part.go +++ b/message/part.go @@ -7,7 +7,6 @@ package message // todo: should we allow base64 messages where a line starts with a space? and possibly more whitespace. is happening in messages. coreutils base64 accepts it, encoding/base64 does not. // todo: handle comments in headers? // todo: should we just always store messages with \n instead of \r\n? \r\n seems easier for use with imap. -// todo: is a header always \r\n\r\n-separated? or is \r\n enough at the beginning of a file? because what would this mean: "\r\ndata"? data isn't a header. // todo: can use a cleanup import ( @@ -32,8 +31,6 @@ import ( "github.com/mjl-/mox/smtp" ) -var xlog = mlog.New("message") - var ( ErrBadContentType = errors.New("bad content-type") ) @@ -83,6 +80,7 @@ type Part struct { lastBoundOffset int64 // Start of header of last/previous part. Used to skip a part if ParseNextPart is called and nextBoundOffset is -1. parent *Part // Parent part, for getting bound from, and setting nextBoundOffset when a part has finished reading. Only for subparts, not top-level parts. bound []byte // Only set if valid multipart with boundary, includes leading --, excludes \r\n. + strict bool // If set, valid crlf line endings are verified when reading body. } // todo: have all Content* fields in Part? @@ -112,18 +110,24 @@ type Address struct { // Parse reads the headers of the mail message and returns a part. // A part provides access to decoded and raw contents of a message and its multiple parts. -func Parse(r io.ReaderAt) (Part, error) { - return newPart(r, 0, nil) +// +// If strict is set, fewer attempts are made to continue parsing when errors are +// encountered, such as with invalid content-type headers or bare carriage returns. +func Parse(log *mlog.Log, strict bool, r io.ReaderAt) (Part, error) { + return newPart(log, strict, r, 0, nil) } // EnsurePart parses a part as with Parse, but ensures a usable part is always // returned, even if error is non-nil. If a parse error occurs, the message is // returned as application/octet-stream, and headers can still be read if they // were valid. -func EnsurePart(r io.ReaderAt, size int64) (Part, error) { - p, err := Parse(r) +// +// If strict is set, fewer attempts are made to continue parsing when errors are +// encountered, such as with invalid content-type headers or bare carriage returns. +func EnsurePart(log *mlog.Log, strict bool, r io.ReaderAt, size int64) (Part, error) { + p, err := Parse(log, strict, r) if err == nil { - err = p.Walk(nil) + err = p.Walk(log, nil) } if err != nil { np, err2 := fallbackPart(p, r, size) @@ -183,7 +187,7 @@ func (p *Part) SetMessageReaderAt() error { } // Walk through message, decoding along the way, and collecting mime part offsets and sizes, and line counts. -func (p *Part) Walk(parent *Part) error { +func (p *Part) Walk(log *mlog.Log, parent *Part) error { if len(p.bound) == 0 { if p.MediaType == "MESSAGE" && (p.MediaSubType == "RFC822" || p.MediaSubType == "GLOBAL") { // todo: don't read whole submessage in memory... @@ -192,11 +196,11 @@ func (p *Part) Walk(parent *Part) error { return err } br := bytes.NewReader(buf) - mp, err := Parse(br) + mp, err := Parse(log, p.strict, br) if err != nil { return fmt.Errorf("parsing embedded message: %w", err) } - if err := mp.Walk(nil); err != nil { + if err := mp.Walk(log, nil); err != nil { // If this is a DSN and we are not in pedantic mode, accept unexpected end of // message. This is quite common because MTA's sometimes just truncate the original // message in a place that makes the message invalid. @@ -218,14 +222,14 @@ func (p *Part) Walk(parent *Part) error { } for { - pp, err := p.ParseNextPart() + pp, err := p.ParseNextPart(log) if err == io.EOF { return nil } if err != nil { return err } - if err := pp.Walk(p); err != nil { + if err := pp.Walk(log, p); err != nil { return err } } @@ -239,7 +243,7 @@ func (p *Part) String() string { // newPart parses a new part, which can be the top-level message. // offset is the bound offset for parts, and the start of message for top-level messages. parent indicates if this is a top-level message or sub-part. // If an error occurs, p's exported values can still be relevant. EnsurePart uses these values. -func newPart(r io.ReaderAt, offset int64, parent *Part) (p Part, rerr error) { +func newPart(log *mlog.Log, strict bool, r io.ReaderAt, offset int64, parent *Part) (p Part, rerr error) { if r == nil { panic("nil reader") } @@ -248,9 +252,10 @@ func newPart(r io.ReaderAt, offset int64, parent *Part) (p Part, rerr error) { EndOffset: -1, r: r, parent: parent, + strict: strict, } - b := &bufAt{r: r, offset: offset} + b := &bufAt{strict: strict, r: r, offset: offset} if parent != nil { p.BoundaryOffset = offset @@ -297,16 +302,46 @@ func newPart(r io.ReaderAt, offset int64, parent *Part) (p Part, rerr error) { ct := p.header.Get("Content-Type") mt, params, err := mime.ParseMediaType(ct) if err != nil && ct != "" { - return p, fmt.Errorf("%w: %s: %q", ErrBadContentType, err, ct) - } - if mt != "" { + if moxvar.Pedantic || strict { + return p, fmt.Errorf("%w: %s: %q", ErrBadContentType, err, ct) + } + + // Try parsing just a content-type, ignoring parameters. + // ../rfc/2045:628 + ct = strings.TrimSpace(strings.SplitN(ct, ";", 2)[0]) + t := strings.SplitN(ct, "/", 2) + isToken := func(s string) bool { + const separators = `()<>@,;:\\"/[]?= ` // ../rfc/2045:663 + for _, c := range s { + if c < 0x20 || c >= 0x80 || strings.ContainsRune(separators, c) { + return false + } + } + return len(s) > 0 + } + // We cannot recover content-type of multipart, we won't have a boundary. + if len(t) == 2 && isToken(t[0]) && !strings.EqualFold(t[0], "multipart") && isToken(t[1]) { + p.MediaType = strings.ToUpper(t[0]) + p.MediaSubType = strings.ToUpper(t[1]) + } else { + p.MediaType = "APPLICATION" + p.MediaSubType = "OCTET-STREAM" + } + log.Debugx("malformed content-type, attempting to recover and continuing", err, mlog.Field("contenttype", p.header.Get("Content-Type")), mlog.Field("mediatype", p.MediaType), mlog.Field("mediasubtype", p.MediaSubType)) + } else if mt != "" { t := strings.SplitN(strings.ToUpper(mt), "/", 2) if len(t) != 2 { - return p, fmt.Errorf("bad content-type: %q (content-type %q)", mt, ct) + if moxvar.Pedantic || strict { + return p, fmt.Errorf("bad content-type: %q (content-type %q)", mt, ct) + } + log.Debug("malformed media-type, ignoring and continuing", mlog.Field("type", mt)) + p.MediaType = "APPLICATION" + p.MediaSubType = "OCTET-STREAM" + } else { + p.MediaType = t[0] + p.MediaSubType = t[1] + p.ContentTypeParams = params } - p.MediaType = t[0] - p.MediaSubType = t[1] - p.ContentTypeParams = params } p.ContentID = p.header.Get("Content-Id") @@ -314,7 +349,7 @@ func newPart(r io.ReaderAt, offset int64, parent *Part) (p Part, rerr error) { p.ContentTransferEncoding = strings.ToUpper(p.header.Get("Content-Transfer-Encoding")) if parent == nil { - p.Envelope, err = parseEnvelope(mail.Header(p.header)) + p.Envelope, err = parseEnvelope(log, mail.Header(p.header)) if err != nil { return p, err } @@ -411,7 +446,7 @@ var wordDecoder = mime.WordDecoder{ }, } -func parseEnvelope(h mail.Header) (*Envelope, error) { +func parseEnvelope(log *mlog.Log, h mail.Header) (*Envelope, error) { date, _ := h.Date() // We currently marshal this field to JSON. But JSON cannot represent all @@ -433,19 +468,19 @@ func parseEnvelope(h mail.Header) (*Envelope, error) { env := &Envelope{ date, subject, - parseAddressList(h, "from"), - parseAddressList(h, "sender"), - parseAddressList(h, "reply-to"), - parseAddressList(h, "to"), - parseAddressList(h, "cc"), - parseAddressList(h, "bcc"), + parseAddressList(log, h, "from"), + parseAddressList(log, h, "sender"), + parseAddressList(log, h, "reply-to"), + parseAddressList(log, h, "to"), + parseAddressList(log, h, "cc"), + parseAddressList(log, h, "bcc"), h.Get("In-Reply-To"), h.Get("Message-Id"), } return env, nil } -func parseAddressList(h mail.Header, k string) []Address { +func parseAddressList(log *mlog.Log, h mail.Header, k string) []Address { l, err := h.AddressList(k) if err != nil { return nil @@ -457,7 +492,7 @@ func parseAddressList(h mail.Header, k string) []Address { addr, err := smtp.ParseAddress(a.Address) if err != nil { // todo: pass a ctx to this function so we can log with cid. - xlog.Infox("parsing address", err, mlog.Field("address", a.Address)) + log.Infox("parsing address (continuing)", err, mlog.Field("address", a.Address)) } else { user = addr.Localpart.String() host = addr.Domain.ASCII @@ -470,7 +505,7 @@ func parseAddressList(h mail.Header, k string) []Address { // ParseNextPart parses the next (sub)part of this multipart message. // ParseNextPart returns io.EOF and a nil part when there are no more parts. // Only used for initial parsing of message. Once parsed, use p.Parts. -func (p *Part) ParseNextPart() (*Part, error) { +func (p *Part) ParseNextPart(log *mlog.Log) (*Part, error) { if len(p.bound) == 0 { return nil, errNotMultipart } @@ -479,7 +514,7 @@ func (p *Part) ParseNextPart() (*Part, error) { panic("access not sequential") } // Set nextBoundOffset by fully reading the last part. - last, err := newPart(p.r, p.lastBoundOffset, p) + last, err := newPart(log, p.strict, p.r, p.lastBoundOffset, p) if err != nil { return nil, err } @@ -490,7 +525,7 @@ func (p *Part) ParseNextPart() (*Part, error) { return nil, fmt.Errorf("internal error: reading part did not set nextBoundOffset") } } - b := &bufAt{r: p.r, offset: p.nextBoundOffset} + b := &bufAt{strict: p.strict, r: p.r, offset: p.nextBoundOffset} // todo: should we require a crlf on final closing bound? we don't require it because some message/rfc822 don't have a crlf after their closing boundary, so those messages don't end in crlf. line, crlf, err := b.ReadLine(false) if err != nil { @@ -523,7 +558,7 @@ func (p *Part) ParseNextPart() (*Part, error) { boundOffset := p.nextBoundOffset p.lastBoundOffset = boundOffset p.nextBoundOffset = -1 - np, err := newPart(p.r, boundOffset, p) + np, err := newPart(log, p.strict, p.r, boundOffset, p) if err != nil { return nil, err } @@ -623,13 +658,37 @@ func (p *Part) RawReader() io.Reader { panic("missing reader") } if p.EndOffset >= 0 { - return io.NewSectionReader(p.r, p.BodyOffset, p.EndOffset-p.BodyOffset) + return &crlfReader{strict: p.strict, r: io.NewSectionReader(p.r, p.BodyOffset, p.EndOffset-p.BodyOffset)} } p.RawLineCount = 0 if p.parent == nil { - return &offsetReader{p, p.BodyOffset, true, false} + return &offsetReader{p, p.BodyOffset, p.strict, true, false} } - return &boundReader{p: p, b: &bufAt{r: p.r, offset: p.BodyOffset}, prevlf: true} + return &boundReader{p: p, b: &bufAt{strict: p.strict, r: p.r, offset: p.BodyOffset}, prevlf: true} +} + +// crlfReader verifies there are no bare newlines and optionally no bare carriage returns. +type crlfReader struct { + r io.Reader + strict bool + prevcr bool +} + +func (r *crlfReader) Read(buf []byte) (int, error) { + n, err := r.r.Read(buf) + if err == nil || err == io.EOF { + for _, b := range buf[:n] { + if b == '\n' && !r.prevcr { + err = errBareLF + break + } else if b != '\n' && r.prevcr && (r.strict || moxvar.Pedantic) { + err = errBareCR + break + } + r.prevcr = b == '\r' + } + } + return n, err } // bufAt is a buffered reader on an underlying ReaderAt. @@ -637,6 +696,7 @@ func (p *Part) RawReader() io.Reader { type bufAt struct { offset int64 // Offset in r currently consumed, i.e. not including any buffered data. + strict bool r io.ReaderAt buf []byte // Buffered data. nbuf int // Valid bytes in buf. @@ -698,7 +758,7 @@ func (b *bufAt) line(consume, requirecrlf bool) (buf []byte, crlf bool, err erro } i++ if i >= b.nbuf || b.buf[i] != '\n' { - if moxvar.Pedantic { + if b.strict || moxvar.Pedantic { return nil, false, errBareCR } continue @@ -743,6 +803,7 @@ func (b *bufAt) PeekByte() (byte, error) { type offsetReader struct { p *Part offset int64 + strict bool prevlf bool prevcr bool } @@ -759,7 +820,7 @@ func (r *offsetReader) Read(buf []byte) (int, error) { if err == nil || err == io.EOF { if c == '\n' && !r.prevcr { err = errBareLF - } else if c != '\n' && r.prevcr && moxvar.Pedantic { + } else if c != '\n' && r.prevcr && (r.strict || moxvar.Pedantic) { err = errBareCR } } @@ -784,7 +845,6 @@ type boundReader struct { nbuf int // Number of valid bytes in buf. crlf []byte // Possible crlf, to be returned if we do not yet encounter a boundary. prevlf bool // If last char returned was a newline. For counting lines. - prevcr bool } func (b *boundReader) Read(buf []byte) (count int, rerr error) { @@ -795,15 +855,7 @@ func (b *boundReader) Read(buf []byte) (count int, rerr error) { if b.prevlf { b.p.RawLineCount++ } - if rerr == nil || rerr == io.EOF { - if c == '\n' && !b.prevcr { - rerr = errBareLF - } else if c != '\n' && b.prevcr && moxvar.Pedantic { - rerr = errBareCR - } - } b.prevlf = c == '\n' - b.prevcr = c == '\r' } } }() diff --git a/message/part_test.go b/message/part_test.go index 9e92460..0d19ff9 100644 --- a/message/part_test.go +++ b/message/part_test.go @@ -11,9 +11,12 @@ import ( "strings" "testing" + "github.com/mjl-/mox/mlog" "github.com/mjl-/mox/moxvar" ) +var xlog = mlog.New("message") + func tcheck(t *testing.T, err error, msg string) { t.Helper() if err != nil { @@ -37,7 +40,7 @@ func tfail(t *testing.T, err, expErr error) { func TestEmptyHeader(t *testing.T) { s := "\r\nx" - p, err := EnsurePart(strings.NewReader(s), int64(len(s))) + p, err := EnsurePart(xlog, true, strings.NewReader(s), int64(len(s))) tcheck(t, err, "parse empty headers") buf, err := io.ReadAll(p.Reader()) tcheck(t, err, "read") @@ -48,15 +51,85 @@ func TestEmptyHeader(t *testing.T) { } func TestBadContentType(t *testing.T) { + expBody := "test" + + // Pedantic is like strict. + moxvar.Pedantic = true s := "content-type: text/html;;\r\n\r\ntest" - p, err := EnsurePart(strings.NewReader(s), int64(len(s))) + p, err := EnsurePart(xlog, false, strings.NewReader(s), int64(len(s))) tfail(t, err, ErrBadContentType) buf, err := io.ReadAll(p.Reader()) tcheck(t, err, "read") - expBody := "test" tcompare(t, string(buf), expBody) tcompare(t, p.MediaType, "APPLICATION") tcompare(t, p.MediaSubType, "OCTET-STREAM") + moxvar.Pedantic = false + + // Strict + s = "content-type: text/html;;\r\n\r\ntest" + p, err = EnsurePart(xlog, true, strings.NewReader(s), int64(len(s))) + tfail(t, err, ErrBadContentType) + buf, err = io.ReadAll(p.Reader()) + tcheck(t, err, "read") + tcompare(t, string(buf), expBody) + tcompare(t, p.MediaType, "APPLICATION") + tcompare(t, p.MediaSubType, "OCTET-STREAM") + + // Non-strict but unrecoverable content-type. + s = "content-type: not a content type;;\r\n\r\ntest" + p, err = EnsurePart(xlog, false, strings.NewReader(s), int64(len(s))) + tcheck(t, err, "parsing message with bad but recoverable content-type") + buf, err = io.ReadAll(p.Reader()) + tcheck(t, err, "read") + tcompare(t, string(buf), expBody) + tcompare(t, p.MediaType, "APPLICATION") + tcompare(t, p.MediaSubType, "OCTET-STREAM") + + // We try to use only the content-type, typically better than application/octet-stream. + s = "content-type: text/html;;\r\n\r\ntest" + p, err = EnsurePart(xlog, false, strings.NewReader(s), int64(len(s))) + tcheck(t, err, "parsing message with bad but recoverable content-type") + buf, err = io.ReadAll(p.Reader()) + tcheck(t, err, "read") + tcompare(t, string(buf), expBody) + tcompare(t, p.MediaType, "TEXT") + tcompare(t, p.MediaSubType, "HTML") + + // Not recovering multipart, we won't have a boundary. + s = "content-type: multipart/mixed;;\r\n\r\ntest" + p, err = EnsurePart(xlog, false, strings.NewReader(s), int64(len(s))) + tcheck(t, err, "parsing message with bad but recoverable content-type") + buf, err = io.ReadAll(p.Reader()) + tcheck(t, err, "read") + tcompare(t, string(buf), expBody) + tcompare(t, p.MediaType, "APPLICATION") + tcompare(t, p.MediaSubType, "OCTET-STREAM") +} + +func TestBareCR(t *testing.T) { + s := "content-type: text/html\r\n\r\nbare\rcr\r\n" + expBody := "bare\rcr\r\n" + + // Pedantic is like strict. + moxvar.Pedantic = true + p, err := EnsurePart(xlog, false, strings.NewReader(s), int64(len(s))) + tfail(t, err, errBareCR) + _, err = io.ReadAll(p.Reader()) + tfail(t, err, errBareCR) + moxvar.Pedantic = false + + // Strict. + p, err = EnsurePart(xlog, true, strings.NewReader(s), int64(len(s))) + tfail(t, err, errBareCR) + _, err = io.ReadAll(p.Reader()) + tcheck(t, err, "read fallback part without error") + + // Non-strict allows bare cr. + p, err = EnsurePart(xlog, false, strings.NewReader(s), int64(len(s))) + tcheck(t, err, "parse") + buf, err := io.ReadAll(p.Reader()) + tcheck(t, err, "read") + tcompare(t, string(buf), expBody) } var basicMsg = strings.ReplaceAll(`From: @@ -68,7 +141,7 @@ aGkK func TestBasic(t *testing.T) { r := strings.NewReader(basicMsg) - p, err := Parse(r) + p, err := Parse(xlog, true, r) tcheck(t, err, "new reader") buf, err := io.ReadAll(p.RawReader()) @@ -103,7 +176,7 @@ Hello Joe, do you think we can meet at 3:30 tomorrow? func TestBasic2(t *testing.T) { r := strings.NewReader(basicMsg2) - p, err := Parse(r) + p, err := Parse(xlog, true, r) tcheck(t, err, "new reader") buf, err := io.ReadAll(p.RawReader()) @@ -123,9 +196,9 @@ func TestBasic2(t *testing.T) { } r = strings.NewReader(basicMsg2) - p, err = Parse(r) + p, err = Parse(xlog, true, r) tcheck(t, err, "new reader") - err = p.Walk(nil) + err = p.Walk(xlog, nil) tcheck(t, err, "walk") if p.RawLineCount != 2 { t.Fatalf("basic message, got %d lines, expected 2", p.RawLineCount) @@ -164,25 +237,25 @@ This is the epilogue. It is also to be ignored. func TestMime(t *testing.T) { // from ../rfc/2046:1148 r := strings.NewReader(mimeMsg) - p, err := Parse(r) + p, err := Parse(xlog, true, r) tcheck(t, err, "new reader") if len(p.bound) == 0 { t.Fatalf("got no bound, expected bound for mime message") } - pp, err := p.ParseNextPart() + pp, err := p.ParseNextPart(xlog) tcheck(t, err, "next part") buf, err := io.ReadAll(pp.Reader()) tcheck(t, err, "read all") tcompare(t, string(buf), "This is implicitly typed plain US-ASCII text.\r\nIt does NOT end with a linebreak.") - pp, err = p.ParseNextPart() + pp, err = p.ParseNextPart(xlog) tcheck(t, err, "next part") buf, err = io.ReadAll(pp.Reader()) tcheck(t, err, "read all") tcompare(t, string(buf), "This is explicitly typed plain US-ASCII text.\r\nIt DOES end with a linebreak.\r\n") - _, err = p.ParseNextPart() + _, err = p.ParseNextPart(xlog) tcompare(t, err, io.EOF) if len(p.Parts) != 2 { @@ -201,33 +274,38 @@ func TestLongLine(t *testing.T) { for i := range line { line[i] = 'a' } - _, err := Parse(bytes.NewReader(line)) + _, err := Parse(xlog, true, bytes.NewReader(line)) tfail(t, err, errLineTooLong) } -func TestHalfCrLf(t *testing.T) { - parse := func(s string) error { - p, err := Parse(strings.NewReader(s)) +func TestBareCrLf(t *testing.T) { + parse := func(strict bool, s string) error { + p, err := Parse(xlog, strict, strings.NewReader(s)) if err != nil { return err } - return p.Walk(nil) + return p.Walk(xlog, nil) } - err := parse("subject: test\ntest\r\n") + err := parse(false, "subject: test\ntest\r\n") tfail(t, err, errBareLF) - err = parse("\r\ntest\ntest\r\n") + err = parse(false, "\r\ntest\ntest\r\n") tfail(t, err, errBareLF) moxvar.Pedantic = true - err = parse("subject: test\rtest\r\n") + err = parse(false, "subject: test\rtest\r\n") tfail(t, err, errBareCR) - err = parse("\r\ntest\rtest\r\n") + err = parse(false, "\r\ntest\rtest\r\n") tfail(t, err, errBareCR) moxvar.Pedantic = false - err = parse("subject: test\rtest\r\n") + err = parse(true, "subject: test\rtest\r\n") + tfail(t, err, errBareCR) + err = parse(true, "\r\ntest\rtest\r\n") + tfail(t, err, errBareCR) + + err = parse(false, "subject: test\rtest\r\n") tcheck(t, err, "header with bare cr") - err = parse("\r\ntest\rtest\r\n") + err = parse(false, "\r\ntest\rtest\r\n") tcheck(t, err, "body with bare cr") } @@ -238,25 +316,25 @@ func TestMissingClosingBoundary(t *testing.T) { test `, "\n", "\r\n") - msg, err := Parse(strings.NewReader(message)) + msg, err := Parse(xlog, false, strings.NewReader(message)) tcheck(t, err, "new reader") err = walkmsg(&msg) tfail(t, err, errMissingClosingBoundary) - msg, _ = Parse(strings.NewReader(message)) - err = msg.Walk(nil) + msg, _ = Parse(xlog, false, strings.NewReader(message)) + err = msg.Walk(xlog, nil) tfail(t, err, errMissingClosingBoundary) } func TestHeaderEOF(t *testing.T) { message := "header: test" - _, err := Parse(strings.NewReader(message)) + _, err := Parse(xlog, false, strings.NewReader(message)) tfail(t, err, errUnexpectedEOF) } func TestBodyEOF(t *testing.T) { message := "header: test\r\n\r\ntest" - msg, err := Parse(strings.NewReader(message)) + msg, err := Parse(xlog, true, strings.NewReader(message)) tcheck(t, err, "new reader") buf, err := io.ReadAll(msg.Reader()) tcheck(t, err, "read body") @@ -287,7 +365,7 @@ test `, "\n", "\r\n") - msg, err := Parse(strings.NewReader(message)) + msg, err := Parse(xlog, false, strings.NewReader(message)) tcheck(t, err, "new reader") enforceSequential = true defer func() { @@ -296,8 +374,8 @@ test err = walkmsg(&msg) tcheck(t, err, "walkmsg") - msg, _ = Parse(strings.NewReader(message)) - err = msg.Walk(nil) + msg, _ = Parse(xlog, false, strings.NewReader(message)) + err = msg.Walk(xlog, nil) tcheck(t, err, "msg.Walk") } @@ -374,7 +452,7 @@ Content-Transfer-Encoding: Quoted-printable --unique-boundary-1-- `, "\n", "\r\n") - msg, err := Parse(strings.NewReader(nestedMessage)) + msg, err := Parse(xlog, true, strings.NewReader(nestedMessage)) tcheck(t, err, "new reader") enforceSequential = true defer func() { @@ -399,8 +477,8 @@ Content-Transfer-Encoding: Quoted-printable t.Fatalf("got %q, expected %q", buf, exp) } - msg, _ = Parse(strings.NewReader(nestedMessage)) - err = msg.Walk(nil) + msg, _ = Parse(xlog, false, strings.NewReader(nestedMessage)) + err = msg.Walk(xlog, nil) tcheck(t, err, "msg.Walk") } @@ -440,7 +518,7 @@ func walk(path string) error { return err } defer r.Close() - msg, err := Parse(r) + msg, err := Parse(xlog, false, r) if err != nil { return err } @@ -460,7 +538,7 @@ func walkmsg(msg *Part) error { } if msg.MediaType == "MESSAGE" && (msg.MediaSubType == "RFC822" || msg.MediaSubType == "GLOBAL") { - mp, err := Parse(bytes.NewReader(buf)) + mp, err := Parse(xlog, false, bytes.NewReader(buf)) if err != nil { return err } @@ -488,7 +566,7 @@ func walkmsg(msg *Part) error { } for { - pp, err := msg.ParseNextPart() + pp, err := msg.ParseNextPart(xlog) if err == io.EOF { return nil } @@ -507,7 +585,7 @@ func TestEmbedded(t *testing.T) { tcheck(t, err, "open") fi, err := f.Stat() tcheck(t, err, "stat") - _, err = EnsurePart(f, fi.Size()) + _, err = EnsurePart(xlog, false, f, fi.Size()) tcheck(t, err, "parse") } @@ -516,6 +594,6 @@ func TestEmbedded2(t *testing.T) { tcheck(t, err, "readfile") buf = bytes.ReplaceAll(buf, []byte("\n"), []byte("\r\n")) - _, err = EnsurePart(bytes.NewReader(buf), int64(len(buf))) + _, err = EnsurePart(xlog, false, bytes.NewReader(buf), int64(len(buf))) tfail(t, err, nil) } diff --git a/smtpserver/analyze.go b/smtpserver/analyze.go index 3c05867..294e010 100644 --- a/smtpserver/analyze.go +++ b/smtpserver/analyze.go @@ -141,7 +141,7 @@ func analyze(ctx context.Context, log *mlog.Log, resolver dns.Resolver, d delive // Messages with DMARC aggregate reports must have a dmarc pass. ../rfc/7489:1866 if d.dmarcResult.Status != dmarc.StatusPass { log.Info("received dmarc report without dmarc pass, not processing as dmarc report") - } else if report, err := dmarcrpt.ParseMessageReport(store.FileMsgReader(d.m.MsgPrefix, d.dataFile)); err != nil { + } else if report, err := dmarcrpt.ParseMessageReport(log, store.FileMsgReader(d.m.MsgPrefix, d.dataFile)); err != nil { log.Infox("parsing dmarc report", err) } else if d, err := dns.ParseDomain(report.PolicyPublished.Domain); err != nil { log.Infox("parsing domain in dmarc report", err) @@ -173,7 +173,7 @@ func analyze(ctx context.Context, log *mlog.Log, resolver dns.Resolver, d delive if !ok { log.Info("received mail to tlsrpt without acceptable DKIM signature, not processing as tls report") - } else if report, err := tlsrpt.ParseMessage(store.FileMsgReader(d.m.MsgPrefix, d.dataFile)); err != nil { + } else if report, err := tlsrpt.ParseMessage(log, store.FileMsgReader(d.m.MsgPrefix, d.dataFile)); err != nil { log.Infox("parsing tls report", err) } else { var known bool @@ -274,7 +274,7 @@ func analyze(ctx context.Context, log *mlog.Log, resolver dns.Resolver, d delive log.Errorx("get key for verifying subject token", err) return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", err, reasonSubjectpassError) } - err = subjectpass.Verify(d.dataFile, []byte(subjectpassKey), conf.SubjectPass.Period) + err = subjectpass.Verify(log, d.dataFile, []byte(subjectpassKey), conf.SubjectPass.Period) pass := err == nil log.Infox("pass by subject token", err, mlog.Field("pass", pass)) if pass { diff --git a/smtpserver/rejects.go b/smtpserver/rejects.go index 45ef6b7..0ff9273 100644 --- a/smtpserver/rejects.go +++ b/smtpserver/rejects.go @@ -18,7 +18,7 @@ import ( // rejectPresent returns whether the message is already present in the rejects mailbox. func rejectPresent(log *mlog.Log, acc *store.Account, rejectsMailbox string, m *store.Message, f *os.File) (present bool, msgID string, hash []byte, rerr error) { - if p, err := message.Parse(store.FileMsgReader(m.MsgPrefix, f)); err != nil { + if p, err := message.Parse(log, false, store.FileMsgReader(m.MsgPrefix, f)); err != nil { log.Infox("parsing reject message for message-id", err) } else if header, err := p.Header(); err != nil { log.Infox("parsing reject message header for message-id", err) diff --git a/smtpserver/server.go b/smtpserver/server.go index f23ba9b..b12c063 100644 --- a/smtpserver/server.go +++ b/smtpserver/server.go @@ -1548,9 +1548,9 @@ func (c *conn) cmdData(p *parser) { if Localserve { // Require that message can be parsed fully. - p, err := message.Parse(dataFile) + p, err := message.Parse(c.log, false, dataFile) if err == nil { - err = p.Walk(nil) + err = p.Walk(c.log, nil) } if err != nil { // ../rfc/6409:541 @@ -1661,7 +1661,7 @@ func (c *conn) submit(ctx context.Context, recvHdrFor func(string) string, msgWr // for other users. // We don't check the Sender field, there is no expectation of verification, ../rfc/7489:2948 // and with Resent headers it seems valid to have someone else as Sender. ../rfc/5322:1578 - msgFrom, header, err := message.From(dataFile) + msgFrom, header, err := message.From(c.log, true, dataFile) if err != nil { metricSubmission.WithLabelValues("badmessage").Inc() c.log.Infox("parsing message From address", err, mlog.Field("user", c.username)) @@ -1854,7 +1854,7 @@ func (c *conn) deliver(ctx context.Context, recvHdrFor func(string) string, msgW // todo: in decision making process, if we run into (some) temporary errors, attempt to continue. if we decide to accept, all good. if we decide to reject, we'll make it a temporary reject. - msgFrom, headers, err := message.From(dataFile) + msgFrom, headers, err := message.From(c.log, false, dataFile) if err != nil { c.log.Infox("parsing message for From address", err) } @@ -2388,7 +2388,7 @@ func (c *conn) deliver(ctx context.Context, recvHdrFor func(string) string, msgW // Gather the message-id before we deliver and the file may be consumed. if !parsedMessageID { - if p, err := message.Parse(store.FileMsgReader(m.MsgPrefix, dataFile)); err != nil { + if p, err := message.Parse(c.log, false, store.FileMsgReader(m.MsgPrefix, dataFile)); err != nil { log.Infox("parsing message for message-id", err) } else if header, err := p.Header(); err != nil { log.Infox("parsing message header for message-id", err) diff --git a/store/account.go b/store/account.go index aecea0d..559c014 100644 --- a/store/account.go +++ b/store/account.go @@ -1045,7 +1045,7 @@ func (a *Account) DeliverMessage(log *mlog.Log, tx *bstore.Tx, m *Message, msgFi var part *message.Part if m.ParsedBuf == nil { mr := FileMsgReader(m.MsgPrefix, msgFile) // We don't close, it would close the msgFile. - p, err := message.EnsurePart(mr, m.Size) + p, err := message.EnsurePart(log, false, mr, m.Size) if err != nil { log.Infox("parsing delivered message", err, mlog.Field("parse", ""), mlog.Field("message", m.ID)) // We continue, p is still valid. @@ -1365,7 +1365,7 @@ func MessageRuleset(log *mlog.Log, dest config.Destination, m *Message, msgPrefi } mr := FileMsgReader(msgPrefix, msgFile) // We don't close, it would close the msgFile. - p, err := message.Parse(mr) + p, err := message.Parse(log, false, mr) if err != nil { log.Errorx("parsing message for evaluating rulesets, continuing with headers", err, mlog.Field("parse", "")) // note: part is still set. diff --git a/subjectpass/subjectpass.go b/subjectpass/subjectpass.go index dcb5958..5caa29b 100644 --- a/subjectpass/subjectpass.go +++ b/subjectpass/subjectpass.go @@ -76,7 +76,7 @@ func Generate(mailFrom smtp.Address, key []byte, tm time.Time) string { // Verify parses "message" and checks if it includes a subjectpass token in its // Subject header that is still valid (within "period") and signed with "key". -func Verify(r io.ReaderAt, key []byte, period time.Duration) (rerr error) { +func Verify(log *mlog.Log, r io.ReaderAt, key []byte, period time.Duration) (rerr error) { var token string defer func() { @@ -89,7 +89,7 @@ func Verify(r io.ReaderAt, key []byte, period time.Duration) (rerr error) { log.Debugx("subjectpass verify result", rerr, mlog.Field("token", token), mlog.Field("period", period)) }() - p, err := message.Parse(r) + p, err := message.Parse(log, true, r) if err != nil { return fmt.Errorf("%w: parse message: %s", ErrMessage, err) } diff --git a/subjectpass/subjectpass_test.go b/subjectpass/subjectpass_test.go index b9948f7..a50a628 100644 --- a/subjectpass/subjectpass_test.go +++ b/subjectpass/subjectpass_test.go @@ -7,26 +7,29 @@ import ( "testing" "time" + "github.com/mjl-/mox/mlog" "github.com/mjl-/mox/smtp" ) +var xlog = mlog.New("subjectpass") + func TestSubjectPass(t *testing.T) { key := []byte("secret token") addr, _ := smtp.ParseAddress("mox@mox.example") sig := Generate(addr, key, time.Now()) message := fmt.Sprintf("From: \r\nSubject: let me in %s\r\n\r\nthe message", sig) - if err := Verify(strings.NewReader(message), key, time.Hour); err != nil { + if err := Verify(xlog, strings.NewReader(message), key, time.Hour); err != nil { t.Fatalf("verifyPassToken: %s", err) } - if err := Verify(strings.NewReader(message), []byte("bad key"), time.Hour); err == nil { + if err := Verify(xlog, strings.NewReader(message), []byte("bad key"), time.Hour); err == nil { t.Fatalf("verifyPassToken did not fail") } sig = Generate(addr, key, time.Now().Add(-time.Hour-257)) message = fmt.Sprintf("From: \r\nSubject: let me in %s\r\n\r\nthe message", sig) - if err := Verify(strings.NewReader(message), key, time.Hour); !errors.Is(err, ErrExpired) { + if err := Verify(xlog, strings.NewReader(message), key, time.Hour); !errors.Is(err, ErrExpired) { t.Fatalf("verifyPassToken should have expired") } } diff --git a/tlsrpt/report.go b/tlsrpt/report.go index beb6e4a..d3bb891 100644 --- a/tlsrpt/report.go +++ b/tlsrpt/report.go @@ -10,6 +10,7 @@ import ( "time" "github.com/mjl-/mox/message" + "github.com/mjl-/mox/mlog" "github.com/mjl-/mox/moxio" ) @@ -137,9 +138,9 @@ func Parse(r io.Reader) (*Report, error) { // ParseMessage parses a Report from a mail message. // The maximum size of the message is 15MB, the maximum size of the // decompressed report is 20MB. -func ParseMessage(r io.ReaderAt) (*Report, error) { +func ParseMessage(log *mlog.Log, r io.ReaderAt) (*Report, error) { // ../rfc/8460:905 - p, err := message.Parse(&moxio.LimitAtReader{R: r, Limit: 15 * 1024 * 1024}) + p, err := message.Parse(log, true, &moxio.LimitAtReader{R: r, Limit: 15 * 1024 * 1024}) if err != nil { return nil, fmt.Errorf("parsing mail message: %s", err) } @@ -147,10 +148,10 @@ func ParseMessage(r io.ReaderAt) (*Report, error) { // Using multipart appears optional, and similar to DMARC someone may decide to // send it like that, so accept a report if it's the entire message. const allow = true - return parseMessageReport(p, allow) + return parseMessageReport(log, p, allow) } -func parseMessageReport(p message.Part, allow bool) (*Report, error) { +func parseMessageReport(log *mlog.Log, p message.Part, allow bool) (*Report, error) { if p.MediaType != "MULTIPART" { if !allow { return nil, ErrNoReport @@ -159,7 +160,7 @@ func parseMessageReport(p message.Part, allow bool) (*Report, error) { } for { - sp, err := p.ParseNextPart() + sp, err := p.ParseNextPart(log) if err == io.EOF { return nil, ErrNoReport } @@ -169,7 +170,7 @@ func parseMessageReport(p message.Part, allow bool) (*Report, error) { if p.MediaSubType == "REPORT" && p.ContentTypeParams["report-type"] != "tlsrpt" { return nil, fmt.Errorf("unknown report-type parameter %q", p.ContentTypeParams["report-type"]) } - report, err := parseMessageReport(*sp, p.MediaSubType == "REPORT") + report, err := parseMessageReport(log, *sp, p.MediaSubType == "REPORT") if err == ErrNoReport { continue } else if err != nil || report != nil { diff --git a/tlsrpt/report_test.go b/tlsrpt/report_test.go index e0102a2..f5f507f 100644 --- a/tlsrpt/report_test.go +++ b/tlsrpt/report_test.go @@ -109,19 +109,19 @@ func TestReport(t *testing.T) { t.Fatalf("parsing report: %s", err) } - if _, err := ParseMessage(strings.NewReader(tlsrptMessage)); err != nil { + if _, err := ParseMessage(xlog, strings.NewReader(tlsrptMessage)); err != nil { t.Fatalf("parsing TLSRPT from message: %s", err) } - if _, err := ParseMessage(strings.NewReader(tlsrptMessage2)); err != nil { + if _, err := ParseMessage(xlog, strings.NewReader(tlsrptMessage2)); err != nil { t.Fatalf("parsing TLSRPT from message: %s", err) } - if _, err := ParseMessage(strings.NewReader(strings.ReplaceAll(tlsrptMessage, "multipart/report", "multipart/related"))); err != ErrNoReport { + if _, err := ParseMessage(xlog, strings.NewReader(strings.ReplaceAll(tlsrptMessage, "multipart/report", "multipart/related"))); err != ErrNoReport { t.Fatalf("got err %v, expected ErrNoReport", err) } - if _, err := ParseMessage(strings.NewReader(strings.ReplaceAll(tlsrptMessage, "application/tlsrpt+json", "application/json"))); err != ErrNoReport { + if _, err := ParseMessage(xlog, strings.NewReader(strings.ReplaceAll(tlsrptMessage, "application/tlsrpt+json", "application/json"))); err != ErrNoReport { t.Fatalf("got err %v, expected ErrNoReport", err) } @@ -134,7 +134,7 @@ func TestReport(t *testing.T) { if err != nil { t.Fatalf("open %q: %s", file, err) } - if _, err := ParseMessage(f); err != nil { + if _, err := ParseMessage(xlog, f); err != nil { t.Fatalf("parsing TLSRPT from message %q: %s", file.Name(), err) } f.Close() @@ -144,6 +144,6 @@ func TestReport(t *testing.T) { func FuzzParseMessage(f *testing.F) { f.Add(tlsrptMessage) f.Fuzz(func(t *testing.T, s string) { - ParseMessage(strings.NewReader(s)) + ParseMessage(xlog, strings.NewReader(s)) }) } diff --git a/tlsrptdb/db_test.go b/tlsrptdb/db_test.go index 2e66a22..4e3d2a3 100644 --- a/tlsrptdb/db_test.go +++ b/tlsrptdb/db_test.go @@ -88,7 +88,7 @@ func TestReport(t *testing.T) { if err != nil { t.Fatalf("open %q: %s", file, err) } - report, err := tlsrpt.ParseMessage(f) + report, err := tlsrpt.ParseMessage(xlog, f) f.Close() if err != nil { t.Fatalf("parsing TLSRPT from message %q: %s", file.Name(), err) diff --git a/webaccount/import.go b/webaccount/import.go index 629778c..a886863 100644 --- a/webaccount/import.go +++ b/webaccount/import.go @@ -505,7 +505,7 @@ func importMessages(ctx context.Context, log *mlog.Log, token string, acc *store } // Parse message and store parsed information for later fast retrieval. - p, err := message.EnsurePart(f, m.Size) + p, err := message.EnsurePart(log, false, f, m.Size) if err != nil { problemf("parsing message %s: %s (continuing)", pos, err) }