improve message parsing: allow bare carriage return (unless in pedantic mode), allow empty header, and no longer treat a message with only headers as a message with only a body

This commit is contained in:
Mechiel Lukkien 2023-08-11 14:07:49 +02:00
parent 79d06184ab
commit 48eb530b1f
No known key found for this signature in database
10 changed files with 129 additions and 79 deletions

9
ctl.go
View file

@ -327,21 +327,16 @@ func servectlcmd(ctx context.Context, ctl *ctl, shutdown func()) {
log.Check(err, "closing temporary message file")
}
}()
mw := &message.Writer{Writer: msgFile}
mw := message.NewWriter(msgFile)
ctl.xwriteok()
ctl.xstreamto(mw)
err = msgFile.Sync()
ctl.xcheck(err, "syncing message to storage")
msgPrefix := []byte{}
if !mw.HaveHeaders {
msgPrefix = []byte("\r\n\r\n")
}
m := &store.Message{
Received: time.Now(),
Size: int64(len(msgPrefix)) + mw.Size,
MsgPrefix: msgPrefix,
Size: mw.Size,
}
a.WithWLock(func() {

View file

@ -2683,7 +2683,7 @@ func (c *conn) cmdAppend(tag, cmd string, p *parser) {
}
}()
defer c.xtrace(mlog.LevelTracedata)()
mw := &message.Writer{Writer: msgFile}
mw := message.NewWriter(msgFile)
msize, err := io.Copy(mw, io.LimitReader(c.br, size))
c.xtrace(mlog.LevelTrace) // Restore.
if err != nil {
@ -2693,11 +2693,6 @@ func (c *conn) cmdAppend(tag, cmd string, p *parser) {
if msize != size {
xserverErrorf("read %d bytes for message, expected %d (%w)", msize, size, errIO)
}
msgPrefix := []byte{}
// todo: should we treat the message as body? i believe headers are required in messages, and bodies are optional. so would make more sense to treat the data as headers. perhaps only if the headers are valid?
if !mw.HaveHeaders {
msgPrefix = []byte("\r\n")
}
if utf8 {
line := c.readline(false)
@ -2736,8 +2731,7 @@ func (c *conn) cmdAppend(tag, cmd string, p *parser) {
Received: tm,
Flags: storeFlags,
Keywords: keywords,
Size: size + int64(len(msgPrefix)),
MsgPrefix: msgPrefix,
Size: size,
}
mb.Add(m.MailboxCounts())

View file

@ -25,10 +25,10 @@ func TestStatus(t *testing.T) {
// Again, now with a message in the mailbox.
tc.transactf("ok", "append inbox {4+}\r\ntest")
tc.transactf("ok", "status inbox (messages uidnext uidvalidity unseen deleted size recent appendlimit)")
tc.xuntagged(imapclient.UntaggedStatus{Mailbox: "Inbox", Attrs: map[string]int64{"MESSAGES": 1, "UIDVALIDITY": 1, "UIDNEXT": 2, "UNSEEN": 1, "DELETED": 0, "SIZE": 6, "RECENT": 0, "APPENDLIMIT": 0}})
tc.xuntagged(imapclient.UntaggedStatus{Mailbox: "Inbox", Attrs: map[string]int64{"MESSAGES": 1, "UIDVALIDITY": 1, "UIDNEXT": 2, "UNSEEN": 1, "DELETED": 0, "SIZE": 4, "RECENT": 0, "APPENDLIMIT": 0}})
tc.client.Select("inbox")
tc.client.StoreFlagsSet("1", true, `\Deleted`)
tc.transactf("ok", "status inbox (messages uidnext uidvalidity unseen deleted size recent appendlimit)")
tc.xuntagged(imapclient.UntaggedStatus{Mailbox: "Inbox", Attrs: map[string]int64{"MESSAGES": 1, "UIDVALIDITY": 1, "UIDNEXT": 2, "UNSEEN": 1, "DELETED": 1, "SIZE": 6, "RECENT": 0, "APPENDLIMIT": 0}})
tc.xuntagged(imapclient.UntaggedStatus{Mailbox: "Inbox", Attrs: map[string]int64{"MESSAGES": 1, "UIDVALIDITY": 1, "UIDNEXT": 2, "UNSEEN": 1, "DELETED": 1, "SIZE": 4, "RECENT": 0, "APPENDLIMIT": 0}})
}

View file

@ -4,7 +4,6 @@ package message
// todo: allow more invalid content-type values, we now stop parsing on: empty media type (eg "content-type: ; name=..."), empty value for property (eg "charset=", missing quotes for characters that should be quoted (eg boundary containing "=" but without quotes), duplicate properties (two charsets), empty pairs (eg "text/html;;").
// todo: what should our max line length be? rfc says 1000. messages exceed that. we should enforce 1000 for outgoing messages.
// todo: should we be forgiving when closing boundary in multipart message is missing? seems like spam messages do this...
// todo: allow bare \r (without \n)? this does happen in messages.
// todo: should we allow base64 messages where a line starts with a space? and possibly more whitespace. is happening in messages. coreutils base64 accepts it, encoding/base64 does not.
// todo: handle comments in headers?
// todo: should we just always store messages with \n instead of \r\n? \r\n seems easier for use with imap.
@ -45,7 +44,8 @@ var (
errLineTooLong = errors.New("line too long")
errMissingBoundaryParam = errors.New("missing/empty boundary content-type parameter")
errMissingClosingBoundary = errors.New("eof without closing boundary")
errHalfLineSep = errors.New("invalid CR or LF without the other")
errBareLF = errors.New("invalid bare line feed")
errBareCR = errors.New("invalid bare carriage return")
errUnexpectedEOF = errors.New("unexpected eof")
)
@ -269,8 +269,12 @@ func newPart(r io.ReaderAt, offset int64, parent *Part) (p Part, rerr error) {
hb := &bytes.Buffer{}
for {
line, _, err := b.ReadLine(true)
if err == io.EOF {
// No body is valid.
break
}
if err != nil {
return p, err
return p, fmt.Errorf("reading header line: %w", err)
}
hb.Write(line)
if len(line) == 2 {
@ -279,13 +283,18 @@ func newPart(r io.ReaderAt, offset int64, parent *Part) (p Part, rerr error) {
}
p.BodyOffset = b.offset
// Don't attempt to parse empty header, mail.ReadMessage doesn't like it.
if p.HeaderOffset == p.BodyOffset {
p.header = textproto.MIMEHeader{}
} else {
h, err := parseHeader(hb)
if err != nil {
return p, fmt.Errorf("parsing header: %w", err)
}
p.header = h
}
ct := h.Get("Content-Type")
ct := p.header.Get("Content-Type")
mt, params, err := mime.ParseMediaType(ct)
if err != nil && ct != "" {
return p, fmt.Errorf("%w: %s: %q", ErrBadContentType, err, ct)
@ -300,12 +309,12 @@ func newPart(r io.ReaderAt, offset int64, parent *Part) (p Part, rerr error) {
p.ContentTypeParams = params
}
p.ContentID = h.Get("Content-Id")
p.ContentDescription = h.Get("Content-Description")
p.ContentTransferEncoding = strings.ToUpper(h.Get("Content-Transfer-Encoding"))
p.ContentID = p.header.Get("Content-Id")
p.ContentDescription = p.header.Get("Content-Description")
p.ContentTransferEncoding = strings.ToUpper(p.header.Get("Content-Transfer-Encoding"))
if parent == nil {
p.Envelope, err = parseEnvelope(mail.Header(h))
p.Envelope, err = parseEnvelope(mail.Header(p.header))
if err != nil {
return p, err
}
@ -347,6 +356,10 @@ func (p *Part) Header() (textproto.MIMEHeader, error) {
if p.header != nil {
return p.header, nil
}
if p.HeaderOffset == p.BodyOffset {
p.header = textproto.MIMEHeader{}
return p.header, nil
}
h, err := parseHeader(p.HeaderReader())
p.header = h
return h, err
@ -357,6 +370,7 @@ func (p *Part) HeaderReader() io.Reader {
return io.NewSectionReader(p.r, p.HeaderOffset, p.BodyOffset-p.HeaderOffset)
}
// parse a header, only call this on non-empty input (even though that is a valid header).
func parseHeader(r io.Reader) (textproto.MIMEHeader, error) {
// We read using mail.ReadMessage instead of textproto.ReadMIMEHeaders because the
// first handles email messages properly, while the second only works for HTTP
@ -444,7 +458,7 @@ func parseAddressList(h mail.Header, k string) []Address {
// ParseNextPart parses the next (sub)part of this multipart message.
// ParseNextPart returns io.EOF and a nil part when there are no more parts.
// Only use for initial parsing of message. Once parsed, use p.Parts.
// Only used for initial parsing of message. Once parsed, use p.Parts.
func (p *Part) ParseNextPart() (*Part, error) {
if len(p.bound) == 0 {
return nil, errNotMultipart
@ -602,14 +616,15 @@ func (p *Part) RawReader() io.Reader {
}
p.RawLineCount = 0
if p.parent == nil {
return &offsetReader{p, p.BodyOffset, true}
return &offsetReader{p, p.BodyOffset, true, false}
}
return &boundReader{p: p, b: &bufAt{r: p.r, offset: p.BodyOffset}, lastnewline: true}
return &boundReader{p: p, b: &bufAt{r: p.r, offset: p.BodyOffset}, prevlf: true}
}
// bufAt is a buffered reader on an underlying ReaderAt.
// bufAt verifies that lines end with crlf.
type bufAt struct {
offset int64 // Offset in r currently consumed, i.e. ignoring any buffered data.
offset int64 // Offset in r currently consumed, i.e. not including any buffered data.
r io.ReaderAt
buf []byte // Buffered data.
@ -649,7 +664,7 @@ func (b *bufAt) ensure() error {
}
// ReadLine reads a line until \r\n is found, returning the line including \r\n.
// If not found, or a single \r or \n is encountered, ReadLine returns an error, e.g. io.EOF.
// If not found, or a bare \n is encountered, or a bare \r is enountered in pedantic mode, ReadLine returns an error.
func (b *bufAt) ReadLine(requirecrlf bool) (buf []byte, crlf bool, err error) {
return b.line(true, requirecrlf)
}
@ -664,14 +679,18 @@ func (b *bufAt) line(consume, requirecrlf bool) (buf []byte, crlf bool, err erro
}
for i, c := range b.buf[:b.nbuf] {
if c == '\n' {
return nil, false, errHalfLineSep
// Should have seen a \r, which should have been handled below.
return nil, false, errBareLF
}
if c != '\r' {
continue
}
i++
if i >= b.nbuf || b.buf[i] != '\n' {
return nil, false, errHalfLineSep
if moxvar.Pedantic {
return nil, false, errBareCR
}
continue
}
b.scratch = b.scratch[:i+1]
copy(b.scratch, b.buf[:i+1])
@ -708,10 +727,13 @@ func (b *bufAt) PeekByte() (byte, error) {
return b.buf[0], nil
}
// offsetReader reads from p.r starting from offset, and RawLineCount on p.
// offsetReader validates lines end with \r\n.
type offsetReader struct {
p *Part
offset int64
lastnewline bool
prevlf bool
prevcr bool
}
func (r *offsetReader) Read(buf []byte) (int, error) {
@ -720,10 +742,18 @@ func (r *offsetReader) Read(buf []byte) (int, error) {
r.offset += int64(n)
for _, c := range buf[:n] {
if r.lastnewline {
if r.prevlf {
r.p.RawLineCount++
}
r.lastnewline = c == '\n'
if err == nil || err == io.EOF {
if c == '\n' && !r.prevcr {
err = errBareLF
} else if c != '\n' && r.prevcr && moxvar.Pedantic {
err = errBareCR
}
}
r.prevlf = c == '\n'
r.prevcr = c == '\r'
}
}
if err == io.EOF {
@ -735,13 +765,15 @@ func (r *offsetReader) Read(buf []byte) (int, error) {
var crlf = []byte("\r\n")
// boundReader is a reader that stops at a closing multipart boundary.
// boundReader ensures lines end with crlf through its use of bufAt.
type boundReader struct {
p *Part
b *bufAt
buf []byte // Data from previous line, to be served first.
nbuf int // Number of valid bytes in buf.
crlf []byte // Possible crlf, to be returned if we do not yet encounter a boundary.
lastnewline bool // If last char return was a newline. For counting lines.
prevlf bool // If last char returned was a newline. For counting lines.
prevcr bool
}
func (b *boundReader) Read(buf []byte) (count int, rerr error) {
@ -749,10 +781,18 @@ func (b *boundReader) Read(buf []byte) (count int, rerr error) {
defer func() {
if count > 0 {
for _, c := range origBuf[:count] {
if b.lastnewline {
if b.prevlf {
b.p.RawLineCount++
}
b.lastnewline = c == '\n'
if rerr == nil || rerr == io.EOF {
if c == '\n' && !b.prevcr {
rerr = errBareLF
} else if c != '\n' && b.prevcr && moxvar.Pedantic {
rerr = errBareCR
}
}
b.prevlf = c == '\n'
b.prevcr = c == '\r'
}
}
}()

View file

@ -10,6 +10,8 @@ import (
"reflect"
"strings"
"testing"
"github.com/mjl-/mox/moxvar"
)
func tcheck(t *testing.T, err error, msg string) {
@ -204,11 +206,29 @@ func TestLongLine(t *testing.T) {
}
func TestHalfCrLf(t *testing.T) {
_, err := Parse(strings.NewReader("test\rtest"))
tfail(t, err, errHalfLineSep)
parse := func(s string) error {
p, err := Parse(strings.NewReader(s))
if err != nil {
return err
}
return p.Walk(nil)
}
err := parse("subject: test\ntest\r\n")
tfail(t, err, errBareLF)
err = parse("\r\ntest\ntest\r\n")
tfail(t, err, errBareLF)
_, err = Parse(strings.NewReader("test\ntest"))
tfail(t, err, errHalfLineSep)
moxvar.Pedantic = true
err = parse("subject: test\rtest\r\n")
tfail(t, err, errBareCR)
err = parse("\r\ntest\rtest\r\n")
tfail(t, err, errBareCR)
moxvar.Pedantic = false
err = parse("subject: test\rtest\r\n")
tcheck(t, err, "header with bare cr")
err = parse("\r\ntest\rtest\r\n")
tcheck(t, err, "body with bare cr")
}
func TestMissingClosingBoundary(t *testing.T) {

View file

@ -7,17 +7,24 @@ import (
// Writer is a write-through helper, collecting properties about the written
// message.
type Writer struct {
Writer io.Writer
HaveHeaders bool
writer io.Writer
HaveBody bool // Body is optional. ../rfc/5322:343
Has8bit bool // Whether a byte with the high/8bit has been read. So whether this is 8BITMIME instead of 7BIT.
Size int64
tail [3]byte // For detecting crlfcrlf.
tail [3]byte // For detecting header/body-separating crlf.
// todo: should be parsing headers here, as we go
}
func NewWriter(w io.Writer) *Writer {
// Pretend we already saw \r\n, for handling empty header.
return &Writer{writer: w, tail: [3]byte{0, '\r', '\n'}}
}
// Write implements io.Writer.
func (w *Writer) Write(buf []byte) (int, error) {
if !w.HaveHeaders && len(buf) > 0 {
if !w.HaveBody && len(buf) > 0 {
get := func(i int) byte {
if i < 0 {
return w.tail[3+i]
@ -27,7 +34,7 @@ func (w *Writer) Write(buf []byte) (int, error) {
for i, b := range buf {
if b == '\n' && get(i-3) == '\r' && get(i-2) == '\n' && get(i-1) == '\r' {
w.HaveHeaders = true
w.HaveBody = true
break
}
}
@ -47,7 +54,7 @@ func (w *Writer) Write(buf []byte) (int, error) {
}
}
}
n, err := w.Writer.Write(buf)
n, err := w.writer.Write(buf)
if n > 0 {
w.Size += int64(n)
}

View file

@ -10,23 +10,23 @@ func TestMsgWriter(t *testing.T) {
t.Helper()
b := &strings.Builder{}
mw := &Writer{Writer: b}
mw := NewWriter(b)
if _, err := mw.Write([]byte(data)); err != nil {
t.Fatalf("write for message %q: %s", data, err)
}
if mw.HaveHeaders != want {
t.Fatalf("got %v, expected %v, for message %q", mw.HaveHeaders, want, data)
if mw.HaveBody != want {
t.Fatalf("got %v, expected %v, for message %q", mw.HaveBody, want, data)
}
b = &strings.Builder{}
mw = &Writer{Writer: b}
mw = NewWriter(b)
for i := range data {
if _, err := mw.Write([]byte(data[i : i+1])); err != nil {
t.Fatalf("write for message %q: %s", data, err)
}
}
if mw.HaveHeaders != want {
t.Fatalf("got %v, expected %v, for message %q", mw.HaveHeaders, want, data)
if mw.HaveBody != want {
t.Fatalf("got %v, expected %v, for message %q", mw.HaveBody, want, data)
}
}
@ -38,4 +38,5 @@ func TestMsgWriter(t *testing.T) {
check("key: value\r\rbody", false)
check("\r\n\r\n", true)
check("\r\n\r\nbody", true)
check("\r\nbody", true)
}

View file

@ -162,7 +162,7 @@ func queueDSN(log *mlog.Log, m Msg, remoteMTA dsn.NameIP, secodeOpt, errmsg stri
}
}()
msgWriter := &message.Writer{Writer: msgFile}
msgWriter := message.NewWriter(msgFile)
if _, err := msgWriter.Write(msgData); err != nil {
qlog("writing dsn message", err)
return

View file

@ -1505,7 +1505,7 @@ func (c *conn) cmdData(p *parser) {
c.log.Check(err, "removing temporary message file")
}
}()
msgWriter := &message.Writer{Writer: dataFile}
msgWriter := message.NewWriter(dataFile)
dr := smtp.NewDataReader(c.r)
n, err := io.Copy(&limitWriter{maxSize: c.maxMessageSize, w: msgWriter}, dr)
c.xtrace(mlog.LevelTrace) // Restore.
@ -1534,7 +1534,7 @@ func (c *conn) cmdData(p *parser) {
// Basic sanity checks on messages before we send them out to the world. Just
// trying to be strict in what we do to others and liberal in what we accept.
if c.submission {
if !msgWriter.HaveHeaders {
if !msgWriter.HaveBody {
// ../rfc/6409:541
xsmtpUserErrorf(smtp.C554TransactionFailed, smtp.SeMsg6Other0, "message requires both header and body section")
}
@ -1771,10 +1771,6 @@ func (c *conn) submit(ctx context.Context, recvHdrFor func(string) string, msgWr
}
xmsgPrefix := append([]byte(recvHdrFor(rcptAcc.rcptTo.String())), msgPrefix...)
// todo: don't convert the headers to a body? it seems the body part is optional. does this have consequences for us in other places? ../rfc/5322:343
if !msgWriter.HaveHeaders {
xmsgPrefix = append(xmsgPrefix, "\r\n"...)
}
msgSize := int64(len(xmsgPrefix)) + msgWriter.Size
if _, err := queue.Add(ctx, c.log, c.account.Name, *c.mailFrom, rcptAcc.rcptTo, msgWriter.Has8bit, c.smtputf8, msgSize, messageID, xmsgPrefix, dataFile, nil, i == len(c.recipients)-1); err != nil {
@ -2283,9 +2279,6 @@ func (c *conn) deliver(ctx context.Context, recvHdrFor func(string) string, msgW
receivedSPF.Header() +
recvHdrFor(rcptAcc.rcptTo.String()),
)
if !msgWriter.HaveHeaders {
msgPrefix = append(msgPrefix, "\r\n"...)
}
m := &store.Message{
Received: time.Now(),

View file

@ -45,7 +45,7 @@ func TestMailbox(t *testing.T) {
t.Fatalf("creating temp msg file: %s", err)
}
defer msgFile.Close()
msgWriter := &message.Writer{Writer: msgFile}
msgWriter := message.NewWriter(msgFile)
if _, err := msgWriter.Write([]byte(" message")); err != nil {
t.Fatalf("writing to temp message: %s", err)
}