From ecf60568b4a313543648774e5ac22433aea44b87 Mon Sep 17 00:00:00 2001 From: Mechiel Lukkien Date: Thu, 8 Feb 2024 12:33:19 +0100 Subject: [PATCH] fix: don't insert spurious \r when fixing up crlf line endings when writing a message message.Writer.Write() adds missing \r's, but the buffer of "last bytes written" was only being updated while writing the message headers, not while writing the body. so for Write()'s in the body section (depending on buffering), we were compensating based on the "last bytes written" as set during the last write in the header section. that could cause a spurious \r to be added when a Write starts with \n while the previous Write did properly end with \r. for issue #117, thanks haraldrudell for reporting and investigating --- message/writer.go | 33 ++++++++++++++++++++------------- message/writer_test.go | 26 ++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 13 deletions(-) diff --git a/message/writer.go b/message/writer.go index 021150f..a75ac38 100644 --- a/message/writer.go +++ b/message/writer.go @@ -13,7 +13,9 @@ type Writer struct { Has8bit bool // Whether a byte with the high/8bit has been read. So whether this needs SMTP 8BITMIME instead of 7BIT. Size int64 // Number of bytes written, may be different from bytes read due to LF to CRLF conversion. - tail [3]byte // For detecting header/body-separating crlf. + // For detecting header/body-separating crlf and fixing up bare lf. These are the + // incoming bytes, not the fixed up bytes. So CRs may be missing from tail. + tail [3]byte // todo: should be parsing headers here, as we go } @@ -26,9 +28,16 @@ func NewWriter(w io.Writer) *Writer { // io.Writer. It converts bare new lines (LF) to carriage returns with new lines // (CRLF). func (w *Writer) Write(buf []byte) (int, error) { - origtail := w.tail + if !w.Has8bit { + for _, b := range buf { + if b >= 0x80 { + w.Has8bit = true + break + } + } + } - if !w.HaveBody && len(buf) > 0 { + if !w.HaveBody { get := func(i int) byte { if i < 0 { return w.tail[3+i] @@ -42,29 +51,27 @@ func (w *Writer) Write(buf []byte) (int, error) { break } } + } + // Update w.tail after having written. Regardless of error, writers can't expect + // subsequent writes to work again properly anyway. + defer func() { n := len(buf) if n > 3 { n = 3 } copy(w.tail[:], w.tail[n:]) copy(w.tail[3-n:], buf[len(buf)-n:]) - } - if !w.Has8bit { - for _, b := range buf { - if b&0x80 != 0 { - w.Has8bit = true - break - } - } - } + }() wrote := 0 o := 0 Top: for o < len(buf) { + // Look for bare newline. If present, write up to that position while adding the + // missing carriage return. Then start the loop again. for i := o; i < len(buf); i++ { - if buf[i] == '\n' && (i > 0 && buf[i-1] != '\r' || i == 0 && origtail[2] != '\r') { + if buf[i] == '\n' && (i > 0 && buf[i-1] != '\r' || i == 0 && w.tail[2] != '\r') { // Write buffer leading up to missing \r. if i > o { n, err := w.writer.Write(buf[o:i]) diff --git a/message/writer_test.go b/message/writer_test.go index ff18b49..4f10f5e 100644 --- a/message/writer_test.go +++ b/message/writer_test.go @@ -53,3 +53,29 @@ func TestMsgWriter(t *testing.T) { t.Fatalf("got %q, expected %q", got, exp) } } + +func TestMsgWriterIssue117(t *testing.T) { + var b strings.Builder + mw := NewWriter(&b) + + // Write header and header/body separator, but with CR missing. + _, err := mw.Write([]byte("a: b\n\n")) + tcheck(t, err, "write") + + // Write start of a line. The newline follows in a second write. Just because of buffering. + _, err = mw.Write([]byte("body\r")) + tcheck(t, err, "write") + + // Finish the line. The bug is that w.tail was only updated while writing headers, + // not while writing message data for the body. That makes the code think this \n + // wasn't preceded by a \r, causing it to add a \r. And we end up with \r\r\n in + // the file. + _, err = mw.Write([]byte("\n")) + tcheck(t, err, "write") + + got := b.String() + exp := "a: b\r\n\r\nbody\r\n" + if got != exp { + t.Fatalf("got %q, expected %q", got, exp) + } +}