From 317dc78397cc24c69a699a8e8af34d2810dfe0b5 Mon Sep 17 00:00:00 2001 From: Mechiel Lukkien Date: Sun, 12 Mar 2023 15:16:01 +0100 Subject: [PATCH] add pedantic mode (used by localserve) that refuses some behaviour that is invalid according to specifications and that we normally accept for compatibility --- README.md | 4 +- config/config.go | 1 + config/doc.go | 4 ++ dkim/parser.go | 28 +++++--- doc.go | 2 +- dsn/parse.go | 2 +- localserve.go | 1 + main.go | 35 +++++++++- message/part.go | 70 ++++++++++++------- message/part_test.go | 10 +-- mox-/config.go | 3 + moxvar/pedantic.go | 4 ++ smtp/address.go | 5 +- smtpserver/parse.go | 5 +- smtpserver/server.go | 7 +- .../message/message-rfc822-multipart2.eml | 2 +- 16 files changed, 127 insertions(+), 56 deletions(-) create mode 100644 moxvar/pedantic.go diff --git a/README.md b/README.md index 1d25e97..a39633d 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ See Quickstart below to get started. proxy), so port 443 can also be used to serve websites. - Prometheus metrics and structured logging for operational insight. - "localserve" subcommand for running mox locally for email-related - testing/developing. + testing/developing, including pedantic mode. Mox is available under the MIT-license and was created by Mechiel Lukkien, mechiel@ueber.net. Mox includes the Public Suffix List by Mozilla, under Mozilla @@ -109,8 +109,6 @@ The code is heavily cross-referenced with the RFCs for readability/maintainabili ## Roadmap -- Strict vs lax mode, defaulting to lax when receiving from the internet, and - strict when sending. - Rate limiting and spam detection for submitted/outgoing messages, to reduce impact when an account gets compromised. - Privilege separation, isolating parts of the application to more restricted diff --git a/config/config.go b/config/config.go index 4a1f80c..1a84bd7 100644 --- a/config/config.go +++ b/config/config.go @@ -38,6 +38,7 @@ type Static struct { Hostname string `sconf-doc:"Full hostname of system, e.g. mail."` HostnameDomain dns.Domain `sconf:"-" json:"-"` // Parsed form of hostname. CheckUpdates bool `sconf:"optional" sconf-doc:"If enabled, a single DNS TXT lookup of _updates.xmox.nl is done every 24h to check for a new release. Each time a new release is found, a changelog is fetched from https://updates.xmox.nl and delivered to the postmaster mailbox."` + Pedantic bool `sconf:"optional" sconf-doc:"In pedantic mode protocol violations (that happen in the wild) for SMTP/IMAP/etc result in errors instead of accepting such behaviour."` TLS struct { CA *struct { AdditionalToSystem bool `sconf:"optional"` diff --git a/config/doc.go b/config/doc.go index d424b94..c6b40d2 100644 --- a/config/doc.go +++ b/config/doc.go @@ -49,6 +49,10 @@ describe-static" and "mox config describe-domains": # (optional) CheckUpdates: false + # In pedantic mode protocol violations (that happen in the wild) for SMTP/IMAP/etc + # result in errors instead of accepting such behaviour. (optional) + Pedantic: false + # Global TLS configuration, e.g. for additional Certificate Authorities. Used for # outgoing SMTP connections, HTTPS requests. (optional) TLS: diff --git a/dkim/parser.go b/dkim/parser.go index 79fce0b..12cf2ee 100644 --- a/dkim/parser.go +++ b/dkim/parser.go @@ -7,6 +7,7 @@ import ( "strings" "github.com/mjl-/mox/dns" + "github.com/mjl-/mox/moxvar" "github.com/mjl-/mox/smtp" ) @@ -194,11 +195,12 @@ func (p *parser) xcanonical() string { return s } -func (p *parser) xdomain() dns.Domain { +func (p *parser) xdomainselector(isselector bool) dns.Domain { subdomain := func(c rune, i int) bool { // domain names must always be a-labels, ../rfc/6376:1115 ../rfc/6376:1187 ../rfc/6376:1303 - // todo: add a "lax" mode where underscore is allowed if this is a selector? seen in the wild, but invalid: ../rfc/6376:581 ../rfc/5321:2303 - return isalphadigit(c) || (i > 0 && c == '-' && p.o+1 < len(p.s)) + // dkim selectors with underscores happen in the wild, accept them when not in + // pedantic mode. ../rfc/6376:581 ../rfc/5321:2303 + return isalphadigit(c) || (i > 0 && (c == '-' || isselector && !moxvar.Pedantic && c == '_') && p.o+1 < len(p.s)) } s := p.xtakefn1(false, subdomain) for p.hasPrefix(".") { @@ -206,11 +208,23 @@ func (p *parser) xdomain() dns.Domain { } d, err := dns.ParseDomain(s) if err != nil { + // ParseDomain does not allow underscore, work around it. + if strings.Contains(s, "_") && isselector && !moxvar.Pedantic { + return dns.Domain{ASCII: strings.ToLower(s)} + } p.xerrorf("parsing domain %q: %s", s, err) } return d } +func (p *parser) xdomain() dns.Domain { + return p.xdomainselector(false) +} + +func (p *parser) xselector() dns.Domain { + return p.xdomainselector(true) +} + func (p *parser) xhdrName(ignoreFWS bool) string { // ../rfc/6376:473 // ../rfc/5322:1689 @@ -258,8 +272,8 @@ func (p *parser) xlocalpart() smtp.Localpart { s += "." + p.xatom() } } - // todo: have a strict parser that only allows the actual max of 64 bytes. some services have large localparts because of generated (bounce) addresses. - if len(s) > 128 { + // In the wild, some services use large localparts for generated (bounce) addresses. + if moxvar.Pedantic && len(s) > 64 || len(s) > 128 { // ../rfc/5321:3486 p.xerrorf("localpart longer than 64 octets") } @@ -458,10 +472,6 @@ func (p *parser) xqp(pipeEncoded, colonEncoded, ignoreFWS bool) string { return s } -func (p *parser) xselector() dns.Domain { - return p.xdomain() -} - func (p *parser) xtimestamp() int64 { // ../rfc/6376:1325 ../rfc/6376:1358 return p.xnumber(12) diff --git a/doc.go b/doc.go index 87d52ec..3af054b 100644 --- a/doc.go +++ b/doc.go @@ -12,7 +12,7 @@ low-maintenance self-hosted email. # Commands - mox [-config config/mox.conf] ... + mox [-config config/mox.conf] [-pedantic] ... mox serve mox quickstart [-existing-webserver] [-hostname host] user@domain [user | uid] mox stop diff --git a/dsn/parse.go b/dsn/parse.go index 33fe3b8..ebf793a 100644 --- a/dsn/parse.go +++ b/dsn/parse.go @@ -32,7 +32,7 @@ func Parse(r io.ReaderAt) (*Message, *message.Part, error) { if part.MediaType != "MULTIPART" || part.MediaSubType != "REPORT" { return nil, nil, fmt.Errorf(`message has content-type %q, must have "message/report"`, strings.ToLower(part.MediaType+"/"+part.MediaSubType)) } - err = part.Walk() + err = part.Walk(nil) if err != nil { return nil, nil, fmt.Errorf("parsing message parts: %v", err) } diff --git a/localserve.go b/localserve.go index 3ba5fa3..a782443 100644 --- a/localserve.go +++ b/localserve.go @@ -296,6 +296,7 @@ func writeLocalConfig(log *mlog.Log, dir string) (rerr error) { Hostname: "localhost", User: fmt.Sprintf("%d", os.Getuid()), AdminPasswordFile: "adminpasswd", + Pedantic: true, Listeners: map[string]config.Listener{ "local": local, }, diff --git a/main.go b/main.go index 3afc2d2..4da26fd 100644 --- a/main.go +++ b/main.go @@ -143,6 +143,7 @@ var commands = []struct { {"bumpuidvalidity", cmdBumpUIDValidity}, {"dmarcdb addreport", cmdDMARCDBAddReport}, {"ensureparsed", cmdEnsureParsed}, + {"message parse", cmdMessageParse}, {"tlsrptdb addreport", cmdTLSRPTDBAddReport}, {"updates addsigned", cmdUpdatesAddSigned}, {"updates genkey", cmdUpdatesGenkey}, @@ -326,7 +327,7 @@ Used to generate documentation. func usage(l []cmd, unlisted bool) { var lines []string if !unlisted { - lines = append(lines, "mox [-config config/mox.conf] ...") + lines = append(lines, "mox [-config config/mox.conf] [-pedantic] ...") } for _, c := range l { c.gather() @@ -352,6 +353,7 @@ func usage(l []cmd, unlisted bool) { } var loglevel string +var pedantic bool // subcommands that are not "serve" should use this function to load the config, it // restores any loglevel specified on the command-line, instead of using the @@ -362,6 +364,9 @@ func mustLoadConfig() { mox.Conf.Log[""] = level mlog.SetConfig(mox.Conf.Log) } + if pedantic { + moxvar.Pedantic = true + } } func main() { @@ -380,6 +385,7 @@ func main() { flag.StringVar(&mox.ConfigStaticPath, "config", envString("MOXCONF", "config/mox.conf"), "configuration file, other config files are looked up in the same directory, defaults to $MOXCONF with a fallback to mox.conf") flag.StringVar(&loglevel, "loglevel", "", "if non-empty, this log level is set early in startup") + flag.BoolVar(&pedantic, "pedantic", false, "protocol violations result in errors instead of accepting/working around them") flag.Usage = func() { usage(cmds, false) } flag.Parse() @@ -387,6 +393,9 @@ func main() { if len(args) == 0 { usage(cmds, false) } + if pedantic { + moxvar.Pedantic = true + } mox.ConfigDynamicPath = filepath.Join(filepath.Dir(mox.ConfigStaticPath), "domains.conf") if level, ok := mlog.Levels[loglevel]; ok && loglevel != "" { @@ -1898,6 +1907,30 @@ func cmdEnsureParsed(c *cmd) { fmt.Printf("%d messages updated\n", n) } +func cmdMessageParse(c *cmd) { + c.unlisted = true + c.params = "message.eml" + c.help = "Parse message, print JSON representation." + + args := c.Parse() + if len(args) != 1 { + c.Usage() + } + + f, err := os.Open(args[0]) + xcheckf(err, "open") + defer f.Close() + + part, err := message.Parse(f) + xcheckf(err, "parsing message") + err = part.Walk(nil) + xcheckf(err, "parsing nested parts") + enc := json.NewEncoder(os.Stdout) + enc.SetIndent("", "\t") + err = enc.Encode(part) + xcheckf(err, "write") +} + func cmdBumpUIDValidity(c *cmd) { c.unlisted = true c.params = "account mailbox" diff --git a/message/part.go b/message/part.go index 0633f62..bac7bae 100644 --- a/message/part.go +++ b/message/part.go @@ -26,6 +26,7 @@ import ( "time" "github.com/mjl-/mox/mlog" + "github.com/mjl-/mox/moxvar" "github.com/mjl-/mox/smtp" ) @@ -115,36 +116,41 @@ func Parse(r io.ReaderAt) (Part, error) { func EnsurePart(r io.ReaderAt, size int64) (Part, error) { p, err := Parse(r) if err == nil { - err = p.Walk() + err = p.Walk(nil) } if err != nil { - np := Part{ - HeaderOffset: p.HeaderOffset, - BodyOffset: p.BodyOffset, - EndOffset: size, - MediaType: "APPLICATION", - MediaSubType: "OCTET-STREAM", - ContentTypeParams: p.ContentTypeParams, - ContentID: p.ContentID, - ContentDescription: p.ContentDescription, - ContentTransferEncoding: p.ContentTransferEncoding, - Envelope: p.Envelope, - // We don't keep: - // - BoundaryOffset: irrelevant for top-level message. - // - RawLineCount and DecodedSize: set below. - // - Parts: we are not treating this as a multipart message. - } - p = np - p.SetReaderAt(r) - // By reading body, the number of lines and decoded size will be set. - _, err2 := io.Copy(io.Discard, p.Reader()) + np, err2 := fallbackPart(p, r, size) if err2 != nil { err = err2 } + p = np } return p, err } +func fallbackPart(p Part, r io.ReaderAt, size int64) (Part, error) { + np := Part{ + HeaderOffset: p.HeaderOffset, + BodyOffset: p.BodyOffset, + EndOffset: size, + MediaType: "APPLICATION", + MediaSubType: "OCTET-STREAM", + ContentTypeParams: p.ContentTypeParams, + ContentID: p.ContentID, + ContentDescription: p.ContentDescription, + ContentTransferEncoding: p.ContentTransferEncoding, + Envelope: p.Envelope, + // We don't keep: + // - BoundaryOffset: irrelevant for top-level message. + // - RawLineCount and DecodedSize: set below. + // - Parts: we are not treating this as a multipart message. + } + np.SetReaderAt(r) + // By reading body, the number of lines and decoded size will be set. + _, err := io.Copy(io.Discard, np.Reader()) + return np, err +} + // SetReaderAt sets r as reader for this part and all its sub parts, recursively. // No reader is set for any Message subpart, see SetMessageReaderAt. func (p *Part) SetReaderAt(r io.ReaderAt) { @@ -170,7 +176,7 @@ func (p *Part) SetMessageReaderAt() error { } // Walk through message, decoding along the way, and collecting mime part offsets and sizes, and line counts. -func (p *Part) Walk() error { +func (p *Part) Walk(parent *Part) error { if len(p.bound) == 0 { if p.MediaType == "MESSAGE" && (p.MediaSubType == "RFC822" || p.MediaSubType == "GLOBAL") { // todo: don't read whole submessage in memory... @@ -178,13 +184,23 @@ func (p *Part) Walk() error { if err != nil { return err } - mp, err := Parse(bytes.NewReader(buf)) + br := bytes.NewReader(buf) + mp, err := Parse(br) if err != nil { return fmt.Errorf("parsing embedded message: %w", err) } - // todo: if this is a DSN, we should have a lax parser that doesn't fail on unexpected end of file. this is quite common because MTA's can just truncate the original message. - if err := mp.Walk(); err != nil { - return fmt.Errorf("parsing parts of embedded message: %w", err) + if err := mp.Walk(nil); err != nil { + // If this is a DSN and we are not in pedantic mode, accept unexpected end of + // message. This is quite common because MTA's sometimes just truncate the original + // message in a place that makes the message invalid. + if errors.Is(err, errUnexpectedEOF) && !moxvar.Pedantic && parent != nil && len(parent.Parts) >= 3 && p == &parent.Parts[2] && parent.MediaType == "MULTIPART" && parent.MediaSubType == "REPORT" { + mp, err = fallbackPart(mp, br, int64(len(buf))) + if err != nil { + return fmt.Errorf("parsing invalid embedded message: %w", err) + } + } else { + return fmt.Errorf("parsing parts of embedded message: %w", err) + } } // todo: if mp does not contain any non-identity content-transfer-encoding, we should set an offsetReader of p.r on mp, recursively. p.Message = &mp @@ -202,7 +218,7 @@ func (p *Part) Walk() error { if err != nil { return err } - if err := pp.Walk(); err != nil { + if err := pp.Walk(p); err != nil { return err } } diff --git a/message/part_test.go b/message/part_test.go index e6b29e7..c3fc621 100644 --- a/message/part_test.go +++ b/message/part_test.go @@ -123,7 +123,7 @@ func TestBasic2(t *testing.T) { r = strings.NewReader(basicMsg2) p, err = Parse(r) tcheck(t, err, "new reader") - err = p.Walk() + err = p.Walk(nil) tcheck(t, err, "walk") if p.RawLineCount != 2 { t.Fatalf("basic message, got %d lines, expected 2", p.RawLineCount) @@ -224,7 +224,7 @@ test tfail(t, err, errMissingClosingBoundary) msg, _ = Parse(strings.NewReader(message)) - err = msg.Walk() + err = msg.Walk(nil) tfail(t, err, errMissingClosingBoundary) } @@ -277,7 +277,7 @@ test tcheck(t, err, "walkmsg") msg, _ = Parse(strings.NewReader(message)) - err = msg.Walk() + err = msg.Walk(nil) tcheck(t, err, "msg.Walk") } @@ -380,7 +380,7 @@ Content-Transfer-Encoding: Quoted-printable } msg, _ = Parse(strings.NewReader(nestedMessage)) - err = msg.Walk() + err = msg.Walk(nil) tcheck(t, err, "msg.Walk") } @@ -497,5 +497,5 @@ func TestEmbedded2(t *testing.T) { buf = bytes.ReplaceAll(buf, []byte("\n"), []byte("\r\n")) _, err = EnsurePart(bytes.NewReader(buf), int64(len(buf))) - tfail(t, err, errUnexpectedEOF) // todo: be able to parse this without an error? truncate message/rfc822 in dsn. + tfail(t, err, nil) } diff --git a/mox-/config.go b/mox-/config.go index 3eb826b..007744e 100644 --- a/mox-/config.go +++ b/mox-/config.go @@ -32,6 +32,7 @@ import ( "github.com/mjl-/mox/dns" "github.com/mjl-/mox/mlog" "github.com/mjl-/mox/moxio" + "github.com/mjl-/mox/moxvar" "github.com/mjl-/mox/mtasts" "github.com/mjl-/mox/smtp" ) @@ -352,6 +353,8 @@ func SetConfig(c *Config) { RootCAs: Conf.Static.TLS.CertPool, } } + + moxvar.Pedantic = c.Static.Pedantic } // ParseConfig parses the static config at path p. If checkOnly is true, no changes diff --git a/moxvar/pedantic.go b/moxvar/pedantic.go new file mode 100644 index 0000000..fbabaff --- /dev/null +++ b/moxvar/pedantic.go @@ -0,0 +1,4 @@ +package moxvar + +// Pendantic mode, in moxvar to prevent cyclic imports. +var Pedantic bool diff --git a/smtp/address.go b/smtp/address.go index c33dd74..68de0bc 100644 --- a/smtp/address.go +++ b/smtp/address.go @@ -7,6 +7,7 @@ import ( "strings" "github.com/mjl-/mox/dns" + "github.com/mjl-/mox/moxvar" ) var ErrBadAddress = errors.New("invalid email address") @@ -256,8 +257,8 @@ func (p *parser) xlocalpart() Localpart { s += "." + p.xatom() } } - // todo: have a strict parser that only allows the actual max of 64 bytes. some services have large localparts because of generated (bounce) addresses. - if len(s) > 128 { + // In the wild, some services use large localparts for generated (bounce) addresses. + if moxvar.Pedantic && len(s) > 64 || len(s) > 128 { // ../rfc/5321:3486 p.xerrorf("localpart longer than 64 octets") } diff --git a/smtpserver/parse.go b/smtpserver/parse.go index 7f614cf..d9ba1f2 100644 --- a/smtpserver/parse.go +++ b/smtpserver/parse.go @@ -8,6 +8,7 @@ import ( "strings" "github.com/mjl-/mox/dns" + "github.com/mjl-/mox/moxvar" "github.com/mjl-/mox/smtp" ) @@ -325,8 +326,8 @@ func (p *parser) xlocalpart() smtp.Localpart { s += "." + p.xatom(true) } } - // todo: have a strict parser that only allows the actual max of 64 bytes. some services have large localparts because of generated (bounce) addresses. - if len(s) > 128 { + // In the wild, some services use large localparts for generated (bounce) addresses. + if moxvar.Pedantic && len(s) > 64 || len(s) > 128 { // ../rfc/5321:3486 p.xerrorf("localpart longer than 64 octets") } diff --git a/smtpserver/server.go b/smtpserver/server.go index f1617e5..6223efb 100644 --- a/smtpserver/server.go +++ b/smtpserver/server.go @@ -284,8 +284,6 @@ type conn struct { ncmds int // Number of commands processed. Used to abort connection when first incoming command is unknown/invalid. dnsBLs []dns.Domain - // todo future: add a flag for "pedantic" mode, causing us to be strict. e.g. interpreting some SHOULD as MUST. ../rfc/5321:4076 - // If non-zero, taken into account during Read and Write. Set while processing DATA // command, we don't want the entire delivery to take too long. deadline time.Time @@ -1509,8 +1507,9 @@ func (c *conn) cmdData(p *parser) { // ../rfc/6409:541 xsmtpUserErrorf(smtp.C554TransactionFailed, smtp.SeMsg6Other0, "message requires both header and body section") } - // todo: check disabled because ios mail will attempt to send smtputf8 with non-ascii in message from localpart without using 8bitmime. we should have a non-lax mode that disallows this behaviour. - if false && msgWriter.Has8bit && !c.has8bitmime { + // Check only for pedantic mode because ios mail will attempt to send smtputf8 with + // non-ascii in message from localpart without using 8bitmime. + if moxvar.Pedantic && msgWriter.Has8bit && !c.has8bitmime { // ../rfc/5321:906 xsmtpUserErrorf(smtp.C500BadSyntax, smtp.SeMsg6Other0, "message with non-us-ascii requires 8bitmime extension") } diff --git a/testdata/message/message-rfc822-multipart2.eml b/testdata/message/message-rfc822-multipart2.eml index 5e7e02c..a1a7f4f 100644 --- a/testdata/message/message-rfc822-multipart2.eml +++ b/testdata/message/message-rfc822-multipart2.eml @@ -1,7 +1,7 @@ Return-Path: <> Date: Sun, 06 Apr 2008 09:38:06 +0900 MIME-Version: 1.0 -Content-Type: Multipart/Mixed; +Content-Type: multipart/report; boundary="------------Boundary-00=_JFOVO7CXFVB00L32QL80" --------------Boundary-00=_JFOVO7CXFVB00L32QL80