diff --git a/dkim/sig.go b/dkim/sig.go index f868e40..321152b 100644 --- a/dkim/sig.go +++ b/dkim/sig.go @@ -147,7 +147,7 @@ func (s *Sig) Header() (string, error) { w.Addf(" ", "b=") if len(s.Signature) > 0 { - w.AddWrap([]byte(base64.StdEncoding.EncodeToString(s.Signature))) + w.AddWrap([]byte(base64.StdEncoding.EncodeToString(s.Signature)), false) } w.Add("\r\n") return w.String(), nil diff --git a/junk/filter.go b/junk/filter.go index 48f8ca3..387ea49 100644 --- a/junk/filter.go +++ b/junk/filter.go @@ -343,21 +343,23 @@ func loadWords(ctx context.Context, db *bstore.DB, l []string, dst map[string]wo return nil } -// ClassifyWords returns the spam probability for the given words, and number of recognized ham and spam words. -func (f *Filter) ClassifyWords(ctx context.Context, words map[string]struct{}) (probability float64, nham, nspam int, rerr error) { - if f.closed { - return 0, 0, 0, errClosed - } +// WordScore is a word with its score as used in classifications, based on +// (historic) training. +type WordScore struct { + Word string + Score float64 // 0 is ham, 1 is spam. +} - type xword struct { - Word string - R float64 +// ClassifyWords returns the spam probability for the given words, and number of recognized ham and spam words. +func (f *Filter) ClassifyWords(ctx context.Context, words map[string]struct{}) (probability float64, hams, spams []WordScore, rerr error) { + if f.closed { + return 0, nil, nil, errClosed } var hamHigh float64 = 0 var spamLow float64 = 1 - var topHam []xword - var topSpam []xword + var topHam []WordScore + var topSpam []WordScore // Find words that should be in the database. lookupWords := []string{} @@ -389,7 +391,7 @@ func (f *Filter) ClassifyWords(ctx context.Context, words map[string]struct{}) ( fetched := map[string]word{} if len(lookupWords) > 0 { if err := loadWords(ctx, f.db, lookupWords, fetched); err != nil { - return 0, 0, 0, err + return 0, nil, nil, err } for w, c := range fetched { delete(expect, w) @@ -432,7 +434,7 @@ func (f *Filter) ClassifyWords(ctx context.Context, words map[string]struct{}) ( if len(topHam) >= f.TopWords && r > hamHigh { continue } - topHam = append(topHam, xword{w, r}) + topHam = append(topHam, WordScore{w, r}) if r > hamHigh { hamHigh = r } @@ -440,7 +442,7 @@ func (f *Filter) ClassifyWords(ctx context.Context, words map[string]struct{}) ( if len(topSpam) >= f.TopWords && r < spamLow { continue } - topSpam = append(topSpam, xword{w, r}) + topSpam = append(topSpam, WordScore{w, r}) if r < spamLow { spamLow = r } @@ -449,24 +451,24 @@ func (f *Filter) ClassifyWords(ctx context.Context, words map[string]struct{}) ( sort.Slice(topHam, func(i, j int) bool { a, b := topHam[i], topHam[j] - if a.R == b.R { + if a.Score == b.Score { return len(a.Word) > len(b.Word) } - return a.R < b.R + return a.Score < b.Score }) sort.Slice(topSpam, func(i, j int) bool { a, b := topSpam[i], topSpam[j] - if a.R == b.R { + if a.Score == b.Score { return len(a.Word) > len(b.Word) } - return a.R > b.R + return a.Score > b.Score }) - nham = f.TopWords + nham := f.TopWords if nham > len(topHam) { nham = len(topHam) } - nspam = f.TopWords + nspam := f.TopWords if nspam > len(topSpam) { nspam = len(topSpam) } @@ -475,27 +477,27 @@ func (f *Filter) ClassifyWords(ctx context.Context, words map[string]struct{}) ( var eta float64 for _, x := range topHam { - eta += math.Log(1-x.R) - math.Log(x.R) + eta += math.Log(1-x.Score) - math.Log(x.Score) } for _, x := range topSpam { - eta += math.Log(1-x.R) - math.Log(x.R) + eta += math.Log(1-x.Score) - math.Log(x.Score) } f.log.Debug("top words", slog.Any("hams", topHam), slog.Any("spams", topSpam)) prob := 1 / (1 + math.Pow(math.E, eta)) - return prob, len(topHam), len(topSpam), nil + return prob, topHam, topSpam, nil } // ClassifyMessagePath is a convenience wrapper for calling ClassifyMessage on a file. -func (f *Filter) ClassifyMessagePath(ctx context.Context, path string) (probability float64, words map[string]struct{}, nham, nspam int, rerr error) { +func (f *Filter) ClassifyMessagePath(ctx context.Context, path string) (probability float64, words map[string]struct{}, hams, spams []WordScore, rerr error) { if f.closed { - return 0, nil, 0, 0, errClosed + return 0, nil, nil, nil, errClosed } mf, err := os.Open(path) if err != nil { - return 0, nil, 0, 0, err + return 0, nil, nil, nil, err } defer func() { err := mf.Close() @@ -503,33 +505,33 @@ func (f *Filter) ClassifyMessagePath(ctx context.Context, path string) (probabil }() fi, err := mf.Stat() if err != nil { - return 0, nil, 0, 0, err + return 0, nil, nil, nil, err } return f.ClassifyMessageReader(ctx, mf, fi.Size()) } -func (f *Filter) ClassifyMessageReader(ctx context.Context, mf io.ReaderAt, size int64) (probability float64, words map[string]struct{}, nham, nspam int, rerr error) { +func (f *Filter) ClassifyMessageReader(ctx context.Context, mf io.ReaderAt, size int64) (probability float64, words map[string]struct{}, hams, spams []WordScore, rerr error) { m, err := message.EnsurePart(f.log.Logger, false, mf, size) if err != nil && errors.Is(err, message.ErrBadContentType) { // Invalid content-type header is a sure sign of spam. //f.log.Infox("parsing content", err) - return 1, nil, 0, 0, nil + return 1, nil, nil, nil, nil } return f.ClassifyMessage(ctx, m) } // ClassifyMessage parses the mail message in r and returns the spam probability // (between 0 and 1), along with the tokenized words found in the message, and the -// number of recognized ham and spam words. -func (f *Filter) ClassifyMessage(ctx context.Context, m message.Part) (probability float64, words map[string]struct{}, nham, nspam int, rerr error) { +// ham and spam words and their scores used. +func (f *Filter) ClassifyMessage(ctx context.Context, m message.Part) (probability float64, words map[string]struct{}, hams, spams []WordScore, rerr error) { var err error words, err = f.ParseMessage(m) if err != nil { - return 0, nil, 0, 0, err + return 0, nil, nil, nil, err } - probability, nham, nspam, err = f.ClassifyWords(ctx, words) - return probability, words, nham, nspam, err + probability, hams, spams, err = f.ClassifyWords(ctx, words) + return probability, words, hams, spams, err } // Train adds the words of a single message to the filter. diff --git a/message/headerwriter.go b/message/headerwriter.go index 96c4424..89f050a 100644 --- a/message/headerwriter.go +++ b/message/headerwriter.go @@ -1,6 +1,7 @@ package message import ( + "bytes" "fmt" "strings" ) @@ -39,12 +40,20 @@ func (w *HeaderWriter) Add(separator string, texts ...string) { } } -// AddWrap adds data, folding anywhere in the buffer. E.g. for base64 data. -func (w *HeaderWriter) AddWrap(buf []byte) { +// AddWrap adds data. If text is set, wrapping happens at space/tab, otherwise +// anywhere in the buffer (e.g. for base64 data). +func (w *HeaderWriter) AddWrap(buf []byte, text bool) { for len(buf) > 0 { line := buf n := 78 - w.lineLen if len(buf) > n { + if text { + if i := bytes.LastIndexAny(buf[:n], " \t"); i > 0 { + n = i + } else if i = bytes.IndexAny(buf, " \t"); i > 0 { + n = i + } + } line, buf = buf[:n], buf[n:] } else { buf = nil diff --git a/smtpserver/analyze.go b/smtpserver/analyze.go index 973910d..62e0e43 100644 --- a/smtpserver/analyze.go +++ b/smtpserver/analyze.go @@ -45,6 +45,7 @@ type delivery struct { dmarcResult dmarc.Result dkimResults []dkim.Result iprevStatus iprev.Status + smtputf8 bool } type analysis struct { @@ -58,7 +59,8 @@ type analysis struct { err error // For our own logging, not sent to remote. dmarcReport *dmarcrpt.Feedback // Validated DMARC aggregate report, not yet stored. tlsReport *tlsrpt.Report // Validated TLS report, not yet stored. - reason string // If non-empty, reason for this decision. Can be one of reputationMethod and a few other tokens. + reason string // If non-empty, reason for this decision. Values from reputationMethod and reason* below. + reasonText []string // Additional details for reason, human-readable, added to X-Mox-Reason header. dmarcOverrideReason string // If set, one of dmarcrpt.PolicyOverride // Additional headers to add during delivery. Used for reasons a message to a // dmarc/tls reporting address isn't processed. @@ -99,6 +101,12 @@ func isListDomain(d delivery, ld dns.Domain) bool { func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d delivery) analysis { var headers string + var reasonText []string + addReasonText := func(format string, args ...any) { + s := fmt.Sprintf(format, args...) + reasonText = append(reasonText, s) + } + // We don't want to let a single IP or network deliver too many messages to an // account. They may fill up the mailbox, either with messages that have to be // purged, or by filling the disk. We check both cases for IP's and networks. @@ -175,11 +183,13 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver if err != nil && !rateError { log.Errorx("checking delivery rates", err) metricDelivery.WithLabelValues("checkrates", "").Inc() - return analysis{d, false, "", smtp.C451LocalErr, smtp.SeSys3Other0, false, "error processing", err, nil, nil, reasonReputationError, "", headers} + addReasonText("checking delivery rates: %v", err) + return analysis{d, false, "", smtp.C451LocalErr, smtp.SeSys3Other0, false, "error processing", err, nil, nil, reasonReputationError, reasonText, "", headers} } else if err != nil { log.Debugx("refusing due to high delivery rate", err) metricDelivery.WithLabelValues("highrate", "").Inc() - return analysis{d, false, "", smtp.C452StorageFull, smtp.SeMailbox2Full2, true, err.Error(), err, nil, nil, reasonHighRate, "", headers} + addReasonText("high delivery rate") + return analysis{d, false, "", smtp.C452StorageFull, smtp.SeMailbox2Full2, true, err.Error(), err, nil, nil, reasonHighRate, reasonText, "", headers} } mailbox := d.destination.Mailbox @@ -196,8 +206,17 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver if rs != nil && !rs.ListAllowDNSDomain.IsZero() { // todo: on temporary failures, reject temporarily? if isListDomain(d, rs.ListAllowDNSDomain) { + addReasonText("validated message from a configured mailing list") d.m.IsMailingList = true - return analysis{d: d, accept: true, mailbox: mailbox, reason: reasonListAllow, dmarcOverrideReason: string(dmarcrpt.PolicyOverrideMailingList), headers: headers} + return analysis{ + d: d, + accept: true, + mailbox: mailbox, + reason: reasonListAllow, + reasonText: reasonText, + dmarcOverrideReason: string(dmarcrpt.PolicyOverrideMailingList), + headers: headers, + } } } @@ -226,6 +245,7 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver d.m.DKIMDomains = dkimdoms dmarcOverrideReason = string(dmarcrpt.PolicyOverrideForwarded) log.Info("forwarded message, clearing identifying signals of forwarding mail server") + addReasonText("ruleset indicates forwarded message") } assignMailbox := func(tx *bstore.Tx) error { @@ -266,7 +286,8 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver }) }) if mberr != nil { - return analysis{d, false, mailbox, smtp.C451LocalErr, smtp.SeSys3Other0, false, "error processing", err, nil, nil, reasonReputationError, dmarcOverrideReason, headers} + addReasonText("error setting original destination mailbox for rejected message: %v", mberr) + return analysis{d, false, mailbox, smtp.C451LocalErr, smtp.SeSys3Other0, false, "error processing", err, nil, nil, reasonReputationError, reasonText, dmarcOverrideReason, headers} } d.m.MailboxID = 0 // We plan to reject, no need to set intended MailboxID. } @@ -279,12 +300,18 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver // Don't draw attention, but don't go so far as to mark as junk. d.m.Seen = true log.Info("accepting reject to configured mailbox due to ruleset") + addReasonText("accepting reject to mailbox due to ruleset") } - return analysis{d, accept, mailbox, code, secode, err == nil, errmsg, err, nil, nil, reason, dmarcOverrideReason, headers} + return analysis{d, accept, mailbox, code, secode, err == nil, errmsg, err, nil, nil, reason, reasonText, dmarcOverrideReason, headers} } if d.dmarcUse && d.dmarcResult.Reject { + addReasonText("message does not pass domain dmarc policy which asks to reject") return reject(smtp.C550MailboxUnavail, smtp.SePol7MultiAuthFails26, "rejecting per dmarc policy", nil, reasonDMARCPolicy) + } else if !d.dmarcUse { + addReasonText("not using any dmarc result") + } else { + addReasonText("dmarc ok") } // todo: should we also reject messages that have a dmarc pass but an spf record "v=spf1 -all"? suggested by m3aawg best practices. @@ -381,13 +408,28 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver return err } - isjunk, conclusive, method, err = reputation(tx, log, d.m) + var text string + isjunk, conclusive, method, text, err = reputation(tx, log, d.m, d.smtputf8) reason = string(method) + s := "address/dkim/spf/ip-based reputation (" + if isjunk != nil && *isjunk { + s += "junk, " + } else if isjunk != nil && !*isjunk { + s += "nonjunk, " + } + if conclusive { + s += "conclusive" + } else { + s += "inconclusive" + } + s += ", " + text + ")" + addReasonText("%s", s) return err }) }) if err != nil { log.Infox("determining reputation", err, slog.Any("message", d.m)) + addReasonText("determining reputation: %v", err) return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", err, reasonReputationError) } log.Info("reputation analyzed", @@ -396,12 +438,33 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver slog.String("method", string(method))) if conclusive { if !*isjunk { - return analysis{d: d, accept: true, mailbox: mailbox, dmarcReport: dmarcReport, tlsReport: tlsReport, reason: reason, dmarcOverrideReason: dmarcOverrideReason, headers: headers} + return analysis{ + d: d, + accept: true, + mailbox: mailbox, + dmarcReport: dmarcReport, + tlsReport: tlsReport, + reason: reason, + reasonText: reasonText, + dmarcOverrideReason: dmarcOverrideReason, + headers: headers, + } } return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", err, string(method)) } else if dmarcReport != nil || tlsReport != nil { log.Info("accepting message with dmarc aggregate report or tls report without reputation") - return analysis{d: d, accept: true, mailbox: mailbox, dmarcReport: dmarcReport, tlsReport: tlsReport, reason: reasonReporting, dmarcOverrideReason: dmarcOverrideReason, headers: headers} + addReasonText("message inconclusive reputation but with dmarc or tls report") + return analysis{ + d: d, + accept: true, + mailbox: mailbox, + dmarcReport: dmarcReport, + tlsReport: tlsReport, + reason: reasonReporting, + reasonText: reasonText, + dmarcOverrideReason: dmarcOverrideReason, + headers: headers, + } } // If there was no previous message from sender or its domain, and we have an SPF // (soft)fail, reject the message. @@ -409,6 +472,7 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver case methodDKIMSPF, methodIP1, methodIP2, methodIP3, methodNone: switch d.m.MailFromValidation { case store.ValidationFail, store.ValidationSoftfail: + addReasonText("no previous message from sender domain and spf result is (soft)fail") return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", nil, reasonSPFPolicy) } } @@ -419,9 +483,13 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver case methodDKIMSPF, methodIP1, methodIP2, methodIP3, methodNone: suspiciousIPrevFail = d.iprevStatus != iprev.StatusPass } + if suspiciousIPrevFail { + addReasonText("suspicious iprev failure") + } // With already a mild junk signal, an iprev fail on top is enough to reject. if suspiciousIPrevFail && isjunk != nil && *isjunk { + addReasonText("message has a mild junk signal and mismatching reverse ip") return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", nil, reasonIPrev) } @@ -431,13 +499,23 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver subjectpassKey, err = d.acc.Subjectpass(d.canonicalAddress) if err != nil { log.Errorx("get key for verifying subject token", err) + addReasonText("subject pass error: %v", err) return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", err, reasonSubjectpassError) } err = subjectpass.Verify(log.Logger, d.dataFile, []byte(subjectpassKey), conf.SubjectPass.Period) pass := err == nil log.Infox("pass by subject token", err, slog.Bool("pass", pass)) if pass { - return analysis{d: d, accept: true, mailbox: mailbox, reason: reasonSubjectpass, dmarcOverrideReason: dmarcOverrideReason, headers: headers} + addReasonText("message has valid subjectpass token in subject") + return analysis{ + d: d, + accept: true, + mailbox: mailbox, + reason: reasonSubjectpass, + reasonText: reasonText, + dmarcOverrideReason: dmarcOverrideReason, + headers: headers, + } } } @@ -450,9 +528,10 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver err := f.Close() log.Check(err, "closing junkfilter") }() - contentProb, _, _, _, err := f.ClassifyMessageReader(ctx, store.FileMsgReader(d.m.MsgPrefix, d.dataFile), d.m.Size) + contentProb, _, hams, spams, err := f.ClassifyMessageReader(ctx, store.FileMsgReader(d.m.MsgPrefix, d.dataFile), d.m.Size) if err != nil { log.Errorx("testing for spam", err) + addReasonText("classify message error: %v", err) return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", err, reasonJunkClassifyError) } // todo: if isjunk is not nil (i.e. there was inconclusive reputation), use it in the probability calculation. give reputation a score of 0.25 or .75 perhaps? @@ -487,14 +566,17 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver // todo: some of these checks should also apply for reputation-based analysis with a weak signal, e.g. verified dkim/spf signal from new domain. // With an iprev fail, non-TLS connection or our address not in To/Cc header, we set a higher bar for content. reason = reasonJunkContent + var thresholdRemark string if suspiciousIPrevFail && threshold > 0.25 { threshold = 0.25 log.Info("setting junk threshold due to iprev fail", slog.Float64("threshold", threshold)) reason = reasonJunkContentStrict + thresholdRemark = " (stricter due to reverse ip mismatch)" } else if !d.tls && threshold > 0.25 { threshold = 0.25 log.Info("setting junk threshold due to plaintext smtp", slog.Float64("threshold", threshold)) reason = reasonJunkContentStrict + thresholdRemark = " (stricter due to missing tls)" } else if (rs == nil || !rs.IsForward) && threshold > 0.25 && !rcptToMatch(d.msgTo) && !rcptToMatch(d.msgCc) { // A common theme in junk messages is your recipient address not being in the To/Cc // headers. We may be in Bcc, but that's unusual for first-time senders. Some @@ -503,6 +585,7 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver threshold = 0.25 log.Info("setting junk threshold due to smtp rcpt to and message to/cc address mismatch", slog.Float64("threshold", threshold)) reason = reasonJunkContentStrict + thresholdRemark = " (stricter due to recipient address not in to/cc header)" } accept = contentProb <= threshold junkSubjectpass = contentProb < threshold-0.2 @@ -510,9 +593,44 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver slog.Bool("accept", accept), slog.Float64("contentprob", contentProb), slog.Bool("subjectpass", junkSubjectpass)) + + s := "content: " + if accept { + s += "not junk" + } else { + s += "junk" + } + s += fmt.Sprintf(", spamscore %.2f, threshold %.2f%s", contentProb, threshold, thresholdRemark) + s += "(ham words: " + for i, w := range hams { + if i > 0 { + s += ", " + } + word := w.Word + if !d.smtputf8 && !isASCII(word) { + word = "(non-ascii)" + } + s += fmt.Sprintf("%s %.3f", word, w.Score) + } + s += "), (spam words: " + for i, w := range spams { + if i > 0 { + s += ", " + } + word := w.Word + if !d.smtputf8 && !isASCII(word) { + word = "(non-ascii)" + } + s += fmt.Sprintf("%s %.3f", word, w.Score) + } + s += ")" + addReasonText("%s", s) } else if err != store.ErrNoJunkFilter { log.Errorx("open junkfilter", err) + addReasonText("open junkfilter: %v", err) return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", err, reasonJunkFilterError) + } else { + addReasonText("no junk filter configured") } // If content looks good, we'll still look at DNS block lists for a reason to @@ -545,20 +663,43 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver accept = false dnsblocklisted = true reason = reasonDNSBlocklisted + addReasonText("dnsbl: ip %s listed in dnsbl %s", d.m.RemoteIP, zone.XName(d.smtputf8)) break } } + if !dnsblocklisted && len(d.dnsBLs) > 0 { + addReasonText("remote ip not blocklisted") + } } if accept { - return analysis{d: d, accept: true, mailbox: mailbox, reason: reasonNoBadSignals, dmarcOverrideReason: dmarcOverrideReason, headers: headers} + addReasonText("no known reputation and no bad signals") + return analysis{ + d: d, + accept: true, + mailbox: mailbox, + reason: reasonNoBadSignals, + reasonText: reasonText, + dmarcOverrideReason: dmarcOverrideReason, + headers: headers, + } } if subjectpassKey != "" && d.dmarcResult.Status == dmarc.StatusPass && method == methodNone && (dnsblocklisted || junkSubjectpass) { log.Info("permanent reject with subjectpass hint of moderately spammy email without reputation") pass := subjectpass.Generate(log.Logger, d.msgFrom, []byte(subjectpassKey), time.Now()) + addReasonText("reject with request to try again with subjectpass token in subject") return reject(smtp.C550MailboxUnavail, smtp.SePol7DeliveryUnauth1, subjectpass.Explanation+pass, nil, reasonGiveSubjectpass) } return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", nil, reason) } + +func isASCII(s string) bool { + for _, b := range []byte(s) { + if b >= 0x80 { + return true + } + } + return false +} diff --git a/smtpserver/reputation.go b/smtpserver/reputation.go index 631235f..1dff331 100644 --- a/smtpserver/reputation.go +++ b/smtpserver/reputation.go @@ -4,10 +4,12 @@ import ( "errors" "fmt" "log/slog" + "strings" "time" "github.com/mjl-/bstore" + "github.com/mjl-/mox/dns" "github.com/mjl-/mox/mlog" "github.com/mjl-/mox/smtp" "github.com/mjl-/mox/store" @@ -97,7 +99,7 @@ const ( // ../rfc/6376:1915 // ../rfc/6376:3716 // ../rfc/7208:2167 -func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rconclusive bool, rmethod reputationMethod, rerr error) { +func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message, smtputf8 bool) (rjunk *bool, rconclusive bool, rmethod reputationMethod, reasonText string, rerr error) { boolptr := func(v bool) *bool { return &v } @@ -179,7 +181,7 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco // todo: we may want to look at dkim/spf in this case. spam := msgs[0].Junk && (len(msgs) == 1 || msgs[1].Junk) conclusive := m.MsgFromValidated - return &spam, conclusive, methodMsgfromFull, nil + return &spam, conclusive, methodMsgfromFull, "reputation of exact message-from address", nil } if !m.MsgFromValidated { // Look for historic messages that were validated. If present, this is likely spam. @@ -189,7 +191,7 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco msgs = xmessageList(q, "msgfromfull-validated") if len(msgs) > 0 { spam := msgs[0].Junk && (len(msgs) == 1 || msgs[1].Junk) - return xtrue, spam, methodMsgfromFull, nil + return xtrue, spam, methodMsgfromFull, "unvalidated message with validated historic messages with exact message-from address", nil } } @@ -199,21 +201,23 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco qr.FilterEqual("Domain", m.MsgFromDomain) qr.FilterGreaterEqual("Sent", now.Add(-3*year)) if xrecipientExists(qr) { - return xfalse, true, methodMsgtoFull, nil + return xfalse, true, methodMsgtoFull, "exact message-from address was earlier message recipient", nil } // Look for domain match, then for organizational domain match. for _, orgdomain := range []bool{false, true} { qm := store.Message{} var method reputationMethod - var descr string + var source, descr string if orgdomain { qm.MsgFromOrgDomain = m.MsgFromOrgDomain method = methodMsgfromOrgDomain + source = "organizational domain of message-from address" descr = "msgfromorgdomain" } else { qm.MsgFromDomain = m.MsgFromDomain method = methodMsgfromDomain + source = "exact domain of message-from address" descr = "msgfromdomain" } @@ -228,7 +232,8 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco } } if 100*nonjunk/len(msgs) > 80 { - return xfalse, true, method, nil + reasonText = fmt.Sprintf("positive reputation with %s based on %d messages", source, len(msgs)) + return xfalse, true, method, reasonText, nil } if nonjunk == 0 { // Only conclusive with at least 3 different localparts. @@ -236,13 +241,16 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco for _, m := range msgs { localparts[m.MsgFromLocalpart] = struct{}{} if len(localparts) == 3 { - return xtrue, true, method, nil + reasonText = fmt.Sprintf("negative reputation of at least 3 addresses with %s based on %d messages", source, len(msgs)) + return xtrue, true, method, reasonText, nil } } - return xtrue, false, method, nil + reasonText = fmt.Sprintf("negative reputation with %s based on %d messages", source, len(msgs)) + return xtrue, false, method, reasonText, nil } // Mixed signals from domain. We don't want to block a new sender. - return nil, false, method, nil + reasonText = fmt.Sprintf("mixed signals with %s based on %d messages", source, len(msgs)) + return nil, false, method, reasonText, nil } if !m.MsgFromValidated { // Look for historic messages that were validated. If present, this is likely spam. @@ -253,7 +261,8 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco msgs = xmessageList(q, descr+"-validated") if len(msgs) > 0 { spam := msgs[0].Junk && (len(msgs) == 1 || msgs[1].Junk) - return xtrue, spam, method, nil + reasonText = fmt.Sprintf("unvalidated message with %s while we have validated messages from that source", source) + return xtrue, spam, method, reasonText, nil } } @@ -262,13 +271,16 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco if orgdomain { qr.FilterEqual("OrgDomain", m.MsgFromOrgDomain) method = methodMsgtoOrgDomain + source = "organizational domain of message-from address" } else { qr.FilterEqual("Domain", m.MsgFromDomain) method = methodMsgtoDomain + source = "exact domain of message-from address" } qr.FilterGreaterEqual("Sent", now.Add(-2*year)) if xrecipientExists(qr) { - return xfalse, true, method, nil + reasonText = fmt.Sprintf("%s was recipient address", source) + return xfalse, true, method, reasonText, nil } } } @@ -277,6 +289,7 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco // We only use identities that passed validation. Failed identities are ignored. ../rfc/6376:2447 // todo future: we could do something with the DKIM identity (i=) field if it is more specific than just the domain (d=). dkimspfsignals := []float64{} + dkimspfreasondoms := []string{} dkimspfmsgs := 0 for _, dom := range m.DKIMDomains { q := messageQuery(nil, year/2, 50) @@ -291,12 +304,15 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco } pspam := float64(nspam) / float64(len(msgs)) dkimspfsignals = append(dkimspfsignals, pspam) + dkimspfreasondoms = append(dkimspfreasondoms, dom) dkimspfmsgs = len(msgs) } } if m.MailFromValidated || m.EHLOValidated { + var dom string var msgs []store.Message if m.MailFromValidated && m.MailFromDomain != "" { + dom = m.MailFromDomain q := messageQuery(&store.Message{MailFromLocalpart: m.MailFromLocalpart, MailFromDomain: m.MailFromDomain}, year/2, 50) msgs = xmessageList(q, "mailfrom") if len(msgs) == 0 { @@ -305,6 +321,7 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco } } if len(msgs) == 0 && m.EHLOValidated && m.EHLODomain != "" { + dom = m.EHLODomain q := messageQuery(&store.Message{EHLODomain: m.EHLODomain}, year/2, 50) msgs = xmessageList(q, "ehlodomain") } @@ -317,6 +334,7 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco } pspam := float64(nspam) / float64(len(msgs)) dkimspfsignals = append(dkimspfsignals, pspam) + dkimspfreasondoms = append(dkimspfreasondoms, dom) if len(msgs) > dkimspfmsgs { dkimspfmsgs = len(msgs) } @@ -324,20 +342,27 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco } if len(dkimspfsignals) > 0 { var nham, nspam int - for _, p := range dkimspfsignals { + var hamdoms, spamdoms []string + for i, p := range dkimspfsignals { + d, _ := dns.ParseDomain(dkimspfreasondoms[i]) if p < .1 { nham++ + hamdoms = append(hamdoms, d.XName(smtputf8)) } else if p > .9 { nspam++ + spamdoms = append(spamdoms, d.XName(smtputf8)) } } if nham > 0 && nspam == 0 { - return xfalse, true, methodDKIMSPF, nil + reasonText = fmt.Sprintf("positive dkim/spf reputation for domain(s) %s", strings.Join(hamdoms, ",")) + return xfalse, true, methodDKIMSPF, reasonText, nil } if nspam > 0 && nham == 0 { - return xtrue, dkimspfmsgs > 1, methodDKIMSPF, nil + reasonText = fmt.Sprintf("negative dkim/spf reputation for domain(s) %s", strings.Join(hamdoms, ",")) + return xtrue, dkimspfmsgs > 1, methodDKIMSPF, reasonText, nil } - return nil, false, methodDKIMSPF, nil + reasonText = fmt.Sprintf("mixed dkim/spf reputation, positive for %s, negative for %s", strings.Join(hamdoms, ","), strings.Join(spamdoms, ",")) + return nil, false, methodDKIMSPF, reasonText, nil } // IP-based. A wider mask needs more messages to be conclusive. @@ -345,23 +370,27 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco var msgs []store.Message var need int var method reputationMethod + var ip string if m.RemoteIPMasked1 != "" { q := messageQuery(&store.Message{RemoteIPMasked1: m.RemoteIPMasked1}, year/4, 50) msgs = xmessageList(q, "ip1") need = 2 method = methodIP1 + ip = m.RemoteIPMasked1 } if len(msgs) == 0 && m.RemoteIPMasked2 != "" { q := messageQuery(&store.Message{RemoteIPMasked2: m.RemoteIPMasked2}, year/4, 50) msgs = xmessageList(q, "ip2") need = 5 method = methodIP2 + ip = m.RemoteIPMasked2 } if len(msgs) == 0 && m.RemoteIPMasked3 != "" { q := messageQuery(&store.Message{RemoteIPMasked3: m.RemoteIPMasked3}, year/4, 50) msgs = xmessageList(q, "ip3") need = 10 method = methodIP3 + ip = m.RemoteIPMasked3 } if len(msgs) > 0 { nspam := 0 @@ -378,8 +407,24 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco spam = xtrue } conclusive := len(msgs) >= need && (pspam <= 0.1 || pspam >= 0.9) - return spam, conclusive, method, nil + v6 := strings.Contains(m.RemoteIP, ":") + reasonText = fmt.Sprintf("reputation for ip %s%s, spam score %.2f", ip, maskclasses[classmask{v6, method}], pspam) + return spam, conclusive, method, reasonText, nil } - return nil, false, methodNone, nil + return nil, false, methodNone, "no address/spf/dkim/ip reputation", nil +} + +type classmask struct { + v6 bool + method reputationMethod +} + +var maskclasses = map[classmask]string{ + {false, methodIP1}: "/32", + {false, methodIP2}: "/26", + {false, methodIP3}: "/21", + {true, methodIP1}: "/64", + {true, methodIP2}: "/48", + {true, methodIP3}: "/32", } diff --git a/smtpserver/reputation_test.go b/smtpserver/reputation_test.go index 0b85783..de3cc78 100644 --- a/smtpserver/reputation_test.go +++ b/smtpserver/reputation_test.go @@ -148,7 +148,7 @@ func TestReputation(t *testing.T) { var method reputationMethod err = db.Read(ctxbg, func(tx *bstore.Tx) error { var err error - isjunk, conclusive, method, err = reputation(tx, pkglog, &m) + isjunk, conclusive, method, _, err = reputation(tx, pkglog, &m, false) return err }) tcheck(t, err, "read tx") diff --git a/smtpserver/server.go b/smtpserver/server.go index 1649276..e51d0f6 100644 --- a/smtpserver/server.go +++ b/smtpserver/server.go @@ -2750,7 +2750,7 @@ func (c *conn) deliver(ctx context.Context, recvHdrFor func(string) string, msgW msgTo = envelope.To msgCc = envelope.CC } - d := delivery{c.tls, &m, dataFile, smtpRcptTo, deliverTo, destination, canonicalAddr, acc, msgTo, msgCc, msgFrom, c.dnsBLs, dmarcUse, dmarcResult, dkimResults, iprevStatus} + d := delivery{c.tls, &m, dataFile, smtpRcptTo, deliverTo, destination, canonicalAddr, acc, msgTo, msgCc, msgFrom, c.dnsBLs, dmarcUse, dmarcResult, dkimResults, iprevStatus, c.smtputf8} r := analyze(ctx, log, c.resolver, d) return &r, nil @@ -2862,10 +2862,25 @@ func (c *conn) deliver(ctx context.Context, recvHdrFor func(string) string, msgW rcptAuthResults.Methods = append([]message.AuthMethod{}, authResults.Methods...) rcptAuthResults.Methods = append(rcptAuthResults.Methods, rcptDMARCMethod) - // Prepend reason as message header, for easy display in mail clients. + // Prepend reason as message header, for easy viewing in mail clients. var xmox string if a0.reason != "" { - xmox = "X-Mox-Reason: " + a0.reason + "\r\n" + hw := &message.HeaderWriter{} + hw.Add(" ", "X-Mox-Reason:") + hw.Add(" ", a0.reason) + for i, s := range a0.reasonText { + if i == 0 { + s = "; " + s + } else { + hw.Newline() + } + // Just in case any of the strings has a newline, replace it with space to not break the message. + s = strings.ReplaceAll(s, "\n", " ") + s = strings.ReplaceAll(s, "\r", " ") + s += ";" + hw.AddWrap([]byte(s), true) + } + xmox = hw.String() } xmox += a0.headers