add more details to x-mox-reason message header added during delivery, for understanding why a message is accepted/rejected

we add various information while analysing an incoming message. like dkim/spf/ip reputation. and content-based junk filter threshold/result and ham/spam words used. for issue #179 by Fell and #157 by mattfbacon
2024-12-26 00:13:47 +03:00 · 2024-10-04 16:01:30 +02:00 · 2024-10-04 16:01:30 +02:00 · 32b549b260
commit 32b549b260
parent 98d0ff22bb
7 changed files with 281 additions and 69 deletions
--- a/dkim/sig.go
+++ b/dkim/sig.go
@ -147,7 +147,7 @@ func (s *Sig) Header() (string, error) {

 	w.Addf(" ", "b=")
 	if len(s.Signature) > 0 {
-		w.AddWrap([]byte(base64.StdEncoding.EncodeToString(s.Signature)))
+		w.AddWrap([]byte(base64.StdEncoding.EncodeToString(s.Signature)), false)
 	}
 	w.Add("\r\n")
 	return w.String(), nil
--- a/junk/filter.go
+++ b/junk/filter.go
@ -343,21 +343,23 @@ func loadWords(ctx context.Context, db *bstore.DB, l []string, dst map[string]wo
 	return nil
 }

-// ClassifyWords returns the spam probability for the given words, and number of recognized ham and spam words.
-func (f *Filter) ClassifyWords(ctx context.Context, words map[string]struct{}) (probability float64, nham, nspam int, rerr error) {
-	if f.closed {
-		return 0, 0, 0, errClosed
-	}
+// WordScore is a word with its score as used in classifications, based on
+// (historic) training.
+type WordScore struct {
+	Word  string
+	Score float64 // 0 is ham, 1 is spam.
+}

-	type xword struct {
-		Word string
-		R    float64
+// ClassifyWords returns the spam probability for the given words, and number of recognized ham and spam words.
+func (f *Filter) ClassifyWords(ctx context.Context, words map[string]struct{}) (probability float64, hams, spams []WordScore, rerr error) {
+	if f.closed {
+		return 0, nil, nil, errClosed
 	}

 	var hamHigh float64 = 0
 	var spamLow float64 = 1
-	var topHam []xword
-	var topSpam []xword
+	var topHam []WordScore
+	var topSpam []WordScore

 	// Find words that should be in the database.
 	lookupWords := []string{}
@ -389,7 +391,7 @@ func (f *Filter) ClassifyWords(ctx context.Context, words map[string]struct{}) (
 	fetched := map[string]word{}
 	if len(lookupWords) > 0 {
 		if err := loadWords(ctx, f.db, lookupWords, fetched); err != nil {
-			return 0, 0, 0, err
+			return 0, nil, nil, err
 		}
 		for w, c := range fetched {
 			delete(expect, w)
@ -432,7 +434,7 @@ func (f *Filter) ClassifyWords(ctx context.Context, words map[string]struct{}) (
 			if len(topHam) >= f.TopWords && r > hamHigh {
 				continue
 			}
-			topHam = append(topHam, xword{w, r})
+			topHam = append(topHam, WordScore{w, r})
 			if r > hamHigh {
 				hamHigh = r
 			}
@ -440,7 +442,7 @@ func (f *Filter) ClassifyWords(ctx context.Context, words map[string]struct{}) (
 			if len(topSpam) >= f.TopWords && r < spamLow {
 				continue
 			}
-			topSpam = append(topSpam, xword{w, r})
+			topSpam = append(topSpam, WordScore{w, r})
 			if r < spamLow {
 				spamLow = r
 			}
@ -449,24 +451,24 @@ func (f *Filter) ClassifyWords(ctx context.Context, words map[string]struct{}) (

 	sort.Slice(topHam, func(i, j int) bool {
 		a, b := topHam[i], topHam[j]
-		if a.R == b.R {
+		if a.Score == b.Score {
 			return len(a.Word) > len(b.Word)
 		}
-		return a.R < b.R
+		return a.Score < b.Score
 	})
 	sort.Slice(topSpam, func(i, j int) bool {
 		a, b := topSpam[i], topSpam[j]
-		if a.R == b.R {
+		if a.Score == b.Score {
 			return len(a.Word) > len(b.Word)
 		}
-		return a.R > b.R
+		return a.Score > b.Score
 	})

-	nham = f.TopWords
+	nham := f.TopWords
 	if nham > len(topHam) {
 		nham = len(topHam)
 	}
-	nspam = f.TopWords
+	nspam := f.TopWords
 	if nspam > len(topSpam) {
 		nspam = len(topSpam)
 	}
@ -475,27 +477,27 @@ func (f *Filter) ClassifyWords(ctx context.Context, words map[string]struct{}) (

 	var eta float64
 	for _, x := range topHam {
-		eta += math.Log(1-x.R) - math.Log(x.R)
+		eta += math.Log(1-x.Score) - math.Log(x.Score)
 	}
 	for _, x := range topSpam {
-		eta += math.Log(1-x.R) - math.Log(x.R)
+		eta += math.Log(1-x.Score) - math.Log(x.Score)
 	}

 	f.log.Debug("top words", slog.Any("hams", topHam), slog.Any("spams", topSpam))

 	prob := 1 / (1 + math.Pow(math.E, eta))
-	return prob, len(topHam), len(topSpam), nil
+	return prob, topHam, topSpam, nil
 }

 // ClassifyMessagePath is a convenience wrapper for calling ClassifyMessage on a file.
-func (f *Filter) ClassifyMessagePath(ctx context.Context, path string) (probability float64, words map[string]struct{}, nham, nspam int, rerr error) {
+func (f *Filter) ClassifyMessagePath(ctx context.Context, path string) (probability float64, words map[string]struct{}, hams, spams []WordScore, rerr error) {
 	if f.closed {
-		return 0, nil, 0, 0, errClosed
+		return 0, nil, nil, nil, errClosed
 	}

 	mf, err := os.Open(path)
 	if err != nil {
-		return 0, nil, 0, 0, err
+		return 0, nil, nil, nil, err
 	}
 	defer func() {
 		err := mf.Close()
@ -503,33 +505,33 @@ func (f *Filter) ClassifyMessagePath(ctx context.Context, path string) (probabil
 	}()
 	fi, err := mf.Stat()
 	if err != nil {
-		return 0, nil, 0, 0, err
+		return 0, nil, nil, nil, err
 	}
 	return f.ClassifyMessageReader(ctx, mf, fi.Size())
 }

-func (f *Filter) ClassifyMessageReader(ctx context.Context, mf io.ReaderAt, size int64) (probability float64, words map[string]struct{}, nham, nspam int, rerr error) {
+func (f *Filter) ClassifyMessageReader(ctx context.Context, mf io.ReaderAt, size int64) (probability float64, words map[string]struct{}, hams, spams []WordScore, rerr error) {
 	m, err := message.EnsurePart(f.log.Logger, false, mf, size)
 	if err != nil && errors.Is(err, message.ErrBadContentType) {
 		// Invalid content-type header is a sure sign of spam.
 		//f.log.Infox("parsing content", err)
-		return 1, nil, 0, 0, nil
+		return 1, nil, nil, nil, nil
 	}
 	return f.ClassifyMessage(ctx, m)
 }

 // ClassifyMessage parses the mail message in r and returns the spam probability
 // (between 0 and 1), along with the tokenized words found in the message, and the
-// number of recognized ham and spam words.
-func (f *Filter) ClassifyMessage(ctx context.Context, m message.Part) (probability float64, words map[string]struct{}, nham, nspam int, rerr error) {
+// ham and spam words and their scores used.
+func (f *Filter) ClassifyMessage(ctx context.Context, m message.Part) (probability float64, words map[string]struct{}, hams, spams []WordScore, rerr error) {
 	var err error
 	words, err = f.ParseMessage(m)
 	if err != nil {
-		return 0, nil, 0, 0, err
+		return 0, nil, nil, nil, err
 	}

-	probability, nham, nspam, err = f.ClassifyWords(ctx, words)
-	return probability, words, nham, nspam, err
+	probability, hams, spams, err = f.ClassifyWords(ctx, words)
+	return probability, words, hams, spams, err
 }

 // Train adds the words of a single message to the filter.
--- a/message/headerwriter.go
+++ b/message/headerwriter.go
@ -1,6 +1,7 @@
 package message

 import (
+	"bytes"
 	"fmt"
 	"strings"
 )
@ -39,12 +40,20 @@ func (w *HeaderWriter) Add(separator string, texts ...string) {
 	}
 }

-// AddWrap adds data, folding anywhere in the buffer. E.g. for base64 data.
-func (w *HeaderWriter) AddWrap(buf []byte) {
+// AddWrap adds data. If text is set, wrapping happens at space/tab, otherwise
+// anywhere in the buffer (e.g. for base64 data).
+func (w *HeaderWriter) AddWrap(buf []byte, text bool) {
 	for len(buf) > 0 {
 		line := buf
 		n := 78 - w.lineLen
 		if len(buf) > n {
+			if text {
+				if i := bytes.LastIndexAny(buf[:n], " \t"); i > 0 {
+					n = i
+				} else if i = bytes.IndexAny(buf, " \t"); i > 0 {
+					n = i
+				}
+			}
 			line, buf = buf[:n], buf[n:]
 		} else {
 			buf = nil
--- a/smtpserver/analyze.go
+++ b/smtpserver/analyze.go
@ -45,6 +45,7 @@ type delivery struct {
 	dmarcResult      dmarc.Result
 	dkimResults      []dkim.Result
 	iprevStatus      iprev.Status
+	smtputf8         bool
 }

 type analysis struct {
@ -58,7 +59,8 @@ type analysis struct {
 	err                 error              // For our own logging, not sent to remote.
 	dmarcReport         *dmarcrpt.Feedback // Validated DMARC aggregate report, not yet stored.
 	tlsReport           *tlsrpt.Report     // Validated TLS report, not yet stored.
-	reason              string             // If non-empty, reason for this decision. Can be one of reputationMethod and a few other tokens.
+	reason              string             // If non-empty, reason for this decision. Values from reputationMethod and reason* below.
+	reasonText          []string           // Additional details for reason, human-readable, added to X-Mox-Reason header.
 	dmarcOverrideReason string             // If set, one of dmarcrpt.PolicyOverride
 	// Additional headers to add during delivery. Used for reasons a message to a
 	// dmarc/tls reporting address isn't processed.
@ -99,6 +101,12 @@ func isListDomain(d delivery, ld dns.Domain) bool {
 func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d delivery) analysis {
 	var headers string

+	var reasonText []string
+	addReasonText := func(format string, args ...any) {
+		s := fmt.Sprintf(format, args...)
+		reasonText = append(reasonText, s)
+	}
+
 	// We don't want to let a single IP or network deliver too many messages to an
 	// account. They may fill up the mailbox, either with messages that have to be
 	// purged, or by filling the disk. We check both cases for IP's and networks.
@ -175,11 +183,13 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
 	if err != nil && !rateError {
 		log.Errorx("checking delivery rates", err)
 		metricDelivery.WithLabelValues("checkrates", "").Inc()
-		return analysis{d, false, "", smtp.C451LocalErr, smtp.SeSys3Other0, false, "error processing", err, nil, nil, reasonReputationError, "", headers}
+		addReasonText("checking delivery rates: %v", err)
+		return analysis{d, false, "", smtp.C451LocalErr, smtp.SeSys3Other0, false, "error processing", err, nil, nil, reasonReputationError, reasonText, "", headers}
 	} else if err != nil {
 		log.Debugx("refusing due to high delivery rate", err)
 		metricDelivery.WithLabelValues("highrate", "").Inc()
-		return analysis{d, false, "", smtp.C452StorageFull, smtp.SeMailbox2Full2, true, err.Error(), err, nil, nil, reasonHighRate, "", headers}
+		addReasonText("high delivery rate")
+		return analysis{d, false, "", smtp.C452StorageFull, smtp.SeMailbox2Full2, true, err.Error(), err, nil, nil, reasonHighRate, reasonText, "", headers}
 	}

 	mailbox := d.destination.Mailbox
@ -196,8 +206,17 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
 	if rs != nil && !rs.ListAllowDNSDomain.IsZero() {
 		// todo: on temporary failures, reject temporarily?
 		if isListDomain(d, rs.ListAllowDNSDomain) {
+			addReasonText("validated message from a configured mailing list")
 			d.m.IsMailingList = true
-			return analysis{d: d, accept: true, mailbox: mailbox, reason: reasonListAllow, dmarcOverrideReason: string(dmarcrpt.PolicyOverrideMailingList), headers: headers}
+			return analysis{
+				d:                   d,
+				accept:              true,
+				mailbox:             mailbox,
+				reason:              reasonListAllow,
+				reasonText:          reasonText,
+				dmarcOverrideReason: string(dmarcrpt.PolicyOverrideMailingList),
+				headers:             headers,
+			}
 		}
 	}

@ -226,6 +245,7 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
 		d.m.DKIMDomains = dkimdoms
 		dmarcOverrideReason = string(dmarcrpt.PolicyOverrideForwarded)
 		log.Info("forwarded message, clearing identifying signals of forwarding mail server")
+		addReasonText("ruleset indicates forwarded message")
 	}

 	assignMailbox := func(tx *bstore.Tx) error {
@ -266,7 +286,8 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
 				})
 			})
 			if mberr != nil {
-				return analysis{d, false, mailbox, smtp.C451LocalErr, smtp.SeSys3Other0, false, "error processing", err, nil, nil, reasonReputationError, dmarcOverrideReason, headers}
+				addReasonText("error setting original destination mailbox for rejected message: %v", mberr)
+				return analysis{d, false, mailbox, smtp.C451LocalErr, smtp.SeSys3Other0, false, "error processing", err, nil, nil, reasonReputationError, reasonText, dmarcOverrideReason, headers}
 			}
 			d.m.MailboxID = 0 // We plan to reject, no need to set intended MailboxID.
 		}
@ -279,12 +300,18 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
 			// Don't draw attention, but don't go so far as to mark as junk.
 			d.m.Seen = true
 			log.Info("accepting reject to configured mailbox due to ruleset")
+			addReasonText("accepting reject to mailbox due to ruleset")
 		}
-		return analysis{d, accept, mailbox, code, secode, err == nil, errmsg, err, nil, nil, reason, dmarcOverrideReason, headers}
+		return analysis{d, accept, mailbox, code, secode, err == nil, errmsg, err, nil, nil, reason, reasonText, dmarcOverrideReason, headers}
 	}

 	if d.dmarcUse && d.dmarcResult.Reject {
+		addReasonText("message does not pass domain dmarc policy which asks to reject")
 		return reject(smtp.C550MailboxUnavail, smtp.SePol7MultiAuthFails26, "rejecting per dmarc policy", nil, reasonDMARCPolicy)
+	} else if !d.dmarcUse {
+		addReasonText("not using any dmarc result")
+	} else {
+		addReasonText("dmarc ok")
 	}
 	// todo: should we also reject messages that have a dmarc pass but an spf record "v=spf1 -all"? suggested by m3aawg best practices.

@ -381,13 +408,28 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
 				return err
 			}

-			isjunk, conclusive, method, err = reputation(tx, log, d.m)
+			var text string
+			isjunk, conclusive, method, text, err = reputation(tx, log, d.m, d.smtputf8)
 			reason = string(method)
+			s := "address/dkim/spf/ip-based reputation ("
+			if isjunk != nil && *isjunk {
+				s += "junk, "
+			} else if isjunk != nil && !*isjunk {
+				s += "nonjunk, "
+			}
+			if conclusive {
+				s += "conclusive"
+			} else {
+				s += "inconclusive"
+			}
+			s += ", " + text + ")"
+			addReasonText("%s", s)
 			return err
 		})
 	})
 	if err != nil {
 		log.Infox("determining reputation", err, slog.Any("message", d.m))
+		addReasonText("determining reputation: %v", err)
 		return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", err, reasonReputationError)
 	}
 	log.Info("reputation analyzed",
@ -396,12 +438,33 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
 		slog.String("method", string(method)))
 	if conclusive {
 		if !*isjunk {
-			return analysis{d: d, accept: true, mailbox: mailbox, dmarcReport: dmarcReport, tlsReport: tlsReport, reason: reason, dmarcOverrideReason: dmarcOverrideReason, headers: headers}
+			return analysis{
+				d:                   d,
+				accept:              true,
+				mailbox:             mailbox,
+				dmarcReport:         dmarcReport,
+				tlsReport:           tlsReport,
+				reason:              reason,
+				reasonText:          reasonText,
+				dmarcOverrideReason: dmarcOverrideReason,
+				headers:             headers,
+			}
 		}
 		return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", err, string(method))
 	} else if dmarcReport != nil || tlsReport != nil {
 		log.Info("accepting message with dmarc aggregate report or tls report without reputation")
-		return analysis{d: d, accept: true, mailbox: mailbox, dmarcReport: dmarcReport, tlsReport: tlsReport, reason: reasonReporting, dmarcOverrideReason: dmarcOverrideReason, headers: headers}
+		addReasonText("message inconclusive reputation but with dmarc or tls report")
+		return analysis{
+			d:                   d,
+			accept:              true,
+			mailbox:             mailbox,
+			dmarcReport:         dmarcReport,
+			tlsReport:           tlsReport,
+			reason:              reasonReporting,
+			reasonText:          reasonText,
+			dmarcOverrideReason: dmarcOverrideReason,
+			headers:             headers,
+		}
 	}
 	// If there was no previous message from sender or its domain, and we have an SPF
 	// (soft)fail, reject the message.
@ -409,6 +472,7 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
 	case methodDKIMSPF, methodIP1, methodIP2, methodIP3, methodNone:
 		switch d.m.MailFromValidation {
 		case store.ValidationFail, store.ValidationSoftfail:
+			addReasonText("no previous message from sender domain and spf result is (soft)fail")
 			return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", nil, reasonSPFPolicy)
 		}
 	}
@ -419,9 +483,13 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
 	case methodDKIMSPF, methodIP1, methodIP2, methodIP3, methodNone:
 		suspiciousIPrevFail = d.iprevStatus != iprev.StatusPass
 	}
+	if suspiciousIPrevFail {
+		addReasonText("suspicious iprev failure")
+	}

 	// With already a mild junk signal, an iprev fail on top is enough to reject.
 	if suspiciousIPrevFail && isjunk != nil && *isjunk {
+		addReasonText("message has a mild junk signal and mismatching reverse ip")
 		return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", nil, reasonIPrev)
 	}

@ -431,13 +499,23 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
 		subjectpassKey, err = d.acc.Subjectpass(d.canonicalAddress)
 		if err != nil {
 			log.Errorx("get key for verifying subject token", err)
+			addReasonText("subject pass error: %v", err)
 			return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", err, reasonSubjectpassError)
 		}
 		err = subjectpass.Verify(log.Logger, d.dataFile, []byte(subjectpassKey), conf.SubjectPass.Period)
 		pass := err == nil
 		log.Infox("pass by subject token", err, slog.Bool("pass", pass))
 		if pass {
-			return analysis{d: d, accept: true, mailbox: mailbox, reason: reasonSubjectpass, dmarcOverrideReason: dmarcOverrideReason, headers: headers}
+			addReasonText("message has valid subjectpass token in subject")
+			return analysis{
+				d:                   d,
+				accept:              true,
+				mailbox:             mailbox,
+				reason:              reasonSubjectpass,
+				reasonText:          reasonText,
+				dmarcOverrideReason: dmarcOverrideReason,
+				headers:             headers,
+			}
 		}
 	}

@ -450,9 +528,10 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
 			err := f.Close()
 			log.Check(err, "closing junkfilter")
 		}()
-		contentProb, _, _, _, err := f.ClassifyMessageReader(ctx, store.FileMsgReader(d.m.MsgPrefix, d.dataFile), d.m.Size)
+		contentProb, _, hams, spams, err := f.ClassifyMessageReader(ctx, store.FileMsgReader(d.m.MsgPrefix, d.dataFile), d.m.Size)
 		if err != nil {
 			log.Errorx("testing for spam", err)
+			addReasonText("classify message error: %v", err)
 			return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", err, reasonJunkClassifyError)
 		}
 		// todo: if isjunk is not nil (i.e. there was inconclusive reputation), use it in the probability calculation. give reputation a score of 0.25 or .75 perhaps?
@ -487,14 +566,17 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
 		// todo: some of these checks should also apply for reputation-based analysis with a weak signal, e.g. verified dkim/spf signal from new domain.
 		// With an iprev fail, non-TLS connection or our address not in To/Cc header, we set a higher bar for content.
 		reason = reasonJunkContent
+		var thresholdRemark string
 		if suspiciousIPrevFail && threshold > 0.25 {
 			threshold = 0.25
 			log.Info("setting junk threshold due to iprev fail", slog.Float64("threshold", threshold))
 			reason = reasonJunkContentStrict
+			thresholdRemark = " (stricter due to reverse ip mismatch)"
 		} else if !d.tls && threshold > 0.25 {
 			threshold = 0.25
 			log.Info("setting junk threshold due to plaintext smtp", slog.Float64("threshold", threshold))
 			reason = reasonJunkContentStrict
+			thresholdRemark = " (stricter due to missing tls)"
 		} else if (rs == nil || !rs.IsForward) && threshold > 0.25 && !rcptToMatch(d.msgTo) && !rcptToMatch(d.msgCc) {
 			// A common theme in junk messages is your recipient address not being in the To/Cc
 			// headers. We may be in Bcc, but that's unusual for first-time senders. Some
@ -503,6 +585,7 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
 			threshold = 0.25
 			log.Info("setting junk threshold due to smtp rcpt to and message to/cc address mismatch", slog.Float64("threshold", threshold))
 			reason = reasonJunkContentStrict
+			thresholdRemark = " (stricter due to recipient address not in to/cc header)"
 		}
 		accept = contentProb <= threshold
 		junkSubjectpass = contentProb < threshold-0.2
@ -510,9 +593,44 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
 			slog.Bool("accept", accept),
 			slog.Float64("contentprob", contentProb),
 			slog.Bool("subjectpass", junkSubjectpass))
+
+		s := "content: "
+		if accept {
+			s += "not junk"
+		} else {
+			s += "junk"
+		}
+		s += fmt.Sprintf(", spamscore %.2f, threshold %.2f%s", contentProb, threshold, thresholdRemark)
+		s += "(ham words: "
+		for i, w := range hams {
+			if i > 0 {
+				s += ", "
+			}
+			word := w.Word
+			if !d.smtputf8 && !isASCII(word) {
+				word = "(non-ascii)"
+			}
+			s += fmt.Sprintf("%s %.3f", word, w.Score)
+		}
+		s += "), (spam words: "
+		for i, w := range spams {
+			if i > 0 {
+				s += ", "
+			}
+			word := w.Word
+			if !d.smtputf8 && !isASCII(word) {
+				word = "(non-ascii)"
+			}
+			s += fmt.Sprintf("%s %.3f", word, w.Score)
+		}
+		s += ")"
+		addReasonText("%s", s)
 	} else if err != store.ErrNoJunkFilter {
 		log.Errorx("open junkfilter", err)
+		addReasonText("open junkfilter: %v", err)
 		return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", err, reasonJunkFilterError)
+	} else {
+		addReasonText("no junk filter configured")
 	}

 	// If content looks good, we'll still look at DNS block lists for a reason to
@ -545,20 +663,43 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
 				accept = false
 				dnsblocklisted = true
 				reason = reasonDNSBlocklisted
+				addReasonText("dnsbl: ip %s listed in dnsbl %s", d.m.RemoteIP, zone.XName(d.smtputf8))
 				break
 			}
 		}
+		if !dnsblocklisted && len(d.dnsBLs) > 0 {
+			addReasonText("remote ip not blocklisted")
+		}
 	}

 	if accept {
-		return analysis{d: d, accept: true, mailbox: mailbox, reason: reasonNoBadSignals, dmarcOverrideReason: dmarcOverrideReason, headers: headers}
+		addReasonText("no known reputation and no bad signals")
+		return analysis{
+			d:                   d,
+			accept:              true,
+			mailbox:             mailbox,
+			reason:              reasonNoBadSignals,
+			reasonText:          reasonText,
+			dmarcOverrideReason: dmarcOverrideReason,
+			headers:             headers,
+		}
 	}

 	if subjectpassKey != "" && d.dmarcResult.Status == dmarc.StatusPass && method == methodNone && (dnsblocklisted || junkSubjectpass) {
 		log.Info("permanent reject with subjectpass hint of moderately spammy email without reputation")
 		pass := subjectpass.Generate(log.Logger, d.msgFrom, []byte(subjectpassKey), time.Now())
+		addReasonText("reject with request to try again with subjectpass token in subject")
 		return reject(smtp.C550MailboxUnavail, smtp.SePol7DeliveryUnauth1, subjectpass.Explanation+pass, nil, reasonGiveSubjectpass)
 	}

 	return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", nil, reason)
 }
+
+func isASCII(s string) bool {
+	for _, b := range []byte(s) {
+		if b >= 0x80 {
+			return true
+		}
+	}
+	return false
+}
--- a/smtpserver/reputation.go
+++ b/smtpserver/reputation.go
@ -4,10 +4,12 @@ import (
 	"errors"
 	"fmt"
 	"log/slog"
+	"strings"
 	"time"

 	"github.com/mjl-/bstore"

+	"github.com/mjl-/mox/dns"
 	"github.com/mjl-/mox/mlog"
 	"github.com/mjl-/mox/smtp"
 	"github.com/mjl-/mox/store"
@ -97,7 +99,7 @@ const (
 // ../rfc/6376:1915
 // ../rfc/6376:3716
 // ../rfc/7208:2167
-func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rconclusive bool, rmethod reputationMethod, rerr error) {
+func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message, smtputf8 bool) (rjunk *bool, rconclusive bool, rmethod reputationMethod, reasonText string, rerr error) {
 	boolptr := func(v bool) *bool {
 		return &v
 	}
@ -179,7 +181,7 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco
 			// todo: we may want to look at dkim/spf in this case.
 			spam := msgs[0].Junk && (len(msgs) == 1 || msgs[1].Junk)
 			conclusive := m.MsgFromValidated
-			return &spam, conclusive, methodMsgfromFull, nil
+			return &spam, conclusive, methodMsgfromFull, "reputation of exact message-from address", nil
 		}
 		if !m.MsgFromValidated {
 			// Look for historic messages that were validated. If present, this is likely spam.
@ -189,7 +191,7 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco
 			msgs = xmessageList(q, "msgfromfull-validated")
 			if len(msgs) > 0 {
 				spam := msgs[0].Junk && (len(msgs) == 1 || msgs[1].Junk)
-				return xtrue, spam, methodMsgfromFull, nil
+				return xtrue, spam, methodMsgfromFull, "unvalidated message with validated historic messages with exact message-from address", nil
 			}
 		}

@ -199,21 +201,23 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco
 		qr.FilterEqual("Domain", m.MsgFromDomain)
 		qr.FilterGreaterEqual("Sent", now.Add(-3*year))
 		if xrecipientExists(qr) {
-			return xfalse, true, methodMsgtoFull, nil
+			return xfalse, true, methodMsgtoFull, "exact message-from address was earlier message recipient", nil
 		}

 		// Look for domain match, then for organizational domain match.
 		for _, orgdomain := range []bool{false, true} {
 			qm := store.Message{}
 			var method reputationMethod
-			var descr string
+			var source, descr string
 			if orgdomain {
 				qm.MsgFromOrgDomain = m.MsgFromOrgDomain
 				method = methodMsgfromOrgDomain
+				source = "organizational domain of message-from address"
 				descr = "msgfromorgdomain"
 			} else {
 				qm.MsgFromDomain = m.MsgFromDomain
 				method = methodMsgfromDomain
+				source = "exact domain of message-from address"
 				descr = "msgfromdomain"
 			}

@ -228,7 +232,8 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco
 					}
 				}
 				if 100*nonjunk/len(msgs) > 80 {
-					return xfalse, true, method, nil
+					reasonText = fmt.Sprintf("positive reputation with %s based on %d messages", source, len(msgs))
+					return xfalse, true, method, reasonText, nil
 				}
 				if nonjunk == 0 {
 					// Only conclusive with at least 3 different localparts.
@ -236,13 +241,16 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco
 					for _, m := range msgs {
 						localparts[m.MsgFromLocalpart] = struct{}{}
 						if len(localparts) == 3 {
-							return xtrue, true, method, nil
+							reasonText = fmt.Sprintf("negative reputation of at least 3 addresses with %s based on %d messages", source, len(msgs))
+							return xtrue, true, method, reasonText, nil
 						}
 					}
-					return xtrue, false, method, nil
+					reasonText = fmt.Sprintf("negative reputation with %s based on %d messages", source, len(msgs))
+					return xtrue, false, method, reasonText, nil
 				}
 				// Mixed signals from domain. We don't want to block a new sender.
-				return nil, false, method, nil
+				reasonText = fmt.Sprintf("mixed signals with %s based on %d messages", source, len(msgs))
+				return nil, false, method, reasonText, nil
 			}
 			if !m.MsgFromValidated {
 				// Look for historic messages that were validated. If present, this is likely spam.
@ -253,7 +261,8 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco
 				msgs = xmessageList(q, descr+"-validated")
 				if len(msgs) > 0 {
 					spam := msgs[0].Junk && (len(msgs) == 1 || msgs[1].Junk)
-					return xtrue, spam, method, nil
+					reasonText = fmt.Sprintf("unvalidated message with %s while we have validated messages from that source", source)
+					return xtrue, spam, method, reasonText, nil
 				}
 			}

@ -262,13 +271,16 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco
 			if orgdomain {
 				qr.FilterEqual("OrgDomain", m.MsgFromOrgDomain)
 				method = methodMsgtoOrgDomain
+				source = "organizational domain of message-from address"
 			} else {
 				qr.FilterEqual("Domain", m.MsgFromDomain)
 				method = methodMsgtoDomain
+				source = "exact domain of message-from address"
 			}
 			qr.FilterGreaterEqual("Sent", now.Add(-2*year))
 			if xrecipientExists(qr) {
-				return xfalse, true, method, nil
+				reasonText = fmt.Sprintf("%s was recipient address", source)
+				return xfalse, true, method, reasonText, nil
 			}
 		}
 	}
@ -277,6 +289,7 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco
 	// We only use identities that passed validation. Failed identities are ignored. ../rfc/6376:2447
 	// todo future: we could do something with the DKIM identity (i=) field if it is more specific than just the domain (d=).
 	dkimspfsignals := []float64{}
+	dkimspfreasondoms := []string{}
 	dkimspfmsgs := 0
 	for _, dom := range m.DKIMDomains {
 		q := messageQuery(nil, year/2, 50)
@ -291,12 +304,15 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco
 			}
 			pspam := float64(nspam) / float64(len(msgs))
 			dkimspfsignals = append(dkimspfsignals, pspam)
+			dkimspfreasondoms = append(dkimspfreasondoms, dom)
 			dkimspfmsgs = len(msgs)
 		}
 	}
 	if m.MailFromValidated || m.EHLOValidated {
+		var dom string
 		var msgs []store.Message
 		if m.MailFromValidated && m.MailFromDomain != "" {
+			dom = m.MailFromDomain
 			q := messageQuery(&store.Message{MailFromLocalpart: m.MailFromLocalpart, MailFromDomain: m.MailFromDomain}, year/2, 50)
 			msgs = xmessageList(q, "mailfrom")
 			if len(msgs) == 0 {
@ -305,6 +321,7 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco
 			}
 		}
 		if len(msgs) == 0 && m.EHLOValidated && m.EHLODomain != "" {
+			dom = m.EHLODomain
 			q := messageQuery(&store.Message{EHLODomain: m.EHLODomain}, year/2, 50)
 			msgs = xmessageList(q, "ehlodomain")
 		}
@ -317,6 +334,7 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco
 			}
 			pspam := float64(nspam) / float64(len(msgs))
 			dkimspfsignals = append(dkimspfsignals, pspam)
+			dkimspfreasondoms = append(dkimspfreasondoms, dom)
 			if len(msgs) > dkimspfmsgs {
 				dkimspfmsgs = len(msgs)
 			}
@ -324,20 +342,27 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco
 	}
 	if len(dkimspfsignals) > 0 {
 		var nham, nspam int
-		for _, p := range dkimspfsignals {
+		var hamdoms, spamdoms []string
+		for i, p := range dkimspfsignals {
+			d, _ := dns.ParseDomain(dkimspfreasondoms[i])
 			if p < .1 {
 				nham++
+				hamdoms = append(hamdoms, d.XName(smtputf8))
 			} else if p > .9 {
 				nspam++
+				spamdoms = append(spamdoms, d.XName(smtputf8))
 			}
 		}
 		if nham > 0 && nspam == 0 {
-			return xfalse, true, methodDKIMSPF, nil
+			reasonText = fmt.Sprintf("positive dkim/spf reputation for domain(s) %s", strings.Join(hamdoms, ","))
+			return xfalse, true, methodDKIMSPF, reasonText, nil
 		}
 		if nspam > 0 && nham == 0 {
-			return xtrue, dkimspfmsgs > 1, methodDKIMSPF, nil
+			reasonText = fmt.Sprintf("negative dkim/spf reputation for domain(s) %s", strings.Join(hamdoms, ","))
+			return xtrue, dkimspfmsgs > 1, methodDKIMSPF, reasonText, nil
 		}
-		return nil, false, methodDKIMSPF, nil
+		reasonText = fmt.Sprintf("mixed dkim/spf reputation, positive for %s, negative for %s", strings.Join(hamdoms, ","), strings.Join(spamdoms, ","))
+		return nil, false, methodDKIMSPF, reasonText, nil
 	}

 	// IP-based. A wider mask needs more messages to be conclusive.
@ -345,23 +370,27 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco
 	var msgs []store.Message
 	var need int
 	var method reputationMethod
+	var ip string
 	if m.RemoteIPMasked1 != "" {
 		q := messageQuery(&store.Message{RemoteIPMasked1: m.RemoteIPMasked1}, year/4, 50)
 		msgs = xmessageList(q, "ip1")
 		need = 2
 		method = methodIP1
+		ip = m.RemoteIPMasked1
 	}
 	if len(msgs) == 0 && m.RemoteIPMasked2 != "" {
 		q := messageQuery(&store.Message{RemoteIPMasked2: m.RemoteIPMasked2}, year/4, 50)
 		msgs = xmessageList(q, "ip2")
 		need = 5
 		method = methodIP2
+		ip = m.RemoteIPMasked2
 	}
 	if len(msgs) == 0 && m.RemoteIPMasked3 != "" {
 		q := messageQuery(&store.Message{RemoteIPMasked3: m.RemoteIPMasked3}, year/4, 50)
 		msgs = xmessageList(q, "ip3")
 		need = 10
 		method = methodIP3
+		ip = m.RemoteIPMasked3
 	}
 	if len(msgs) > 0 {
 		nspam := 0
@ -378,8 +407,24 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco
 			spam = xtrue
 		}
 		conclusive := len(msgs) >= need && (pspam <= 0.1 || pspam >= 0.9)
-		return spam, conclusive, method, nil
+		v6 := strings.Contains(m.RemoteIP, ":")
+		reasonText = fmt.Sprintf("reputation for ip %s%s, spam score %.2f", ip, maskclasses[classmask{v6, method}], pspam)
+		return spam, conclusive, method, reasonText, nil
 	}

-	return nil, false, methodNone, nil
+	return nil, false, methodNone, "no address/spf/dkim/ip reputation", nil
+}
+
+type classmask struct {
+	v6     bool
+	method reputationMethod
+}
+
+var maskclasses = map[classmask]string{
+	{false, methodIP1}: "/32",
+	{false, methodIP2}: "/26",
+	{false, methodIP3}: "/21",
+	{true, methodIP1}:  "/64",
+	{true, methodIP2}:  "/48",
+	{true, methodIP3}:  "/32",
 }
--- a/smtpserver/reputation_test.go
+++ b/smtpserver/reputation_test.go
@ -148,7 +148,7 @@ func TestReputation(t *testing.T) {
 		var method reputationMethod
 		err = db.Read(ctxbg, func(tx *bstore.Tx) error {
 			var err error
-			isjunk, conclusive, method, err = reputation(tx, pkglog, &m)
+			isjunk, conclusive, method, _, err = reputation(tx, pkglog, &m, false)
 			return err
 		})
 		tcheck(t, err, "read tx")
--- a/smtpserver/server.go
+++ b/smtpserver/server.go
@ -2750,7 +2750,7 @@ func (c *conn) deliver(ctx context.Context, recvHdrFor func(string) string, msgW
 			msgTo = envelope.To
 			msgCc = envelope.CC
 		}
-		d := delivery{c.tls, &m, dataFile, smtpRcptTo, deliverTo, destination, canonicalAddr, acc, msgTo, msgCc, msgFrom, c.dnsBLs, dmarcUse, dmarcResult, dkimResults, iprevStatus}
+		d := delivery{c.tls, &m, dataFile, smtpRcptTo, deliverTo, destination, canonicalAddr, acc, msgTo, msgCc, msgFrom, c.dnsBLs, dmarcUse, dmarcResult, dkimResults, iprevStatus, c.smtputf8}

 		r := analyze(ctx, log, c.resolver, d)
 		return &r, nil
@ -2862,10 +2862,25 @@ func (c *conn) deliver(ctx context.Context, recvHdrFor func(string) string, msgW
 		rcptAuthResults.Methods = append([]message.AuthMethod{}, authResults.Methods...)
 		rcptAuthResults.Methods = append(rcptAuthResults.Methods, rcptDMARCMethod)

-		// Prepend reason as message header, for easy display in mail clients.
+		// Prepend reason as message header, for easy viewing in mail clients.
 		var xmox string
 		if a0.reason != "" {
-			xmox = "X-Mox-Reason: " + a0.reason + "\r\n"
+			hw := &message.HeaderWriter{}
+			hw.Add(" ", "X-Mox-Reason:")
+			hw.Add(" ", a0.reason)
+			for i, s := range a0.reasonText {
+				if i == 0 {
+					s = "; " + s
+				} else {
+					hw.Newline()
+				}
+				// Just in case any of the strings has a newline, replace it with space to not break the message.
+				s = strings.ReplaceAll(s, "\n", " ")
+				s = strings.ReplaceAll(s, "\r", " ")
+				s += ";"
+				hw.AddWrap([]byte(s), true)
+			}
+			xmox = hw.String()
 		}
 		xmox += a0.headers