add more details to x-mox-reason message header added during delivery, for understanding why a message is accepted/rejected

we add various information while analysing an incoming message. like
dkim/spf/ip reputation. and content-based junk filter threshold/result and
ham/spam words used.

for issue #179 by Fell and #157 by mattfbacon
This commit is contained in:
Mechiel Lukkien 2024-10-04 16:01:30 +02:00
parent 98d0ff22bb
commit 32b549b260
No known key found for this signature in database
7 changed files with 281 additions and 69 deletions

View file

@ -147,7 +147,7 @@ func (s *Sig) Header() (string, error) {
w.Addf(" ", "b=")
if len(s.Signature) > 0 {
w.AddWrap([]byte(base64.StdEncoding.EncodeToString(s.Signature)))
w.AddWrap([]byte(base64.StdEncoding.EncodeToString(s.Signature)), false)
}
w.Add("\r\n")
return w.String(), nil

View file

@ -343,21 +343,23 @@ func loadWords(ctx context.Context, db *bstore.DB, l []string, dst map[string]wo
return nil
}
// ClassifyWords returns the spam probability for the given words, and number of recognized ham and spam words.
func (f *Filter) ClassifyWords(ctx context.Context, words map[string]struct{}) (probability float64, nham, nspam int, rerr error) {
if f.closed {
return 0, 0, 0, errClosed
}
// WordScore is a word with its score as used in classifications, based on
// (historic) training.
type WordScore struct {
Word string
Score float64 // 0 is ham, 1 is spam.
}
type xword struct {
Word string
R float64
// ClassifyWords returns the spam probability for the given words, and number of recognized ham and spam words.
func (f *Filter) ClassifyWords(ctx context.Context, words map[string]struct{}) (probability float64, hams, spams []WordScore, rerr error) {
if f.closed {
return 0, nil, nil, errClosed
}
var hamHigh float64 = 0
var spamLow float64 = 1
var topHam []xword
var topSpam []xword
var topHam []WordScore
var topSpam []WordScore
// Find words that should be in the database.
lookupWords := []string{}
@ -389,7 +391,7 @@ func (f *Filter) ClassifyWords(ctx context.Context, words map[string]struct{}) (
fetched := map[string]word{}
if len(lookupWords) > 0 {
if err := loadWords(ctx, f.db, lookupWords, fetched); err != nil {
return 0, 0, 0, err
return 0, nil, nil, err
}
for w, c := range fetched {
delete(expect, w)
@ -432,7 +434,7 @@ func (f *Filter) ClassifyWords(ctx context.Context, words map[string]struct{}) (
if len(topHam) >= f.TopWords && r > hamHigh {
continue
}
topHam = append(topHam, xword{w, r})
topHam = append(topHam, WordScore{w, r})
if r > hamHigh {
hamHigh = r
}
@ -440,7 +442,7 @@ func (f *Filter) ClassifyWords(ctx context.Context, words map[string]struct{}) (
if len(topSpam) >= f.TopWords && r < spamLow {
continue
}
topSpam = append(topSpam, xword{w, r})
topSpam = append(topSpam, WordScore{w, r})
if r < spamLow {
spamLow = r
}
@ -449,24 +451,24 @@ func (f *Filter) ClassifyWords(ctx context.Context, words map[string]struct{}) (
sort.Slice(topHam, func(i, j int) bool {
a, b := topHam[i], topHam[j]
if a.R == b.R {
if a.Score == b.Score {
return len(a.Word) > len(b.Word)
}
return a.R < b.R
return a.Score < b.Score
})
sort.Slice(topSpam, func(i, j int) bool {
a, b := topSpam[i], topSpam[j]
if a.R == b.R {
if a.Score == b.Score {
return len(a.Word) > len(b.Word)
}
return a.R > b.R
return a.Score > b.Score
})
nham = f.TopWords
nham := f.TopWords
if nham > len(topHam) {
nham = len(topHam)
}
nspam = f.TopWords
nspam := f.TopWords
if nspam > len(topSpam) {
nspam = len(topSpam)
}
@ -475,27 +477,27 @@ func (f *Filter) ClassifyWords(ctx context.Context, words map[string]struct{}) (
var eta float64
for _, x := range topHam {
eta += math.Log(1-x.R) - math.Log(x.R)
eta += math.Log(1-x.Score) - math.Log(x.Score)
}
for _, x := range topSpam {
eta += math.Log(1-x.R) - math.Log(x.R)
eta += math.Log(1-x.Score) - math.Log(x.Score)
}
f.log.Debug("top words", slog.Any("hams", topHam), slog.Any("spams", topSpam))
prob := 1 / (1 + math.Pow(math.E, eta))
return prob, len(topHam), len(topSpam), nil
return prob, topHam, topSpam, nil
}
// ClassifyMessagePath is a convenience wrapper for calling ClassifyMessage on a file.
func (f *Filter) ClassifyMessagePath(ctx context.Context, path string) (probability float64, words map[string]struct{}, nham, nspam int, rerr error) {
func (f *Filter) ClassifyMessagePath(ctx context.Context, path string) (probability float64, words map[string]struct{}, hams, spams []WordScore, rerr error) {
if f.closed {
return 0, nil, 0, 0, errClosed
return 0, nil, nil, nil, errClosed
}
mf, err := os.Open(path)
if err != nil {
return 0, nil, 0, 0, err
return 0, nil, nil, nil, err
}
defer func() {
err := mf.Close()
@ -503,33 +505,33 @@ func (f *Filter) ClassifyMessagePath(ctx context.Context, path string) (probabil
}()
fi, err := mf.Stat()
if err != nil {
return 0, nil, 0, 0, err
return 0, nil, nil, nil, err
}
return f.ClassifyMessageReader(ctx, mf, fi.Size())
}
func (f *Filter) ClassifyMessageReader(ctx context.Context, mf io.ReaderAt, size int64) (probability float64, words map[string]struct{}, nham, nspam int, rerr error) {
func (f *Filter) ClassifyMessageReader(ctx context.Context, mf io.ReaderAt, size int64) (probability float64, words map[string]struct{}, hams, spams []WordScore, rerr error) {
m, err := message.EnsurePart(f.log.Logger, false, mf, size)
if err != nil && errors.Is(err, message.ErrBadContentType) {
// Invalid content-type header is a sure sign of spam.
//f.log.Infox("parsing content", err)
return 1, nil, 0, 0, nil
return 1, nil, nil, nil, nil
}
return f.ClassifyMessage(ctx, m)
}
// ClassifyMessage parses the mail message in r and returns the spam probability
// (between 0 and 1), along with the tokenized words found in the message, and the
// number of recognized ham and spam words.
func (f *Filter) ClassifyMessage(ctx context.Context, m message.Part) (probability float64, words map[string]struct{}, nham, nspam int, rerr error) {
// ham and spam words and their scores used.
func (f *Filter) ClassifyMessage(ctx context.Context, m message.Part) (probability float64, words map[string]struct{}, hams, spams []WordScore, rerr error) {
var err error
words, err = f.ParseMessage(m)
if err != nil {
return 0, nil, 0, 0, err
return 0, nil, nil, nil, err
}
probability, nham, nspam, err = f.ClassifyWords(ctx, words)
return probability, words, nham, nspam, err
probability, hams, spams, err = f.ClassifyWords(ctx, words)
return probability, words, hams, spams, err
}
// Train adds the words of a single message to the filter.

View file

@ -1,6 +1,7 @@
package message
import (
"bytes"
"fmt"
"strings"
)
@ -39,12 +40,20 @@ func (w *HeaderWriter) Add(separator string, texts ...string) {
}
}
// AddWrap adds data, folding anywhere in the buffer. E.g. for base64 data.
func (w *HeaderWriter) AddWrap(buf []byte) {
// AddWrap adds data. If text is set, wrapping happens at space/tab, otherwise
// anywhere in the buffer (e.g. for base64 data).
func (w *HeaderWriter) AddWrap(buf []byte, text bool) {
for len(buf) > 0 {
line := buf
n := 78 - w.lineLen
if len(buf) > n {
if text {
if i := bytes.LastIndexAny(buf[:n], " \t"); i > 0 {
n = i
} else if i = bytes.IndexAny(buf, " \t"); i > 0 {
n = i
}
}
line, buf = buf[:n], buf[n:]
} else {
buf = nil

View file

@ -45,6 +45,7 @@ type delivery struct {
dmarcResult dmarc.Result
dkimResults []dkim.Result
iprevStatus iprev.Status
smtputf8 bool
}
type analysis struct {
@ -58,7 +59,8 @@ type analysis struct {
err error // For our own logging, not sent to remote.
dmarcReport *dmarcrpt.Feedback // Validated DMARC aggregate report, not yet stored.
tlsReport *tlsrpt.Report // Validated TLS report, not yet stored.
reason string // If non-empty, reason for this decision. Can be one of reputationMethod and a few other tokens.
reason string // If non-empty, reason for this decision. Values from reputationMethod and reason* below.
reasonText []string // Additional details for reason, human-readable, added to X-Mox-Reason header.
dmarcOverrideReason string // If set, one of dmarcrpt.PolicyOverride
// Additional headers to add during delivery. Used for reasons a message to a
// dmarc/tls reporting address isn't processed.
@ -99,6 +101,12 @@ func isListDomain(d delivery, ld dns.Domain) bool {
func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d delivery) analysis {
var headers string
var reasonText []string
addReasonText := func(format string, args ...any) {
s := fmt.Sprintf(format, args...)
reasonText = append(reasonText, s)
}
// We don't want to let a single IP or network deliver too many messages to an
// account. They may fill up the mailbox, either with messages that have to be
// purged, or by filling the disk. We check both cases for IP's and networks.
@ -175,11 +183,13 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
if err != nil && !rateError {
log.Errorx("checking delivery rates", err)
metricDelivery.WithLabelValues("checkrates", "").Inc()
return analysis{d, false, "", smtp.C451LocalErr, smtp.SeSys3Other0, false, "error processing", err, nil, nil, reasonReputationError, "", headers}
addReasonText("checking delivery rates: %v", err)
return analysis{d, false, "", smtp.C451LocalErr, smtp.SeSys3Other0, false, "error processing", err, nil, nil, reasonReputationError, reasonText, "", headers}
} else if err != nil {
log.Debugx("refusing due to high delivery rate", err)
metricDelivery.WithLabelValues("highrate", "").Inc()
return analysis{d, false, "", smtp.C452StorageFull, smtp.SeMailbox2Full2, true, err.Error(), err, nil, nil, reasonHighRate, "", headers}
addReasonText("high delivery rate")
return analysis{d, false, "", smtp.C452StorageFull, smtp.SeMailbox2Full2, true, err.Error(), err, nil, nil, reasonHighRate, reasonText, "", headers}
}
mailbox := d.destination.Mailbox
@ -196,8 +206,17 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
if rs != nil && !rs.ListAllowDNSDomain.IsZero() {
// todo: on temporary failures, reject temporarily?
if isListDomain(d, rs.ListAllowDNSDomain) {
addReasonText("validated message from a configured mailing list")
d.m.IsMailingList = true
return analysis{d: d, accept: true, mailbox: mailbox, reason: reasonListAllow, dmarcOverrideReason: string(dmarcrpt.PolicyOverrideMailingList), headers: headers}
return analysis{
d: d,
accept: true,
mailbox: mailbox,
reason: reasonListAllow,
reasonText: reasonText,
dmarcOverrideReason: string(dmarcrpt.PolicyOverrideMailingList),
headers: headers,
}
}
}
@ -226,6 +245,7 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
d.m.DKIMDomains = dkimdoms
dmarcOverrideReason = string(dmarcrpt.PolicyOverrideForwarded)
log.Info("forwarded message, clearing identifying signals of forwarding mail server")
addReasonText("ruleset indicates forwarded message")
}
assignMailbox := func(tx *bstore.Tx) error {
@ -266,7 +286,8 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
})
})
if mberr != nil {
return analysis{d, false, mailbox, smtp.C451LocalErr, smtp.SeSys3Other0, false, "error processing", err, nil, nil, reasonReputationError, dmarcOverrideReason, headers}
addReasonText("error setting original destination mailbox for rejected message: %v", mberr)
return analysis{d, false, mailbox, smtp.C451LocalErr, smtp.SeSys3Other0, false, "error processing", err, nil, nil, reasonReputationError, reasonText, dmarcOverrideReason, headers}
}
d.m.MailboxID = 0 // We plan to reject, no need to set intended MailboxID.
}
@ -279,12 +300,18 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
// Don't draw attention, but don't go so far as to mark as junk.
d.m.Seen = true
log.Info("accepting reject to configured mailbox due to ruleset")
addReasonText("accepting reject to mailbox due to ruleset")
}
return analysis{d, accept, mailbox, code, secode, err == nil, errmsg, err, nil, nil, reason, dmarcOverrideReason, headers}
return analysis{d, accept, mailbox, code, secode, err == nil, errmsg, err, nil, nil, reason, reasonText, dmarcOverrideReason, headers}
}
if d.dmarcUse && d.dmarcResult.Reject {
addReasonText("message does not pass domain dmarc policy which asks to reject")
return reject(smtp.C550MailboxUnavail, smtp.SePol7MultiAuthFails26, "rejecting per dmarc policy", nil, reasonDMARCPolicy)
} else if !d.dmarcUse {
addReasonText("not using any dmarc result")
} else {
addReasonText("dmarc ok")
}
// todo: should we also reject messages that have a dmarc pass but an spf record "v=spf1 -all"? suggested by m3aawg best practices.
@ -381,13 +408,28 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
return err
}
isjunk, conclusive, method, err = reputation(tx, log, d.m)
var text string
isjunk, conclusive, method, text, err = reputation(tx, log, d.m, d.smtputf8)
reason = string(method)
s := "address/dkim/spf/ip-based reputation ("
if isjunk != nil && *isjunk {
s += "junk, "
} else if isjunk != nil && !*isjunk {
s += "nonjunk, "
}
if conclusive {
s += "conclusive"
} else {
s += "inconclusive"
}
s += ", " + text + ")"
addReasonText("%s", s)
return err
})
})
if err != nil {
log.Infox("determining reputation", err, slog.Any("message", d.m))
addReasonText("determining reputation: %v", err)
return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", err, reasonReputationError)
}
log.Info("reputation analyzed",
@ -396,12 +438,33 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
slog.String("method", string(method)))
if conclusive {
if !*isjunk {
return analysis{d: d, accept: true, mailbox: mailbox, dmarcReport: dmarcReport, tlsReport: tlsReport, reason: reason, dmarcOverrideReason: dmarcOverrideReason, headers: headers}
return analysis{
d: d,
accept: true,
mailbox: mailbox,
dmarcReport: dmarcReport,
tlsReport: tlsReport,
reason: reason,
reasonText: reasonText,
dmarcOverrideReason: dmarcOverrideReason,
headers: headers,
}
}
return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", err, string(method))
} else if dmarcReport != nil || tlsReport != nil {
log.Info("accepting message with dmarc aggregate report or tls report without reputation")
return analysis{d: d, accept: true, mailbox: mailbox, dmarcReport: dmarcReport, tlsReport: tlsReport, reason: reasonReporting, dmarcOverrideReason: dmarcOverrideReason, headers: headers}
addReasonText("message inconclusive reputation but with dmarc or tls report")
return analysis{
d: d,
accept: true,
mailbox: mailbox,
dmarcReport: dmarcReport,
tlsReport: tlsReport,
reason: reasonReporting,
reasonText: reasonText,
dmarcOverrideReason: dmarcOverrideReason,
headers: headers,
}
}
// If there was no previous message from sender or its domain, and we have an SPF
// (soft)fail, reject the message.
@ -409,6 +472,7 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
case methodDKIMSPF, methodIP1, methodIP2, methodIP3, methodNone:
switch d.m.MailFromValidation {
case store.ValidationFail, store.ValidationSoftfail:
addReasonText("no previous message from sender domain and spf result is (soft)fail")
return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", nil, reasonSPFPolicy)
}
}
@ -419,9 +483,13 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
case methodDKIMSPF, methodIP1, methodIP2, methodIP3, methodNone:
suspiciousIPrevFail = d.iprevStatus != iprev.StatusPass
}
if suspiciousIPrevFail {
addReasonText("suspicious iprev failure")
}
// With already a mild junk signal, an iprev fail on top is enough to reject.
if suspiciousIPrevFail && isjunk != nil && *isjunk {
addReasonText("message has a mild junk signal and mismatching reverse ip")
return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", nil, reasonIPrev)
}
@ -431,13 +499,23 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
subjectpassKey, err = d.acc.Subjectpass(d.canonicalAddress)
if err != nil {
log.Errorx("get key for verifying subject token", err)
addReasonText("subject pass error: %v", err)
return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", err, reasonSubjectpassError)
}
err = subjectpass.Verify(log.Logger, d.dataFile, []byte(subjectpassKey), conf.SubjectPass.Period)
pass := err == nil
log.Infox("pass by subject token", err, slog.Bool("pass", pass))
if pass {
return analysis{d: d, accept: true, mailbox: mailbox, reason: reasonSubjectpass, dmarcOverrideReason: dmarcOverrideReason, headers: headers}
addReasonText("message has valid subjectpass token in subject")
return analysis{
d: d,
accept: true,
mailbox: mailbox,
reason: reasonSubjectpass,
reasonText: reasonText,
dmarcOverrideReason: dmarcOverrideReason,
headers: headers,
}
}
}
@ -450,9 +528,10 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
err := f.Close()
log.Check(err, "closing junkfilter")
}()
contentProb, _, _, _, err := f.ClassifyMessageReader(ctx, store.FileMsgReader(d.m.MsgPrefix, d.dataFile), d.m.Size)
contentProb, _, hams, spams, err := f.ClassifyMessageReader(ctx, store.FileMsgReader(d.m.MsgPrefix, d.dataFile), d.m.Size)
if err != nil {
log.Errorx("testing for spam", err)
addReasonText("classify message error: %v", err)
return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", err, reasonJunkClassifyError)
}
// todo: if isjunk is not nil (i.e. there was inconclusive reputation), use it in the probability calculation. give reputation a score of 0.25 or .75 perhaps?
@ -487,14 +566,17 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
// todo: some of these checks should also apply for reputation-based analysis with a weak signal, e.g. verified dkim/spf signal from new domain.
// With an iprev fail, non-TLS connection or our address not in To/Cc header, we set a higher bar for content.
reason = reasonJunkContent
var thresholdRemark string
if suspiciousIPrevFail && threshold > 0.25 {
threshold = 0.25
log.Info("setting junk threshold due to iprev fail", slog.Float64("threshold", threshold))
reason = reasonJunkContentStrict
thresholdRemark = " (stricter due to reverse ip mismatch)"
} else if !d.tls && threshold > 0.25 {
threshold = 0.25
log.Info("setting junk threshold due to plaintext smtp", slog.Float64("threshold", threshold))
reason = reasonJunkContentStrict
thresholdRemark = " (stricter due to missing tls)"
} else if (rs == nil || !rs.IsForward) && threshold > 0.25 && !rcptToMatch(d.msgTo) && !rcptToMatch(d.msgCc) {
// A common theme in junk messages is your recipient address not being in the To/Cc
// headers. We may be in Bcc, but that's unusual for first-time senders. Some
@ -503,6 +585,7 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
threshold = 0.25
log.Info("setting junk threshold due to smtp rcpt to and message to/cc address mismatch", slog.Float64("threshold", threshold))
reason = reasonJunkContentStrict
thresholdRemark = " (stricter due to recipient address not in to/cc header)"
}
accept = contentProb <= threshold
junkSubjectpass = contentProb < threshold-0.2
@ -510,9 +593,44 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
slog.Bool("accept", accept),
slog.Float64("contentprob", contentProb),
slog.Bool("subjectpass", junkSubjectpass))
s := "content: "
if accept {
s += "not junk"
} else {
s += "junk"
}
s += fmt.Sprintf(", spamscore %.2f, threshold %.2f%s", contentProb, threshold, thresholdRemark)
s += "(ham words: "
for i, w := range hams {
if i > 0 {
s += ", "
}
word := w.Word
if !d.smtputf8 && !isASCII(word) {
word = "(non-ascii)"
}
s += fmt.Sprintf("%s %.3f", word, w.Score)
}
s += "), (spam words: "
for i, w := range spams {
if i > 0 {
s += ", "
}
word := w.Word
if !d.smtputf8 && !isASCII(word) {
word = "(non-ascii)"
}
s += fmt.Sprintf("%s %.3f", word, w.Score)
}
s += ")"
addReasonText("%s", s)
} else if err != store.ErrNoJunkFilter {
log.Errorx("open junkfilter", err)
addReasonText("open junkfilter: %v", err)
return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", err, reasonJunkFilterError)
} else {
addReasonText("no junk filter configured")
}
// If content looks good, we'll still look at DNS block lists for a reason to
@ -545,20 +663,43 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
accept = false
dnsblocklisted = true
reason = reasonDNSBlocklisted
addReasonText("dnsbl: ip %s listed in dnsbl %s", d.m.RemoteIP, zone.XName(d.smtputf8))
break
}
}
if !dnsblocklisted && len(d.dnsBLs) > 0 {
addReasonText("remote ip not blocklisted")
}
}
if accept {
return analysis{d: d, accept: true, mailbox: mailbox, reason: reasonNoBadSignals, dmarcOverrideReason: dmarcOverrideReason, headers: headers}
addReasonText("no known reputation and no bad signals")
return analysis{
d: d,
accept: true,
mailbox: mailbox,
reason: reasonNoBadSignals,
reasonText: reasonText,
dmarcOverrideReason: dmarcOverrideReason,
headers: headers,
}
}
if subjectpassKey != "" && d.dmarcResult.Status == dmarc.StatusPass && method == methodNone && (dnsblocklisted || junkSubjectpass) {
log.Info("permanent reject with subjectpass hint of moderately spammy email without reputation")
pass := subjectpass.Generate(log.Logger, d.msgFrom, []byte(subjectpassKey), time.Now())
addReasonText("reject with request to try again with subjectpass token in subject")
return reject(smtp.C550MailboxUnavail, smtp.SePol7DeliveryUnauth1, subjectpass.Explanation+pass, nil, reasonGiveSubjectpass)
}
return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", nil, reason)
}
func isASCII(s string) bool {
for _, b := range []byte(s) {
if b >= 0x80 {
return true
}
}
return false
}

View file

@ -4,10 +4,12 @@ import (
"errors"
"fmt"
"log/slog"
"strings"
"time"
"github.com/mjl-/bstore"
"github.com/mjl-/mox/dns"
"github.com/mjl-/mox/mlog"
"github.com/mjl-/mox/smtp"
"github.com/mjl-/mox/store"
@ -97,7 +99,7 @@ const (
// ../rfc/6376:1915
// ../rfc/6376:3716
// ../rfc/7208:2167
func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rconclusive bool, rmethod reputationMethod, rerr error) {
func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message, smtputf8 bool) (rjunk *bool, rconclusive bool, rmethod reputationMethod, reasonText string, rerr error) {
boolptr := func(v bool) *bool {
return &v
}
@ -179,7 +181,7 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco
// todo: we may want to look at dkim/spf in this case.
spam := msgs[0].Junk && (len(msgs) == 1 || msgs[1].Junk)
conclusive := m.MsgFromValidated
return &spam, conclusive, methodMsgfromFull, nil
return &spam, conclusive, methodMsgfromFull, "reputation of exact message-from address", nil
}
if !m.MsgFromValidated {
// Look for historic messages that were validated. If present, this is likely spam.
@ -189,7 +191,7 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco
msgs = xmessageList(q, "msgfromfull-validated")
if len(msgs) > 0 {
spam := msgs[0].Junk && (len(msgs) == 1 || msgs[1].Junk)
return xtrue, spam, methodMsgfromFull, nil
return xtrue, spam, methodMsgfromFull, "unvalidated message with validated historic messages with exact message-from address", nil
}
}
@ -199,21 +201,23 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco
qr.FilterEqual("Domain", m.MsgFromDomain)
qr.FilterGreaterEqual("Sent", now.Add(-3*year))
if xrecipientExists(qr) {
return xfalse, true, methodMsgtoFull, nil
return xfalse, true, methodMsgtoFull, "exact message-from address was earlier message recipient", nil
}
// Look for domain match, then for organizational domain match.
for _, orgdomain := range []bool{false, true} {
qm := store.Message{}
var method reputationMethod
var descr string
var source, descr string
if orgdomain {
qm.MsgFromOrgDomain = m.MsgFromOrgDomain
method = methodMsgfromOrgDomain
source = "organizational domain of message-from address"
descr = "msgfromorgdomain"
} else {
qm.MsgFromDomain = m.MsgFromDomain
method = methodMsgfromDomain
source = "exact domain of message-from address"
descr = "msgfromdomain"
}
@ -228,7 +232,8 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco
}
}
if 100*nonjunk/len(msgs) > 80 {
return xfalse, true, method, nil
reasonText = fmt.Sprintf("positive reputation with %s based on %d messages", source, len(msgs))
return xfalse, true, method, reasonText, nil
}
if nonjunk == 0 {
// Only conclusive with at least 3 different localparts.
@ -236,13 +241,16 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco
for _, m := range msgs {
localparts[m.MsgFromLocalpart] = struct{}{}
if len(localparts) == 3 {
return xtrue, true, method, nil
reasonText = fmt.Sprintf("negative reputation of at least 3 addresses with %s based on %d messages", source, len(msgs))
return xtrue, true, method, reasonText, nil
}
}
return xtrue, false, method, nil
reasonText = fmt.Sprintf("negative reputation with %s based on %d messages", source, len(msgs))
return xtrue, false, method, reasonText, nil
}
// Mixed signals from domain. We don't want to block a new sender.
return nil, false, method, nil
reasonText = fmt.Sprintf("mixed signals with %s based on %d messages", source, len(msgs))
return nil, false, method, reasonText, nil
}
if !m.MsgFromValidated {
// Look for historic messages that were validated. If present, this is likely spam.
@ -253,7 +261,8 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco
msgs = xmessageList(q, descr+"-validated")
if len(msgs) > 0 {
spam := msgs[0].Junk && (len(msgs) == 1 || msgs[1].Junk)
return xtrue, spam, method, nil
reasonText = fmt.Sprintf("unvalidated message with %s while we have validated messages from that source", source)
return xtrue, spam, method, reasonText, nil
}
}
@ -262,13 +271,16 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco
if orgdomain {
qr.FilterEqual("OrgDomain", m.MsgFromOrgDomain)
method = methodMsgtoOrgDomain
source = "organizational domain of message-from address"
} else {
qr.FilterEqual("Domain", m.MsgFromDomain)
method = methodMsgtoDomain
source = "exact domain of message-from address"
}
qr.FilterGreaterEqual("Sent", now.Add(-2*year))
if xrecipientExists(qr) {
return xfalse, true, method, nil
reasonText = fmt.Sprintf("%s was recipient address", source)
return xfalse, true, method, reasonText, nil
}
}
}
@ -277,6 +289,7 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco
// We only use identities that passed validation. Failed identities are ignored. ../rfc/6376:2447
// todo future: we could do something with the DKIM identity (i=) field if it is more specific than just the domain (d=).
dkimspfsignals := []float64{}
dkimspfreasondoms := []string{}
dkimspfmsgs := 0
for _, dom := range m.DKIMDomains {
q := messageQuery(nil, year/2, 50)
@ -291,12 +304,15 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco
}
pspam := float64(nspam) / float64(len(msgs))
dkimspfsignals = append(dkimspfsignals, pspam)
dkimspfreasondoms = append(dkimspfreasondoms, dom)
dkimspfmsgs = len(msgs)
}
}
if m.MailFromValidated || m.EHLOValidated {
var dom string
var msgs []store.Message
if m.MailFromValidated && m.MailFromDomain != "" {
dom = m.MailFromDomain
q := messageQuery(&store.Message{MailFromLocalpart: m.MailFromLocalpart, MailFromDomain: m.MailFromDomain}, year/2, 50)
msgs = xmessageList(q, "mailfrom")
if len(msgs) == 0 {
@ -305,6 +321,7 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco
}
}
if len(msgs) == 0 && m.EHLOValidated && m.EHLODomain != "" {
dom = m.EHLODomain
q := messageQuery(&store.Message{EHLODomain: m.EHLODomain}, year/2, 50)
msgs = xmessageList(q, "ehlodomain")
}
@ -317,6 +334,7 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco
}
pspam := float64(nspam) / float64(len(msgs))
dkimspfsignals = append(dkimspfsignals, pspam)
dkimspfreasondoms = append(dkimspfreasondoms, dom)
if len(msgs) > dkimspfmsgs {
dkimspfmsgs = len(msgs)
}
@ -324,20 +342,27 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco
}
if len(dkimspfsignals) > 0 {
var nham, nspam int
for _, p := range dkimspfsignals {
var hamdoms, spamdoms []string
for i, p := range dkimspfsignals {
d, _ := dns.ParseDomain(dkimspfreasondoms[i])
if p < .1 {
nham++
hamdoms = append(hamdoms, d.XName(smtputf8))
} else if p > .9 {
nspam++
spamdoms = append(spamdoms, d.XName(smtputf8))
}
}
if nham > 0 && nspam == 0 {
return xfalse, true, methodDKIMSPF, nil
reasonText = fmt.Sprintf("positive dkim/spf reputation for domain(s) %s", strings.Join(hamdoms, ","))
return xfalse, true, methodDKIMSPF, reasonText, nil
}
if nspam > 0 && nham == 0 {
return xtrue, dkimspfmsgs > 1, methodDKIMSPF, nil
reasonText = fmt.Sprintf("negative dkim/spf reputation for domain(s) %s", strings.Join(hamdoms, ","))
return xtrue, dkimspfmsgs > 1, methodDKIMSPF, reasonText, nil
}
return nil, false, methodDKIMSPF, nil
reasonText = fmt.Sprintf("mixed dkim/spf reputation, positive for %s, negative for %s", strings.Join(hamdoms, ","), strings.Join(spamdoms, ","))
return nil, false, methodDKIMSPF, reasonText, nil
}
// IP-based. A wider mask needs more messages to be conclusive.
@ -345,23 +370,27 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco
var msgs []store.Message
var need int
var method reputationMethod
var ip string
if m.RemoteIPMasked1 != "" {
q := messageQuery(&store.Message{RemoteIPMasked1: m.RemoteIPMasked1}, year/4, 50)
msgs = xmessageList(q, "ip1")
need = 2
method = methodIP1
ip = m.RemoteIPMasked1
}
if len(msgs) == 0 && m.RemoteIPMasked2 != "" {
q := messageQuery(&store.Message{RemoteIPMasked2: m.RemoteIPMasked2}, year/4, 50)
msgs = xmessageList(q, "ip2")
need = 5
method = methodIP2
ip = m.RemoteIPMasked2
}
if len(msgs) == 0 && m.RemoteIPMasked3 != "" {
q := messageQuery(&store.Message{RemoteIPMasked3: m.RemoteIPMasked3}, year/4, 50)
msgs = xmessageList(q, "ip3")
need = 10
method = methodIP3
ip = m.RemoteIPMasked3
}
if len(msgs) > 0 {
nspam := 0
@ -378,8 +407,24 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco
spam = xtrue
}
conclusive := len(msgs) >= need && (pspam <= 0.1 || pspam >= 0.9)
return spam, conclusive, method, nil
v6 := strings.Contains(m.RemoteIP, ":")
reasonText = fmt.Sprintf("reputation for ip %s%s, spam score %.2f", ip, maskclasses[classmask{v6, method}], pspam)
return spam, conclusive, method, reasonText, nil
}
return nil, false, methodNone, nil
return nil, false, methodNone, "no address/spf/dkim/ip reputation", nil
}
type classmask struct {
v6 bool
method reputationMethod
}
var maskclasses = map[classmask]string{
{false, methodIP1}: "/32",
{false, methodIP2}: "/26",
{false, methodIP3}: "/21",
{true, methodIP1}: "/64",
{true, methodIP2}: "/48",
{true, methodIP3}: "/32",
}

View file

@ -148,7 +148,7 @@ func TestReputation(t *testing.T) {
var method reputationMethod
err = db.Read(ctxbg, func(tx *bstore.Tx) error {
var err error
isjunk, conclusive, method, err = reputation(tx, pkglog, &m)
isjunk, conclusive, method, _, err = reputation(tx, pkglog, &m, false)
return err
})
tcheck(t, err, "read tx")

View file

@ -2750,7 +2750,7 @@ func (c *conn) deliver(ctx context.Context, recvHdrFor func(string) string, msgW
msgTo = envelope.To
msgCc = envelope.CC
}
d := delivery{c.tls, &m, dataFile, smtpRcptTo, deliverTo, destination, canonicalAddr, acc, msgTo, msgCc, msgFrom, c.dnsBLs, dmarcUse, dmarcResult, dkimResults, iprevStatus}
d := delivery{c.tls, &m, dataFile, smtpRcptTo, deliverTo, destination, canonicalAddr, acc, msgTo, msgCc, msgFrom, c.dnsBLs, dmarcUse, dmarcResult, dkimResults, iprevStatus, c.smtputf8}
r := analyze(ctx, log, c.resolver, d)
return &r, nil
@ -2862,10 +2862,25 @@ func (c *conn) deliver(ctx context.Context, recvHdrFor func(string) string, msgW
rcptAuthResults.Methods = append([]message.AuthMethod{}, authResults.Methods...)
rcptAuthResults.Methods = append(rcptAuthResults.Methods, rcptDMARCMethod)
// Prepend reason as message header, for easy display in mail clients.
// Prepend reason as message header, for easy viewing in mail clients.
var xmox string
if a0.reason != "" {
xmox = "X-Mox-Reason: " + a0.reason + "\r\n"
hw := &message.HeaderWriter{}
hw.Add(" ", "X-Mox-Reason:")
hw.Add(" ", a0.reason)
for i, s := range a0.reasonText {
if i == 0 {
s = "; " + s
} else {
hw.Newline()
}
// Just in case any of the strings has a newline, replace it with space to not break the message.
s = strings.ReplaceAll(s, "\n", " ")
s = strings.ReplaceAll(s, "\r", " ")
s += ";"
hw.AddWrap([]byte(s), true)
}
xmox = hw.String()
}
xmox += a0.headers