mirror of
https://github.com/mjl-/mox.git
synced 2024-12-26 00:13:47 +03:00
add more details to x-mox-reason message header added during delivery, for understanding why a message is accepted/rejected
we add various information while analysing an incoming message. like dkim/spf/ip reputation. and content-based junk filter threshold/result and ham/spam words used. for issue #179 by Fell and #157 by mattfbacon
This commit is contained in:
parent
98d0ff22bb
commit
32b549b260
7 changed files with 281 additions and 69 deletions
|
@ -147,7 +147,7 @@ func (s *Sig) Header() (string, error) {
|
|||
|
||||
w.Addf(" ", "b=")
|
||||
if len(s.Signature) > 0 {
|
||||
w.AddWrap([]byte(base64.StdEncoding.EncodeToString(s.Signature)))
|
||||
w.AddWrap([]byte(base64.StdEncoding.EncodeToString(s.Signature)), false)
|
||||
}
|
||||
w.Add("\r\n")
|
||||
return w.String(), nil
|
||||
|
|
|
@ -343,21 +343,23 @@ func loadWords(ctx context.Context, db *bstore.DB, l []string, dst map[string]wo
|
|||
return nil
|
||||
}
|
||||
|
||||
// ClassifyWords returns the spam probability for the given words, and number of recognized ham and spam words.
|
||||
func (f *Filter) ClassifyWords(ctx context.Context, words map[string]struct{}) (probability float64, nham, nspam int, rerr error) {
|
||||
if f.closed {
|
||||
return 0, 0, 0, errClosed
|
||||
}
|
||||
// WordScore is a word with its score as used in classifications, based on
|
||||
// (historic) training.
|
||||
type WordScore struct {
|
||||
Word string
|
||||
Score float64 // 0 is ham, 1 is spam.
|
||||
}
|
||||
|
||||
type xword struct {
|
||||
Word string
|
||||
R float64
|
||||
// ClassifyWords returns the spam probability for the given words, and number of recognized ham and spam words.
|
||||
func (f *Filter) ClassifyWords(ctx context.Context, words map[string]struct{}) (probability float64, hams, spams []WordScore, rerr error) {
|
||||
if f.closed {
|
||||
return 0, nil, nil, errClosed
|
||||
}
|
||||
|
||||
var hamHigh float64 = 0
|
||||
var spamLow float64 = 1
|
||||
var topHam []xword
|
||||
var topSpam []xword
|
||||
var topHam []WordScore
|
||||
var topSpam []WordScore
|
||||
|
||||
// Find words that should be in the database.
|
||||
lookupWords := []string{}
|
||||
|
@ -389,7 +391,7 @@ func (f *Filter) ClassifyWords(ctx context.Context, words map[string]struct{}) (
|
|||
fetched := map[string]word{}
|
||||
if len(lookupWords) > 0 {
|
||||
if err := loadWords(ctx, f.db, lookupWords, fetched); err != nil {
|
||||
return 0, 0, 0, err
|
||||
return 0, nil, nil, err
|
||||
}
|
||||
for w, c := range fetched {
|
||||
delete(expect, w)
|
||||
|
@ -432,7 +434,7 @@ func (f *Filter) ClassifyWords(ctx context.Context, words map[string]struct{}) (
|
|||
if len(topHam) >= f.TopWords && r > hamHigh {
|
||||
continue
|
||||
}
|
||||
topHam = append(topHam, xword{w, r})
|
||||
topHam = append(topHam, WordScore{w, r})
|
||||
if r > hamHigh {
|
||||
hamHigh = r
|
||||
}
|
||||
|
@ -440,7 +442,7 @@ func (f *Filter) ClassifyWords(ctx context.Context, words map[string]struct{}) (
|
|||
if len(topSpam) >= f.TopWords && r < spamLow {
|
||||
continue
|
||||
}
|
||||
topSpam = append(topSpam, xword{w, r})
|
||||
topSpam = append(topSpam, WordScore{w, r})
|
||||
if r < spamLow {
|
||||
spamLow = r
|
||||
}
|
||||
|
@ -449,24 +451,24 @@ func (f *Filter) ClassifyWords(ctx context.Context, words map[string]struct{}) (
|
|||
|
||||
sort.Slice(topHam, func(i, j int) bool {
|
||||
a, b := topHam[i], topHam[j]
|
||||
if a.R == b.R {
|
||||
if a.Score == b.Score {
|
||||
return len(a.Word) > len(b.Word)
|
||||
}
|
||||
return a.R < b.R
|
||||
return a.Score < b.Score
|
||||
})
|
||||
sort.Slice(topSpam, func(i, j int) bool {
|
||||
a, b := topSpam[i], topSpam[j]
|
||||
if a.R == b.R {
|
||||
if a.Score == b.Score {
|
||||
return len(a.Word) > len(b.Word)
|
||||
}
|
||||
return a.R > b.R
|
||||
return a.Score > b.Score
|
||||
})
|
||||
|
||||
nham = f.TopWords
|
||||
nham := f.TopWords
|
||||
if nham > len(topHam) {
|
||||
nham = len(topHam)
|
||||
}
|
||||
nspam = f.TopWords
|
||||
nspam := f.TopWords
|
||||
if nspam > len(topSpam) {
|
||||
nspam = len(topSpam)
|
||||
}
|
||||
|
@ -475,27 +477,27 @@ func (f *Filter) ClassifyWords(ctx context.Context, words map[string]struct{}) (
|
|||
|
||||
var eta float64
|
||||
for _, x := range topHam {
|
||||
eta += math.Log(1-x.R) - math.Log(x.R)
|
||||
eta += math.Log(1-x.Score) - math.Log(x.Score)
|
||||
}
|
||||
for _, x := range topSpam {
|
||||
eta += math.Log(1-x.R) - math.Log(x.R)
|
||||
eta += math.Log(1-x.Score) - math.Log(x.Score)
|
||||
}
|
||||
|
||||
f.log.Debug("top words", slog.Any("hams", topHam), slog.Any("spams", topSpam))
|
||||
|
||||
prob := 1 / (1 + math.Pow(math.E, eta))
|
||||
return prob, len(topHam), len(topSpam), nil
|
||||
return prob, topHam, topSpam, nil
|
||||
}
|
||||
|
||||
// ClassifyMessagePath is a convenience wrapper for calling ClassifyMessage on a file.
|
||||
func (f *Filter) ClassifyMessagePath(ctx context.Context, path string) (probability float64, words map[string]struct{}, nham, nspam int, rerr error) {
|
||||
func (f *Filter) ClassifyMessagePath(ctx context.Context, path string) (probability float64, words map[string]struct{}, hams, spams []WordScore, rerr error) {
|
||||
if f.closed {
|
||||
return 0, nil, 0, 0, errClosed
|
||||
return 0, nil, nil, nil, errClosed
|
||||
}
|
||||
|
||||
mf, err := os.Open(path)
|
||||
if err != nil {
|
||||
return 0, nil, 0, 0, err
|
||||
return 0, nil, nil, nil, err
|
||||
}
|
||||
defer func() {
|
||||
err := mf.Close()
|
||||
|
@ -503,33 +505,33 @@ func (f *Filter) ClassifyMessagePath(ctx context.Context, path string) (probabil
|
|||
}()
|
||||
fi, err := mf.Stat()
|
||||
if err != nil {
|
||||
return 0, nil, 0, 0, err
|
||||
return 0, nil, nil, nil, err
|
||||
}
|
||||
return f.ClassifyMessageReader(ctx, mf, fi.Size())
|
||||
}
|
||||
|
||||
func (f *Filter) ClassifyMessageReader(ctx context.Context, mf io.ReaderAt, size int64) (probability float64, words map[string]struct{}, nham, nspam int, rerr error) {
|
||||
func (f *Filter) ClassifyMessageReader(ctx context.Context, mf io.ReaderAt, size int64) (probability float64, words map[string]struct{}, hams, spams []WordScore, rerr error) {
|
||||
m, err := message.EnsurePart(f.log.Logger, false, mf, size)
|
||||
if err != nil && errors.Is(err, message.ErrBadContentType) {
|
||||
// Invalid content-type header is a sure sign of spam.
|
||||
//f.log.Infox("parsing content", err)
|
||||
return 1, nil, 0, 0, nil
|
||||
return 1, nil, nil, nil, nil
|
||||
}
|
||||
return f.ClassifyMessage(ctx, m)
|
||||
}
|
||||
|
||||
// ClassifyMessage parses the mail message in r and returns the spam probability
|
||||
// (between 0 and 1), along with the tokenized words found in the message, and the
|
||||
// number of recognized ham and spam words.
|
||||
func (f *Filter) ClassifyMessage(ctx context.Context, m message.Part) (probability float64, words map[string]struct{}, nham, nspam int, rerr error) {
|
||||
// ham and spam words and their scores used.
|
||||
func (f *Filter) ClassifyMessage(ctx context.Context, m message.Part) (probability float64, words map[string]struct{}, hams, spams []WordScore, rerr error) {
|
||||
var err error
|
||||
words, err = f.ParseMessage(m)
|
||||
if err != nil {
|
||||
return 0, nil, 0, 0, err
|
||||
return 0, nil, nil, nil, err
|
||||
}
|
||||
|
||||
probability, nham, nspam, err = f.ClassifyWords(ctx, words)
|
||||
return probability, words, nham, nspam, err
|
||||
probability, hams, spams, err = f.ClassifyWords(ctx, words)
|
||||
return probability, words, hams, spams, err
|
||||
}
|
||||
|
||||
// Train adds the words of a single message to the filter.
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package message
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
@ -39,12 +40,20 @@ func (w *HeaderWriter) Add(separator string, texts ...string) {
|
|||
}
|
||||
}
|
||||
|
||||
// AddWrap adds data, folding anywhere in the buffer. E.g. for base64 data.
|
||||
func (w *HeaderWriter) AddWrap(buf []byte) {
|
||||
// AddWrap adds data. If text is set, wrapping happens at space/tab, otherwise
|
||||
// anywhere in the buffer (e.g. for base64 data).
|
||||
func (w *HeaderWriter) AddWrap(buf []byte, text bool) {
|
||||
for len(buf) > 0 {
|
||||
line := buf
|
||||
n := 78 - w.lineLen
|
||||
if len(buf) > n {
|
||||
if text {
|
||||
if i := bytes.LastIndexAny(buf[:n], " \t"); i > 0 {
|
||||
n = i
|
||||
} else if i = bytes.IndexAny(buf, " \t"); i > 0 {
|
||||
n = i
|
||||
}
|
||||
}
|
||||
line, buf = buf[:n], buf[n:]
|
||||
} else {
|
||||
buf = nil
|
||||
|
|
|
@ -45,6 +45,7 @@ type delivery struct {
|
|||
dmarcResult dmarc.Result
|
||||
dkimResults []dkim.Result
|
||||
iprevStatus iprev.Status
|
||||
smtputf8 bool
|
||||
}
|
||||
|
||||
type analysis struct {
|
||||
|
@ -58,7 +59,8 @@ type analysis struct {
|
|||
err error // For our own logging, not sent to remote.
|
||||
dmarcReport *dmarcrpt.Feedback // Validated DMARC aggregate report, not yet stored.
|
||||
tlsReport *tlsrpt.Report // Validated TLS report, not yet stored.
|
||||
reason string // If non-empty, reason for this decision. Can be one of reputationMethod and a few other tokens.
|
||||
reason string // If non-empty, reason for this decision. Values from reputationMethod and reason* below.
|
||||
reasonText []string // Additional details for reason, human-readable, added to X-Mox-Reason header.
|
||||
dmarcOverrideReason string // If set, one of dmarcrpt.PolicyOverride
|
||||
// Additional headers to add during delivery. Used for reasons a message to a
|
||||
// dmarc/tls reporting address isn't processed.
|
||||
|
@ -99,6 +101,12 @@ func isListDomain(d delivery, ld dns.Domain) bool {
|
|||
func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d delivery) analysis {
|
||||
var headers string
|
||||
|
||||
var reasonText []string
|
||||
addReasonText := func(format string, args ...any) {
|
||||
s := fmt.Sprintf(format, args...)
|
||||
reasonText = append(reasonText, s)
|
||||
}
|
||||
|
||||
// We don't want to let a single IP or network deliver too many messages to an
|
||||
// account. They may fill up the mailbox, either with messages that have to be
|
||||
// purged, or by filling the disk. We check both cases for IP's and networks.
|
||||
|
@ -175,11 +183,13 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
|
|||
if err != nil && !rateError {
|
||||
log.Errorx("checking delivery rates", err)
|
||||
metricDelivery.WithLabelValues("checkrates", "").Inc()
|
||||
return analysis{d, false, "", smtp.C451LocalErr, smtp.SeSys3Other0, false, "error processing", err, nil, nil, reasonReputationError, "", headers}
|
||||
addReasonText("checking delivery rates: %v", err)
|
||||
return analysis{d, false, "", smtp.C451LocalErr, smtp.SeSys3Other0, false, "error processing", err, nil, nil, reasonReputationError, reasonText, "", headers}
|
||||
} else if err != nil {
|
||||
log.Debugx("refusing due to high delivery rate", err)
|
||||
metricDelivery.WithLabelValues("highrate", "").Inc()
|
||||
return analysis{d, false, "", smtp.C452StorageFull, smtp.SeMailbox2Full2, true, err.Error(), err, nil, nil, reasonHighRate, "", headers}
|
||||
addReasonText("high delivery rate")
|
||||
return analysis{d, false, "", smtp.C452StorageFull, smtp.SeMailbox2Full2, true, err.Error(), err, nil, nil, reasonHighRate, reasonText, "", headers}
|
||||
}
|
||||
|
||||
mailbox := d.destination.Mailbox
|
||||
|
@ -196,8 +206,17 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
|
|||
if rs != nil && !rs.ListAllowDNSDomain.IsZero() {
|
||||
// todo: on temporary failures, reject temporarily?
|
||||
if isListDomain(d, rs.ListAllowDNSDomain) {
|
||||
addReasonText("validated message from a configured mailing list")
|
||||
d.m.IsMailingList = true
|
||||
return analysis{d: d, accept: true, mailbox: mailbox, reason: reasonListAllow, dmarcOverrideReason: string(dmarcrpt.PolicyOverrideMailingList), headers: headers}
|
||||
return analysis{
|
||||
d: d,
|
||||
accept: true,
|
||||
mailbox: mailbox,
|
||||
reason: reasonListAllow,
|
||||
reasonText: reasonText,
|
||||
dmarcOverrideReason: string(dmarcrpt.PolicyOverrideMailingList),
|
||||
headers: headers,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -226,6 +245,7 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
|
|||
d.m.DKIMDomains = dkimdoms
|
||||
dmarcOverrideReason = string(dmarcrpt.PolicyOverrideForwarded)
|
||||
log.Info("forwarded message, clearing identifying signals of forwarding mail server")
|
||||
addReasonText("ruleset indicates forwarded message")
|
||||
}
|
||||
|
||||
assignMailbox := func(tx *bstore.Tx) error {
|
||||
|
@ -266,7 +286,8 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
|
|||
})
|
||||
})
|
||||
if mberr != nil {
|
||||
return analysis{d, false, mailbox, smtp.C451LocalErr, smtp.SeSys3Other0, false, "error processing", err, nil, nil, reasonReputationError, dmarcOverrideReason, headers}
|
||||
addReasonText("error setting original destination mailbox for rejected message: %v", mberr)
|
||||
return analysis{d, false, mailbox, smtp.C451LocalErr, smtp.SeSys3Other0, false, "error processing", err, nil, nil, reasonReputationError, reasonText, dmarcOverrideReason, headers}
|
||||
}
|
||||
d.m.MailboxID = 0 // We plan to reject, no need to set intended MailboxID.
|
||||
}
|
||||
|
@ -279,12 +300,18 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
|
|||
// Don't draw attention, but don't go so far as to mark as junk.
|
||||
d.m.Seen = true
|
||||
log.Info("accepting reject to configured mailbox due to ruleset")
|
||||
addReasonText("accepting reject to mailbox due to ruleset")
|
||||
}
|
||||
return analysis{d, accept, mailbox, code, secode, err == nil, errmsg, err, nil, nil, reason, dmarcOverrideReason, headers}
|
||||
return analysis{d, accept, mailbox, code, secode, err == nil, errmsg, err, nil, nil, reason, reasonText, dmarcOverrideReason, headers}
|
||||
}
|
||||
|
||||
if d.dmarcUse && d.dmarcResult.Reject {
|
||||
addReasonText("message does not pass domain dmarc policy which asks to reject")
|
||||
return reject(smtp.C550MailboxUnavail, smtp.SePol7MultiAuthFails26, "rejecting per dmarc policy", nil, reasonDMARCPolicy)
|
||||
} else if !d.dmarcUse {
|
||||
addReasonText("not using any dmarc result")
|
||||
} else {
|
||||
addReasonText("dmarc ok")
|
||||
}
|
||||
// todo: should we also reject messages that have a dmarc pass but an spf record "v=spf1 -all"? suggested by m3aawg best practices.
|
||||
|
||||
|
@ -381,13 +408,28 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
|
|||
return err
|
||||
}
|
||||
|
||||
isjunk, conclusive, method, err = reputation(tx, log, d.m)
|
||||
var text string
|
||||
isjunk, conclusive, method, text, err = reputation(tx, log, d.m, d.smtputf8)
|
||||
reason = string(method)
|
||||
s := "address/dkim/spf/ip-based reputation ("
|
||||
if isjunk != nil && *isjunk {
|
||||
s += "junk, "
|
||||
} else if isjunk != nil && !*isjunk {
|
||||
s += "nonjunk, "
|
||||
}
|
||||
if conclusive {
|
||||
s += "conclusive"
|
||||
} else {
|
||||
s += "inconclusive"
|
||||
}
|
||||
s += ", " + text + ")"
|
||||
addReasonText("%s", s)
|
||||
return err
|
||||
})
|
||||
})
|
||||
if err != nil {
|
||||
log.Infox("determining reputation", err, slog.Any("message", d.m))
|
||||
addReasonText("determining reputation: %v", err)
|
||||
return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", err, reasonReputationError)
|
||||
}
|
||||
log.Info("reputation analyzed",
|
||||
|
@ -396,12 +438,33 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
|
|||
slog.String("method", string(method)))
|
||||
if conclusive {
|
||||
if !*isjunk {
|
||||
return analysis{d: d, accept: true, mailbox: mailbox, dmarcReport: dmarcReport, tlsReport: tlsReport, reason: reason, dmarcOverrideReason: dmarcOverrideReason, headers: headers}
|
||||
return analysis{
|
||||
d: d,
|
||||
accept: true,
|
||||
mailbox: mailbox,
|
||||
dmarcReport: dmarcReport,
|
||||
tlsReport: tlsReport,
|
||||
reason: reason,
|
||||
reasonText: reasonText,
|
||||
dmarcOverrideReason: dmarcOverrideReason,
|
||||
headers: headers,
|
||||
}
|
||||
}
|
||||
return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", err, string(method))
|
||||
} else if dmarcReport != nil || tlsReport != nil {
|
||||
log.Info("accepting message with dmarc aggregate report or tls report without reputation")
|
||||
return analysis{d: d, accept: true, mailbox: mailbox, dmarcReport: dmarcReport, tlsReport: tlsReport, reason: reasonReporting, dmarcOverrideReason: dmarcOverrideReason, headers: headers}
|
||||
addReasonText("message inconclusive reputation but with dmarc or tls report")
|
||||
return analysis{
|
||||
d: d,
|
||||
accept: true,
|
||||
mailbox: mailbox,
|
||||
dmarcReport: dmarcReport,
|
||||
tlsReport: tlsReport,
|
||||
reason: reasonReporting,
|
||||
reasonText: reasonText,
|
||||
dmarcOverrideReason: dmarcOverrideReason,
|
||||
headers: headers,
|
||||
}
|
||||
}
|
||||
// If there was no previous message from sender or its domain, and we have an SPF
|
||||
// (soft)fail, reject the message.
|
||||
|
@ -409,6 +472,7 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
|
|||
case methodDKIMSPF, methodIP1, methodIP2, methodIP3, methodNone:
|
||||
switch d.m.MailFromValidation {
|
||||
case store.ValidationFail, store.ValidationSoftfail:
|
||||
addReasonText("no previous message from sender domain and spf result is (soft)fail")
|
||||
return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", nil, reasonSPFPolicy)
|
||||
}
|
||||
}
|
||||
|
@ -419,9 +483,13 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
|
|||
case methodDKIMSPF, methodIP1, methodIP2, methodIP3, methodNone:
|
||||
suspiciousIPrevFail = d.iprevStatus != iprev.StatusPass
|
||||
}
|
||||
if suspiciousIPrevFail {
|
||||
addReasonText("suspicious iprev failure")
|
||||
}
|
||||
|
||||
// With already a mild junk signal, an iprev fail on top is enough to reject.
|
||||
if suspiciousIPrevFail && isjunk != nil && *isjunk {
|
||||
addReasonText("message has a mild junk signal and mismatching reverse ip")
|
||||
return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", nil, reasonIPrev)
|
||||
}
|
||||
|
||||
|
@ -431,13 +499,23 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
|
|||
subjectpassKey, err = d.acc.Subjectpass(d.canonicalAddress)
|
||||
if err != nil {
|
||||
log.Errorx("get key for verifying subject token", err)
|
||||
addReasonText("subject pass error: %v", err)
|
||||
return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", err, reasonSubjectpassError)
|
||||
}
|
||||
err = subjectpass.Verify(log.Logger, d.dataFile, []byte(subjectpassKey), conf.SubjectPass.Period)
|
||||
pass := err == nil
|
||||
log.Infox("pass by subject token", err, slog.Bool("pass", pass))
|
||||
if pass {
|
||||
return analysis{d: d, accept: true, mailbox: mailbox, reason: reasonSubjectpass, dmarcOverrideReason: dmarcOverrideReason, headers: headers}
|
||||
addReasonText("message has valid subjectpass token in subject")
|
||||
return analysis{
|
||||
d: d,
|
||||
accept: true,
|
||||
mailbox: mailbox,
|
||||
reason: reasonSubjectpass,
|
||||
reasonText: reasonText,
|
||||
dmarcOverrideReason: dmarcOverrideReason,
|
||||
headers: headers,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -450,9 +528,10 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
|
|||
err := f.Close()
|
||||
log.Check(err, "closing junkfilter")
|
||||
}()
|
||||
contentProb, _, _, _, err := f.ClassifyMessageReader(ctx, store.FileMsgReader(d.m.MsgPrefix, d.dataFile), d.m.Size)
|
||||
contentProb, _, hams, spams, err := f.ClassifyMessageReader(ctx, store.FileMsgReader(d.m.MsgPrefix, d.dataFile), d.m.Size)
|
||||
if err != nil {
|
||||
log.Errorx("testing for spam", err)
|
||||
addReasonText("classify message error: %v", err)
|
||||
return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", err, reasonJunkClassifyError)
|
||||
}
|
||||
// todo: if isjunk is not nil (i.e. there was inconclusive reputation), use it in the probability calculation. give reputation a score of 0.25 or .75 perhaps?
|
||||
|
@ -487,14 +566,17 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
|
|||
// todo: some of these checks should also apply for reputation-based analysis with a weak signal, e.g. verified dkim/spf signal from new domain.
|
||||
// With an iprev fail, non-TLS connection or our address not in To/Cc header, we set a higher bar for content.
|
||||
reason = reasonJunkContent
|
||||
var thresholdRemark string
|
||||
if suspiciousIPrevFail && threshold > 0.25 {
|
||||
threshold = 0.25
|
||||
log.Info("setting junk threshold due to iprev fail", slog.Float64("threshold", threshold))
|
||||
reason = reasonJunkContentStrict
|
||||
thresholdRemark = " (stricter due to reverse ip mismatch)"
|
||||
} else if !d.tls && threshold > 0.25 {
|
||||
threshold = 0.25
|
||||
log.Info("setting junk threshold due to plaintext smtp", slog.Float64("threshold", threshold))
|
||||
reason = reasonJunkContentStrict
|
||||
thresholdRemark = " (stricter due to missing tls)"
|
||||
} else if (rs == nil || !rs.IsForward) && threshold > 0.25 && !rcptToMatch(d.msgTo) && !rcptToMatch(d.msgCc) {
|
||||
// A common theme in junk messages is your recipient address not being in the To/Cc
|
||||
// headers. We may be in Bcc, but that's unusual for first-time senders. Some
|
||||
|
@ -503,6 +585,7 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
|
|||
threshold = 0.25
|
||||
log.Info("setting junk threshold due to smtp rcpt to and message to/cc address mismatch", slog.Float64("threshold", threshold))
|
||||
reason = reasonJunkContentStrict
|
||||
thresholdRemark = " (stricter due to recipient address not in to/cc header)"
|
||||
}
|
||||
accept = contentProb <= threshold
|
||||
junkSubjectpass = contentProb < threshold-0.2
|
||||
|
@ -510,9 +593,44 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
|
|||
slog.Bool("accept", accept),
|
||||
slog.Float64("contentprob", contentProb),
|
||||
slog.Bool("subjectpass", junkSubjectpass))
|
||||
|
||||
s := "content: "
|
||||
if accept {
|
||||
s += "not junk"
|
||||
} else {
|
||||
s += "junk"
|
||||
}
|
||||
s += fmt.Sprintf(", spamscore %.2f, threshold %.2f%s", contentProb, threshold, thresholdRemark)
|
||||
s += "(ham words: "
|
||||
for i, w := range hams {
|
||||
if i > 0 {
|
||||
s += ", "
|
||||
}
|
||||
word := w.Word
|
||||
if !d.smtputf8 && !isASCII(word) {
|
||||
word = "(non-ascii)"
|
||||
}
|
||||
s += fmt.Sprintf("%s %.3f", word, w.Score)
|
||||
}
|
||||
s += "), (spam words: "
|
||||
for i, w := range spams {
|
||||
if i > 0 {
|
||||
s += ", "
|
||||
}
|
||||
word := w.Word
|
||||
if !d.smtputf8 && !isASCII(word) {
|
||||
word = "(non-ascii)"
|
||||
}
|
||||
s += fmt.Sprintf("%s %.3f", word, w.Score)
|
||||
}
|
||||
s += ")"
|
||||
addReasonText("%s", s)
|
||||
} else if err != store.ErrNoJunkFilter {
|
||||
log.Errorx("open junkfilter", err)
|
||||
addReasonText("open junkfilter: %v", err)
|
||||
return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", err, reasonJunkFilterError)
|
||||
} else {
|
||||
addReasonText("no junk filter configured")
|
||||
}
|
||||
|
||||
// If content looks good, we'll still look at DNS block lists for a reason to
|
||||
|
@ -545,20 +663,43 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
|
|||
accept = false
|
||||
dnsblocklisted = true
|
||||
reason = reasonDNSBlocklisted
|
||||
addReasonText("dnsbl: ip %s listed in dnsbl %s", d.m.RemoteIP, zone.XName(d.smtputf8))
|
||||
break
|
||||
}
|
||||
}
|
||||
if !dnsblocklisted && len(d.dnsBLs) > 0 {
|
||||
addReasonText("remote ip not blocklisted")
|
||||
}
|
||||
}
|
||||
|
||||
if accept {
|
||||
return analysis{d: d, accept: true, mailbox: mailbox, reason: reasonNoBadSignals, dmarcOverrideReason: dmarcOverrideReason, headers: headers}
|
||||
addReasonText("no known reputation and no bad signals")
|
||||
return analysis{
|
||||
d: d,
|
||||
accept: true,
|
||||
mailbox: mailbox,
|
||||
reason: reasonNoBadSignals,
|
||||
reasonText: reasonText,
|
||||
dmarcOverrideReason: dmarcOverrideReason,
|
||||
headers: headers,
|
||||
}
|
||||
}
|
||||
|
||||
if subjectpassKey != "" && d.dmarcResult.Status == dmarc.StatusPass && method == methodNone && (dnsblocklisted || junkSubjectpass) {
|
||||
log.Info("permanent reject with subjectpass hint of moderately spammy email without reputation")
|
||||
pass := subjectpass.Generate(log.Logger, d.msgFrom, []byte(subjectpassKey), time.Now())
|
||||
addReasonText("reject with request to try again with subjectpass token in subject")
|
||||
return reject(smtp.C550MailboxUnavail, smtp.SePol7DeliveryUnauth1, subjectpass.Explanation+pass, nil, reasonGiveSubjectpass)
|
||||
}
|
||||
|
||||
return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", nil, reason)
|
||||
}
|
||||
|
||||
func isASCII(s string) bool {
|
||||
for _, b := range []byte(s) {
|
||||
if b >= 0x80 {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
|
|
@ -4,10 +4,12 @@ import (
|
|||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/mjl-/bstore"
|
||||
|
||||
"github.com/mjl-/mox/dns"
|
||||
"github.com/mjl-/mox/mlog"
|
||||
"github.com/mjl-/mox/smtp"
|
||||
"github.com/mjl-/mox/store"
|
||||
|
@ -97,7 +99,7 @@ const (
|
|||
// ../rfc/6376:1915
|
||||
// ../rfc/6376:3716
|
||||
// ../rfc/7208:2167
|
||||
func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rconclusive bool, rmethod reputationMethod, rerr error) {
|
||||
func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message, smtputf8 bool) (rjunk *bool, rconclusive bool, rmethod reputationMethod, reasonText string, rerr error) {
|
||||
boolptr := func(v bool) *bool {
|
||||
return &v
|
||||
}
|
||||
|
@ -179,7 +181,7 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco
|
|||
// todo: we may want to look at dkim/spf in this case.
|
||||
spam := msgs[0].Junk && (len(msgs) == 1 || msgs[1].Junk)
|
||||
conclusive := m.MsgFromValidated
|
||||
return &spam, conclusive, methodMsgfromFull, nil
|
||||
return &spam, conclusive, methodMsgfromFull, "reputation of exact message-from address", nil
|
||||
}
|
||||
if !m.MsgFromValidated {
|
||||
// Look for historic messages that were validated. If present, this is likely spam.
|
||||
|
@ -189,7 +191,7 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco
|
|||
msgs = xmessageList(q, "msgfromfull-validated")
|
||||
if len(msgs) > 0 {
|
||||
spam := msgs[0].Junk && (len(msgs) == 1 || msgs[1].Junk)
|
||||
return xtrue, spam, methodMsgfromFull, nil
|
||||
return xtrue, spam, methodMsgfromFull, "unvalidated message with validated historic messages with exact message-from address", nil
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -199,21 +201,23 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco
|
|||
qr.FilterEqual("Domain", m.MsgFromDomain)
|
||||
qr.FilterGreaterEqual("Sent", now.Add(-3*year))
|
||||
if xrecipientExists(qr) {
|
||||
return xfalse, true, methodMsgtoFull, nil
|
||||
return xfalse, true, methodMsgtoFull, "exact message-from address was earlier message recipient", nil
|
||||
}
|
||||
|
||||
// Look for domain match, then for organizational domain match.
|
||||
for _, orgdomain := range []bool{false, true} {
|
||||
qm := store.Message{}
|
||||
var method reputationMethod
|
||||
var descr string
|
||||
var source, descr string
|
||||
if orgdomain {
|
||||
qm.MsgFromOrgDomain = m.MsgFromOrgDomain
|
||||
method = methodMsgfromOrgDomain
|
||||
source = "organizational domain of message-from address"
|
||||
descr = "msgfromorgdomain"
|
||||
} else {
|
||||
qm.MsgFromDomain = m.MsgFromDomain
|
||||
method = methodMsgfromDomain
|
||||
source = "exact domain of message-from address"
|
||||
descr = "msgfromdomain"
|
||||
}
|
||||
|
||||
|
@ -228,7 +232,8 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco
|
|||
}
|
||||
}
|
||||
if 100*nonjunk/len(msgs) > 80 {
|
||||
return xfalse, true, method, nil
|
||||
reasonText = fmt.Sprintf("positive reputation with %s based on %d messages", source, len(msgs))
|
||||
return xfalse, true, method, reasonText, nil
|
||||
}
|
||||
if nonjunk == 0 {
|
||||
// Only conclusive with at least 3 different localparts.
|
||||
|
@ -236,13 +241,16 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco
|
|||
for _, m := range msgs {
|
||||
localparts[m.MsgFromLocalpart] = struct{}{}
|
||||
if len(localparts) == 3 {
|
||||
return xtrue, true, method, nil
|
||||
reasonText = fmt.Sprintf("negative reputation of at least 3 addresses with %s based on %d messages", source, len(msgs))
|
||||
return xtrue, true, method, reasonText, nil
|
||||
}
|
||||
}
|
||||
return xtrue, false, method, nil
|
||||
reasonText = fmt.Sprintf("negative reputation with %s based on %d messages", source, len(msgs))
|
||||
return xtrue, false, method, reasonText, nil
|
||||
}
|
||||
// Mixed signals from domain. We don't want to block a new sender.
|
||||
return nil, false, method, nil
|
||||
reasonText = fmt.Sprintf("mixed signals with %s based on %d messages", source, len(msgs))
|
||||
return nil, false, method, reasonText, nil
|
||||
}
|
||||
if !m.MsgFromValidated {
|
||||
// Look for historic messages that were validated. If present, this is likely spam.
|
||||
|
@ -253,7 +261,8 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco
|
|||
msgs = xmessageList(q, descr+"-validated")
|
||||
if len(msgs) > 0 {
|
||||
spam := msgs[0].Junk && (len(msgs) == 1 || msgs[1].Junk)
|
||||
return xtrue, spam, method, nil
|
||||
reasonText = fmt.Sprintf("unvalidated message with %s while we have validated messages from that source", source)
|
||||
return xtrue, spam, method, reasonText, nil
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -262,13 +271,16 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco
|
|||
if orgdomain {
|
||||
qr.FilterEqual("OrgDomain", m.MsgFromOrgDomain)
|
||||
method = methodMsgtoOrgDomain
|
||||
source = "organizational domain of message-from address"
|
||||
} else {
|
||||
qr.FilterEqual("Domain", m.MsgFromDomain)
|
||||
method = methodMsgtoDomain
|
||||
source = "exact domain of message-from address"
|
||||
}
|
||||
qr.FilterGreaterEqual("Sent", now.Add(-2*year))
|
||||
if xrecipientExists(qr) {
|
||||
return xfalse, true, method, nil
|
||||
reasonText = fmt.Sprintf("%s was recipient address", source)
|
||||
return xfalse, true, method, reasonText, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -277,6 +289,7 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco
|
|||
// We only use identities that passed validation. Failed identities are ignored. ../rfc/6376:2447
|
||||
// todo future: we could do something with the DKIM identity (i=) field if it is more specific than just the domain (d=).
|
||||
dkimspfsignals := []float64{}
|
||||
dkimspfreasondoms := []string{}
|
||||
dkimspfmsgs := 0
|
||||
for _, dom := range m.DKIMDomains {
|
||||
q := messageQuery(nil, year/2, 50)
|
||||
|
@ -291,12 +304,15 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco
|
|||
}
|
||||
pspam := float64(nspam) / float64(len(msgs))
|
||||
dkimspfsignals = append(dkimspfsignals, pspam)
|
||||
dkimspfreasondoms = append(dkimspfreasondoms, dom)
|
||||
dkimspfmsgs = len(msgs)
|
||||
}
|
||||
}
|
||||
if m.MailFromValidated || m.EHLOValidated {
|
||||
var dom string
|
||||
var msgs []store.Message
|
||||
if m.MailFromValidated && m.MailFromDomain != "" {
|
||||
dom = m.MailFromDomain
|
||||
q := messageQuery(&store.Message{MailFromLocalpart: m.MailFromLocalpart, MailFromDomain: m.MailFromDomain}, year/2, 50)
|
||||
msgs = xmessageList(q, "mailfrom")
|
||||
if len(msgs) == 0 {
|
||||
|
@ -305,6 +321,7 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco
|
|||
}
|
||||
}
|
||||
if len(msgs) == 0 && m.EHLOValidated && m.EHLODomain != "" {
|
||||
dom = m.EHLODomain
|
||||
q := messageQuery(&store.Message{EHLODomain: m.EHLODomain}, year/2, 50)
|
||||
msgs = xmessageList(q, "ehlodomain")
|
||||
}
|
||||
|
@ -317,6 +334,7 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco
|
|||
}
|
||||
pspam := float64(nspam) / float64(len(msgs))
|
||||
dkimspfsignals = append(dkimspfsignals, pspam)
|
||||
dkimspfreasondoms = append(dkimspfreasondoms, dom)
|
||||
if len(msgs) > dkimspfmsgs {
|
||||
dkimspfmsgs = len(msgs)
|
||||
}
|
||||
|
@ -324,20 +342,27 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco
|
|||
}
|
||||
if len(dkimspfsignals) > 0 {
|
||||
var nham, nspam int
|
||||
for _, p := range dkimspfsignals {
|
||||
var hamdoms, spamdoms []string
|
||||
for i, p := range dkimspfsignals {
|
||||
d, _ := dns.ParseDomain(dkimspfreasondoms[i])
|
||||
if p < .1 {
|
||||
nham++
|
||||
hamdoms = append(hamdoms, d.XName(smtputf8))
|
||||
} else if p > .9 {
|
||||
nspam++
|
||||
spamdoms = append(spamdoms, d.XName(smtputf8))
|
||||
}
|
||||
}
|
||||
if nham > 0 && nspam == 0 {
|
||||
return xfalse, true, methodDKIMSPF, nil
|
||||
reasonText = fmt.Sprintf("positive dkim/spf reputation for domain(s) %s", strings.Join(hamdoms, ","))
|
||||
return xfalse, true, methodDKIMSPF, reasonText, nil
|
||||
}
|
||||
if nspam > 0 && nham == 0 {
|
||||
return xtrue, dkimspfmsgs > 1, methodDKIMSPF, nil
|
||||
reasonText = fmt.Sprintf("negative dkim/spf reputation for domain(s) %s", strings.Join(hamdoms, ","))
|
||||
return xtrue, dkimspfmsgs > 1, methodDKIMSPF, reasonText, nil
|
||||
}
|
||||
return nil, false, methodDKIMSPF, nil
|
||||
reasonText = fmt.Sprintf("mixed dkim/spf reputation, positive for %s, negative for %s", strings.Join(hamdoms, ","), strings.Join(spamdoms, ","))
|
||||
return nil, false, methodDKIMSPF, reasonText, nil
|
||||
}
|
||||
|
||||
// IP-based. A wider mask needs more messages to be conclusive.
|
||||
|
@ -345,23 +370,27 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco
|
|||
var msgs []store.Message
|
||||
var need int
|
||||
var method reputationMethod
|
||||
var ip string
|
||||
if m.RemoteIPMasked1 != "" {
|
||||
q := messageQuery(&store.Message{RemoteIPMasked1: m.RemoteIPMasked1}, year/4, 50)
|
||||
msgs = xmessageList(q, "ip1")
|
||||
need = 2
|
||||
method = methodIP1
|
||||
ip = m.RemoteIPMasked1
|
||||
}
|
||||
if len(msgs) == 0 && m.RemoteIPMasked2 != "" {
|
||||
q := messageQuery(&store.Message{RemoteIPMasked2: m.RemoteIPMasked2}, year/4, 50)
|
||||
msgs = xmessageList(q, "ip2")
|
||||
need = 5
|
||||
method = methodIP2
|
||||
ip = m.RemoteIPMasked2
|
||||
}
|
||||
if len(msgs) == 0 && m.RemoteIPMasked3 != "" {
|
||||
q := messageQuery(&store.Message{RemoteIPMasked3: m.RemoteIPMasked3}, year/4, 50)
|
||||
msgs = xmessageList(q, "ip3")
|
||||
need = 10
|
||||
method = methodIP3
|
||||
ip = m.RemoteIPMasked3
|
||||
}
|
||||
if len(msgs) > 0 {
|
||||
nspam := 0
|
||||
|
@ -378,8 +407,24 @@ func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rco
|
|||
spam = xtrue
|
||||
}
|
||||
conclusive := len(msgs) >= need && (pspam <= 0.1 || pspam >= 0.9)
|
||||
return spam, conclusive, method, nil
|
||||
v6 := strings.Contains(m.RemoteIP, ":")
|
||||
reasonText = fmt.Sprintf("reputation for ip %s%s, spam score %.2f", ip, maskclasses[classmask{v6, method}], pspam)
|
||||
return spam, conclusive, method, reasonText, nil
|
||||
}
|
||||
|
||||
return nil, false, methodNone, nil
|
||||
return nil, false, methodNone, "no address/spf/dkim/ip reputation", nil
|
||||
}
|
||||
|
||||
type classmask struct {
|
||||
v6 bool
|
||||
method reputationMethod
|
||||
}
|
||||
|
||||
var maskclasses = map[classmask]string{
|
||||
{false, methodIP1}: "/32",
|
||||
{false, methodIP2}: "/26",
|
||||
{false, methodIP3}: "/21",
|
||||
{true, methodIP1}: "/64",
|
||||
{true, methodIP2}: "/48",
|
||||
{true, methodIP3}: "/32",
|
||||
}
|
||||
|
|
|
@ -148,7 +148,7 @@ func TestReputation(t *testing.T) {
|
|||
var method reputationMethod
|
||||
err = db.Read(ctxbg, func(tx *bstore.Tx) error {
|
||||
var err error
|
||||
isjunk, conclusive, method, err = reputation(tx, pkglog, &m)
|
||||
isjunk, conclusive, method, _, err = reputation(tx, pkglog, &m, false)
|
||||
return err
|
||||
})
|
||||
tcheck(t, err, "read tx")
|
||||
|
|
|
@ -2750,7 +2750,7 @@ func (c *conn) deliver(ctx context.Context, recvHdrFor func(string) string, msgW
|
|||
msgTo = envelope.To
|
||||
msgCc = envelope.CC
|
||||
}
|
||||
d := delivery{c.tls, &m, dataFile, smtpRcptTo, deliverTo, destination, canonicalAddr, acc, msgTo, msgCc, msgFrom, c.dnsBLs, dmarcUse, dmarcResult, dkimResults, iprevStatus}
|
||||
d := delivery{c.tls, &m, dataFile, smtpRcptTo, deliverTo, destination, canonicalAddr, acc, msgTo, msgCc, msgFrom, c.dnsBLs, dmarcUse, dmarcResult, dkimResults, iprevStatus, c.smtputf8}
|
||||
|
||||
r := analyze(ctx, log, c.resolver, d)
|
||||
return &r, nil
|
||||
|
@ -2862,10 +2862,25 @@ func (c *conn) deliver(ctx context.Context, recvHdrFor func(string) string, msgW
|
|||
rcptAuthResults.Methods = append([]message.AuthMethod{}, authResults.Methods...)
|
||||
rcptAuthResults.Methods = append(rcptAuthResults.Methods, rcptDMARCMethod)
|
||||
|
||||
// Prepend reason as message header, for easy display in mail clients.
|
||||
// Prepend reason as message header, for easy viewing in mail clients.
|
||||
var xmox string
|
||||
if a0.reason != "" {
|
||||
xmox = "X-Mox-Reason: " + a0.reason + "\r\n"
|
||||
hw := &message.HeaderWriter{}
|
||||
hw.Add(" ", "X-Mox-Reason:")
|
||||
hw.Add(" ", a0.reason)
|
||||
for i, s := range a0.reasonText {
|
||||
if i == 0 {
|
||||
s = "; " + s
|
||||
} else {
|
||||
hw.Newline()
|
||||
}
|
||||
// Just in case any of the strings has a newline, replace it with space to not break the message.
|
||||
s = strings.ReplaceAll(s, "\n", " ")
|
||||
s = strings.ReplaceAll(s, "\r", " ")
|
||||
s += ";"
|
||||
hw.AddWrap([]byte(s), true)
|
||||
}
|
||||
xmox = hw.String()
|
||||
}
|
||||
xmox += a0.headers
|
||||
|
||||
|
|
Loading…
Reference in a new issue