mox/smtpserver/analyze.go
Mechiel Lukkien a0f3856e40
when moving a message out of a Rejects mailbox, mark the message as "not seen" so stands out in the destination mailbox (e.g. inbox)
we set the flag both for move in imap and in webmail.

this also ensures the "MailboxDestinedID", used for per-mailbox reputation
analysis, is set in more reject-situations. before this change, some rejects
(such as based on DMARC reject) wouldn't result in reputation being used after
having been moved the message out of the rejects mailbox.

in the future, we need more tests for scenario's like this...

for issue #63 reported by x8x
may also help with issue #64
2023-09-22 15:53:05 +02:00

395 lines
15 KiB
Go

package smtpserver
import (
"context"
"fmt"
"net"
"os"
"time"
"github.com/mjl-/bstore"
"github.com/mjl-/mox/dkim"
"github.com/mjl-/mox/dmarc"
"github.com/mjl-/mox/dmarcrpt"
"github.com/mjl-/mox/dns"
"github.com/mjl-/mox/dnsbl"
"github.com/mjl-/mox/iprev"
"github.com/mjl-/mox/mlog"
"github.com/mjl-/mox/mox-"
"github.com/mjl-/mox/smtp"
"github.com/mjl-/mox/store"
"github.com/mjl-/mox/subjectpass"
"github.com/mjl-/mox/tlsrpt"
)
type delivery struct {
m *store.Message
dataFile *os.File
rcptAcc rcptAccount
acc *store.Account
msgFrom smtp.Address
dnsBLs []dns.Domain
dmarcUse bool
dmarcResult dmarc.Result
dkimResults []dkim.Result
iprevStatus iprev.Status
}
type analysis struct {
accept bool
mailbox string
code int
secode string
userError bool
errmsg string
err error // For our own logging, not sent to remote.
dmarcReport *dmarcrpt.Feedback // Validated dmarc aggregate report, not yet stored.
tlsReport *tlsrpt.Report // Validated TLS report, not yet stored.
reason string // If non-empty, reason for this decision. Can be one of reputationMethod and a few other tokens.
}
const (
reasonListAllow = "list-allow"
reasonDMARCPolicy = "dmarc-policy"
reasonReputationError = "reputation-error"
reasonReporting = "reporting"
reasonSPFPolicy = "spf-policy"
reasonJunkClassifyError = "junk-classify-error"
reasonJunkFilterError = "junk-filter-error"
reasonGiveSubjectpass = "give-subjectpass"
reasonNoBadSignals = "no-bad-signals"
reasonJunkContent = "junk-content"
reasonJunkContentStrict = "junk-content-strict"
reasonDNSBlocklisted = "dns-blocklisted"
reasonSubjectpass = "subjectpass"
reasonSubjectpassError = "subjectpass-error"
reasonIPrev = "iprev" // No or mil junk reputation signals, and bad iprev.
)
func analyze(ctx context.Context, log *mlog.Log, resolver dns.Resolver, d delivery) analysis {
mailbox := d.rcptAcc.destination.Mailbox
if mailbox == "" {
mailbox = "Inbox"
}
// If destination mailbox has a mailing list domain (for SPF/DKIM) configured,
// check it for a pass.
rs := store.MessageRuleset(log, d.rcptAcc.destination, d.m, d.m.MsgPrefix, d.dataFile)
if rs != nil {
mailbox = rs.Mailbox
}
if rs != nil && !rs.ListAllowDNSDomain.IsZero() {
ld := rs.ListAllowDNSDomain
// todo: on temporary failures, reject temporarily?
if d.m.MailFromValidated && ld.Name() == d.m.MailFromDomain {
return analysis{accept: true, mailbox: mailbox, reason: reasonListAllow}
}
for _, r := range d.dkimResults {
if r.Status == dkim.StatusPass && r.Sig.Domain == ld {
return analysis{accept: true, mailbox: mailbox, reason: reasonListAllow}
}
}
}
// For forwarded messages, we have different junk analysis. We don't reject for
// failing DMARC, and we clear fields that could implicate the forwarding mail
// server during future classifications on incoming messages (the forwarding mail
// server isn't responsible for the message).
if rs != nil && rs.IsForward {
d.dmarcUse = false
d.m.IsForward = true
d.m.RemoteIPMasked1 = ""
d.m.RemoteIPMasked2 = ""
d.m.RemoteIPMasked3 = ""
d.m.OrigEHLODomain = d.m.EHLODomain
d.m.EHLODomain = ""
d.m.MailFromDomain = "" // Still available in MailFrom.
d.m.OrigDKIMDomains = d.m.DKIMDomains
dkimdoms := []string{}
for _, dom := range d.m.DKIMDomains {
if dom != rs.VerifiedDNSDomain.Name() {
dkimdoms = append(dkimdoms, dom)
}
}
d.m.DKIMDomains = dkimdoms
log.Info("forwarded message, clearing identifying signals of forwarding mail server")
}
assignMailbox := func(tx *bstore.Tx) error {
// Set message MailboxID to which mail will be delivered. Reputation is
// per-mailbox. If referenced mailbox is not found (e.g. does not yet exist), we
// can still determine a reputation because we also base it on outgoing
// messages and those are account-global.
mb, err := d.acc.MailboxFind(tx, mailbox)
if err != nil {
return fmt.Errorf("finding destination mailbox: %w", err)
}
if mb != nil {
// We want to deliver to mb.ID, but this message may be rejected and sent to the
// Rejects mailbox instead, with MailboxID overwritten. Record the ID in
// MailboxDestinedID too. If the message is later moved out of the Rejects mailbox,
// we'll adjust the MailboxOrigID so it gets taken into account during reputation
// calculating in future deliveries. If we end up delivering to the intended
// mailbox (i.e. not rejecting), MailboxDestinedID is cleared during delivery so we
// don't store it unnecessarily.
d.m.MailboxID = mb.ID
d.m.MailboxDestinedID = mb.ID
} else {
log.Debug("mailbox not found in database", mlog.Field("mailbox", mailbox))
}
return nil
}
reject := func(code int, secode string, errmsg string, err error, reason string) analysis {
// We may have set MailboxDestinedID below already while we had a transaction. If
// not, do it now. This makes it possible to use the per-mailbox reputation when a
// user moves the message out of the Rejects mailbox to the intended mailbox
// (typically Inbox).
if d.m.MailboxDestinedID == 0 {
var mberr error
d.acc.WithRLock(func() {
mberr = d.acc.DB.Read(ctx, func(tx *bstore.Tx) error {
return assignMailbox(tx)
})
})
if mberr != nil {
return analysis{false, mailbox, smtp.C451LocalErr, smtp.SeSys3Other0, false, "error processing", err, nil, nil, reasonReputationError}
}
d.m.MailboxID = 0 // We plan to reject, no need to set intended MailboxID.
}
accept := false
if rs != nil && rs.AcceptRejectsToMailbox != "" {
accept = true
mailbox = rs.AcceptRejectsToMailbox
d.m.IsReject = true
// Don't draw attention, but don't go so far as to mark as junk.
d.m.Seen = true
log.Info("accepting reject to configured mailbox due to ruleset")
}
return analysis{accept, mailbox, code, secode, err == nil, errmsg, err, nil, nil, reason}
}
if d.dmarcUse && d.dmarcResult.Reject {
return reject(smtp.C550MailboxUnavail, smtp.SePol7MultiAuthFails26, "rejecting per dmarc policy", nil, reasonDMARCPolicy)
}
// todo: should we also reject messages that have a dmarc pass but an spf record "v=spf1 -all"? suggested by m3aawg best practices.
// If destination is the DMARC reporting mailbox, do additional checks and keep
// track of the report. We'll check reputation, defaulting to accept.
var dmarcReport *dmarcrpt.Feedback
if d.rcptAcc.destination.DMARCReports {
// Messages with DMARC aggregate reports must have a dmarc pass. ../rfc/7489:1866
if d.dmarcResult.Status != dmarc.StatusPass {
log.Info("received dmarc report without dmarc pass, not processing as dmarc report")
} else if report, err := dmarcrpt.ParseMessageReport(log, store.FileMsgReader(d.m.MsgPrefix, d.dataFile)); err != nil {
log.Infox("parsing dmarc report", err)
} else if d, err := dns.ParseDomain(report.PolicyPublished.Domain); err != nil {
log.Infox("parsing domain in dmarc report", err)
} else if _, ok := mox.Conf.Domain(d); !ok {
log.Info("dmarc report for domain not configured, ignoring", mlog.Field("domain", d))
} else if report.ReportMetadata.DateRange.End > time.Now().Unix()+60 {
log.Info("dmarc report with end date in the future, ignoring", mlog.Field("domain", d), mlog.Field("end", time.Unix(report.ReportMetadata.DateRange.End, 0)))
} else {
dmarcReport = report
}
}
// Similar to DMARC reporting, we check for the required DKIM. We'll check
// reputation, defaulting to accept.
var tlsReport *tlsrpt.Report
if d.rcptAcc.destination.TLSReports {
// Valid DKIM signature for domain must be present. We take "valid" to assume
// "passing", not "syntactically valid". We also check for "tlsrpt" as service.
// This check is optional, but if anyone goes through the trouble to explicitly
// list allowed services, they would be surprised to see them ignored.
// ../rfc/8460:320
ok := false
for _, r := range d.dkimResults {
if r.Status == dkim.StatusPass && r.Sig.Domain == d.msgFrom.Domain && r.Sig.Length < 0 && r.Record.ServiceAllowed("tlsrpt") {
ok = true
break
}
}
if !ok {
log.Info("received mail to tlsrpt without acceptable DKIM signature, not processing as tls report")
} else if report, err := tlsrpt.ParseMessage(log, store.FileMsgReader(d.m.MsgPrefix, d.dataFile)); err != nil {
log.Infox("parsing tls report", err)
} else {
var known bool
for _, p := range report.Policies {
log.Info("tlsrpt policy domain", mlog.Field("domain", p.Policy.Domain))
if d, err := dns.ParseDomain(p.Policy.Domain); err != nil {
log.Infox("parsing domain in tls report", err)
} else if _, ok := mox.Conf.Domain(d); ok {
known = true
break
}
}
if !known {
log.Info("tls report without one of configured domains, ignoring")
} else {
tlsReport = report
}
}
}
// Determine if message is acceptable based on DMARC domain, DKIM identities, or
// host-based reputation.
var isjunk *bool
var conclusive bool
var method reputationMethod
var reason string
var err error
d.acc.WithRLock(func() {
err = d.acc.DB.Read(ctx, func(tx *bstore.Tx) error {
if err := assignMailbox(tx); err != nil {
return err
}
isjunk, conclusive, method, err = reputation(tx, log, d.m)
reason = string(method)
return err
})
})
if err != nil {
log.Infox("determining reputation", err, mlog.Field("message", d.m))
return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", err, reasonReputationError)
}
log.Info("reputation analyzed", mlog.Field("conclusive", conclusive), mlog.Field("isjunk", isjunk), mlog.Field("method", string(method)))
if conclusive {
if !*isjunk {
return analysis{accept: true, mailbox: mailbox, dmarcReport: dmarcReport, tlsReport: tlsReport, reason: reason}
}
return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", err, string(method))
} else if dmarcReport != nil || tlsReport != nil {
log.Info("accepting dmarc reporting or tlsrpt message without reputation")
return analysis{accept: true, mailbox: mailbox, dmarcReport: dmarcReport, tlsReport: tlsReport, reason: reasonReporting}
}
// If there was no previous message from sender or its domain, and we have an SPF
// (soft)fail, reject the message.
switch method {
case methodDKIMSPF, methodIP1, methodIP2, methodIP3, methodNone:
switch d.m.MailFromValidation {
case store.ValidationFail, store.ValidationSoftfail:
return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", nil, reasonSPFPolicy)
}
}
// Senders without reputation and without iprev pass, are likely spam.
var suspiciousIPrevFail bool
switch method {
case methodDKIMSPF, methodIP1, methodIP2, methodIP3, methodNone:
suspiciousIPrevFail = d.iprevStatus != iprev.StatusPass
}
// With already a mild junk signal, an iprev fail on top is enough to reject.
if suspiciousIPrevFail && isjunk != nil && *isjunk {
return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", nil, reasonIPrev)
}
var subjectpassKey string
conf, _ := d.acc.Conf()
if conf.SubjectPass.Period > 0 {
subjectpassKey, err = d.acc.Subjectpass(d.rcptAcc.canonicalAddress)
if err != nil {
log.Errorx("get key for verifying subject token", err)
return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", err, reasonSubjectpassError)
}
err = subjectpass.Verify(log, d.dataFile, []byte(subjectpassKey), conf.SubjectPass.Period)
pass := err == nil
log.Infox("pass by subject token", err, mlog.Field("pass", pass))
if pass {
return analysis{accept: true, mailbox: mailbox, reason: reasonSubjectpass}
}
}
reason = reasonNoBadSignals
accept := true
var junkSubjectpass bool
f, jf, err := d.acc.OpenJunkFilter(ctx, log)
if err == nil {
defer func() {
err := f.Close()
log.Check(err, "closing junkfilter")
}()
contentProb, _, _, _, err := f.ClassifyMessageReader(ctx, store.FileMsgReader(d.m.MsgPrefix, d.dataFile), d.m.Size)
if err != nil {
log.Errorx("testing for spam", err)
return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", err, reasonJunkClassifyError)
}
// todo: if isjunk is not nil (i.e. there was inconclusive reputation), use it in the probability calculation. give reputation a score of 0.25 or .75 perhaps?
// todo: if there aren't enough historic messages, we should just let messages in.
// todo: we could require nham and nspam to be above a certain number when there were plenty of words in the message, and in the database. can indicate a spammer is misspelling words. however, it can also mean a message in a different language/script...
// If we don't accept, we may still respond with a "subjectpass" hint below.
// We add some jitter to the threshold we use. So we don't act as too easy an
// oracle for words that are a strong indicator of haminess.
// todo: we should rate-limit uses of the junkfilter.
jitter := (jitterRand.Float64() - 0.5) / 10
threshold := jf.Threshold + jitter
// With an iprev fail, we set a higher bar for content.
reason = reasonJunkContent
if suspiciousIPrevFail && threshold > 0.25 {
threshold = 0.25
log.Info("setting junk threshold due to iprev fail", mlog.Field("threshold", 0.25))
reason = reasonJunkContentStrict
}
accept = contentProb <= threshold
junkSubjectpass = contentProb < threshold-0.2
log.Info("content analyzed", mlog.Field("accept", accept), mlog.Field("contentprob", contentProb), mlog.Field("subjectpass", junkSubjectpass))
} else if err != store.ErrNoJunkFilter {
log.Errorx("open junkfilter", err)
return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", err, reasonJunkFilterError)
}
// If content looks good, we'll still look at DNS block lists for a reason to
// reject. We normally won't get here if we've communicated with this sender
// before.
var dnsblocklisted bool
if accept {
blocked := func(zone dns.Domain) bool {
dnsblctx, dnsblcancel := context.WithTimeout(ctx, 30*time.Second)
defer dnsblcancel()
if !checkDNSBLHealth(dnsblctx, resolver, zone) {
log.Info("dnsbl not healthy, skipping", mlog.Field("zone", zone))
return false
}
status, expl, err := dnsbl.Lookup(dnsblctx, resolver, zone, net.ParseIP(d.m.RemoteIP))
dnsblcancel()
if status == dnsbl.StatusFail {
log.Info("rejecting due to listing in dnsbl", mlog.Field("zone", zone), mlog.Field("explanation", expl))
return true
} else if err != nil {
log.Infox("dnsbl lookup", err, mlog.Field("zone", zone), mlog.Field("status", status))
}
return false
}
// Note: We don't check in parallel, we are in no hurry to accept possible spam.
for _, zone := range d.dnsBLs {
if blocked(zone) {
accept = false
dnsblocklisted = true
reason = reasonDNSBlocklisted
break
}
}
}
if accept {
return analysis{accept: true, mailbox: mailbox, reason: reasonNoBadSignals}
}
if subjectpassKey != "" && d.dmarcResult.Status == dmarc.StatusPass && method == methodNone && (dnsblocklisted || junkSubjectpass) {
log.Info("permanent reject with subjectpass hint of moderately spammy email without reputation")
pass := subjectpass.Generate(d.msgFrom, []byte(subjectpassKey), time.Now())
return reject(smtp.C550MailboxUnavail, smtp.SePol7DeliveryUnauth1, subjectpass.Explanation+pass, nil, reasonGiveSubjectpass)
}
return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", nil, reason)
}