mox/smtpserver/analyze.go

407 lines
16 KiB
Go
Raw Normal View History

2023-01-30 16:27:06 +03:00
package smtpserver
import (
"context"
"fmt"
2023-01-30 16:27:06 +03:00
"net"
"os"
"time"
"github.com/mjl-/bstore"
"github.com/mjl-/mox/dkim"
"github.com/mjl-/mox/dmarc"
"github.com/mjl-/mox/dmarcrpt"
"github.com/mjl-/mox/dns"
"github.com/mjl-/mox/dnsbl"
"github.com/mjl-/mox/iprev"
"github.com/mjl-/mox/mlog"
"github.com/mjl-/mox/mox-"
"github.com/mjl-/mox/smtp"
"github.com/mjl-/mox/store"
"github.com/mjl-/mox/subjectpass"
"github.com/mjl-/mox/tlsrpt"
)
type delivery struct {
m *store.Message
dataFile *os.File
rcptAcc rcptAccount
acc *store.Account
msgFrom smtp.Address
dnsBLs []dns.Domain
dmarcUse bool
dmarcResult dmarc.Result
dkimResults []dkim.Result
iprevStatus iprev.Status
}
type analysis struct {
accept bool
mailbox string
code int
secode string
userError bool
errmsg string
err error // For our own logging, not sent to remote.
dmarcReport *dmarcrpt.Feedback // Validated DMARC aggregate report, not yet stored.
tlsReport *tlsrpt.Report // Validated TLS report, not yet stored.
reason string // If non-empty, reason for this decision. Can be one of reputationMethod and a few other tokens.
dmarcOverrideReason string // If set, one of dmarcrpt.PolicyOverride
2023-01-30 16:27:06 +03:00
}
const (
reasonListAllow = "list-allow"
reasonDMARCPolicy = "dmarc-policy"
reasonReputationError = "reputation-error"
reasonReporting = "reporting"
reasonSPFPolicy = "spf-policy"
reasonJunkClassifyError = "junk-classify-error"
reasonJunkFilterError = "junk-filter-error"
reasonGiveSubjectpass = "give-subjectpass"
reasonNoBadSignals = "no-bad-signals"
reasonJunkContent = "junk-content"
reasonJunkContentStrict = "junk-content-strict"
reasonDNSBlocklisted = "dns-blocklisted"
reasonSubjectpass = "subjectpass"
reasonSubjectpassError = "subjectpass-error"
reasonIPrev = "iprev" // No or mild junk reputation signals, and bad iprev.
2023-01-30 16:27:06 +03:00
)
func isListDomain(d delivery, ld dns.Domain) bool {
if d.m.MailFromValidated && ld.Name() == d.m.MailFromDomain {
return true
}
for _, r := range d.dkimResults {
if r.Status == dkim.StatusPass && r.Sig.Domain == ld {
return true
}
}
return false
}
2023-01-30 16:27:06 +03:00
func analyze(ctx context.Context, log *mlog.Log, resolver dns.Resolver, d delivery) analysis {
mailbox := d.rcptAcc.destination.Mailbox
if mailbox == "" {
mailbox = "Inbox"
2023-01-30 16:27:06 +03:00
}
// If destination mailbox has a mailing list domain (for SPF/DKIM) configured,
// check it for a pass.
rs := store.MessageRuleset(log, d.rcptAcc.destination, d.m, d.m.MsgPrefix, d.dataFile)
if rs != nil {
mailbox = rs.Mailbox
}
2023-01-30 16:27:06 +03:00
if rs != nil && !rs.ListAllowDNSDomain.IsZero() {
// todo: on temporary failures, reject temporarily?
if isListDomain(d, rs.ListAllowDNSDomain) {
d.m.IsMailingList = true
return analysis{accept: true, mailbox: mailbox, reason: reasonListAllow, dmarcOverrideReason: string(dmarcrpt.PolicyOverrideMailingList)}
2023-01-30 16:27:06 +03:00
}
}
var dmarcOverrideReason string
// For forwarded messages, we have different junk analysis. We don't reject for
// failing DMARC, and we clear fields that could implicate the forwarding mail
// server during future classifications on incoming messages (the forwarding mail
// server isn't responsible for the message).
if rs != nil && rs.IsForward {
d.dmarcUse = false
d.m.IsForward = true
d.m.RemoteIPMasked1 = ""
d.m.RemoteIPMasked2 = ""
d.m.RemoteIPMasked3 = ""
d.m.OrigEHLODomain = d.m.EHLODomain
d.m.EHLODomain = ""
d.m.MailFromDomain = "" // Still available in MailFrom.
d.m.OrigDKIMDomains = d.m.DKIMDomains
dkimdoms := []string{}
for _, dom := range d.m.DKIMDomains {
if dom != rs.VerifiedDNSDomain.Name() {
dkimdoms = append(dkimdoms, dom)
}
}
d.m.DKIMDomains = dkimdoms
dmarcOverrideReason = string(dmarcrpt.PolicyOverrideForwarded)
log.Info("forwarded message, clearing identifying signals of forwarding mail server")
}
assignMailbox := func(tx *bstore.Tx) error {
// Set message MailboxID to which mail will be delivered. Reputation is
// per-mailbox. If referenced mailbox is not found (e.g. does not yet exist), we
// can still determine a reputation because we also base it on outgoing
// messages and those are account-global.
mb, err := d.acc.MailboxFind(tx, mailbox)
if err != nil {
return fmt.Errorf("finding destination mailbox: %w", err)
}
if mb != nil {
// We want to deliver to mb.ID, but this message may be rejected and sent to the
// Rejects mailbox instead, with MailboxID overwritten. Record the ID in
// MailboxDestinedID too. If the message is later moved out of the Rejects mailbox,
// we'll adjust the MailboxOrigID so it gets taken into account during reputation
// calculating in future deliveries. If we end up delivering to the intended
// mailbox (i.e. not rejecting), MailboxDestinedID is cleared during delivery so we
// don't store it unnecessarily.
d.m.MailboxID = mb.ID
d.m.MailboxDestinedID = mb.ID
} else {
log.Debug("mailbox not found in database", mlog.Field("mailbox", mailbox))
}
return nil
}
reject := func(code int, secode string, errmsg string, err error, reason string) analysis {
// We may have set MailboxDestinedID below already while we had a transaction. If
// not, do it now. This makes it possible to use the per-mailbox reputation when a
// user moves the message out of the Rejects mailbox to the intended mailbox
// (typically Inbox).
if d.m.MailboxDestinedID == 0 {
var mberr error
d.acc.WithRLock(func() {
mberr = d.acc.DB.Read(ctx, func(tx *bstore.Tx) error {
return assignMailbox(tx)
})
})
if mberr != nil {
return analysis{false, mailbox, smtp.C451LocalErr, smtp.SeSys3Other0, false, "error processing", err, nil, nil, reasonReputationError, dmarcOverrideReason}
}
d.m.MailboxID = 0 // We plan to reject, no need to set intended MailboxID.
}
accept := false
if rs != nil && rs.AcceptRejectsToMailbox != "" {
accept = true
mailbox = rs.AcceptRejectsToMailbox
d.m.IsReject = true
// Don't draw attention, but don't go so far as to mark as junk.
d.m.Seen = true
log.Info("accepting reject to configured mailbox due to ruleset")
}
return analysis{accept, mailbox, code, secode, err == nil, errmsg, err, nil, nil, reason, dmarcOverrideReason}
}
2023-01-30 16:27:06 +03:00
if d.dmarcUse && d.dmarcResult.Reject {
return reject(smtp.C550MailboxUnavail, smtp.SePol7MultiAuthFails26, "rejecting per dmarc policy", nil, reasonDMARCPolicy)
}
// todo: should we also reject messages that have a dmarc pass but an spf record "v=spf1 -all"? suggested by m3aawg best practices.
// If destination is the DMARC reporting mailbox, do additional checks and keep
// track of the report. We'll check reputation, defaulting to accept.
var dmarcReport *dmarcrpt.Feedback
if d.rcptAcc.destination.DMARCReports {
// Messages with DMARC aggregate reports must have a DMARC pass. ../rfc/7489:1866
2023-01-30 16:27:06 +03:00
if d.dmarcResult.Status != dmarc.StatusPass {
log.Info("received dmarc aggregate report without dmarc pass, not processing as dmarc report")
} else if report, err := dmarcrpt.ParseMessageReport(log, store.FileMsgReader(d.m.MsgPrefix, d.dataFile)); err != nil {
log.Infox("parsing dmarc aggregate report", err)
2023-01-30 16:27:06 +03:00
} else if d, err := dns.ParseDomain(report.PolicyPublished.Domain); err != nil {
log.Infox("parsing domain in dmarc aggregate report", err)
2023-01-30 16:27:06 +03:00
} else if _, ok := mox.Conf.Domain(d); !ok {
log.Info("dmarc aggregate report for domain not configured, ignoring", mlog.Field("domain", d))
2023-01-30 16:27:06 +03:00
} else if report.ReportMetadata.DateRange.End > time.Now().Unix()+60 {
log.Info("dmarc aggregate report with end date in the future, ignoring", mlog.Field("domain", d), mlog.Field("end", time.Unix(report.ReportMetadata.DateRange.End, 0)))
2023-01-30 16:27:06 +03:00
} else {
dmarcReport = report
}
}
// Similar to DMARC reporting, we check for the required DKIM. We'll check
// reputation, defaulting to accept.
var tlsReport *tlsrpt.Report
if d.rcptAcc.destination.TLSReports {
// Valid DKIM signature for domain must be present. We take "valid" to assume
// "passing", not "syntactically valid". We also check for "tlsrpt" as service.
// This check is optional, but if anyone goes through the trouble to explicitly
// list allowed services, they would be surprised to see them ignored.
// ../rfc/8460:320
ok := false
for _, r := range d.dkimResults {
if r.Status == dkim.StatusPass && r.Sig.Domain == d.msgFrom.Domain && r.Sig.Length < 0 && r.Record.ServiceAllowed("tlsrpt") {
ok = true
break
}
}
if !ok {
log.Info("received mail to tlsrpt without acceptable DKIM signature, not processing as tls report")
} else if report, err := tlsrpt.ParseMessage(log, store.FileMsgReader(d.m.MsgPrefix, d.dataFile)); err != nil {
log.Infox("parsing tls report", err)
2023-01-30 16:27:06 +03:00
} else {
var known bool
for _, p := range report.Policies {
log.Info("tlsrpt policy domain", mlog.Field("domain", p.Policy.Domain))
if d, err := dns.ParseDomain(p.Policy.Domain); err != nil {
log.Infox("parsing domain in tls report", err)
2023-01-30 16:27:06 +03:00
} else if _, ok := mox.Conf.Domain(d); ok {
known = true
break
}
}
if !known {
log.Info("tls report without one of configured domains, ignoring")
2023-01-30 16:27:06 +03:00
} else {
tlsReport = report
}
}
}
// Determine if message is acceptable based on DMARC domain, DKIM identities, or
// host-based reputation.
var isjunk *bool
var conclusive bool
var method reputationMethod
var reason string
var err error
d.acc.WithRLock(func() {
err = d.acc.DB.Read(ctx, func(tx *bstore.Tx) error {
if err := assignMailbox(tx); err != nil {
return err
2023-01-30 16:27:06 +03:00
}
isjunk, conclusive, method, err = reputation(tx, log, d.m)
reason = string(method)
return err
})
})
if err != nil {
log.Infox("determining reputation", err, mlog.Field("message", d.m))
return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", err, reasonReputationError)
}
log.Info("reputation analyzed", mlog.Field("conclusive", conclusive), mlog.Field("isjunk", isjunk), mlog.Field("method", string(method)))
if conclusive {
if !*isjunk {
return analysis{accept: true, mailbox: mailbox, dmarcReport: dmarcReport, tlsReport: tlsReport, reason: reason, dmarcOverrideReason: dmarcOverrideReason}
2023-01-30 16:27:06 +03:00
}
return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", err, string(method))
} else if dmarcReport != nil || tlsReport != nil {
log.Info("accepting message with dmarc aggregate report or tls report without reputation")
return analysis{accept: true, mailbox: mailbox, dmarcReport: dmarcReport, tlsReport: tlsReport, reason: reasonReporting, dmarcOverrideReason: dmarcOverrideReason}
2023-01-30 16:27:06 +03:00
}
// If there was no previous message from sender or its domain, and we have an SPF
// (soft)fail, reject the message.
switch method {
case methodDKIMSPF, methodIP1, methodIP2, methodIP3, methodNone:
switch d.m.MailFromValidation {
case store.ValidationFail, store.ValidationSoftfail:
return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", nil, reasonSPFPolicy)
}
}
// Senders without reputation and without iprev pass, are likely spam.
var suspiciousIPrevFail bool
switch method {
case methodDKIMSPF, methodIP1, methodIP2, methodIP3, methodNone:
suspiciousIPrevFail = d.iprevStatus != iprev.StatusPass
}
// With already a mild junk signal, an iprev fail on top is enough to reject.
if suspiciousIPrevFail && isjunk != nil && *isjunk {
return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", nil, reasonIPrev)
}
var subjectpassKey string
conf, _ := d.acc.Conf()
if conf.SubjectPass.Period > 0 {
subjectpassKey, err = d.acc.Subjectpass(d.rcptAcc.canonicalAddress)
if err != nil {
log.Errorx("get key for verifying subject token", err)
return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", err, reasonSubjectpassError)
}
err = subjectpass.Verify(log, d.dataFile, []byte(subjectpassKey), conf.SubjectPass.Period)
2023-01-30 16:27:06 +03:00
pass := err == nil
log.Infox("pass by subject token", err, mlog.Field("pass", pass))
if pass {
return analysis{accept: true, mailbox: mailbox, reason: reasonSubjectpass, dmarcOverrideReason: dmarcOverrideReason}
2023-01-30 16:27:06 +03:00
}
}
reason = reasonNoBadSignals
accept := true
var junkSubjectpass bool
f, jf, err := d.acc.OpenJunkFilter(ctx, log)
2023-01-30 16:27:06 +03:00
if err == nil {
defer func() {
err := f.Close()
log.Check(err, "closing junkfilter")
2023-01-30 16:27:06 +03:00
}()
contentProb, _, _, _, err := f.ClassifyMessageReader(ctx, store.FileMsgReader(d.m.MsgPrefix, d.dataFile), d.m.Size)
2023-01-30 16:27:06 +03:00
if err != nil {
log.Errorx("testing for spam", err)
return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", err, reasonJunkClassifyError)
}
// todo: if isjunk is not nil (i.e. there was inconclusive reputation), use it in the probability calculation. give reputation a score of 0.25 or .75 perhaps?
// todo: if there aren't enough historic messages, we should just let messages in.
// todo: we could require nham and nspam to be above a certain number when there were plenty of words in the message, and in the database. can indicate a spammer is misspelling words. however, it can also mean a message in a different language/script...
// If we don't accept, we may still respond with a "subjectpass" hint below.
// We add some jitter to the threshold we use. So we don't act as too easy an
// oracle for words that are a strong indicator of haminess.
// todo: we should rate-limit uses of the junkfilter.
jitter := (jitterRand.Float64() - 0.5) / 10
threshold := jf.Threshold + jitter
// With an iprev fail, we set a higher bar for content.
reason = reasonJunkContent
if suspiciousIPrevFail && threshold > 0.25 {
threshold = 0.25
log.Info("setting junk threshold due to iprev fail", mlog.Field("threshold", 0.25))
reason = reasonJunkContentStrict
}
accept = contentProb <= threshold
junkSubjectpass = contentProb < threshold-0.2
log.Info("content analyzed", mlog.Field("accept", accept), mlog.Field("contentprob", contentProb), mlog.Field("subjectpass", junkSubjectpass))
2023-01-30 16:27:06 +03:00
} else if err != store.ErrNoJunkFilter {
log.Errorx("open junkfilter", err)
return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", err, reasonJunkFilterError)
}
// If content looks good, we'll still look at DNS block lists for a reason to
// reject. We normally won't get here if we've communicated with this sender
// before.
var dnsblocklisted bool
if accept {
blocked := func(zone dns.Domain) bool {
dnsblctx, dnsblcancel := context.WithTimeout(ctx, 30*time.Second)
defer dnsblcancel()
if !checkDNSBLHealth(dnsblctx, resolver, zone) {
log.Info("dnsbl not healthy, skipping", mlog.Field("zone", zone))
return false
}
status, expl, err := dnsbl.Lookup(dnsblctx, resolver, zone, net.ParseIP(d.m.RemoteIP))
dnsblcancel()
if status == dnsbl.StatusFail {
log.Info("rejecting due to listing in dnsbl", mlog.Field("zone", zone), mlog.Field("explanation", expl))
return true
} else if err != nil {
log.Infox("dnsbl lookup", err, mlog.Field("zone", zone), mlog.Field("status", status))
}
return false
}
// Note: We don't check in parallel, we are in no hurry to accept possible spam.
for _, zone := range d.dnsBLs {
if blocked(zone) {
accept = false
dnsblocklisted = true
reason = reasonDNSBlocklisted
break
}
}
}
if accept {
return analysis{accept: true, mailbox: mailbox, reason: reasonNoBadSignals, dmarcOverrideReason: dmarcOverrideReason}
2023-01-30 16:27:06 +03:00
}
if subjectpassKey != "" && d.dmarcResult.Status == dmarc.StatusPass && method == methodNone && (dnsblocklisted || junkSubjectpass) {
log.Info("permanent reject with subjectpass hint of moderately spammy email without reputation")
pass := subjectpass.Generate(d.msgFrom, []byte(subjectpassKey), time.Now())
return reject(smtp.C550MailboxUnavail, smtp.SePol7DeliveryUnauth1, subjectpass.Explanation+pass, nil, reasonGiveSubjectpass)
}
return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", nil, reason)
}