mox/smtpserver/analyze.go

package smtpserver

import (
	"context"
	"net"
	"os"
	"time"

	"github.com/mjl-/bstore"

	"github.com/mjl-/mox/dkim"
	"github.com/mjl-/mox/dmarc"
	"github.com/mjl-/mox/dmarcrpt"
	"github.com/mjl-/mox/dns"
	"github.com/mjl-/mox/dnsbl"
	"github.com/mjl-/mox/iprev"
	"github.com/mjl-/mox/mlog"
	"github.com/mjl-/mox/mox-"
	"github.com/mjl-/mox/smtp"
	"github.com/mjl-/mox/store"
	"github.com/mjl-/mox/subjectpass"
	"github.com/mjl-/mox/tlsrpt"
)

type delivery struct {
	m           *store.Message
	dataFile    *os.File
	rcptAcc     rcptAccount
	acc         *store.Account
	msgFrom     smtp.Address
	dnsBLs      []dns.Domain
	dmarcUse    bool
	dmarcResult dmarc.Result
	dkimResults []dkim.Result
	iprevStatus iprev.Status
}

type analysis struct {
	accept      bool
	code        int
	secode      string
	userError   bool
	errmsg      string
	err         error              // For our own logging, not sent to remote.
	dmarcReport *dmarcrpt.Feedback // Validated dmarc aggregate report, not yet stored.
	tlsReport   *tlsrpt.Report     // Validated TLS report, not yet stored.
	reason      string             // If non-empty, reason for this decision. Can be one of reputationMethod and a few other tokens.
}

const (
	reasonListAllow         = "list-allow"
	reasonDMARCPolicy       = "dmarc-policy"
	reasonReputationError   = "reputation-error"
	reasonReporting         = "reporting"
	reasonSPFPolicy         = "spf-policy"
	reasonJunkClassifyError = "junk-classify-error"
	reasonJunkFilterError   = "junk-filter-error"
	reasonGiveSubjectpass   = "give-subjectpass"
	reasonNoBadSignals      = "no-bad-signals"
	reasonJunkContent       = "junk-content"
	reasonJunkContentStrict = "junk-content-strict"
	reasonDNSBlocklisted    = "dns-blocklisted"
	reasonSubjectpass       = "subjectpass"
	reasonSubjectpassError  = "subjectpass-error"
	reasonIPrev             = "iprev" // No or mil junk reputation signals, and bad iprev.
)

func analyze(ctx context.Context, log *mlog.Log, resolver dns.Resolver, d delivery) analysis {
	reject := func(code int, secode string, errmsg string, err error, reason string) analysis {
		return analysis{false, code, secode, err == nil, errmsg, err, nil, nil, reason}
	}

	// If destination mailbox has a mailing list domain (for SPF/DKIM) configured,
	// check it for a pass.
	// todo: should use this evaluation for final delivery as well
	rs := store.MessageRuleset(log, d.rcptAcc.destination, d.m, d.m.MsgPrefix, d.dataFile)
	if rs != nil && !rs.ListAllowDNSDomain.IsZero() {
		ld := rs.ListAllowDNSDomain
		// todo: on temporary failures, reject temporarily?
		if d.m.MailFromValidated && ld.Name() == d.m.MailFromDomain {
			return analysis{accept: true, reason: reasonListAllow}
		}
		for _, r := range d.dkimResults {
			if r.Status == dkim.StatusPass && r.Sig.Domain == ld {
				return analysis{accept: true, reason: reasonListAllow}
			}
		}
	}

	if d.dmarcUse && d.dmarcResult.Reject {
		return reject(smtp.C550MailboxUnavail, smtp.SePol7MultiAuthFails26, "rejecting per dmarc policy", nil, reasonDMARCPolicy)
	}
	// todo: should we also reject messages that have a dmarc pass but an spf record "v=spf1 -all"? suggested by m3aawg best practices.

	// If destination is the DMARC reporting mailbox, do additional checks and keep
	// track of the report. We'll check reputation, defaulting to accept.
	var dmarcReport *dmarcrpt.Feedback
	if d.rcptAcc.destination.DMARCReports {
		// Messages with DMARC aggregate reports must have a dmarc pass. ../rfc/7489:1866
		if d.dmarcResult.Status != dmarc.StatusPass {
			log.Info("received dmarc report without dmarc pass, not processing as dmarc report")
		} else if report, err := dmarcrpt.ParseMessageReport(store.FileMsgReader(d.m.MsgPrefix, d.dataFile)); err != nil {
			log.Infox("parsing dmarc report", err)
		} else if d, err := dns.ParseDomain(report.PolicyPublished.Domain); err != nil {
			log.Infox("parsing domain in dmarc report", err)
		} else if _, ok := mox.Conf.Domain(d); !ok {
			log.Info("dmarc report for domain not configured, ignoring", mlog.Field("domain", d))
		} else if report.ReportMetadata.DateRange.End > time.Now().Unix()+60 {
			log.Info("dmarc report with end date in the future, ignoring", mlog.Field("domain", d), mlog.Field("end", time.Unix(report.ReportMetadata.DateRange.End, 0)))
		} else {
			dmarcReport = report
		}
	}

	// Similar to DMARC reporting, we check for the required DKIM. We'll check
	// reputation, defaulting to accept.
	var tlsReport *tlsrpt.Report
	if d.rcptAcc.destination.TLSReports {
		// Valid DKIM signature for domain must be present. We take "valid" to assume
		// "passing", not "syntactically valid". We also check for "tlsrpt" as service.
		// This check is optional, but if anyone goes through the trouble to explicitly
		// list allowed services, they would be surprised to see them ignored.
		// ../rfc/8460:320
		ok := false
		for _, r := range d.dkimResults {
			if r.Status == dkim.StatusPass && r.Sig.Domain == d.msgFrom.Domain && r.Sig.Length < 0 && r.Record.ServiceAllowed("tlsrpt") {
				ok = true
				break
			}
		}

		if !ok {
			log.Info("received mail to tlsrpt without acceptable DKIM signature, not processing as tls report")
		} else if report, err := tlsrpt.ParseMessage(store.FileMsgReader(d.m.MsgPrefix, d.dataFile)); err != nil {
			log.Infox("parsing tls report", err)
		} else {
			var known bool
			for _, p := range report.Policies {
				log.Info("tlsrpt policy domain", mlog.Field("domain", p.Policy.Domain))
				if d, err := dns.ParseDomain(p.Policy.Domain); err != nil {
					log.Infox("parsing domain in tls report", err)
				} else if _, ok := mox.Conf.Domain(d); ok {
					known = true
					break
				}
			}
			if !known {
				log.Info("tls report without one of configured domains, ignoring")
			} else {
				tlsReport = report
			}
		}
	}

	// Determine if message is acceptable based on DMARC domain, DKIM identities, or
	// host-based reputation.
	var isjunk *bool
	var conclusive bool
	var method reputationMethod
	var reason string
	var err error
	d.acc.WithRLock(func() {
		err = d.acc.DB.Read(func(tx *bstore.Tx) error {
			// Set message MailboxID to which mail will be delivered. Reputation is
			// per-mailbox. If referenced mailbox is not found (e.g. does not yet exist), we
			// can still determine a reputation because we also base it on outgoing
			// messages and those are account-global.
			mailbox := d.rcptAcc.destination.Mailbox
			if mailbox == "" {
				mailbox = "Inbox"
			}
			if rs != nil {
				mailbox = rs.Mailbox
			}
			mb := d.acc.MailboxFindX(tx, mailbox)
			if mb != nil {
				// We want to deliver to mb.ID, but this message may be rejected and sent to the
				// Rejects mailbox instead, which MailboxID overwritten. Record the ID in
				// MailboxDestinedID too. If the message is later moved out of the Rejects mailbox,
				// we'll adjust the MailboxOrigID so it gets taken into account during reputation
				// calculating in future deliveries. If we end up delivering to the intended
				// mailbox (i.e. not rejecting), MailboxDestinedID is cleared during delivery so we
				// don't store it unnecessarily.
				d.m.MailboxID = mb.ID
				d.m.MailboxDestinedID = mb.ID
			} else {
				log.Debug("mailbox not found in database", mlog.Field("mailbox", mailbox))
			}

			var err error
			isjunk, conclusive, method, err = reputation(tx, log, d.m)
			reason = string(method)
			return err
		})
	})
	if err != nil {
		log.Infox("determining reputation", err, mlog.Field("message", d.m))
		return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", err, reasonReputationError)
	}
	log.Info("reputation analyzed", mlog.Field("conclusive", conclusive), mlog.Field("isjunk", isjunk), mlog.Field("method", string(method)))
	if conclusive {
		if !*isjunk {
			return analysis{accept: true, dmarcReport: dmarcReport, tlsReport: tlsReport, reason: reason}
		}
		return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", err, string(method))
	} else if dmarcReport != nil || tlsReport != nil {
		log.Info("accepting dmarc reporting or tlsrpt message without reputation")
		return analysis{accept: true, dmarcReport: dmarcReport, tlsReport: tlsReport, reason: reasonReporting}
	}
	// If there was no previous message from sender or its domain, and we have an SPF
	// (soft)fail, reject the message.
	switch method {
	case methodDKIMSPF, methodIP1, methodIP2, methodIP3, methodNone:
		switch d.m.MailFromValidation {
		case store.ValidationFail, store.ValidationSoftfail:
			return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", nil, reasonSPFPolicy)
		}
	}

	// Senders without reputation and without iprev pass, are likely spam.
	var suspiciousIPrevFail bool
	switch method {
	case methodDKIMSPF, methodIP1, methodIP2, methodIP3, methodNone:
		suspiciousIPrevFail = d.iprevStatus != iprev.StatusPass
	}

	// With already a mild junk signal, an iprev fail on top is enough to reject.
	if suspiciousIPrevFail && isjunk != nil && *isjunk {
		return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", nil, reasonIPrev)
	}

	var subjectpassKey string
	conf, _ := d.acc.Conf()
	if conf.SubjectPass.Period > 0 {
		subjectpassKey, err = d.acc.Subjectpass(d.rcptAcc.canonicalAddress)
		if err != nil {
			log.Errorx("get key for verifying subject token", err)
			return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", err, reasonSubjectpassError)
		}
		err = subjectpass.Verify(d.dataFile, []byte(subjectpassKey), conf.SubjectPass.Period)
		pass := err == nil
		log.Infox("pass by subject token", err, mlog.Field("pass", pass))
		if pass {
			return analysis{accept: true, reason: reasonSubjectpass}
		}
	}

	reason = reasonNoBadSignals
	accept := true
	var junkSubjectpass bool
	f, jf, err := d.acc.OpenJunkFilter(log)
	if err == nil {
		defer func() {
			err := f.Close()
			log.Check(err, "closing junkfilter")
		}()
		contentProb, _, _, _, err := f.ClassifyMessageReader(store.FileMsgReader(d.m.MsgPrefix, d.dataFile), d.m.Size)
		if err != nil {
			log.Errorx("testing for spam", err)
			return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", err, reasonJunkClassifyError)
		}
		// todo: if isjunk is not nil (i.e. there was inconclusive reputation), use it in the probability calculation. give reputation a score of 0.25 or .75 perhaps?
		// todo: if there aren't enough historic messages, we should just let messages in.
		// todo: we could require nham and nspam to be above a certain number when there were plenty of words in the message, and in the database. can indicate a spammer is misspelling words. however, it can also mean a message in a different language/script...

		// If we don't accept, we may still respond with a "subjectpass" hint below.
		// We add some jitter to the threshold we use. So we don't act as too easy an
		// oracle for words that are a strong indicator of haminess.
		// todo: we should rate-limit uses of the junkfilter.
		jitter := (jitterRand.Float64() - 0.5) / 10
		threshold := jf.Threshold + jitter

		// With an iprev fail, we set a higher bar for content.
		reason = reasonJunkContent
		if suspiciousIPrevFail && threshold > 0.25 {
			threshold = 0.25
			log.Info("setting junk threshold due to iprev fail", mlog.Field("threshold", 0.25))
			reason = reasonJunkContentStrict
		}
		accept = contentProb <= threshold
		junkSubjectpass = contentProb < threshold-0.2
		log.Info("content analyzed", mlog.Field("accept", accept), mlog.Field("contentprob", contentProb), mlog.Field("subjectpass", junkSubjectpass))
	} else if err != store.ErrNoJunkFilter {
		log.Errorx("open junkfilter", err)
		return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", err, reasonJunkFilterError)
	}

	// If content looks good, we'll still look at DNS block lists for a reason to
	// reject. We normally won't get here if we've communicated with this sender
	// before.
	var dnsblocklisted bool
	if accept {
		blocked := func(zone dns.Domain) bool {
			dnsblctx, dnsblcancel := context.WithTimeout(ctx, 30*time.Second)
			defer dnsblcancel()
			if !checkDNSBLHealth(dnsblctx, resolver, zone) {
				log.Info("dnsbl not healthy, skipping", mlog.Field("zone", zone))
				return false
			}

			status, expl, err := dnsbl.Lookup(dnsblctx, resolver, zone, net.ParseIP(d.m.RemoteIP))
			dnsblcancel()
			if status == dnsbl.StatusFail {
				log.Info("rejecting due to listing in dnsbl", mlog.Field("zone", zone), mlog.Field("explanation", expl))
				return true
			} else if err != nil {
				log.Infox("dnsbl lookup", err, mlog.Field("zone", zone), mlog.Field("status", status))
			}
			return false
		}

		// Note: We don't check in parallel, we are in no hurry to accept possible spam.
		for _, zone := range d.dnsBLs {
			if blocked(zone) {
				accept = false
				dnsblocklisted = true
				reason = reasonDNSBlocklisted
				break
			}
		}
	}

	if accept {
		return analysis{accept: true, reason: reasonNoBadSignals}
	}

	if subjectpassKey != "" && d.dmarcResult.Status == dmarc.StatusPass && method == methodNone && (dnsblocklisted || junkSubjectpass) {
		log.Info("permanent reject with subjectpass hint of moderately spammy email without reputation")
		pass := subjectpass.Generate(d.msgFrom, []byte(subjectpassKey), time.Now())
		return reject(smtp.C550MailboxUnavail, smtp.SePol7DeliveryUnauth1, subjectpass.Explanation+pass, nil, reasonGiveSubjectpass)
	}

	return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", nil, reason)
}
mox! 2023-01-30 16:27:06 +03:00			`package smtpserver`

			`import (`
			`"context"`
			`"net"`
			`"os"`
			`"time"`

			`"github.com/mjl-/bstore"`

			`"github.com/mjl-/mox/dkim"`
			`"github.com/mjl-/mox/dmarc"`
			`"github.com/mjl-/mox/dmarcrpt"`
			`"github.com/mjl-/mox/dns"`
			`"github.com/mjl-/mox/dnsbl"`
			`"github.com/mjl-/mox/iprev"`
			`"github.com/mjl-/mox/mlog"`
			`"github.com/mjl-/mox/mox-"`
			`"github.com/mjl-/mox/smtp"`
			`"github.com/mjl-/mox/store"`
			`"github.com/mjl-/mox/subjectpass"`
			`"github.com/mjl-/mox/tlsrpt"`
			`)`

			`type delivery struct {`
			`m *store.Message`
			`dataFile *os.File`
			`rcptAcc rcptAccount`
			`acc *store.Account`
			`msgFrom smtp.Address`
			`dnsBLs []dns.Domain`
			`dmarcUse bool`
			`dmarcResult dmarc.Result`
			`dkimResults []dkim.Result`
			`iprevStatus iprev.Status`
			`}`

			`type analysis struct {`
			`accept bool`
			`code int`
			`secode string`
			`userError bool`
			`errmsg string`
			`err error // For our own logging, not sent to remote.`
			`dmarcReport *dmarcrpt.Feedback // Validated dmarc aggregate report, not yet stored.`
			`tlsReport *tlsrpt.Report // Validated TLS report, not yet stored.`
			`reason string // If non-empty, reason for this decision. Can be one of reputationMethod and a few other tokens.`
			`}`

			`const (`
			`reasonListAllow = "list-allow"`
			`reasonDMARCPolicy = "dmarc-policy"`
			`reasonReputationError = "reputation-error"`
			`reasonReporting = "reporting"`
			`reasonSPFPolicy = "spf-policy"`
			`reasonJunkClassifyError = "junk-classify-error"`
			`reasonJunkFilterError = "junk-filter-error"`
			`reasonGiveSubjectpass = "give-subjectpass"`
			`reasonNoBadSignals = "no-bad-signals"`
			`reasonJunkContent = "junk-content"`
			`reasonJunkContentStrict = "junk-content-strict"`
			`reasonDNSBlocklisted = "dns-blocklisted"`
			`reasonSubjectpass = "subjectpass"`
			`reasonSubjectpassError = "subjectpass-error"`
			`reasonIPrev = "iprev" // No or mil junk reputation signals, and bad iprev.`
			`)`

			`func analyze(ctx context.Context, log *mlog.Log, resolver dns.Resolver, d delivery) analysis {`
			`reject := func(code int, secode string, errmsg string, err error, reason string) analysis {`
			`return analysis{false, code, secode, err == nil, errmsg, err, nil, nil, reason}`
			`}`

			`// If destination mailbox has a mailing list domain (for SPF/DKIM) configured,`
			`// check it for a pass.`
			`// todo: should use this evaluation for final delivery as well`
			`rs := store.MessageRuleset(log, d.rcptAcc.destination, d.m, d.m.MsgPrefix, d.dataFile)`
			`if rs != nil && !rs.ListAllowDNSDomain.IsZero() {`
			`ld := rs.ListAllowDNSDomain`
			`// todo: on temporary failures, reject temporarily?`
			`if d.m.MailFromValidated && ld.Name() == d.m.MailFromDomain {`
			`return analysis{accept: true, reason: reasonListAllow}`
			`}`
			`for _, r := range d.dkimResults {`
			`if r.Status == dkim.StatusPass && r.Sig.Domain == ld {`
			`return analysis{accept: true, reason: reasonListAllow}`
			`}`
			`}`
			`}`

			`if d.dmarcUse && d.dmarcResult.Reject {`
			`return reject(smtp.C550MailboxUnavail, smtp.SePol7MultiAuthFails26, "rejecting per dmarc policy", nil, reasonDMARCPolicy)`
			`}`
			`// todo: should we also reject messages that have a dmarc pass but an spf record "v=spf1 -all"? suggested by m3aawg best practices.`

			`// If destination is the DMARC reporting mailbox, do additional checks and keep`
			`// track of the report. We'll check reputation, defaulting to accept.`
			`var dmarcReport *dmarcrpt.Feedback`
			`if d.rcptAcc.destination.DMARCReports {`
			`// Messages with DMARC aggregate reports must have a dmarc pass. ../rfc/7489:1866`
			`if d.dmarcResult.Status != dmarc.StatusPass {`
change some log levels from info to debug, and use lower case log messages 2023-02-12 01:54:22 +03:00			`log.Info("received dmarc report without dmarc pass, not processing as dmarc report")`
mox! 2023-01-30 16:27:06 +03:00			`} else if report, err := dmarcrpt.ParseMessageReport(store.FileMsgReader(d.m.MsgPrefix, d.dataFile)); err != nil {`
			`log.Infox("parsing dmarc report", err)`
			`} else if d, err := dns.ParseDomain(report.PolicyPublished.Domain); err != nil {`
			`log.Infox("parsing domain in dmarc report", err)`
			`} else if _, ok := mox.Conf.Domain(d); !ok {`
			`log.Info("dmarc report for domain not configured, ignoring", mlog.Field("domain", d))`
			`} else if report.ReportMetadata.DateRange.End > time.Now().Unix()+60 {`
			`log.Info("dmarc report with end date in the future, ignoring", mlog.Field("domain", d), mlog.Field("end", time.Unix(report.ReportMetadata.DateRange.End, 0)))`
			`} else {`
			`dmarcReport = report`
			`}`
			`}`

			`// Similar to DMARC reporting, we check for the required DKIM. We'll check`
			`// reputation, defaulting to accept.`
			`var tlsReport *tlsrpt.Report`
			`if d.rcptAcc.destination.TLSReports {`
			`// Valid DKIM signature for domain must be present. We take "valid" to assume`
			`// "passing", not "syntactically valid". We also check for "tlsrpt" as service.`
			`// This check is optional, but if anyone goes through the trouble to explicitly`
			`// list allowed services, they would be surprised to see them ignored.`
			`// ../rfc/8460:320`
			`ok := false`
			`for _, r := range d.dkimResults {`
			`if r.Status == dkim.StatusPass && r.Sig.Domain == d.msgFrom.Domain && r.Sig.Length < 0 && r.Record.ServiceAllowed("tlsrpt") {`
			`ok = true`
			`break`
			`}`
			`}`

			`if !ok {`
change some log levels from info to debug, and use lower case log messages 2023-02-12 01:54:22 +03:00			`log.Info("received mail to tlsrpt without acceptable DKIM signature, not processing as tls report")`
mox! 2023-01-30 16:27:06 +03:00			`} else if report, err := tlsrpt.ParseMessage(store.FileMsgReader(d.m.MsgPrefix, d.dataFile)); err != nil {`
change some log levels from info to debug, and use lower case log messages 2023-02-12 01:54:22 +03:00			`log.Infox("parsing tls report", err)`
mox! 2023-01-30 16:27:06 +03:00			`} else {`
			`var known bool`
			`for _, p := range report.Policies {`
			`log.Info("tlsrpt policy domain", mlog.Field("domain", p.Policy.Domain))`
			`if d, err := dns.ParseDomain(p.Policy.Domain); err != nil {`
change some log levels from info to debug, and use lower case log messages 2023-02-12 01:54:22 +03:00			`log.Infox("parsing domain in tls report", err)`
mox! 2023-01-30 16:27:06 +03:00			`} else if _, ok := mox.Conf.Domain(d); ok {`
			`known = true`
			`break`
			`}`
			`}`
			`if !known {`
change some log levels from info to debug, and use lower case log messages 2023-02-12 01:54:22 +03:00			`log.Info("tls report without one of configured domains, ignoring")`
mox! 2023-01-30 16:27:06 +03:00			`} else {`
			`tlsReport = report`
			`}`
			`}`
			`}`

			`// Determine if message is acceptable based on DMARC domain, DKIM identities, or`
			`// host-based reputation.`
			`var isjunk *bool`
			`var conclusive bool`
			`var method reputationMethod`
			`var reason string`
			`var err error`
			`d.acc.WithRLock(func() {`
			`err = d.acc.DB.Read(func(tx *bstore.Tx) error {`
			`// Set message MailboxID to which mail will be delivered. Reputation is`
			`// per-mailbox. If referenced mailbox is not found (e.g. does not yet exist), we`
fix handling of reputation for messages that were moved out of the rejects mailbox the idea of the rejects mailbox is to show messages that were rejected. you can look there, and if you see a message that should have been delivered, you can move it to your inbox or archive. next time a deliver attempt by that user is attempted, they should be accepted, because you corrected the reject. but that wasn't happening, because the reputation-calculation is per-delivery mailbox (e.g. Inbox) and we look at MailboxOrigID when calculating the reputation. and that was set to the Rejects mailbox id, so the message wasn't considered. the same applies to moving messages from Rejects to Junk (to train your filter). we now keep track of a MailboxDestinedID, that is set to the mailbox that we would have delivered to if we would not have rejected the message. then, when a message is moved out of the Rejects mailbox, we change MailboxOrigID to MailboxDestinedID. this essentially makes the message look like it was delivered normally. 2023-03-03 15:19:27 +03:00			`// can still determine a reputation because we also base it on outgoing`
			`// messages and those are account-global.`
mox! 2023-01-30 16:27:06 +03:00			`mailbox := d.rcptAcc.destination.Mailbox`
			`if mailbox == "" {`
			`mailbox = "Inbox"`
			`}`
			`if rs != nil {`
			`mailbox = rs.Mailbox`
			`}`
			`mb := d.acc.MailboxFindX(tx, mailbox)`
			`if mb != nil {`
fix handling of reputation for messages that were moved out of the rejects mailbox the idea of the rejects mailbox is to show messages that were rejected. you can look there, and if you see a message that should have been delivered, you can move it to your inbox or archive. next time a deliver attempt by that user is attempted, they should be accepted, because you corrected the reject. but that wasn't happening, because the reputation-calculation is per-delivery mailbox (e.g. Inbox) and we look at MailboxOrigID when calculating the reputation. and that was set to the Rejects mailbox id, so the message wasn't considered. the same applies to moving messages from Rejects to Junk (to train your filter). we now keep track of a MailboxDestinedID, that is set to the mailbox that we would have delivered to if we would not have rejected the message. then, when a message is moved out of the Rejects mailbox, we change MailboxOrigID to MailboxDestinedID. this essentially makes the message look like it was delivered normally. 2023-03-03 15:19:27 +03:00			`// We want to deliver to mb.ID, but this message may be rejected and sent to the`
			`// Rejects mailbox instead, which MailboxID overwritten. Record the ID in`
			`// MailboxDestinedID too. If the message is later moved out of the Rejects mailbox,`
			`// we'll adjust the MailboxOrigID so it gets taken into account during reputation`
			`// calculating in future deliveries. If we end up delivering to the intended`
			`// mailbox (i.e. not rejecting), MailboxDestinedID is cleared during delivery so we`
			`// don't store it unnecessarily.`
mox! 2023-01-30 16:27:06 +03:00			`d.m.MailboxID = mb.ID`
fix handling of reputation for messages that were moved out of the rejects mailbox the idea of the rejects mailbox is to show messages that were rejected. you can look there, and if you see a message that should have been delivered, you can move it to your inbox or archive. next time a deliver attempt by that user is attempted, they should be accepted, because you corrected the reject. but that wasn't happening, because the reputation-calculation is per-delivery mailbox (e.g. Inbox) and we look at MailboxOrigID when calculating the reputation. and that was set to the Rejects mailbox id, so the message wasn't considered. the same applies to moving messages from Rejects to Junk (to train your filter). we now keep track of a MailboxDestinedID, that is set to the mailbox that we would have delivered to if we would not have rejected the message. then, when a message is moved out of the Rejects mailbox, we change MailboxOrigID to MailboxDestinedID. this essentially makes the message look like it was delivered normally. 2023-03-03 15:19:27 +03:00			`d.m.MailboxDestinedID = mb.ID`
mox! 2023-01-30 16:27:06 +03:00			`} else {`
			`log.Debug("mailbox not found in database", mlog.Field("mailbox", mailbox))`
			`}`

			`var err error`
			`isjunk, conclusive, method, err = reputation(tx, log, d.m)`
			`reason = string(method)`
			`return err`
			`})`
			`})`
			`if err != nil {`
			`log.Infox("determining reputation", err, mlog.Field("message", d.m))`
			`return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", err, reasonReputationError)`
			`}`
			`log.Info("reputation analyzed", mlog.Field("conclusive", conclusive), mlog.Field("isjunk", isjunk), mlog.Field("method", string(method)))`
			`if conclusive {`
			`if !*isjunk {`
			`return analysis{accept: true, dmarcReport: dmarcReport, tlsReport: tlsReport, reason: reason}`
			`}`
			`return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", err, string(method))`
			`} else if dmarcReport != nil \|\| tlsReport != nil {`
			`log.Info("accepting dmarc reporting or tlsrpt message without reputation")`
			`return analysis{accept: true, dmarcReport: dmarcReport, tlsReport: tlsReport, reason: reasonReporting}`
			`}`
			`// If there was no previous message from sender or its domain, and we have an SPF`
			`// (soft)fail, reject the message.`
			`switch method {`
			`case methodDKIMSPF, methodIP1, methodIP2, methodIP3, methodNone:`
			`switch d.m.MailFromValidation {`
			`case store.ValidationFail, store.ValidationSoftfail:`
			`return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", nil, reasonSPFPolicy)`
			`}`
			`}`

			`// Senders without reputation and without iprev pass, are likely spam.`
			`var suspiciousIPrevFail bool`
			`switch method {`
			`case methodDKIMSPF, methodIP1, methodIP2, methodIP3, methodNone:`
			`suspiciousIPrevFail = d.iprevStatus != iprev.StatusPass`
			`}`

			`// With already a mild junk signal, an iprev fail on top is enough to reject.`
			`if suspiciousIPrevFail && isjunk != nil && *isjunk {`
			`return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", nil, reasonIPrev)`
			`}`

			`var subjectpassKey string`
			`conf, _ := d.acc.Conf()`
			`if conf.SubjectPass.Period > 0 {`
			`subjectpassKey, err = d.acc.Subjectpass(d.rcptAcc.canonicalAddress)`
			`if err != nil {`
			`log.Errorx("get key for verifying subject token", err)`
			`return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", err, reasonSubjectpassError)`
			`}`
			`err = subjectpass.Verify(d.dataFile, []byte(subjectpassKey), conf.SubjectPass.Period)`
			`pass := err == nil`
			`log.Infox("pass by subject token", err, mlog.Field("pass", pass))`
			`if pass {`
			`return analysis{accept: true, reason: reasonSubjectpass}`
			`}`
			`}`

			`reason = reasonNoBadSignals`
			`accept := true`
			`var junkSubjectpass bool`
			`f, jf, err := d.acc.OpenJunkFilter(log)`
			`if err == nil {`
			`defer func() {`
consistently use log.Check for logging errors that "should not happen", don't influence application flow sooner or later, someone will notice one of these messages, which will lead us to a bug. 2023-02-16 15:22:00 +03:00			`err := f.Close()`
			`log.Check(err, "closing junkfilter")`
mox! 2023-01-30 16:27:06 +03:00			`}()`
			`contentProb, _, _, _, err := f.ClassifyMessageReader(store.FileMsgReader(d.m.MsgPrefix, d.dataFile), d.m.Size)`
			`if err != nil {`
			`log.Errorx("testing for spam", err)`
			`return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", err, reasonJunkClassifyError)`
			`}`
			`// todo: if isjunk is not nil (i.e. there was inconclusive reputation), use it in the probability calculation. give reputation a score of 0.25 or .75 perhaps?`
			`// todo: if there aren't enough historic messages, we should just let messages in.`
			`// todo: we could require nham and nspam to be above a certain number when there were plenty of words in the message, and in the database. can indicate a spammer is misspelling words. however, it can also mean a message in a different language/script...`

			`// If we don't accept, we may still respond with a "subjectpass" hint below.`
			`// We add some jitter to the threshold we use. So we don't act as too easy an`
			`// oracle for words that are a strong indicator of haminess.`
			`// todo: we should rate-limit uses of the junkfilter.`
			`jitter := (jitterRand.Float64() - 0.5) / 10`
			`threshold := jf.Threshold + jitter`

			`// With an iprev fail, we set a higher bar for content.`
			`reason = reasonJunkContent`
			`if suspiciousIPrevFail && threshold > 0.25 {`
			`threshold = 0.25`
			`log.Info("setting junk threshold due to iprev fail", mlog.Field("threshold", 0.25))`
			`reason = reasonJunkContentStrict`
			`}`
			`accept = contentProb <= threshold`
			`junkSubjectpass = contentProb < threshold-0.2`
consistently use lower-cased field names for logging lines 2023-02-25 14:37:59 +03:00			`log.Info("content analyzed", mlog.Field("accept", accept), mlog.Field("contentprob", contentProb), mlog.Field("subjectpass", junkSubjectpass))`
mox! 2023-01-30 16:27:06 +03:00			`} else if err != store.ErrNoJunkFilter {`
			`log.Errorx("open junkfilter", err)`
			`return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", err, reasonJunkFilterError)`
			`}`

			`// If content looks good, we'll still look at DNS block lists for a reason to`
			`// reject. We normally won't get here if we've communicated with this sender`
			`// before.`
			`var dnsblocklisted bool`
			`if accept {`
			`blocked := func(zone dns.Domain) bool {`
			`dnsblctx, dnsblcancel := context.WithTimeout(ctx, 30*time.Second)`
			`defer dnsblcancel()`
			`if !checkDNSBLHealth(dnsblctx, resolver, zone) {`
			`log.Info("dnsbl not healthy, skipping", mlog.Field("zone", zone))`
			`return false`
			`}`

			`status, expl, err := dnsbl.Lookup(dnsblctx, resolver, zone, net.ParseIP(d.m.RemoteIP))`
			`dnsblcancel()`
			`if status == dnsbl.StatusFail {`
			`log.Info("rejecting due to listing in dnsbl", mlog.Field("zone", zone), mlog.Field("explanation", expl))`
			`return true`
			`} else if err != nil {`
			`log.Infox("dnsbl lookup", err, mlog.Field("zone", zone), mlog.Field("status", status))`
			`}`
			`return false`
			`}`

			`// Note: We don't check in parallel, we are in no hurry to accept possible spam.`
			`for _, zone := range d.dnsBLs {`
			`if blocked(zone) {`
			`accept = false`
			`dnsblocklisted = true`
			`reason = reasonDNSBlocklisted`
			`break`
			`}`
			`}`
			`}`

			`if accept {`
			`return analysis{accept: true, reason: reasonNoBadSignals}`
			`}`

			`if subjectpassKey != "" && d.dmarcResult.Status == dmarc.StatusPass && method == methodNone && (dnsblocklisted \|\| junkSubjectpass) {`
			`log.Info("permanent reject with subjectpass hint of moderately spammy email without reputation")`
			`pass := subjectpass.Generate(d.msgFrom, []byte(subjectpassKey), time.Now())`
			`return reject(smtp.C550MailboxUnavail, smtp.SePol7DeliveryUnauth1, subjectpass.Explanation+pass, nil, reasonGiveSubjectpass)`
			`}`

			`return reject(smtp.C451LocalErr, smtp.SeSys3Other0, "error processing", nil, reason)`
			`}`