mox/mtastsdb/db.go
Mechiel Lukkien 5b20cba50a
switch to slog.Logger for logging, for easier reuse of packages by external software
we don't want external software to include internal details like mlog.
slog.Logger is/will be the standard.

we still have mlog for its helper functions, and its handler that logs in
concise logfmt used by mox.

packages that are not meant for reuse still pass around mlog.Log for
convenience.

we use golang.org/x/exp/slog because we also support the previous Go toolchain
version. with the next Go release, we'll switch to the builtin slog.
2023-12-14 13:45:52 +01:00

399 lines
13 KiB
Go

// Package mtastsdb stores MTA-STS policies for later use.
//
// An MTA-STS policy can specify how long it may be cached. By storing a
// policy, it does not have to be fetched again during email delivery, which
// makes it harder for attackers to intervene.
package mtastsdb
import (
"context"
"crypto/tls"
"errors"
"fmt"
"os"
"path/filepath"
"strings"
"sync"
"time"
"golang.org/x/exp/slog"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/mjl-/bstore"
"github.com/mjl-/mox/dns"
"github.com/mjl-/mox/mlog"
"github.com/mjl-/mox/mox-"
"github.com/mjl-/mox/mtasts"
"github.com/mjl-/mox/tlsrpt"
)
var (
metricGet = promauto.NewCounterVec(
prometheus.CounterOpts{
Name: "mox_mtastsdb_get_total",
Help: "Number of Get by result.",
},
[]string{"result"},
)
)
var timeNow = time.Now // Tests override this.
// PolicyRecord is a cached policy or absence of a policy.
type PolicyRecord struct {
Domain string // Domain name, with unicode characters.
Inserted time.Time `bstore:"default now"`
ValidEnd time.Time
LastUpdate time.Time // Policies are refreshed on use and periodically.
LastUse time.Time `bstore:"index"`
Backoff bool
RecordID string // As retrieved from DNS.
mtasts.Policy // As retrieved from the well-known HTTPS url.
// Text that make up the policy, as retrieved. We didn't store this in the past. If
// empty, policy can be reconstructed from Policy field. Needed by TLSRPT.
PolicyText string
}
var (
// No valid non-expired policy in database.
ErrNotFound = errors.New("mtastsdb: policy not found")
// Indicates an MTA-STS TXT record was fetched recently, but fetching the policy
// failed and should not yet be retried.
ErrBackoff = errors.New("mtastsdb: policy fetch failed recently")
)
var DBTypes = []any{PolicyRecord{}} // Types stored in DB.
var DB *bstore.DB // Exported for backups.
var mutex sync.Mutex
func database(ctx context.Context) (rdb *bstore.DB, rerr error) {
mutex.Lock()
defer mutex.Unlock()
if DB == nil {
p := mox.DataDirPath("mtasts.db")
os.MkdirAll(filepath.Dir(p), 0770)
db, err := bstore.Open(ctx, p, &bstore.Options{Timeout: 5 * time.Second, Perm: 0660}, DBTypes...)
if err != nil {
return nil, err
}
DB = db
}
return DB, nil
}
// Init opens the database and starts a goroutine that refreshes policies in
// the database, and keeps doing so periodically.
func Init(refresher bool) error {
_, err := database(mox.Shutdown)
if err != nil {
return err
}
if refresher {
// todo: allow us to shut down cleanly?
go refresh()
}
return nil
}
// Close closes the database.
func Close() {
mutex.Lock()
defer mutex.Unlock()
if DB != nil {
err := DB.Close()
mlog.New("mtastsdb", nil).Check(err, "closing database")
DB = nil
}
}
// lookup looks up a policy for the domain in the database.
//
// Only non-expired records are returned.
//
// Returns ErrNotFound if record is not present.
// Returns ErrBackoff if a recent attempt to fetch a record failed.
func lookup(ctx context.Context, log mlog.Log, domain dns.Domain) (*PolicyRecord, error) {
db, err := database(ctx)
if err != nil {
return nil, err
}
if domain.IsZero() {
return nil, fmt.Errorf("empty domain")
}
now := timeNow()
q := bstore.QueryDB[PolicyRecord](ctx, db)
q.FilterNonzero(PolicyRecord{Domain: domain.Name()})
q.FilterGreater("ValidEnd", now)
pr, err := q.Get()
if err == bstore.ErrAbsent {
return nil, ErrNotFound
} else if err != nil {
return nil, err
}
pr.LastUse = now
if err := db.Update(ctx, &pr); err != nil {
log.Errorx("marking cached mta-sts policy as used in database", err)
}
if pr.Backoff {
return nil, ErrBackoff
}
return &pr, nil
}
// Upsert adds the policy to the database, overwriting an existing policy for the domain.
// Policy can be nil, indicating a failure to fetch the policy.
func Upsert(ctx context.Context, domain dns.Domain, recordID string, policy *mtasts.Policy, policyText string) error {
db, err := database(ctx)
if err != nil {
return err
}
return db.Write(ctx, func(tx *bstore.Tx) error {
pr := PolicyRecord{Domain: domain.Name()}
err := tx.Get(&pr)
if err != nil && err != bstore.ErrAbsent {
return err
}
now := timeNow()
var p mtasts.Policy
if policy != nil {
p = *policy
} else {
// ../rfc/8461:552
p.Mode = mtasts.ModeNone
p.MaxAgeSeconds = 5 * 60
}
backoff := policy == nil
validEnd := now.Add(time.Duration(p.MaxAgeSeconds) * time.Second)
if err == bstore.ErrAbsent {
pr = PolicyRecord{domain.Name(), now, validEnd, now, now, backoff, recordID, p, policyText}
return tx.Insert(&pr)
}
pr.ValidEnd = validEnd
pr.LastUpdate = now
pr.LastUse = now
pr.Backoff = backoff
pr.RecordID = recordID
pr.Policy = p
pr.PolicyText = policyText
return tx.Update(&pr)
})
}
// PolicyRecords returns all policies in the database, sorted descending by last
// use, domain.
func PolicyRecords(ctx context.Context) ([]PolicyRecord, error) {
db, err := database(ctx)
if err != nil {
return nil, err
}
return bstore.QueryDB[PolicyRecord](ctx, db).SortDesc("LastUse", "Domain").List()
}
// Get retrieves an MTA-STS policy for domain and whether it is fresh.
//
// If an error is returned, it should be considered a transient error, e.g. a
// temporary DNS lookup failure.
//
// The returned policy can be nil also when there is no error. In this case, the
// domain does not implement MTA-STS.
//
// If a policy is present in the local database, it is refreshed if needed. If no
// policy is present for the domain, an attempt is made to fetch the policy and
// store it in the local database.
//
// Some errors are logged but not otherwise returned, e.g. if a new policy is
// supposedly published but could not be retrieved.
//
// Get returns an "sts" or "no-policy-found" in reportResult in most cases (when
// not a local/internal error). It may add an "sts" result without policy contents
// ("policy-string") in case of errors while fetching the policy.
func Get(ctx context.Context, elog *slog.Logger, resolver dns.Resolver, domain dns.Domain) (policy *mtasts.Policy, reportResult tlsrpt.Result, fresh bool, err error) {
log := mlog.New("mtastsdb", elog)
defer func() {
result := "ok"
if err != nil && errors.Is(err, ErrBackoff) {
result = "backoff"
} else if err != nil && errors.Is(err, ErrNotFound) {
result = "notfound"
} else if err != nil {
result = "error"
}
metricGet.WithLabelValues(result).Inc()
log.Debugx("mtastsdb get result", err, slog.Any("domain", domain), slog.Bool("fresh", fresh))
}()
cachedPolicy, err := lookup(ctx, log, domain)
if err != nil && errors.Is(err, ErrNotFound) {
// We don't have a policy for this domain, not even a record that we tried recently
// and should backoff. So attempt to fetch policy.
nctx, cancel := context.WithTimeout(ctx, time.Minute)
defer cancel()
record, p, ptext, err := mtasts.Get(nctx, log.Logger, resolver, domain)
if err != nil {
switch {
case errors.Is(err, mtasts.ErrNoRecord) || errors.Is(err, mtasts.ErrMultipleRecords) || errors.Is(err, mtasts.ErrRecordSyntax) || errors.Is(err, mtasts.ErrNoPolicy) || errors.Is(err, mtasts.ErrPolicyFetch) || errors.Is(err, mtasts.ErrPolicySyntax):
// Remote is not doing MTA-STS, continue below. ../rfc/8461:333 ../rfc/8461:574
log.Debugx("interpreting mtasts error to mean remote is not doing mta-sts", err)
if errors.Is(err, mtasts.ErrNoRecord) {
reportResult = tlsrpt.MakeResult(tlsrpt.NoPolicyFound, domain)
} else {
fd := policyFetchFailureDetails(err)
reportResult = tlsrpt.MakeResult(tlsrpt.STS, domain, fd)
}
default:
// Interpret as temporary error, e.g. mtasts.ErrDNS, try again later.
// Temporary DNS error could be an operational issue on our side, but we can still
// report it.
// Result: ../rfc/8460:594
fd := tlsrpt.Details(tlsrpt.ResultSTSPolicyFetch, mtasts.TLSReportFailureReason(err))
reportResult = tlsrpt.MakeResult(tlsrpt.STS, domain, fd)
return nil, reportResult, false, fmt.Errorf("lookup up mta-sts policy: %w", err)
}
} else if p.Mode == mtasts.ModeNone {
reportResult = tlsrpt.MakeResult(tlsrpt.NoPolicyFound, domain)
} else {
reportResult = tlsrpt.Result{Policy: tlsrptPolicy(p, ptext, domain)}
}
// Insert policy into database. If we could not fetch the policy itself, we back
// off for 5 minutes. ../rfc/8461:555
if err == nil || errors.Is(err, mtasts.ErrNoPolicy) || errors.Is(err, mtasts.ErrPolicyFetch) || errors.Is(err, mtasts.ErrPolicySyntax) {
var recordID string
if record != nil {
recordID = record.ID
}
if err := Upsert(ctx, domain, recordID, p, ptext); err != nil {
log.Errorx("inserting policy into cache, continuing", err)
}
}
return p, reportResult, true, nil
} else if err != nil && errors.Is(err, ErrBackoff) {
// ../rfc/8461:552
// We recently failed to fetch a policy, act as if MTA-STS is not implemented.
// Result: ../rfc/8460:594
fd := tlsrpt.Details(tlsrpt.ResultSTSPolicyFetch, "back-off-after-recent-fetch-error")
reportResult = tlsrpt.MakeResult(tlsrpt.STS, domain, fd)
return nil, reportResult, false, nil
} else if err != nil {
// We don't add the result to the report, this is an internal error.
return nil, reportResult, false, fmt.Errorf("looking up mta-sts policy in cache: %w", err)
}
// Policy was found in database. Check in DNS it is still fresh.
policy = &cachedPolicy.Policy
nctx, cancel := context.WithTimeout(ctx, 30*time.Second)
defer cancel()
record, _, err := mtasts.LookupRecord(nctx, log.Logger, resolver, domain)
if err != nil {
if errors.Is(err, mtasts.ErrNoRecord) {
if policy.Mode != mtasts.ModeNone {
log.Errorx("no mtasts dns record while checking non-none policy for freshness, either domain owner removed mta-sts without phasing out policy with a none-policy for period of previous max-age, or this could be an attempt to downgrade to connection without mtasts, continuing with previous policy", err)
}
// else, policy will be removed by periodic refresher in the near future.
} else {
// Could be a temporary DNS or configuration error.
log.Errorx("checking for freshness of cached mta-sts dns txt record for domain, continuing with previously cached policy", err)
}
// Result: ../rfc/8460:594
fd := tlsrpt.Details(tlsrpt.ResultSTSPolicyFetch, mtasts.TLSReportFailureReason(err))
if policy.Mode != mtasts.ModeNone {
fd.FailureReasonCode += "+fallback-to-cached-policy"
}
reportResult = tlsrpt.Result{
Policy: tlsrptPolicy(policy, cachedPolicy.PolicyText, domain),
FailureDetails: []tlsrpt.FailureDetails{fd},
}
return policy, reportResult, false, nil
} else if record.ID == cachedPolicy.RecordID && cachedPolicy.PolicyText != "" {
// In the past, we didn't store the raw policy lines in cachedPolicy.Lines. We only
// stop now if we do have policy lines in the cache.
reportResult = tlsrpt.Result{Policy: tlsrptPolicy(policy, cachedPolicy.PolicyText, domain)}
return policy, reportResult, true, nil
}
// New policy should be available, or we are fetching the policy again because we
// didn't store the raw policy lines in the past.
nctx, cancel = context.WithTimeout(ctx, 30*time.Second)
defer cancel()
p, ptext, err := mtasts.FetchPolicy(nctx, log.Logger, domain)
if err != nil {
log.Errorx("fetching updated policy for domain, continuing with previously cached policy", err)
fd := policyFetchFailureDetails(err)
fd.FailureReasonCode += "+fallback-to-cached-policy"
reportResult = tlsrpt.Result{
Policy: tlsrptPolicy(policy, cachedPolicy.PolicyText, domain),
FailureDetails: []tlsrpt.FailureDetails{fd},
}
return policy, reportResult, false, nil
}
if err := Upsert(ctx, domain, record.ID, p, ptext); err != nil {
log.Errorx("inserting refreshed policy into cache, continuing with fresh policy", err)
}
reportResult = tlsrpt.Result{Policy: tlsrptPolicy(p, ptext, domain)}
return p, reportResult, true, nil
}
func policyFetchFailureDetails(err error) tlsrpt.FailureDetails {
var verificationErr *tls.CertificateVerificationError
if errors.As(err, &verificationErr) {
resultType, reasonCode := tlsrpt.TLSFailureDetails(verificationErr)
// Result: ../rfc/8460:601
reason := string(resultType)
if reasonCode != "" {
reason += "+" + reasonCode
}
return tlsrpt.Details(tlsrpt.ResultSTSWebPKIInvalid, reason)
} else if errors.Is(err, mtasts.ErrPolicySyntax) {
// Result: ../rfc/8460:598
return tlsrpt.Details(tlsrpt.ResultSTSPolicyInvalid, mtasts.TLSReportFailureReason(err))
}
// Result: ../rfc/8460:594
return tlsrpt.Details(tlsrpt.ResultSTSPolicyFetch, mtasts.TLSReportFailureReason(err))
}
func tlsrptPolicy(p *mtasts.Policy, policyText string, domain dns.Domain) tlsrpt.ResultPolicy {
if policyText == "" {
// We didn't always store original policy lines. Reconstruct.
policyText = p.String()
}
lines := strings.Split(strings.TrimSuffix(policyText, "\n"), "\n")
for i, line := range lines {
lines[i] = strings.TrimSuffix(line, "\r")
}
rp := tlsrpt.ResultPolicy{
Type: tlsrpt.STS,
Domain: domain.ASCII,
String: lines,
}
rp.MXHost = make([]string, len(p.MX))
for i, mx := range p.MX {
s := mx.Domain.ASCII
if mx.Wildcard {
s = "*." + s
}
rp.MXHost[i] = s
}
return rp
}