mirror of
https://github.com/mjl-/mox.git
synced 2024-12-26 16:33:47 +03:00
893a6f8911
we were already accepting, processing and displaying incoming tls reports. now we start tracking TLS connection and security-policy-related errors for outgoing message deliveries as well. we send reports once a day, to the reporting addresses specified in TLSRPT records (rua) of a policy domain. these reports are about MTA-STS policies and/or DANE policies, and about STARTTLS-related failures. sending reports is enabled by default, but can be disabled through setting NoOutgoingTLSReports in mox.conf. only at the end of the implementation process came the realization that the TLSRPT policy domain for DANE (MX) hosts are separate from the TLSRPT policy for the recipient domain, and that MTA-STS and DANE TLS/policy results are typically delivered in separate reports. so MX hosts need their own TLSRPT policies. config for the per-host TLSRPT policy should be added to mox.conf for existing installs, in field HostTLSRPT. it is automatically configured by quickstart for new installs. with a HostTLSRPT config, the "dns records" and "dns check" admin pages now suggest the per-host TLSRPT record. by creating that record, you're requesting TLS reports about your MX host. gathering all the TLS/policy results is somewhat tricky. the tentacles go throughout the code. the positive result is that the TLS/policy-related code had to be cleaned up a bit. for example, the smtpclient TLS modes now reflect reality better, with independent settings about whether PKIX and/or DANE verification has to be done, and/or whether verification errors have to be ignored (e.g. for tls-required: no header). also, cached mtasts policies of mode "none" are now cleaned up once the MTA-STS DNS record goes away.
422 lines
17 KiB
Go
422 lines
17 KiB
Go
package smtpclient
|
|
|
|
import (
|
|
"context"
|
|
"crypto/sha256"
|
|
"crypto/sha512"
|
|
"crypto/x509"
|
|
"errors"
|
|
"fmt"
|
|
"net"
|
|
"sort"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/mjl-/adns"
|
|
|
|
"github.com/mjl-/mox/dns"
|
|
"github.com/mjl-/mox/mlog"
|
|
)
|
|
|
|
var (
|
|
errCNAMELoop = errors.New("cname loop")
|
|
errCNAMELimit = errors.New("too many cname records")
|
|
errDNS = errors.New("dns lookup error")
|
|
errNoMail = errors.New("domain does not accept email as indicated with single dot for mx record")
|
|
)
|
|
|
|
// GatherDestinations looks up the hosts to deliver email to a domain ("next-hop").
|
|
// If it is an IP address, it is the only destination to try. Otherwise CNAMEs of
|
|
// the domain are followed. Then MX records for the expanded CNAME are looked up.
|
|
// If no MX record is present, the original domain is returned. If an MX record is
|
|
// present but indicates the domain does not accept email, ErrNoMail is returned.
|
|
// If valid MX records were found, the MX target hosts are returned.
|
|
//
|
|
// haveMX indicates if an MX record was found.
|
|
//
|
|
// origNextHopAuthentic indicates if the DNS record for the initial domain name was
|
|
// DNSSEC secure (CNAME, MX).
|
|
//
|
|
// expandedNextHopAuthentic indicates if the DNS records after following CNAMEs were
|
|
// DNSSEC secure.
|
|
//
|
|
// These authentic flags are used by DANE, to determine where to look up TLSA
|
|
// records, and which names to allow in the remote TLS certificate. If MX records
|
|
// were found, both the original and expanded next-hops must be authentic for DANE
|
|
// to apply. For a non-IP with no MX records found, the authentic result can be
|
|
// used to decide which of the names to use as TLSA base domain.
|
|
func GatherDestinations(ctx context.Context, log *mlog.Log, resolver dns.Resolver, origNextHop dns.IPDomain) (haveMX, origNextHopAuthentic, expandedNextHopAuthentic bool, expandedNextHop dns.Domain, hosts []dns.IPDomain, permanent bool, err error) {
|
|
// ../rfc/5321:3824
|
|
|
|
// IP addresses are dialed directly, and don't have TLSA records.
|
|
if len(origNextHop.IP) > 0 {
|
|
return false, false, false, expandedNextHop, []dns.IPDomain{origNextHop}, false, nil
|
|
}
|
|
|
|
// We start out assuming the result is authentic. Updated with each lookup.
|
|
origNextHopAuthentic = true
|
|
expandedNextHopAuthentic = true
|
|
|
|
// We start out delivering to the recipient domain. We follow CNAMEs.
|
|
rcptDomain := origNextHop.Domain
|
|
// Domain we are actually delivering to, after following CNAME record(s).
|
|
expandedNextHop = rcptDomain
|
|
// Keep track of CNAMEs we have followed, to detect loops.
|
|
domainsSeen := map[string]bool{}
|
|
for i := 0; ; i++ {
|
|
if domainsSeen[expandedNextHop.ASCII] {
|
|
// todo: only mark as permanent failure if TTLs for all records are beyond latest possibly delivery retry we would do.
|
|
err := fmt.Errorf("%w: recipient domain %s: already saw %s", errCNAMELoop, rcptDomain, expandedNextHop)
|
|
return false, origNextHopAuthentic, expandedNextHopAuthentic, expandedNextHop, nil, false, err
|
|
}
|
|
domainsSeen[expandedNextHop.ASCII] = true
|
|
|
|
// note: The Go resolver returns the requested name if the domain has no CNAME
|
|
// record but has a host record.
|
|
if i == 16 {
|
|
// We have a maximum number of CNAME records we follow. There is no hard limit for
|
|
// DNS, and you might think folks wouldn't configure CNAME chains at all, but for
|
|
// (non-mail) domains, CNAME chains of 10 records have been encountered according
|
|
// to the internet.
|
|
// todo: only mark as permanent failure if TTLs for all records are beyond latest possibly delivery retry we would do.
|
|
err := fmt.Errorf("%w: recipient domain %s, last resolved domain %s", errCNAMELimit, rcptDomain, expandedNextHop)
|
|
return false, origNextHopAuthentic, expandedNextHopAuthentic, expandedNextHop, nil, false, err
|
|
}
|
|
|
|
// Do explicit CNAME lookup. Go's LookupMX also resolves CNAMEs, but we want to
|
|
// know the final name, and we're interested in learning if the first vs later
|
|
// results were DNSSEC-(in)secure.
|
|
// ../rfc/5321:3838 ../rfc/3974:197
|
|
cctx, ccancel := context.WithTimeout(ctx, 30*time.Second)
|
|
defer ccancel()
|
|
cname, cnameResult, err := resolver.LookupCNAME(cctx, expandedNextHop.ASCII+".")
|
|
ccancel()
|
|
if i == 0 {
|
|
origNextHopAuthentic = origNextHopAuthentic && cnameResult.Authentic
|
|
}
|
|
expandedNextHopAuthentic = expandedNextHopAuthentic && cnameResult.Authentic
|
|
if err != nil && !dns.IsNotFound(err) {
|
|
err = fmt.Errorf("%w: cname lookup for %s: %v", errDNS, expandedNextHop, err)
|
|
return false, origNextHopAuthentic, expandedNextHopAuthentic, expandedNextHop, nil, false, err
|
|
}
|
|
if err == nil && cname != expandedNextHop.ASCII+"." {
|
|
d, err := dns.ParseDomain(strings.TrimSuffix(cname, "."))
|
|
if err != nil {
|
|
// todo: only mark as permanent failure if TTLs for all records are beyond latest possibly delivery retry we would do.
|
|
err = fmt.Errorf("%w: parsing cname domain %s: %v", errDNS, expandedNextHop, err)
|
|
return false, origNextHopAuthentic, expandedNextHopAuthentic, expandedNextHop, nil, false, err
|
|
}
|
|
expandedNextHop = d
|
|
// Start again with new domain.
|
|
continue
|
|
}
|
|
|
|
// Not a CNAME, so lookup MX record.
|
|
mctx, mcancel := context.WithTimeout(ctx, 30*time.Second)
|
|
defer mcancel()
|
|
// Note: LookupMX can return an error and still return records: Invalid records are
|
|
// filtered out and an error returned. We must process any records that are valid.
|
|
// Only if all are unusable will we return an error. ../rfc/5321:3851
|
|
mxl, mxResult, err := resolver.LookupMX(mctx, expandedNextHop.ASCII+".")
|
|
mcancel()
|
|
if i == 0 {
|
|
origNextHopAuthentic = origNextHopAuthentic && mxResult.Authentic
|
|
}
|
|
expandedNextHopAuthentic = expandedNextHopAuthentic && mxResult.Authentic
|
|
if err != nil && len(mxl) == 0 {
|
|
if !dns.IsNotFound(err) {
|
|
err = fmt.Errorf("%w: mx lookup for %s: %v", errDNS, expandedNextHop, err)
|
|
return false, origNextHopAuthentic, expandedNextHopAuthentic, expandedNextHop, nil, false, err
|
|
}
|
|
|
|
// No MX record, attempt delivery directly to host. ../rfc/5321:3842
|
|
hosts = []dns.IPDomain{{Domain: expandedNextHop}}
|
|
return false, origNextHopAuthentic, expandedNextHopAuthentic, expandedNextHop, hosts, false, nil
|
|
} else if err != nil {
|
|
log.Infox("mx record has some invalid records, keeping only the valid mx records", err)
|
|
}
|
|
|
|
// ../rfc/7505:122
|
|
if err == nil && len(mxl) == 1 && mxl[0].Host == "." {
|
|
// Note: Depending on MX record TTL, this record may be replaced with a more
|
|
// receptive MX record before our final delivery attempt. But it's clearly the
|
|
// explicit desire not to be bothered with email delivery attempts, so mark failure
|
|
// as permanent.
|
|
return true, origNextHopAuthentic, expandedNextHopAuthentic, expandedNextHop, nil, true, errNoMail
|
|
}
|
|
|
|
// The Go resolver already sorts by preference, randomizing records of same
|
|
// preference. ../rfc/5321:3885
|
|
for _, mx := range mxl {
|
|
// Parsing lax (unless pedantic mode) for MX targets with underscores as seen in the wild.
|
|
host, err := dns.ParseDomainLax(strings.TrimSuffix(mx.Host, "."))
|
|
if err != nil {
|
|
// note: should not happen because Go resolver already filters these out.
|
|
err = fmt.Errorf("%w: invalid host name in mx record %q: %v", errDNS, mx.Host, err)
|
|
return true, origNextHopAuthentic, expandedNextHopAuthentic, expandedNextHop, nil, true, err
|
|
}
|
|
hosts = append(hosts, dns.IPDomain{Domain: host})
|
|
}
|
|
if len(hosts) > 0 {
|
|
err = nil
|
|
}
|
|
return true, origNextHopAuthentic, expandedNextHopAuthentic, expandedNextHop, hosts, false, err
|
|
}
|
|
}
|
|
|
|
// GatherIPs looks up the IPs to try for connecting to host, with the IPs ordered
|
|
// to take previous attempts into account. For use with DANE, the CNAME-expanded
|
|
// name is returned, and whether the DNS responses were authentic.
|
|
func GatherIPs(ctx context.Context, log *mlog.Log, resolver dns.Resolver, host dns.IPDomain, dialedIPs map[string][]net.IP) (authentic bool, expandedAuthentic bool, expandedHost dns.Domain, ips []net.IP, dualstack bool, rerr error) {
|
|
if len(host.IP) > 0 {
|
|
return false, false, dns.Domain{}, []net.IP{host.IP}, false, nil
|
|
}
|
|
|
|
authentic = true
|
|
expandedAuthentic = true
|
|
|
|
// The Go resolver automatically follows CNAMEs, which is not allowed for host
|
|
// names in MX records, but seems to be accepted and is documented for DANE SMTP
|
|
// behaviour. We resolve CNAMEs explicitly, so we can return the final name, which
|
|
// DANE needs. ../rfc/7671:246
|
|
// ../rfc/5321:3861 ../rfc/2181:661 ../rfc/7672:1382 ../rfc/7671:1030
|
|
name := host.Domain.ASCII + "."
|
|
|
|
for i := 0; ; i++ {
|
|
cname, result, err := resolver.LookupCNAME(ctx, name)
|
|
if i == 0 {
|
|
authentic = result.Authentic
|
|
}
|
|
expandedAuthentic = expandedAuthentic && result.Authentic
|
|
if dns.IsNotFound(err) {
|
|
break
|
|
} else if err != nil {
|
|
return authentic, expandedAuthentic, dns.Domain{}, nil, dualstack, err
|
|
} else if strings.TrimSuffix(cname, ".") == strings.TrimSuffix(name, ".") {
|
|
break
|
|
}
|
|
if i > 10 {
|
|
return authentic, expandedAuthentic, dns.Domain{}, nil, dualstack, fmt.Errorf("mx lookup: %w", errCNAMELimit)
|
|
}
|
|
name = strings.TrimSuffix(cname, ".") + "."
|
|
}
|
|
|
|
if name == host.Domain.ASCII+"." {
|
|
expandedHost = host.Domain
|
|
} else {
|
|
var err error
|
|
expandedHost, err = dns.ParseDomain(strings.TrimSuffix(name, "."))
|
|
if err != nil {
|
|
return authentic, expandedAuthentic, dns.Domain{}, nil, dualstack, fmt.Errorf("parsing cname-resolved domain: %w", err)
|
|
}
|
|
}
|
|
|
|
ipaddrs, result, err := resolver.LookupIPAddr(ctx, name)
|
|
authentic = authentic && result.Authentic
|
|
expandedAuthentic = expandedAuthentic && result.Authentic
|
|
if err != nil || len(ipaddrs) == 0 {
|
|
return authentic, expandedAuthentic, expandedHost, nil, false, fmt.Errorf("looking up %q: %w", name, err)
|
|
}
|
|
var have4, have6 bool
|
|
for _, ipaddr := range ipaddrs {
|
|
ips = append(ips, ipaddr.IP)
|
|
if ipaddr.IP.To4() == nil {
|
|
have6 = true
|
|
} else {
|
|
have4 = true
|
|
}
|
|
}
|
|
dualstack = have4 && have6
|
|
prevIPs := dialedIPs[host.String()]
|
|
if len(prevIPs) > 0 {
|
|
prevIP := prevIPs[len(prevIPs)-1]
|
|
prevIs4 := prevIP.To4() != nil
|
|
sameFamily := 0
|
|
for _, ip := range prevIPs {
|
|
is4 := ip.To4() != nil
|
|
if prevIs4 == is4 {
|
|
sameFamily++
|
|
}
|
|
}
|
|
preferPrev := sameFamily == 1
|
|
// We use stable sort so any preferred/randomized listing from DNS is kept intact.
|
|
sort.SliceStable(ips, func(i, j int) bool {
|
|
aIs4 := ips[i].To4() != nil
|
|
bIs4 := ips[j].To4() != nil
|
|
if aIs4 != bIs4 {
|
|
// Prefer "i" if it is not same address family.
|
|
return aIs4 != prevIs4
|
|
}
|
|
// Prefer "i" if it is the same as last and we should be preferring it.
|
|
return preferPrev && ips[i].Equal(prevIP)
|
|
})
|
|
log.Debug("ordered ips for dialing", mlog.Field("ips", ips))
|
|
}
|
|
return
|
|
}
|
|
|
|
// GatherTLSA looks up TLSA record for either expandedHost or host, and returns
|
|
// records usable for DANE with SMTP, and host names to allow in DANE-TA
|
|
// certificate name verification.
|
|
//
|
|
// If no records are found, this isn't necessarily an error. It can just indicate
|
|
// the domain/host does not opt-in to DANE, and nil records and a nil error are
|
|
// returned.
|
|
//
|
|
// Only usable records are returned. If any record was found, DANE is required and
|
|
// this is indicated with daneRequired. If no usable records remain, the caller
|
|
// must do TLS, but not verify the remote TLS certificate.
|
|
//
|
|
// Returned values are always meaningful, also when an error was returned.
|
|
func GatherTLSA(ctx context.Context, log *mlog.Log, resolver dns.Resolver, host dns.Domain, expandedAuthentic bool, expandedHost dns.Domain) (daneRequired bool, daneRecords []adns.TLSA, tlsaBaseDomain dns.Domain, err error) {
|
|
// ../rfc/7672:912
|
|
// This function is only called when the lookup of host was authentic.
|
|
|
|
var l []adns.TLSA
|
|
|
|
tlsaBaseDomain = host
|
|
if host == expandedHost || !expandedAuthentic {
|
|
l, err = lookupTLSACNAME(ctx, log, resolver, 25, "tcp", host)
|
|
} else if expandedAuthentic {
|
|
// ../rfc/7672:934
|
|
tlsaBaseDomain = expandedHost
|
|
l, err = lookupTLSACNAME(ctx, log, resolver, 25, "tcp", expandedHost)
|
|
if err == nil && len(l) == 0 {
|
|
tlsaBaseDomain = host
|
|
l, err = lookupTLSACNAME(ctx, log, resolver, 25, "tcp", host)
|
|
}
|
|
}
|
|
if len(l) == 0 || err != nil {
|
|
daneRequired = err != nil
|
|
log.Debugx("gathering tlsa records failed", err, mlog.Field("danerequired", daneRequired), mlog.Field("basedomain", tlsaBaseDomain))
|
|
return daneRequired, nil, tlsaBaseDomain, err
|
|
}
|
|
daneRequired = len(l) > 0
|
|
l = filterUsableTLSARecords(log, l)
|
|
log.Debug("tlsa records exist", mlog.Field("danerequired", daneRequired), mlog.Field("records", l), mlog.Field("basedomain", tlsaBaseDomain))
|
|
return daneRequired, l, tlsaBaseDomain, err
|
|
}
|
|
|
|
// lookupTLSACNAME composes a TLSA domain name to lookup, follows CNAMEs and looks
|
|
// up TLSA records. no TLSA records exist, a nil error is returned as it means
|
|
// the host does not opt-in to DANE.
|
|
func lookupTLSACNAME(ctx context.Context, log *mlog.Log, resolver dns.Resolver, port int, protocol string, host dns.Domain) (l []adns.TLSA, rerr error) {
|
|
name := fmt.Sprintf("_%d._%s.%s", port, protocol, host.ASCII+".")
|
|
for i := 0; ; i++ {
|
|
cname, result, err := resolver.LookupCNAME(ctx, name)
|
|
if dns.IsNotFound(err) {
|
|
if !result.Authentic {
|
|
log.Debugx("cname nxdomain result during tlsa lookup not authentic, not doing dane for host", err, mlog.Field("host", host), mlog.Field("name", name))
|
|
return nil, nil
|
|
}
|
|
break
|
|
} else if err != nil {
|
|
return nil, fmt.Errorf("looking up cname for tlsa candidate base domain: %w", err)
|
|
} else if !result.Authentic {
|
|
log.Debugx("cname result during tlsa lookup not authentic, not doing dane for host", err, mlog.Field("host", host), mlog.Field("name", name))
|
|
return nil, nil
|
|
}
|
|
if i == 10 {
|
|
return nil, fmt.Errorf("looking up cname for tlsa candidate base domain: %w", errCNAMELimit)
|
|
}
|
|
name = strings.TrimSuffix(cname, ".") + "."
|
|
}
|
|
var result adns.Result
|
|
var err error
|
|
l, result, err = resolver.LookupTLSA(ctx, 0, "", name)
|
|
if dns.IsNotFound(err) || err == nil && len(l) == 0 {
|
|
log.Debugx("no tlsa records for host, not doing dane", err, mlog.Field("host", host), mlog.Field("name", name), mlog.Field("authentic", result.Authentic))
|
|
return nil, nil
|
|
} else if err != nil {
|
|
return nil, fmt.Errorf("looking up tlsa records for tlsa candidate base domain: %w", err)
|
|
} else if !result.Authentic {
|
|
log.Debugx("tlsa lookup not authentic, not doing dane for host", err, mlog.Field("host", host), mlog.Field("name", name))
|
|
return nil, nil
|
|
}
|
|
return l, nil
|
|
}
|
|
|
|
func filterUsableTLSARecords(log *mlog.Log, l []adns.TLSA) []adns.TLSA {
|
|
// Gather "usable" records. ../rfc/7672:708
|
|
o := 0
|
|
for _, r := range l {
|
|
// A record is not usable when we don't recognize parameters. ../rfc/6698:649
|
|
|
|
switch r.Usage {
|
|
case adns.TLSAUsageDANETA, adns.TLSAUsageDANEEE:
|
|
default:
|
|
// We can regard PKIX-TA and PKIX-EE as "unusable" with SMTP DANE. ../rfc/7672:1304
|
|
continue
|
|
}
|
|
switch r.Selector {
|
|
case adns.TLSASelectorCert, adns.TLSASelectorSPKI:
|
|
default:
|
|
continue
|
|
}
|
|
switch r.MatchType {
|
|
case adns.TLSAMatchTypeFull:
|
|
if r.Selector == adns.TLSASelectorCert {
|
|
if _, err := x509.ParseCertificate(r.CertAssoc); err != nil {
|
|
log.Debugx("parsing certificate in dane tlsa record, ignoring", err)
|
|
continue
|
|
}
|
|
} else if r.Selector == adns.TLSASelectorSPKI {
|
|
if _, err := x509.ParsePKIXPublicKey(r.CertAssoc); err != nil {
|
|
log.Debugx("parsing certificate in dane tlsa record, ignoring", err)
|
|
continue
|
|
}
|
|
}
|
|
case adns.TLSAMatchTypeSHA256:
|
|
if len(r.CertAssoc) != sha256.Size {
|
|
log.Debug("dane tlsa record with wrong data size for sha2-256", mlog.Field("got", len(r.CertAssoc)), mlog.Field("expect", sha256.Size))
|
|
continue
|
|
}
|
|
case adns.TLSAMatchTypeSHA512:
|
|
if len(r.CertAssoc) != sha512.Size {
|
|
log.Debug("dane tlsa record with wrong data size for sha2-512", mlog.Field("got", len(r.CertAssoc)), mlog.Field("expect", sha512.Size))
|
|
continue
|
|
}
|
|
default:
|
|
continue
|
|
}
|
|
|
|
l[o] = r
|
|
o++
|
|
}
|
|
return l[:o]
|
|
}
|
|
|
|
// GatherTLSANames returns the allowed names in TLS certificates for verification
|
|
// with PKIX-* or DANE-TA. The first name should be used for SNI.
|
|
//
|
|
// If there was no MX record, the next-hop domain parameters (i.e. the original
|
|
// email destination host, and its CNAME-expanded host, that has MX records) are
|
|
// ignored and only the base domain parameters are taken into account.
|
|
func GatherTLSANames(haveMX, expandedNextHopAuthentic, expandedTLSABaseDomainAuthentic bool, origNextHop, expandedNextHop, origTLSABaseDomain, expandedTLSABaseDomain dns.Domain) []dns.Domain {
|
|
// Gather the names to check against TLS certificate. ../rfc/7672:1318
|
|
if !haveMX {
|
|
// ../rfc/7672:1336
|
|
if !expandedTLSABaseDomainAuthentic || origTLSABaseDomain == expandedTLSABaseDomain {
|
|
return []dns.Domain{origTLSABaseDomain}
|
|
}
|
|
return []dns.Domain{expandedTLSABaseDomain, origTLSABaseDomain}
|
|
} else if expandedNextHopAuthentic {
|
|
// ../rfc/7672:1326
|
|
var l []dns.Domain
|
|
if expandedTLSABaseDomainAuthentic {
|
|
l = []dns.Domain{expandedTLSABaseDomain}
|
|
}
|
|
if expandedTLSABaseDomain != origTLSABaseDomain {
|
|
l = append(l, origTLSABaseDomain)
|
|
}
|
|
l = append(l, origNextHop)
|
|
if origNextHop != expandedNextHop {
|
|
l = append(l, expandedNextHop)
|
|
}
|
|
return l
|
|
} else {
|
|
// We don't attempt DANE after insecure MX, but behaviour for it is specified.
|
|
// ../rfc/7672:1332
|
|
return []dns.Domain{origNextHop}
|
|
}
|
|
}
|