mox/dkim/sig.go
Mechiel Lukkien 32b549b260
add more details to x-mox-reason message header added during delivery, for understanding why a message is accepted/rejected
we add various information while analysing an incoming message. like
dkim/spf/ip reputation. and content-based junk filter threshold/result and
ham/spam words used.

for issue #179 by Fell and #157 by mattfbacon
2024-10-04 16:01:30 +02:00

353 lines
11 KiB
Go

package dkim
import (
"bytes"
"encoding/base64"
"errors"
"fmt"
"strings"
"github.com/mjl-/mox/dns"
"github.com/mjl-/mox/message"
"github.com/mjl-/mox/smtp"
)
// Sig is a DKIM-Signature header.
//
// String values must be compared case insensitively.
type Sig struct {
// Required fields.
Version int // Version, 1. Field "v". Always the first field.
AlgorithmSign string // "rsa" or "ed25519". Field "a".
AlgorithmHash string // "sha256" or the deprecated "sha1" (deprecated). Field "a".
Signature []byte // Field "b".
BodyHash []byte // Field "bh".
Domain dns.Domain // Field "d".
SignedHeaders []string // Duplicates are meaningful. Field "h".
Selector dns.Domain // Selector, for looking DNS TXT record at <s>._domainkey.<domain>. Field "s".
// Optional fields.
// Canonicalization is the transformation of header and/or body before hashing. The
// value is in original case, but must be compared case-insensitively. Normally two
// slash-separated values: header canonicalization and body canonicalization. But
// the "simple" means "simple/simple" and "relaxed" means "relaxed/simple". Field
// "c".
Canonicalization string
Length int64 // Body length to verify, default -1 for whole body. Field "l".
Identity *Identity // AUID (agent/user id). If nil and an identity is needed, should be treated as an Identity without localpart and Domain from d= field. Field "i".
QueryMethods []string // For public key, currently known value is "dns/txt" (should be compared case-insensitively). If empty, dns/txt must be assumed. Field "q".
SignTime int64 // Unix epoch. -1 if unset. Field "t".
ExpireTime int64 // Unix epoch. -1 if unset. Field "x".
CopiedHeaders []string // Copied header fields. Field "z".
}
// Identity is used for the optional i= field in a DKIM-Signature header. It uses
// the syntax of an email address, but does not necessarily represent one.
type Identity struct {
Localpart *smtp.Localpart // Optional.
Domain dns.Domain
}
// String returns a value for use in the i= DKIM-Signature field.
func (i Identity) String() string {
s := "@" + i.Domain.ASCII
// We need localpart as pointer to indicate it is missing because localparts can be
// "" which we store (decoded) as empty string and we need to differentiate.
if i.Localpart != nil {
s = i.Localpart.String() + s
}
return s
}
func newSigWithDefaults() *Sig {
return &Sig{
Canonicalization: "simple/simple",
Length: -1,
SignTime: -1,
ExpireTime: -1,
}
}
// Algorithm returns an algorithm string for use in the "a" field. E.g.
// "ed25519-sha256".
func (s Sig) Algorithm() string {
return s.AlgorithmSign + "-" + s.AlgorithmHash
}
// Header returns the DKIM-Signature header in string form, to be prepended to a
// message, including DKIM-Signature field name and trailing \r\n.
func (s *Sig) Header() (string, error) {
// ../rfc/6376:1021
// todo: make a higher-level writer that accepts pairs, and only folds to next line when needed.
w := &message.HeaderWriter{}
w.Addf("", "DKIM-Signature: v=%d;", s.Version)
// Domain names must always be in ASCII. ../rfc/6376:1115 ../rfc/6376:1187 ../rfc/6376:1303
w.Addf(" ", "d=%s;", s.Domain.ASCII)
w.Addf(" ", "s=%s;", s.Selector.ASCII)
if s.Identity != nil {
w.Addf(" ", "i=%s;", s.Identity.String()) // todo: Is utf-8 ok here?
}
w.Addf(" ", "a=%s;", s.Algorithm())
if s.Canonicalization != "" && !strings.EqualFold(s.Canonicalization, "simple") && !strings.EqualFold(s.Canonicalization, "simple/simple") {
w.Addf(" ", "c=%s;", s.Canonicalization)
}
if s.Length >= 0 {
w.Addf(" ", "l=%d;", s.Length)
}
if len(s.QueryMethods) > 0 && !(len(s.QueryMethods) == 1 && strings.EqualFold(s.QueryMethods[0], "dns/txt")) {
w.Addf(" ", "q=%s;", strings.Join(s.QueryMethods, ":"))
}
if s.SignTime >= 0 {
w.Addf(" ", "t=%d;", s.SignTime)
}
if s.ExpireTime >= 0 {
w.Addf(" ", "x=%d;", s.ExpireTime)
}
if len(s.SignedHeaders) > 0 {
for i, v := range s.SignedHeaders {
sep := ""
if i == 0 {
v = "h=" + v
sep = " "
}
if i < len(s.SignedHeaders)-1 {
v += ":"
} else if i == len(s.SignedHeaders)-1 {
v += ";"
}
w.Addf(sep, v)
}
}
if len(s.CopiedHeaders) > 0 {
// todo: wrap long headers? we can at least add FWS before the :
for i, v := range s.CopiedHeaders {
t := strings.SplitN(v, ":", 2)
if len(t) == 2 {
v = t[0] + ":" + packQpHdrValue(t[1])
} else {
return "", fmt.Errorf("invalid header in copied headers (z=): %q", v)
}
sep := ""
if i == 0 {
v = "z=" + v
sep = " "
}
if i < len(s.CopiedHeaders)-1 {
v += "|"
} else if i == len(s.CopiedHeaders)-1 {
v += ";"
}
w.Addf(sep, v)
}
}
w.Addf(" ", "bh=%s;", base64.StdEncoding.EncodeToString(s.BodyHash))
w.Addf(" ", "b=")
if len(s.Signature) > 0 {
w.AddWrap([]byte(base64.StdEncoding.EncodeToString(s.Signature)), false)
}
w.Add("\r\n")
return w.String(), nil
}
// Like quoted printable, but with "|" encoded as well.
// We also encode ":" because it is used as separator in DKIM headers which can
// cause trouble for "q", even though it is listed in dkim-safe-char,
// ../rfc/6376:497.
func packQpHdrValue(s string) string {
// ../rfc/6376:474
const hex = "0123456789ABCDEF"
var r string
for _, b := range []byte(s) {
if b > ' ' && b < 0x7f && b != ';' && b != '=' && b != '|' && b != ':' {
r += string(b)
} else {
r += "=" + string(hex[b>>4]) + string(hex[(b>>0)&0xf])
}
}
return r
}
var (
errSigHeader = errors.New("not DKIM-Signature header")
errSigDuplicateTag = errors.New("duplicate tag")
errSigMissingCRLF = errors.New("missing crlf at end")
errSigExpired = errors.New("signature timestamp (t=) must be before signature expiration (x=)")
errSigIdentityDomain = errors.New("identity domain (i=) not under domain (d=)")
errSigMissingTag = errors.New("missing required tag")
errSigUnknownVersion = errors.New("unknown version")
errSigBodyHash = errors.New("bad body hash size given algorithm")
)
// parseSignatures returns the parsed form of a DKIM-Signature header.
//
// buf must end in crlf, as it should have occurred in the mail message.
//
// The dkim signature with signature left empty ("b=") and without trailing
// crlf is returned, for use in verification.
func parseSignature(buf []byte, smtputf8 bool) (sig *Sig, verifySig []byte, err error) {
defer func() {
if x := recover(); x == nil {
return
} else if xerr, ok := x.(error); ok {
sig = nil
verifySig = nil
err = xerr
} else {
panic(x)
}
}()
xerrorf := func(format string, args ...any) {
panic(fmt.Errorf(format, args...))
}
if !bytes.HasSuffix(buf, []byte("\r\n")) {
xerrorf("%w", errSigMissingCRLF)
}
buf = buf[:len(buf)-2]
ds := newSigWithDefaults()
seen := map[string]struct{}{}
p := parser{s: string(buf), smtputf8: smtputf8}
name := p.xhdrName(false)
if !strings.EqualFold(name, "DKIM-Signature") {
xerrorf("%w", errSigHeader)
}
p.wsp()
p.xtake(":")
p.wsp()
// ../rfc/6376:655
// ../rfc/6376:656 ../rfc/6376-eid5070
// ../rfc/6376:658 ../rfc/6376-eid5070
for {
p.fws()
k := p.xtagName()
p.fws()
p.xtake("=")
// Special case for "b", see below.
if k != "b" {
p.fws()
}
// Keys are case-sensitive: ../rfc/6376:679
if _, ok := seen[k]; ok {
// Duplicates not allowed: ../rfc/6376:683
xerrorf("%w: %q", errSigDuplicateTag, k)
break
}
seen[k] = struct{}{}
// ../rfc/6376:1021
switch k {
case "v":
// ../rfc/6376:1025
ds.Version = int(p.xnumber(10))
if ds.Version != 1 {
xerrorf("%w: version %d", errSigUnknownVersion, ds.Version)
}
case "a":
// ../rfc/6376:1038
ds.AlgorithmSign, ds.AlgorithmHash = p.xalgorithm()
case "b":
// ../rfc/6376:1054
// To calculate the hash, we have to feed the DKIM-Signature header to the hash
// function, but with the value for "b=" (the signature) left out. The parser
// tracks all data that is read, except when drop is true.
// ../rfc/6376:997
// Surrounding whitespace must be cleared as well. ../rfc/6376:1659
// Note: The RFC says "surrounding" whitespace, but whitespace is only allowed
// before the value as part of the ABNF production for "b". Presumably the
// intention is to ignore the trailing "[FWS]" for the tag-spec production,
// ../rfc/6376:656
// Another indication is the term "value portion", ../rfc/6376:1667. It appears to
// mean everything after the "b=" part, instead of the actual value (either encoded
// or decoded).
p.drop = true
p.fws()
ds.Signature = p.xbase64()
p.fws()
p.drop = false
case "bh":
// ../rfc/6376:1076
ds.BodyHash = p.xbase64()
case "c":
// ../rfc/6376:1088
ds.Canonicalization = p.xcanonical()
// ../rfc/6376:810
case "d":
// ../rfc/6376:1105
ds.Domain = p.xdomain()
case "h":
// ../rfc/6376:1134
ds.SignedHeaders = p.xsignedHeaderFields()
case "i":
// ../rfc/6376:1171
id := p.xauid()
ds.Identity = &id
case "l":
// ../rfc/6376:1244
ds.Length = p.xbodyLength()
case "q":
// ../rfc/6376:1268
ds.QueryMethods = p.xqueryMethods()
case "s":
// ../rfc/6376:1300
ds.Selector = p.xselector()
case "t":
// ../rfc/6376:1310
ds.SignTime = p.xtimestamp()
case "x":
// ../rfc/6376:1327
ds.ExpireTime = p.xtimestamp()
case "z":
// ../rfc/6376:1361
ds.CopiedHeaders = p.xcopiedHeaderFields()
default:
// We must ignore unknown fields. ../rfc/6376:692 ../rfc/6376:1022
p.xchar() // ../rfc/6376-eid5070
for !p.empty() && !p.hasPrefix(";") {
p.xchar()
}
}
p.fws()
if p.empty() {
break
}
p.xtake(";")
if p.empty() {
break
}
}
// ../rfc/6376:2532
required := []string{"v", "a", "b", "bh", "d", "h", "s"}
for _, req := range required {
if _, ok := seen[req]; !ok {
xerrorf("%w: %q", errSigMissingTag, req)
}
}
if strings.EqualFold(ds.AlgorithmHash, "sha1") && len(ds.BodyHash) != 20 {
xerrorf("%w: got %d bytes, must be 20 for sha1", errSigBodyHash, len(ds.BodyHash))
} else if strings.EqualFold(ds.AlgorithmHash, "sha256") && len(ds.BodyHash) != 32 {
xerrorf("%w: got %d bytes, must be 32 for sha256", errSigBodyHash, len(ds.BodyHash))
}
// ../rfc/6376:1337
if ds.SignTime >= 0 && ds.ExpireTime >= 0 && ds.SignTime >= ds.ExpireTime {
xerrorf("%w", errSigExpired)
}
// Default identity is "@" plus domain. We don't set this value because we want to
// keep the distinction between absent value.
// ../rfc/6376:1172 ../rfc/6376:2537 ../rfc/6376:2541
if ds.Identity != nil && ds.Identity.Domain.ASCII != ds.Domain.ASCII && !strings.HasSuffix(ds.Identity.Domain.ASCII, "."+ds.Domain.ASCII) {
xerrorf("%w: identity domain %q not under domain %q", errSigIdentityDomain, ds.Identity.Domain.ASCII, ds.Domain.ASCII)
}
return ds, []byte(p.tracked), nil
}