mirror of
https://github.com/mjl-/mox.git
synced 2024-12-26 16:33:47 +03:00
32b549b260
we add various information while analysing an incoming message. like dkim/spf/ip reputation. and content-based junk filter threshold/result and ham/spam words used. for issue #179 by Fell and #157 by mattfbacon
353 lines
11 KiB
Go
353 lines
11 KiB
Go
package dkim
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/base64"
|
|
"errors"
|
|
"fmt"
|
|
"strings"
|
|
|
|
"github.com/mjl-/mox/dns"
|
|
"github.com/mjl-/mox/message"
|
|
"github.com/mjl-/mox/smtp"
|
|
)
|
|
|
|
// Sig is a DKIM-Signature header.
|
|
//
|
|
// String values must be compared case insensitively.
|
|
type Sig struct {
|
|
// Required fields.
|
|
Version int // Version, 1. Field "v". Always the first field.
|
|
AlgorithmSign string // "rsa" or "ed25519". Field "a".
|
|
AlgorithmHash string // "sha256" or the deprecated "sha1" (deprecated). Field "a".
|
|
Signature []byte // Field "b".
|
|
BodyHash []byte // Field "bh".
|
|
Domain dns.Domain // Field "d".
|
|
SignedHeaders []string // Duplicates are meaningful. Field "h".
|
|
Selector dns.Domain // Selector, for looking DNS TXT record at <s>._domainkey.<domain>. Field "s".
|
|
|
|
// Optional fields.
|
|
// Canonicalization is the transformation of header and/or body before hashing. The
|
|
// value is in original case, but must be compared case-insensitively. Normally two
|
|
// slash-separated values: header canonicalization and body canonicalization. But
|
|
// the "simple" means "simple/simple" and "relaxed" means "relaxed/simple". Field
|
|
// "c".
|
|
Canonicalization string
|
|
Length int64 // Body length to verify, default -1 for whole body. Field "l".
|
|
Identity *Identity // AUID (agent/user id). If nil and an identity is needed, should be treated as an Identity without localpart and Domain from d= field. Field "i".
|
|
QueryMethods []string // For public key, currently known value is "dns/txt" (should be compared case-insensitively). If empty, dns/txt must be assumed. Field "q".
|
|
SignTime int64 // Unix epoch. -1 if unset. Field "t".
|
|
ExpireTime int64 // Unix epoch. -1 if unset. Field "x".
|
|
CopiedHeaders []string // Copied header fields. Field "z".
|
|
}
|
|
|
|
// Identity is used for the optional i= field in a DKIM-Signature header. It uses
|
|
// the syntax of an email address, but does not necessarily represent one.
|
|
type Identity struct {
|
|
Localpart *smtp.Localpart // Optional.
|
|
Domain dns.Domain
|
|
}
|
|
|
|
// String returns a value for use in the i= DKIM-Signature field.
|
|
func (i Identity) String() string {
|
|
s := "@" + i.Domain.ASCII
|
|
// We need localpart as pointer to indicate it is missing because localparts can be
|
|
// "" which we store (decoded) as empty string and we need to differentiate.
|
|
if i.Localpart != nil {
|
|
s = i.Localpart.String() + s
|
|
}
|
|
return s
|
|
}
|
|
|
|
func newSigWithDefaults() *Sig {
|
|
return &Sig{
|
|
Canonicalization: "simple/simple",
|
|
Length: -1,
|
|
SignTime: -1,
|
|
ExpireTime: -1,
|
|
}
|
|
}
|
|
|
|
// Algorithm returns an algorithm string for use in the "a" field. E.g.
|
|
// "ed25519-sha256".
|
|
func (s Sig) Algorithm() string {
|
|
return s.AlgorithmSign + "-" + s.AlgorithmHash
|
|
}
|
|
|
|
// Header returns the DKIM-Signature header in string form, to be prepended to a
|
|
// message, including DKIM-Signature field name and trailing \r\n.
|
|
func (s *Sig) Header() (string, error) {
|
|
// ../rfc/6376:1021
|
|
// todo: make a higher-level writer that accepts pairs, and only folds to next line when needed.
|
|
w := &message.HeaderWriter{}
|
|
w.Addf("", "DKIM-Signature: v=%d;", s.Version)
|
|
// Domain names must always be in ASCII. ../rfc/6376:1115 ../rfc/6376:1187 ../rfc/6376:1303
|
|
w.Addf(" ", "d=%s;", s.Domain.ASCII)
|
|
w.Addf(" ", "s=%s;", s.Selector.ASCII)
|
|
if s.Identity != nil {
|
|
w.Addf(" ", "i=%s;", s.Identity.String()) // todo: Is utf-8 ok here?
|
|
}
|
|
w.Addf(" ", "a=%s;", s.Algorithm())
|
|
|
|
if s.Canonicalization != "" && !strings.EqualFold(s.Canonicalization, "simple") && !strings.EqualFold(s.Canonicalization, "simple/simple") {
|
|
w.Addf(" ", "c=%s;", s.Canonicalization)
|
|
}
|
|
if s.Length >= 0 {
|
|
w.Addf(" ", "l=%d;", s.Length)
|
|
}
|
|
if len(s.QueryMethods) > 0 && !(len(s.QueryMethods) == 1 && strings.EqualFold(s.QueryMethods[0], "dns/txt")) {
|
|
w.Addf(" ", "q=%s;", strings.Join(s.QueryMethods, ":"))
|
|
}
|
|
if s.SignTime >= 0 {
|
|
w.Addf(" ", "t=%d;", s.SignTime)
|
|
}
|
|
if s.ExpireTime >= 0 {
|
|
w.Addf(" ", "x=%d;", s.ExpireTime)
|
|
}
|
|
|
|
if len(s.SignedHeaders) > 0 {
|
|
for i, v := range s.SignedHeaders {
|
|
sep := ""
|
|
if i == 0 {
|
|
v = "h=" + v
|
|
sep = " "
|
|
}
|
|
if i < len(s.SignedHeaders)-1 {
|
|
v += ":"
|
|
} else if i == len(s.SignedHeaders)-1 {
|
|
v += ";"
|
|
}
|
|
w.Addf(sep, v)
|
|
}
|
|
}
|
|
if len(s.CopiedHeaders) > 0 {
|
|
// todo: wrap long headers? we can at least add FWS before the :
|
|
for i, v := range s.CopiedHeaders {
|
|
t := strings.SplitN(v, ":", 2)
|
|
if len(t) == 2 {
|
|
v = t[0] + ":" + packQpHdrValue(t[1])
|
|
} else {
|
|
return "", fmt.Errorf("invalid header in copied headers (z=): %q", v)
|
|
}
|
|
sep := ""
|
|
if i == 0 {
|
|
v = "z=" + v
|
|
sep = " "
|
|
}
|
|
if i < len(s.CopiedHeaders)-1 {
|
|
v += "|"
|
|
} else if i == len(s.CopiedHeaders)-1 {
|
|
v += ";"
|
|
}
|
|
w.Addf(sep, v)
|
|
}
|
|
}
|
|
|
|
w.Addf(" ", "bh=%s;", base64.StdEncoding.EncodeToString(s.BodyHash))
|
|
|
|
w.Addf(" ", "b=")
|
|
if len(s.Signature) > 0 {
|
|
w.AddWrap([]byte(base64.StdEncoding.EncodeToString(s.Signature)), false)
|
|
}
|
|
w.Add("\r\n")
|
|
return w.String(), nil
|
|
}
|
|
|
|
// Like quoted printable, but with "|" encoded as well.
|
|
// We also encode ":" because it is used as separator in DKIM headers which can
|
|
// cause trouble for "q", even though it is listed in dkim-safe-char,
|
|
// ../rfc/6376:497.
|
|
func packQpHdrValue(s string) string {
|
|
// ../rfc/6376:474
|
|
const hex = "0123456789ABCDEF"
|
|
var r string
|
|
for _, b := range []byte(s) {
|
|
if b > ' ' && b < 0x7f && b != ';' && b != '=' && b != '|' && b != ':' {
|
|
r += string(b)
|
|
} else {
|
|
r += "=" + string(hex[b>>4]) + string(hex[(b>>0)&0xf])
|
|
}
|
|
}
|
|
return r
|
|
}
|
|
|
|
var (
|
|
errSigHeader = errors.New("not DKIM-Signature header")
|
|
errSigDuplicateTag = errors.New("duplicate tag")
|
|
errSigMissingCRLF = errors.New("missing crlf at end")
|
|
errSigExpired = errors.New("signature timestamp (t=) must be before signature expiration (x=)")
|
|
errSigIdentityDomain = errors.New("identity domain (i=) not under domain (d=)")
|
|
errSigMissingTag = errors.New("missing required tag")
|
|
errSigUnknownVersion = errors.New("unknown version")
|
|
errSigBodyHash = errors.New("bad body hash size given algorithm")
|
|
)
|
|
|
|
// parseSignatures returns the parsed form of a DKIM-Signature header.
|
|
//
|
|
// buf must end in crlf, as it should have occurred in the mail message.
|
|
//
|
|
// The dkim signature with signature left empty ("b=") and without trailing
|
|
// crlf is returned, for use in verification.
|
|
func parseSignature(buf []byte, smtputf8 bool) (sig *Sig, verifySig []byte, err error) {
|
|
defer func() {
|
|
if x := recover(); x == nil {
|
|
return
|
|
} else if xerr, ok := x.(error); ok {
|
|
sig = nil
|
|
verifySig = nil
|
|
err = xerr
|
|
} else {
|
|
panic(x)
|
|
}
|
|
}()
|
|
|
|
xerrorf := func(format string, args ...any) {
|
|
panic(fmt.Errorf(format, args...))
|
|
}
|
|
|
|
if !bytes.HasSuffix(buf, []byte("\r\n")) {
|
|
xerrorf("%w", errSigMissingCRLF)
|
|
}
|
|
buf = buf[:len(buf)-2]
|
|
|
|
ds := newSigWithDefaults()
|
|
seen := map[string]struct{}{}
|
|
p := parser{s: string(buf), smtputf8: smtputf8}
|
|
name := p.xhdrName(false)
|
|
if !strings.EqualFold(name, "DKIM-Signature") {
|
|
xerrorf("%w", errSigHeader)
|
|
}
|
|
p.wsp()
|
|
p.xtake(":")
|
|
p.wsp()
|
|
// ../rfc/6376:655
|
|
// ../rfc/6376:656 ../rfc/6376-eid5070
|
|
// ../rfc/6376:658 ../rfc/6376-eid5070
|
|
for {
|
|
p.fws()
|
|
k := p.xtagName()
|
|
p.fws()
|
|
p.xtake("=")
|
|
// Special case for "b", see below.
|
|
if k != "b" {
|
|
p.fws()
|
|
}
|
|
// Keys are case-sensitive: ../rfc/6376:679
|
|
if _, ok := seen[k]; ok {
|
|
// Duplicates not allowed: ../rfc/6376:683
|
|
xerrorf("%w: %q", errSigDuplicateTag, k)
|
|
break
|
|
}
|
|
seen[k] = struct{}{}
|
|
|
|
// ../rfc/6376:1021
|
|
switch k {
|
|
case "v":
|
|
// ../rfc/6376:1025
|
|
ds.Version = int(p.xnumber(10))
|
|
if ds.Version != 1 {
|
|
xerrorf("%w: version %d", errSigUnknownVersion, ds.Version)
|
|
}
|
|
case "a":
|
|
// ../rfc/6376:1038
|
|
ds.AlgorithmSign, ds.AlgorithmHash = p.xalgorithm()
|
|
case "b":
|
|
// ../rfc/6376:1054
|
|
// To calculate the hash, we have to feed the DKIM-Signature header to the hash
|
|
// function, but with the value for "b=" (the signature) left out. The parser
|
|
// tracks all data that is read, except when drop is true.
|
|
// ../rfc/6376:997
|
|
// Surrounding whitespace must be cleared as well. ../rfc/6376:1659
|
|
// Note: The RFC says "surrounding" whitespace, but whitespace is only allowed
|
|
// before the value as part of the ABNF production for "b". Presumably the
|
|
// intention is to ignore the trailing "[FWS]" for the tag-spec production,
|
|
// ../rfc/6376:656
|
|
// Another indication is the term "value portion", ../rfc/6376:1667. It appears to
|
|
// mean everything after the "b=" part, instead of the actual value (either encoded
|
|
// or decoded).
|
|
p.drop = true
|
|
p.fws()
|
|
ds.Signature = p.xbase64()
|
|
p.fws()
|
|
p.drop = false
|
|
case "bh":
|
|
// ../rfc/6376:1076
|
|
ds.BodyHash = p.xbase64()
|
|
case "c":
|
|
// ../rfc/6376:1088
|
|
ds.Canonicalization = p.xcanonical()
|
|
// ../rfc/6376:810
|
|
case "d":
|
|
// ../rfc/6376:1105
|
|
ds.Domain = p.xdomain()
|
|
case "h":
|
|
// ../rfc/6376:1134
|
|
ds.SignedHeaders = p.xsignedHeaderFields()
|
|
case "i":
|
|
// ../rfc/6376:1171
|
|
id := p.xauid()
|
|
ds.Identity = &id
|
|
case "l":
|
|
// ../rfc/6376:1244
|
|
ds.Length = p.xbodyLength()
|
|
case "q":
|
|
// ../rfc/6376:1268
|
|
ds.QueryMethods = p.xqueryMethods()
|
|
case "s":
|
|
// ../rfc/6376:1300
|
|
ds.Selector = p.xselector()
|
|
case "t":
|
|
// ../rfc/6376:1310
|
|
ds.SignTime = p.xtimestamp()
|
|
case "x":
|
|
// ../rfc/6376:1327
|
|
ds.ExpireTime = p.xtimestamp()
|
|
case "z":
|
|
// ../rfc/6376:1361
|
|
ds.CopiedHeaders = p.xcopiedHeaderFields()
|
|
default:
|
|
// We must ignore unknown fields. ../rfc/6376:692 ../rfc/6376:1022
|
|
p.xchar() // ../rfc/6376-eid5070
|
|
for !p.empty() && !p.hasPrefix(";") {
|
|
p.xchar()
|
|
}
|
|
}
|
|
p.fws()
|
|
|
|
if p.empty() {
|
|
break
|
|
}
|
|
p.xtake(";")
|
|
if p.empty() {
|
|
break
|
|
}
|
|
}
|
|
|
|
// ../rfc/6376:2532
|
|
required := []string{"v", "a", "b", "bh", "d", "h", "s"}
|
|
for _, req := range required {
|
|
if _, ok := seen[req]; !ok {
|
|
xerrorf("%w: %q", errSigMissingTag, req)
|
|
}
|
|
}
|
|
|
|
if strings.EqualFold(ds.AlgorithmHash, "sha1") && len(ds.BodyHash) != 20 {
|
|
xerrorf("%w: got %d bytes, must be 20 for sha1", errSigBodyHash, len(ds.BodyHash))
|
|
} else if strings.EqualFold(ds.AlgorithmHash, "sha256") && len(ds.BodyHash) != 32 {
|
|
xerrorf("%w: got %d bytes, must be 32 for sha256", errSigBodyHash, len(ds.BodyHash))
|
|
}
|
|
|
|
// ../rfc/6376:1337
|
|
if ds.SignTime >= 0 && ds.ExpireTime >= 0 && ds.SignTime >= ds.ExpireTime {
|
|
xerrorf("%w", errSigExpired)
|
|
}
|
|
|
|
// Default identity is "@" plus domain. We don't set this value because we want to
|
|
// keep the distinction between absent value.
|
|
// ../rfc/6376:1172 ../rfc/6376:2537 ../rfc/6376:2541
|
|
if ds.Identity != nil && ds.Identity.Domain.ASCII != ds.Domain.ASCII && !strings.HasSuffix(ds.Identity.Domain.ASCII, "."+ds.Domain.ASCII) {
|
|
xerrorf("%w: identity domain %q not under domain %q", errSigIdentityDomain, ds.Identity.Domain.ASCII, ds.Domain.ASCII)
|
|
}
|
|
|
|
return ds, []byte(p.tracked), nil
|
|
}
|