mirror of
https://github.com/mjl-/mox.git
synced 2025-01-15 09:56:27 +03:00
8e6fe7459b
both when parsing our configs, and for incoming on smtp or in messages. so we properly compare things like é and e+accent as equal, and accept the different encodings of that same address.
503 lines
11 KiB
Go
503 lines
11 KiB
Go
package dkim
|
|
|
|
import (
|
|
"encoding/base64"
|
|
"fmt"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"golang.org/x/text/unicode/norm"
|
|
|
|
"github.com/mjl-/mox/dns"
|
|
"github.com/mjl-/mox/smtp"
|
|
)
|
|
|
|
// Pedantic enables stricter parsing.
|
|
var Pedantic bool
|
|
|
|
type parseErr string
|
|
|
|
func (e parseErr) Error() string {
|
|
return string(e)
|
|
}
|
|
|
|
var _ error = parseErr("")
|
|
|
|
type parser struct {
|
|
s string
|
|
o int // Offset into s.
|
|
tracked string // All data consumed, except when "drop" is true. To be set by caller when parsing the value for "b=".
|
|
drop bool
|
|
smtputf8 bool // If set, allow characters > 0x7f.
|
|
}
|
|
|
|
func (p *parser) xerrorf(format string, args ...any) {
|
|
msg := fmt.Sprintf(format, args...)
|
|
if p.o < len(p.s) {
|
|
msg = fmt.Sprintf("%s (leftover %q)", msg, p.s[p.o:])
|
|
}
|
|
panic(parseErr(msg))
|
|
}
|
|
|
|
func (p *parser) track(s string) {
|
|
if !p.drop {
|
|
p.tracked += s
|
|
}
|
|
}
|
|
|
|
func (p *parser) hasPrefix(s string) bool {
|
|
return strings.HasPrefix(p.s[p.o:], s)
|
|
}
|
|
|
|
func (p *parser) xtaken(n int) string {
|
|
r := p.s[p.o : p.o+n]
|
|
p.o += n
|
|
p.track(r)
|
|
return r
|
|
}
|
|
|
|
func (p *parser) xtakefn(ignoreFWS bool, fn func(c rune, i int) bool) string {
|
|
var r string
|
|
for i, c := range p.s[p.o:] {
|
|
if !fn(c, i) {
|
|
switch c {
|
|
case ' ', '\t', '\r', '\n':
|
|
continue
|
|
}
|
|
p.xtaken(i)
|
|
return r
|
|
}
|
|
r += string(c)
|
|
}
|
|
p.xtaken(len(p.s) - p.o)
|
|
return r
|
|
}
|
|
|
|
func (p *parser) empty() bool {
|
|
return p.o >= len(p.s)
|
|
}
|
|
|
|
func (p *parser) xnonempty() {
|
|
if p.o >= len(p.s) {
|
|
p.xerrorf("expected at least 1 more char")
|
|
}
|
|
}
|
|
|
|
func (p *parser) xtakefn1(ignoreFWS bool, fn func(c rune, i int) bool) string {
|
|
var r string
|
|
p.xnonempty()
|
|
for i, c := range p.s[p.o:] {
|
|
if !fn(c, i) {
|
|
switch c {
|
|
case ' ', '\t', '\r', '\n':
|
|
continue
|
|
}
|
|
if i == 0 {
|
|
p.xerrorf("expected at least 1 char")
|
|
}
|
|
p.xtaken(i)
|
|
return r
|
|
}
|
|
r += string(c)
|
|
}
|
|
return p.xtaken(len(p.s) - p.o)
|
|
}
|
|
|
|
func (p *parser) wsp() {
|
|
p.xtakefn(false, func(c rune, i int) bool {
|
|
return c == ' ' || c == '\t'
|
|
})
|
|
}
|
|
|
|
func (p *parser) fws() {
|
|
p.wsp()
|
|
if p.hasPrefix("\r\n ") || p.hasPrefix("\r\n\t") {
|
|
p.xtaken(3)
|
|
p.wsp()
|
|
}
|
|
}
|
|
|
|
// peekfws returns whether remaining text starts with s, optionally prefix with fws.
|
|
func (p *parser) peekfws(s string) bool {
|
|
o := p.o
|
|
p.fws()
|
|
r := p.hasPrefix(s)
|
|
p.o = o
|
|
return r
|
|
}
|
|
|
|
func (p *parser) xtake(s string) string {
|
|
if !strings.HasPrefix(p.s[p.o:], s) {
|
|
p.xerrorf("expected %q", s)
|
|
}
|
|
return p.xtaken(len(s))
|
|
}
|
|
|
|
func (p *parser) take(s string) bool {
|
|
if strings.HasPrefix(p.s[p.o:], s) {
|
|
p.o += len(s)
|
|
p.track(s)
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
// ../rfc/6376:657
|
|
func (p *parser) xtagName() string {
|
|
return p.xtakefn1(false, func(c rune, i int) bool {
|
|
return isalpha(c) || i > 0 && (isdigit(c) || c == '_')
|
|
})
|
|
}
|
|
|
|
func (p *parser) xalgorithm() (string, string) {
|
|
// ../rfc/6376:1046
|
|
xtagx := func(c rune, i int) bool {
|
|
return isalpha(c) || i > 0 && isdigit(c)
|
|
}
|
|
algk := p.xtakefn1(false, xtagx)
|
|
p.xtake("-")
|
|
algv := p.xtakefn1(false, xtagx)
|
|
return algk, algv
|
|
}
|
|
|
|
// fws in value is ignored. empty/no base64 characters is valid.
|
|
// ../rfc/6376:1021
|
|
// ../rfc/6376:1076
|
|
func (p *parser) xbase64() []byte {
|
|
s := ""
|
|
p.xtakefn(false, func(c rune, i int) bool {
|
|
if isalphadigit(c) || c == '+' || c == '/' || c == '=' {
|
|
s += string(c)
|
|
return true
|
|
}
|
|
if c == ' ' || c == '\t' {
|
|
return true
|
|
}
|
|
rem := p.s[p.o+i:]
|
|
if strings.HasPrefix(rem, "\r\n ") || strings.HasPrefix(rem, "\r\n\t") {
|
|
return true
|
|
}
|
|
if (strings.HasPrefix(rem, "\n ") || strings.HasPrefix(rem, "\n\t")) && p.o+i-1 > 0 && p.s[p.o+i-1] == '\r' {
|
|
return true
|
|
}
|
|
return false
|
|
})
|
|
buf, err := base64.StdEncoding.DecodeString(s)
|
|
if err != nil {
|
|
p.xerrorf("decoding base64: %v", err)
|
|
}
|
|
return buf
|
|
}
|
|
|
|
// parses canonicalization in original case.
|
|
func (p *parser) xcanonical() string {
|
|
// ../rfc/6376:1100
|
|
s := p.xhyphenatedWord()
|
|
if p.take("/") {
|
|
return s + "/" + p.xhyphenatedWord()
|
|
}
|
|
return s
|
|
}
|
|
|
|
func (p *parser) xdomainselector(isselector bool) dns.Domain {
|
|
subdomain := func(c rune, i int) bool {
|
|
// domain names must always be a-labels, ../rfc/6376:1115 ../rfc/6376:1187 ../rfc/6376:1303
|
|
// dkim selectors with underscores happen in the wild, accept them when not in
|
|
// pedantic mode. ../rfc/6376:581 ../rfc/5321:2303
|
|
return isalphadigit(c) || (i > 0 && (c == '-' || isselector && !Pedantic && c == '_') && p.o+1 < len(p.s))
|
|
}
|
|
s := p.xtakefn1(false, subdomain)
|
|
for p.hasPrefix(".") {
|
|
s += p.xtake(".") + p.xtakefn1(false, subdomain)
|
|
}
|
|
if isselector {
|
|
// Not to be interpreted as IDNA.
|
|
return dns.Domain{ASCII: strings.ToLower(s)}
|
|
}
|
|
d, err := dns.ParseDomain(s)
|
|
if err != nil {
|
|
p.xerrorf("parsing domain %q: %s", s, err)
|
|
}
|
|
return d
|
|
}
|
|
|
|
func (p *parser) xdomain() dns.Domain {
|
|
return p.xdomainselector(false)
|
|
}
|
|
|
|
func (p *parser) xselector() dns.Domain {
|
|
return p.xdomainselector(true)
|
|
}
|
|
|
|
func (p *parser) xhdrName(ignoreFWS bool) string {
|
|
// ../rfc/6376:473
|
|
// ../rfc/5322:1689
|
|
// BNF for hdr-name (field-name) allows ";", but DKIM disallows unencoded semicolons. ../rfc/6376:643
|
|
// ignoreFWS is needed for "z=", which can have FWS anywhere. ../rfc/6376:1372
|
|
return p.xtakefn1(ignoreFWS, func(c rune, i int) bool {
|
|
return c > ' ' && c < 0x7f && c != ':' && c != ';'
|
|
})
|
|
}
|
|
|
|
func (p *parser) xsignedHeaderFields() []string {
|
|
// ../rfc/6376:1157
|
|
l := []string{p.xhdrName(false)}
|
|
for p.peekfws(":") {
|
|
p.fws()
|
|
p.xtake(":")
|
|
p.fws()
|
|
l = append(l, p.xhdrName(false))
|
|
}
|
|
return l
|
|
}
|
|
|
|
func (p *parser) xauid() Identity {
|
|
// ../rfc/6376:1192
|
|
// Localpart is optional.
|
|
if p.take("@") {
|
|
return Identity{Domain: p.xdomain()}
|
|
}
|
|
lp := p.xlocalpart()
|
|
p.xtake("@")
|
|
dom := p.xdomain()
|
|
return Identity{&lp, dom}
|
|
}
|
|
|
|
// todo: reduce duplication between implementations: ../smtp/address.go:/xlocalpart ../dkim/parser.go:/xlocalpart ../smtpserver/parse.go:/xlocalpart
|
|
func (p *parser) xlocalpart() smtp.Localpart {
|
|
// ../rfc/6376:434
|
|
// ../rfc/5321:2316
|
|
var s string
|
|
if p.hasPrefix(`"`) {
|
|
s = p.xquotedString()
|
|
} else {
|
|
s = p.xatom()
|
|
for p.take(".") {
|
|
s += "." + p.xatom()
|
|
}
|
|
}
|
|
// In the wild, some services use large localparts for generated (bounce) addresses.
|
|
if Pedantic && len(s) > 64 || len(s) > 128 {
|
|
// ../rfc/5321:3486
|
|
p.xerrorf("localpart longer than 64 octets")
|
|
}
|
|
return smtp.Localpart(norm.NFC.String(s))
|
|
}
|
|
|
|
func (p *parser) xquotedString() string {
|
|
p.xtake(`"`)
|
|
var s string
|
|
var esc bool
|
|
for {
|
|
c := p.xchar()
|
|
if esc {
|
|
if c >= ' ' && c < 0x7f {
|
|
s += string(c)
|
|
esc = false
|
|
continue
|
|
}
|
|
p.xerrorf("invalid localpart, bad escaped char %c", c)
|
|
}
|
|
if c == '\\' {
|
|
esc = true
|
|
continue
|
|
}
|
|
if c == '"' {
|
|
return s
|
|
}
|
|
if c >= ' ' && c < 0x7f && c != '\\' && c != '"' || (c > 0x7f && p.smtputf8) {
|
|
s += string(c)
|
|
continue
|
|
}
|
|
p.xerrorf("invalid localpart, invalid character %c", c)
|
|
}
|
|
}
|
|
|
|
func (p *parser) xchar() rune {
|
|
// We are careful to track invalid utf-8 properly.
|
|
if p.empty() {
|
|
p.xerrorf("need another character")
|
|
}
|
|
var r rune
|
|
var o int
|
|
for i, c := range p.s[p.o:] {
|
|
if i > 0 {
|
|
o = i
|
|
break
|
|
}
|
|
r = c
|
|
}
|
|
if o == 0 {
|
|
p.track(p.s[p.o:])
|
|
p.o = len(p.s)
|
|
} else {
|
|
p.track(p.s[p.o : p.o+o])
|
|
p.o += o
|
|
}
|
|
return r
|
|
}
|
|
|
|
func (p *parser) xatom() string {
|
|
return p.xtakefn1(false, func(c rune, i int) bool {
|
|
switch c {
|
|
case '!', '#', '$', '%', '&', '\'', '*', '+', '-', '/', '=', '?', '^', '_', '`', '{', '|', '}', '~':
|
|
return true
|
|
}
|
|
return isalphadigit(c) || (c > 0x7f && p.smtputf8)
|
|
})
|
|
}
|
|
|
|
func (p *parser) xbodyLength() int64 {
|
|
// ../rfc/6376:1265
|
|
return p.xnumber(76)
|
|
}
|
|
|
|
func (p *parser) xnumber(maxdigits int) int64 {
|
|
o := -1
|
|
for i, c := range p.s[p.o:] {
|
|
if c >= '0' && c <= '9' {
|
|
o = i
|
|
} else {
|
|
break
|
|
}
|
|
}
|
|
if o == -1 {
|
|
p.xerrorf("expected digits")
|
|
}
|
|
if o+1 > maxdigits {
|
|
p.xerrorf("too many digits")
|
|
}
|
|
v, err := strconv.ParseInt(p.xtaken(o+1), 10, 64)
|
|
if err != nil {
|
|
p.xerrorf("parsing digits: %s", err)
|
|
}
|
|
return v
|
|
}
|
|
|
|
func (p *parser) xqueryMethods() []string {
|
|
// ../rfc/6376:1285
|
|
l := []string{p.xqtagmethod()}
|
|
for p.peekfws(":") {
|
|
p.fws()
|
|
p.xtake(":")
|
|
l = append(l, p.xqtagmethod())
|
|
}
|
|
return l
|
|
}
|
|
|
|
func (p *parser) xqtagmethod() string {
|
|
// ../rfc/6376:1295 ../rfc/6376-eid4810
|
|
s := p.xhyphenatedWord()
|
|
// ABNF production "x-sig-q-tag-args" should probably just have been
|
|
// "hyphenated-word". As qp-hdr-value, it will consume ":". A similar problem does
|
|
// not occur for "z" because it is also "|"-delimited. We work around the potential
|
|
// issue by parsing "dns/txt" explicitly.
|
|
rem := p.s[p.o:]
|
|
if strings.EqualFold(s, "dns") && len(rem) >= len("/txt") && strings.EqualFold(rem[:len("/txt")], "/txt") {
|
|
s += p.xtaken(4)
|
|
} else if p.take("/") {
|
|
s += "/" + p.xqp(true, true, false)
|
|
}
|
|
return s
|
|
}
|
|
|
|
func isalpha(c rune) bool {
|
|
return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z'
|
|
}
|
|
|
|
func isdigit(c rune) bool {
|
|
return c >= '0' && c <= '9'
|
|
}
|
|
|
|
func isalphadigit(c rune) bool {
|
|
return isalpha(c) || isdigit(c)
|
|
}
|
|
|
|
// ../rfc/6376:469
|
|
func (p *parser) xhyphenatedWord() string {
|
|
return p.xtakefn1(false, func(c rune, i int) bool {
|
|
return isalpha(c) || i > 0 && isdigit(c) || i > 0 && c == '-' && p.o+i+1 < len(p.s) && isalphadigit(rune(p.s[p.o+i+1]))
|
|
})
|
|
}
|
|
|
|
// ../rfc/6376:474
|
|
func (p *parser) xqphdrvalue(ignoreFWS bool) string {
|
|
return p.xqp(true, false, ignoreFWS)
|
|
}
|
|
|
|
func (p *parser) xqpSection() string {
|
|
return p.xqp(false, false, false)
|
|
}
|
|
|
|
// dkim-quoted-printable (pipeEncoded true) or qp-section.
|
|
//
|
|
// It is described in terms of (lots of) modifications to MIME quoted-printable,
|
|
// but it may be simpler to just ignore that reference.
|
|
//
|
|
// ignoreFWS is required for "z=", which can have FWS anywhere.
|
|
func (p *parser) xqp(pipeEncoded, colonEncoded, ignoreFWS bool) string {
|
|
// ../rfc/6376:494 ../rfc/2045:1260
|
|
|
|
hex := func(c byte) rune {
|
|
if c >= '0' && c <= '9' {
|
|
return rune(c - '0')
|
|
}
|
|
return rune(10 + c - 'A')
|
|
}
|
|
|
|
s := ""
|
|
for !p.empty() {
|
|
p.fws()
|
|
if pipeEncoded && p.hasPrefix("|") {
|
|
break
|
|
}
|
|
if colonEncoded && p.hasPrefix(":") {
|
|
break
|
|
}
|
|
if p.take("=") {
|
|
h := p.xtakefn(ignoreFWS, func(c rune, i int) bool {
|
|
return i < 2 && (c >= '0' && c <= '9' || c >= 'A' && c <= 'Z')
|
|
})
|
|
if len(h) != 2 {
|
|
p.xerrorf("expected qp-hdr-value")
|
|
}
|
|
c := (hex(h[0]) << 4) | hex(h[1])
|
|
s += string(c)
|
|
continue
|
|
}
|
|
x := p.xtakefn(ignoreFWS, func(c rune, i int) bool {
|
|
return c > ' ' && c < 0x7f && c != ';' && c != '=' && !(pipeEncoded && c == '|')
|
|
})
|
|
if x == "" {
|
|
break
|
|
}
|
|
s += x
|
|
}
|
|
return s
|
|
}
|
|
|
|
func (p *parser) xtimestamp() int64 {
|
|
// ../rfc/6376:1325 ../rfc/6376:1358
|
|
return p.xnumber(12)
|
|
}
|
|
|
|
func (p *parser) xcopiedHeaderFields() []string {
|
|
// ../rfc/6376:1384
|
|
l := []string{p.xztagcopy()}
|
|
for p.hasPrefix("|") {
|
|
p.xtake("|")
|
|
p.fws()
|
|
l = append(l, p.xztagcopy())
|
|
}
|
|
return l
|
|
}
|
|
|
|
func (p *parser) xztagcopy() string {
|
|
// ABNF does not mention FWS (unlike for other fields), but FWS is allowed everywhere in the value...
|
|
// ../rfc/6376:1386 ../rfc/6376:1372
|
|
f := p.xhdrName(true)
|
|
p.fws()
|
|
p.xtake(":")
|
|
v := p.xqphdrvalue(true)
|
|
return f + ":" + v
|
|
}
|