2023-01-30 16:27:06 +03:00
package dkim
import (
"encoding/base64"
"fmt"
"strconv"
"strings"
"github.com/mjl-/mox/dns"
"github.com/mjl-/mox/smtp"
)
type parseErr string
func ( e parseErr ) Error ( ) string {
return string ( e )
}
var _ error = parseErr ( "" )
type parser struct {
s string
o int // Offset into s.
tracked string // All data consumed, except when "drop" is true. To be set by caller when parsing the value for "b=".
drop bool
smtputf8 bool // If set, allow characters > 0x7f.
}
func ( p * parser ) xerrorf ( format string , args ... any ) {
msg := fmt . Sprintf ( format , args ... )
if p . o < len ( p . s ) {
msg = fmt . Sprintf ( "%s (leftover %q)" , msg , p . s [ p . o : ] )
}
panic ( parseErr ( msg ) )
}
func ( p * parser ) track ( s string ) {
if ! p . drop {
p . tracked += s
}
}
func ( p * parser ) hasPrefix ( s string ) bool {
return strings . HasPrefix ( p . s [ p . o : ] , s )
}
func ( p * parser ) xtaken ( n int ) string {
r := p . s [ p . o : p . o + n ]
p . o += n
p . track ( r )
return r
}
2023-02-03 15:29:47 +03:00
func ( p * parser ) xtakefn ( ignoreFWS bool , fn func ( c rune , i int ) bool ) string {
var r string
2023-01-30 16:27:06 +03:00
for i , c := range p . s [ p . o : ] {
if ! fn ( c , i ) {
2023-02-03 15:29:47 +03:00
switch c {
case ' ' , '\t' , '\r' , '\n' :
continue
}
p . xtaken ( i )
return r
2023-01-30 16:27:06 +03:00
}
2023-02-03 15:29:47 +03:00
r += string ( c )
2023-01-30 16:27:06 +03:00
}
2023-02-03 15:29:47 +03:00
p . xtaken ( len ( p . s ) - p . o )
return r
2023-01-30 16:27:06 +03:00
}
func ( p * parser ) empty ( ) bool {
return p . o >= len ( p . s )
}
func ( p * parser ) xnonempty ( ) {
if p . o >= len ( p . s ) {
p . xerrorf ( "expected at least 1 more char" )
}
}
2023-02-03 15:29:47 +03:00
func ( p * parser ) xtakefn1 ( ignoreFWS bool , fn func ( c rune , i int ) bool ) string {
var r string
2023-01-30 16:27:06 +03:00
p . xnonempty ( )
for i , c := range p . s [ p . o : ] {
if ! fn ( c , i ) {
2023-02-03 15:29:47 +03:00
switch c {
case ' ' , '\t' , '\r' , '\n' :
continue
}
2023-01-30 16:27:06 +03:00
if i == 0 {
p . xerrorf ( "expected at least 1 char" )
}
2023-02-03 15:29:47 +03:00
p . xtaken ( i )
return r
2023-01-30 16:27:06 +03:00
}
2023-02-03 15:29:47 +03:00
r += string ( c )
2023-01-30 16:27:06 +03:00
}
return p . xtaken ( len ( p . s ) - p . o )
}
func ( p * parser ) wsp ( ) {
2023-02-03 15:29:47 +03:00
p . xtakefn ( false , func ( c rune , i int ) bool {
2023-01-30 16:27:06 +03:00
return c == ' ' || c == '\t'
} )
}
func ( p * parser ) fws ( ) {
p . wsp ( )
if p . hasPrefix ( "\r\n " ) || p . hasPrefix ( "\r\n\t" ) {
p . xtaken ( 3 )
p . wsp ( )
}
}
// peekfws returns whether remaining text starts with s, optionally prefix with fws.
func ( p * parser ) peekfws ( s string ) bool {
o := p . o
p . fws ( )
r := p . hasPrefix ( s )
p . o = o
return r
}
func ( p * parser ) xtake ( s string ) string {
if ! strings . HasPrefix ( p . s [ p . o : ] , s ) {
p . xerrorf ( "expected %q" , s )
}
return p . xtaken ( len ( s ) )
}
func ( p * parser ) take ( s string ) bool {
if strings . HasPrefix ( p . s [ p . o : ] , s ) {
p . o += len ( s )
p . track ( s )
return true
}
return false
}
// ../rfc/6376:657
func ( p * parser ) xtagName ( ) string {
2023-02-03 15:29:47 +03:00
return p . xtakefn1 ( false , func ( c rune , i int ) bool {
2023-01-30 16:27:06 +03:00
return isalpha ( c ) || i > 0 && ( isdigit ( c ) || c == '_' )
} )
}
func ( p * parser ) xalgorithm ( ) ( string , string ) {
// ../rfc/6376:1046
xtagx := func ( c rune , i int ) bool {
return isalpha ( c ) || i > 0 && isdigit ( c )
}
2023-02-03 15:29:47 +03:00
algk := p . xtakefn1 ( false , xtagx )
2023-01-30 16:27:06 +03:00
p . xtake ( "-" )
2023-02-03 15:29:47 +03:00
algv := p . xtakefn1 ( false , xtagx )
2023-01-30 16:27:06 +03:00
return algk , algv
}
// fws in value is ignored. empty/no base64 characters is valid.
// ../rfc/6376:1021
// ../rfc/6376:1076
func ( p * parser ) xbase64 ( ) [ ] byte {
s := ""
2023-02-03 15:29:47 +03:00
p . xtakefn ( false , func ( c rune , i int ) bool {
2023-01-30 16:27:06 +03:00
if isalphadigit ( c ) || c == '+' || c == '/' || c == '=' {
s += string ( c )
return true
}
if c == ' ' || c == '\t' {
return true
}
rem := p . s [ p . o + i : ]
if strings . HasPrefix ( rem , "\r\n " ) || strings . HasPrefix ( rem , "\r\n\t" ) {
return true
}
if ( strings . HasPrefix ( rem , "\n " ) || strings . HasPrefix ( rem , "\n\t" ) ) && p . o + i - 1 > 0 && p . s [ p . o + i - 1 ] == '\r' {
return true
}
return false
} )
buf , err := base64 . StdEncoding . DecodeString ( s )
if err != nil {
p . xerrorf ( "decoding base64: %v" , err )
}
return buf
}
// parses canonicalization in original case.
func ( p * parser ) xcanonical ( ) string {
// ../rfc/6376:1100
s := p . xhyphenatedWord ( )
if p . take ( "/" ) {
return s + "/" + p . xhyphenatedWord ( )
}
return s
}
func ( p * parser ) xdomain ( ) dns . Domain {
subdomain := func ( c rune , i int ) bool {
// domain names must always be a-labels, ../rfc/6376:1115 ../rfc/6376:1187 ../rfc/6376:1303
// todo: add a "lax" mode where underscore is allowed if this is a selector? seen in the wild, but invalid: ../rfc/6376:581 ../rfc/5321:2303
return isalphadigit ( c ) || ( i > 0 && c == '-' && p . o + 1 < len ( p . s ) )
}
2023-02-03 15:29:47 +03:00
s := p . xtakefn1 ( false , subdomain )
2023-01-30 16:27:06 +03:00
for p . hasPrefix ( "." ) {
2023-02-03 15:29:47 +03:00
s += p . xtake ( "." ) + p . xtakefn1 ( false , subdomain )
2023-01-30 16:27:06 +03:00
}
d , err := dns . ParseDomain ( s )
if err != nil {
p . xerrorf ( "parsing domain %q: %s" , s , err )
}
return d
}
2023-02-03 15:29:47 +03:00
func ( p * parser ) xhdrName ( ignoreFWS bool ) string {
2023-01-30 16:27:06 +03:00
// ../rfc/6376:473
// ../rfc/5322:1689
// BNF for hdr-name (field-name) allows ";", but DKIM disallows unencoded semicolons. ../rfc/6376:643
2023-02-03 15:29:47 +03:00
// ignoreFWS is needed for "z=", which can have FWS anywhere. ../rfc/6376:1372
return p . xtakefn1 ( ignoreFWS , func ( c rune , i int ) bool {
2023-01-30 16:27:06 +03:00
return c > ' ' && c < 0x7f && c != ':' && c != ';'
} )
}
func ( p * parser ) xsignedHeaderFields ( ) [ ] string {
// ../rfc/6376:1157
2023-02-03 15:29:47 +03:00
l := [ ] string { p . xhdrName ( false ) }
2023-01-30 16:27:06 +03:00
for p . peekfws ( ":" ) {
p . fws ( )
p . xtake ( ":" )
p . fws ( )
2023-02-03 15:29:47 +03:00
l = append ( l , p . xhdrName ( false ) )
2023-01-30 16:27:06 +03:00
}
return l
}
func ( p * parser ) xauid ( ) Identity {
// ../rfc/6376:1192
// Localpart is optional.
if p . take ( "@" ) {
return Identity { Domain : p . xdomain ( ) }
}
lp := p . xlocalpart ( )
p . xtake ( "@" )
dom := p . xdomain ( )
return Identity { & lp , dom }
}
// todo: reduce duplication between implementations: ../smtp/address.go:/xlocalpart ../dkim/parser.go:/xlocalpart ../smtpserver/parse.go:/xlocalpart
func ( p * parser ) xlocalpart ( ) smtp . Localpart {
// ../rfc/6376:434
// ../rfc/5321:2316
var s string
if p . hasPrefix ( ` " ` ) {
s = p . xquotedString ( )
} else {
s = p . xatom ( )
for p . take ( "." ) {
s += "." + p . xatom ( )
}
}
// todo: have a strict parser that only allows the actual max of 64 bytes. some services have large localparts because of generated (bounce) addresses.
if len ( s ) > 128 {
// ../rfc/5321:3486
p . xerrorf ( "localpart longer than 64 octets" )
}
return smtp . Localpart ( s )
}
func ( p * parser ) xquotedString ( ) string {
p . xtake ( ` " ` )
var s string
var esc bool
for {
c := p . xchar ( )
if esc {
if c >= ' ' && c < 0x7f {
s += string ( c )
esc = false
continue
}
p . xerrorf ( "invalid localpart, bad escaped char %c" , c )
}
if c == '\\' {
esc = true
continue
}
if c == '"' {
return s
}
if c >= ' ' && c < 0x7f && c != '\\' && c != '"' || ( c > 0x7f && p . smtputf8 ) {
s += string ( c )
continue
}
p . xerrorf ( "invalid localpart, invalid character %c" , c )
}
}
func ( p * parser ) xchar ( ) rune {
// We are careful to track invalid utf-8 properly.
if p . empty ( ) {
p . xerrorf ( "need another character" )
}
var r rune
var o int
for i , c := range p . s [ p . o : ] {
if i > 0 {
o = i
break
}
r = c
}
if o == 0 {
p . track ( p . s [ p . o : ] )
p . o = len ( p . s )
} else {
p . track ( p . s [ p . o : p . o + o ] )
p . o += o
}
return r
}
func ( p * parser ) xatom ( ) string {
2023-02-03 15:29:47 +03:00
return p . xtakefn1 ( false , func ( c rune , i int ) bool {
2023-01-30 16:27:06 +03:00
switch c {
case '!' , '#' , '$' , '%' , '&' , '\'' , '*' , '+' , '-' , '/' , '=' , '?' , '^' , '_' , '`' , '{' , '|' , '}' , '~' :
return true
}
return isalphadigit ( c ) || ( c > 0x7f && p . smtputf8 )
} )
}
func ( p * parser ) xbodyLength ( ) int64 {
// ../rfc/6376:1265
return p . xnumber ( 76 )
}
func ( p * parser ) xnumber ( maxdigits int ) int64 {
o := - 1
for i , c := range p . s [ p . o : ] {
if c >= '0' && c <= '9' {
o = i
} else {
break
}
}
if o == - 1 {
p . xerrorf ( "expected digits" )
}
if o + 1 > maxdigits {
p . xerrorf ( "too many digits" )
}
v , err := strconv . ParseInt ( p . xtaken ( o + 1 ) , 10 , 64 )
if err != nil {
p . xerrorf ( "parsing digits: %s" , err )
}
return v
}
func ( p * parser ) xqueryMethods ( ) [ ] string {
// ../rfc/6376:1285
l := [ ] string { p . xqtagmethod ( ) }
for p . peekfws ( ":" ) {
p . fws ( )
p . xtake ( ":" )
l = append ( l , p . xqtagmethod ( ) )
}
return l
}
func ( p * parser ) xqtagmethod ( ) string {
// ../rfc/6376:1295 ../rfc/6376-eid4810
s := p . xhyphenatedWord ( )
// ABNF production "x-sig-q-tag-args" should probably just have been
// "hyphenated-word". As qp-hdr-value, it will consume ":". A similar problem does
// not occur for "z" because it is also "|"-delimited. We work around the potential
// issue by parsing "dns/txt" explicitly.
rem := p . s [ p . o : ]
if strings . EqualFold ( s , "dns" ) && len ( rem ) >= len ( "/txt" ) && strings . EqualFold ( rem [ : len ( "/txt" ) ] , "/txt" ) {
s += p . xtaken ( 4 )
} else if p . take ( "/" ) {
2023-02-03 15:29:47 +03:00
s += "/" + p . xqp ( true , true , false )
2023-01-30 16:27:06 +03:00
}
return s
}
func isalpha ( c rune ) bool {
return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z'
}
func isdigit ( c rune ) bool {
return c >= '0' && c <= '9'
}
func isalphadigit ( c rune ) bool {
return isalpha ( c ) || isdigit ( c )
}
// ../rfc/6376:469
func ( p * parser ) xhyphenatedWord ( ) string {
2023-02-03 15:29:47 +03:00
return p . xtakefn1 ( false , func ( c rune , i int ) bool {
2023-01-30 16:27:06 +03:00
return isalpha ( c ) || i > 0 && isdigit ( c ) || i > 0 && c == '-' && p . o + i + 1 < len ( p . s ) && isalphadigit ( rune ( p . s [ p . o + i + 1 ] ) )
} )
}
// ../rfc/6376:474
2023-02-03 15:29:47 +03:00
func ( p * parser ) xqphdrvalue ( ignoreFWS bool ) string {
return p . xqp ( true , false , ignoreFWS )
2023-01-30 16:27:06 +03:00
}
func ( p * parser ) xqpSection ( ) string {
2023-02-03 15:29:47 +03:00
return p . xqp ( false , false , false )
2023-01-30 16:27:06 +03:00
}
// dkim-quoted-printable (pipeEncoded true) or qp-section.
//
// It is described in terms of (lots of) modifications to MIME quoted-printable,
// but it may be simpler to just ignore that reference.
2023-02-03 15:29:47 +03:00
//
// ignoreFWS is required for "z=", which can have FWS anywhere.
func ( p * parser ) xqp ( pipeEncoded , colonEncoded , ignoreFWS bool ) string {
2023-01-30 16:27:06 +03:00
// ../rfc/6376:494 ../rfc/2045:1260
hex := func ( c byte ) rune {
if c >= '0' && c <= '9' {
return rune ( c - '0' )
}
return rune ( 10 + c - 'A' )
}
s := ""
for ! p . empty ( ) {
p . fws ( )
if pipeEncoded && p . hasPrefix ( "|" ) {
break
}
if colonEncoded && p . hasPrefix ( ":" ) {
break
}
2023-02-03 15:29:47 +03:00
if p . take ( "=" ) {
h := p . xtakefn ( ignoreFWS , func ( c rune , i int ) bool {
2023-01-30 16:27:06 +03:00
return i < 2 && ( c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' )
} )
if len ( h ) != 2 {
p . xerrorf ( "expected qp-hdr-value" )
}
c := ( hex ( h [ 0 ] ) << 4 ) | hex ( h [ 1 ] )
s += string ( c )
continue
}
2023-02-03 15:29:47 +03:00
x := p . xtakefn ( ignoreFWS , func ( c rune , i int ) bool {
2023-01-30 16:27:06 +03:00
return c > ' ' && c < 0x7f && c != ';' && c != '=' && ! ( pipeEncoded && c == '|' )
} )
if x == "" {
break
}
s += x
}
return s
}
func ( p * parser ) xselector ( ) dns . Domain {
return p . xdomain ( )
}
func ( p * parser ) xtimestamp ( ) int64 {
// ../rfc/6376:1325 ../rfc/6376:1358
return p . xnumber ( 12 )
}
func ( p * parser ) xcopiedHeaderFields ( ) [ ] string {
// ../rfc/6376:1384
l := [ ] string { p . xztagcopy ( ) }
for p . hasPrefix ( "|" ) {
p . xtake ( "|" )
p . fws ( )
l = append ( l , p . xztagcopy ( ) )
}
return l
}
func ( p * parser ) xztagcopy ( ) string {
2023-02-03 15:29:47 +03:00
// ABNF does not mention FWS (unlike for other fields), but FWS is allowed everywhere in the value...
// ../rfc/6376:1386 ../rfc/6376:1372
f := p . xhdrName ( true )
2023-01-30 16:27:06 +03:00
p . fws ( )
p . xtake ( ":" )
2023-02-03 15:29:47 +03:00
v := p . xqphdrvalue ( true )
2023-01-30 16:27:06 +03:00
return f + ":" + v
}