mirror of
https://github.com/mjl-/mox.git
synced 2024-12-25 16:03:48 +03:00
016fde8d78
we are using Go's net/mail to parse message headers. it can parse addresses, and properly decodes email addresses with double quotes (e.g. " "@example.com). however, it gives us an address without the double quotes in the localpart, effectively an invalid address. we now have a workaround to parse such not-quite-addresses. for issue #199 reported by gene-hightower, thanks for reporting!
379 lines
8.6 KiB
Go
379 lines
8.6 KiB
Go
package smtp
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"golang.org/x/text/unicode/norm"
|
|
|
|
"github.com/mjl-/mox/dns"
|
|
)
|
|
|
|
// Pedantic enables stricter parsing.
|
|
var Pedantic bool
|
|
|
|
var ErrBadAddress = errors.New("invalid email address")
|
|
|
|
// Localpart is a decoded local part of an email address, before the "@".
|
|
// For quoted strings, values do not hold the double quote or escaping backslashes.
|
|
// An empty string can be a valid localpart.
|
|
// Localparts are in Unicode NFC.
|
|
type Localpart string
|
|
|
|
// String returns a packed representation of an address, with proper escaping/quoting, for use in SMTP.
|
|
func (lp Localpart) String() string {
|
|
// See ../rfc/5321:2322 ../rfc/6531:414
|
|
// First we try as dot-string. If not possible we make a quoted-string.
|
|
dotstr := true
|
|
t := strings.Split(string(lp), ".")
|
|
for _, e := range t {
|
|
for _, c := range e {
|
|
if c >= '0' && c <= '9' || c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c > 0x7f {
|
|
continue
|
|
}
|
|
switch c {
|
|
case '!', '#', '$', '%', '&', '\'', '*', '+', '-', '/', '=', '?', '^', '_', '`', '{', '|', '}', '~':
|
|
continue
|
|
}
|
|
dotstr = false
|
|
break
|
|
}
|
|
dotstr = dotstr && len(e) > 0
|
|
}
|
|
dotstr = dotstr && len(t) > 0
|
|
if dotstr {
|
|
return string(lp)
|
|
}
|
|
|
|
// Make quoted-string.
|
|
r := `"`
|
|
for _, b := range lp {
|
|
if b == '"' || b == '\\' {
|
|
r += "\\" + string(b)
|
|
} else {
|
|
r += string(b)
|
|
}
|
|
}
|
|
r += `"`
|
|
return r
|
|
}
|
|
|
|
// LogString returns the localpart as string for use in smtp, and an escaped
|
|
// representation if it has non-ascii characters.
|
|
func (lp Localpart) LogString() string {
|
|
s := lp.String()
|
|
qs := strconv.QuoteToASCII(s)
|
|
if qs != `"`+s+`"` {
|
|
s = "/" + qs
|
|
}
|
|
return s
|
|
}
|
|
|
|
// DSNString returns the localpart as string for use in a DSN.
|
|
// utf8 indicates if the remote MTA supports utf8 messaging. If not, the 7bit DSN
|
|
// encoding for "utf-8-addr-xtext" from RFC 6533 is used.
|
|
func (lp Localpart) DSNString(utf8 bool) string {
|
|
if utf8 {
|
|
return lp.String()
|
|
}
|
|
// ../rfc/6533:259
|
|
r := ""
|
|
for _, c := range lp {
|
|
if c > 0x20 && c < 0x7f && c != '\\' && c != '+' && c != '=' {
|
|
r += string(c)
|
|
} else {
|
|
r += fmt.Sprintf(`\x{%x}`, c)
|
|
}
|
|
}
|
|
return r
|
|
}
|
|
|
|
// IsInternational returns if this is an internationalized local part, i.e. has
|
|
// non-ASCII characters.
|
|
func (lp Localpart) IsInternational() bool {
|
|
for _, c := range lp {
|
|
if c > 0x7f {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// Address is a parsed email address.
|
|
type Address struct {
|
|
Localpart Localpart
|
|
Domain dns.Domain // todo: shouldn't we accept an ip address here too? and merge this type into smtp.Path.
|
|
}
|
|
|
|
// NewAddress returns an address.
|
|
func NewAddress(localpart Localpart, domain dns.Domain) Address {
|
|
return Address{localpart, domain}
|
|
}
|
|
|
|
func (a Address) Path() Path {
|
|
return Path{Localpart: a.Localpart, IPDomain: dns.IPDomain{Domain: a.Domain}}
|
|
}
|
|
|
|
func (a Address) IsZero() bool {
|
|
return a == Address{}
|
|
}
|
|
|
|
// Pack returns the address in string form. If smtputf8 is true, the domain is
|
|
// formatted with non-ASCII characters. If localpart has non-ASCII characters,
|
|
// they are returned regardless of smtputf8.
|
|
func (a Address) Pack(smtputf8 bool) string {
|
|
if a.IsZero() {
|
|
return ""
|
|
}
|
|
return a.Localpart.String() + "@" + a.Domain.XName(smtputf8)
|
|
}
|
|
|
|
// String returns the address in string form with non-ASCII characters.
|
|
func (a Address) String() string {
|
|
if a.IsZero() {
|
|
return ""
|
|
}
|
|
return a.Localpart.String() + "@" + a.Domain.Name()
|
|
}
|
|
|
|
// LogString returns the address with with utf-8 in localpart and/or domain. In
|
|
// case of an IDNA domain and/or quotable characters in the localpart, an address
|
|
// with quoted/escaped localpart and ASCII domain is also returned.
|
|
func (a Address) LogString() string {
|
|
if a.IsZero() {
|
|
return ""
|
|
}
|
|
s := a.Pack(true)
|
|
lp := a.Localpart.String()
|
|
qlp := strconv.QuoteToASCII(lp)
|
|
escaped := qlp != `"`+lp+`"`
|
|
if a.Domain.Unicode != "" || escaped {
|
|
if escaped {
|
|
lp = qlp
|
|
}
|
|
s += "/" + lp + "@" + a.Domain.ASCII
|
|
}
|
|
return s
|
|
}
|
|
|
|
// ParseAddress parses an email address. UTF-8 is allowed.
|
|
// Returns ErrBadAddress for invalid addresses.
|
|
func ParseAddress(s string) (address Address, err error) {
|
|
lp, rem, err := parseLocalPart(s)
|
|
if err != nil {
|
|
return Address{}, fmt.Errorf("%w: %s", ErrBadAddress, err)
|
|
}
|
|
if !strings.HasPrefix(rem, "@") {
|
|
return Address{}, fmt.Errorf("%w: expected @", ErrBadAddress)
|
|
}
|
|
rem = rem[1:]
|
|
d, err := dns.ParseDomain(rem)
|
|
if err != nil {
|
|
return Address{}, fmt.Errorf("%w: %s", ErrBadAddress, err)
|
|
}
|
|
return Address{lp, d}, err
|
|
}
|
|
|
|
// ParseNetMailAddress parses a not-quite-valid address as found in
|
|
// net/mail.Address.Address.
|
|
//
|
|
// net/mail does parse quoted addresses properly, but stores the localpart
|
|
// unquoted. So an address `" "@example.com` would be stored as ` @example.com`,
|
|
// which we would fail to parse without special attention.
|
|
func ParseNetMailAddress(a string) (address Address, err error) {
|
|
i := strings.LastIndex(a, "@")
|
|
if i < 0 {
|
|
return Address{}, fmt.Errorf("%w: missing @", ErrBadAddress)
|
|
}
|
|
addrStr := Localpart(a[:i]).String() + "@" + a[i+1:]
|
|
return ParseAddress(addrStr)
|
|
}
|
|
|
|
var ErrBadLocalpart = errors.New("invalid localpart")
|
|
|
|
// ParseLocalpart parses the local part.
|
|
// UTF-8 is allowed.
|
|
// Returns ErrBadAddress for invalid addresses.
|
|
func ParseLocalpart(s string) (localpart Localpart, err error) {
|
|
lp, rem, err := parseLocalPart(s)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
if rem != "" {
|
|
return "", fmt.Errorf("%w: remaining after localpart: %q", ErrBadLocalpart, rem)
|
|
}
|
|
return lp, nil
|
|
}
|
|
|
|
func parseLocalPart(s string) (localpart Localpart, remain string, err error) {
|
|
p := &parser{s, 0}
|
|
|
|
defer func() {
|
|
x := recover()
|
|
if x == nil {
|
|
return
|
|
}
|
|
e, ok := x.(error)
|
|
if !ok {
|
|
panic(x)
|
|
}
|
|
err = fmt.Errorf("%w: %s", ErrBadLocalpart, e)
|
|
}()
|
|
|
|
lp := p.xlocalpart()
|
|
return lp, p.remainder(), nil
|
|
}
|
|
|
|
type parser struct {
|
|
s string
|
|
o int
|
|
}
|
|
|
|
func (p *parser) xerrorf(format string, args ...any) {
|
|
panic(fmt.Errorf(format, args...))
|
|
}
|
|
|
|
func (p *parser) hasPrefix(s string) bool {
|
|
return strings.HasPrefix(p.s[p.o:], s)
|
|
}
|
|
|
|
func (p *parser) take(s string) bool {
|
|
if p.hasPrefix(s) {
|
|
p.o += len(s)
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
func (p *parser) xtake(s string) {
|
|
if !p.take(s) {
|
|
p.xerrorf("expected %q", s)
|
|
}
|
|
}
|
|
|
|
func (p *parser) empty() bool {
|
|
return p.o == len(p.s)
|
|
}
|
|
|
|
func (p *parser) xtaken(n int) string {
|
|
r := p.s[p.o : p.o+n]
|
|
p.o += n
|
|
return r
|
|
}
|
|
|
|
func (p *parser) remainder() string {
|
|
r := p.s[p.o:]
|
|
p.o = len(p.s)
|
|
return r
|
|
}
|
|
|
|
// todo: reduce duplication between implementations: ../smtp/address.go:/xlocalpart ../dkim/parser.go:/xlocalpart ../smtpserver/parse.go:/xlocalpart
|
|
func (p *parser) xlocalpart() Localpart {
|
|
// ../rfc/5321:2316
|
|
var s string
|
|
if p.hasPrefix(`"`) {
|
|
s = p.xquotedString()
|
|
} else {
|
|
s = p.xatom()
|
|
for p.take(".") {
|
|
s += "." + p.xatom()
|
|
}
|
|
}
|
|
// In the wild, some services use large localparts for generated (bounce) addresses.
|
|
if Pedantic && len(s) > 64 || len(s) > 128 {
|
|
// ../rfc/5321:3486
|
|
p.xerrorf("localpart longer than 64 octets")
|
|
}
|
|
return Localpart(norm.NFC.String(s))
|
|
}
|
|
|
|
func (p *parser) xquotedString() string {
|
|
p.xtake(`"`)
|
|
var s string
|
|
var esc bool
|
|
for {
|
|
c := p.xchar()
|
|
if esc {
|
|
if c >= ' ' && c < 0x7f {
|
|
s += string(c)
|
|
esc = false
|
|
continue
|
|
}
|
|
p.xerrorf("invalid localpart, bad escaped char %c", c)
|
|
}
|
|
if c == '\\' {
|
|
esc = true
|
|
continue
|
|
}
|
|
if c == '"' {
|
|
return s
|
|
}
|
|
// todo: should we be accepting utf8 for quoted strings?
|
|
if c >= ' ' && c < 0x7f && c != '\\' && c != '"' || c > 0x7f {
|
|
s += string(c)
|
|
continue
|
|
}
|
|
p.xerrorf("invalid localpart, invalid character %c", c)
|
|
}
|
|
}
|
|
|
|
func (p *parser) xchar() rune {
|
|
// We are careful to track invalid utf-8 properly.
|
|
if p.empty() {
|
|
p.xerrorf("need another character")
|
|
}
|
|
var r rune
|
|
var o int
|
|
for i, c := range p.s[p.o:] {
|
|
if i > 0 {
|
|
o = i
|
|
break
|
|
}
|
|
r = c
|
|
}
|
|
if o == 0 {
|
|
p.o = len(p.s)
|
|
} else {
|
|
p.o += o
|
|
}
|
|
return r
|
|
}
|
|
|
|
func (p *parser) takefn1(what string, fn func(c rune, i int) bool) string {
|
|
if p.empty() {
|
|
p.xerrorf("need at least one char for %s", what)
|
|
}
|
|
for i, c := range p.s[p.o:] {
|
|
if !fn(c, i) {
|
|
if i == 0 {
|
|
p.xerrorf("expected at least one char for %s, got char %c", what, c)
|
|
}
|
|
return p.xtaken(i)
|
|
}
|
|
}
|
|
return p.remainder()
|
|
}
|
|
|
|
func (p *parser) xatom() string {
|
|
return p.takefn1("atom", func(c rune, i int) bool {
|
|
switch c {
|
|
case '!', '#', '$', '%', '&', '\'', '*', '+', '-', '/', '=', '?', '^', '_', '`', '{', '|', '}', '~':
|
|
return true
|
|
}
|
|
return isalphadigit(c) || c > 0x7f
|
|
})
|
|
}
|
|
|
|
func isalpha(c rune) bool {
|
|
return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z'
|
|
}
|
|
|
|
func isdigit(c rune) bool {
|
|
return c >= '0' && c <= '9'
|
|
}
|
|
|
|
func isalphadigit(c rune) bool {
|
|
return isalpha(c) || isdigit(c)
|
|
}
|