normalize localparts with unicode nfc when parsing

both when parsing our configs, and for incoming on smtp or in messages.
so we properly compare things like é and e+accent as equal, and accept the
different encodings of that same address.
This commit is contained in:
Mechiel Lukkien 2024-03-08 21:08:40 +01:00
parent 4fbd7abb57
commit 8e6fe7459b
No known key found for this signature in database
23 changed files with 134 additions and 59 deletions

View file

@ -6,6 +6,8 @@ import (
"strconv"
"strings"
"golang.org/x/text/unicode/norm"
"github.com/mjl-/mox/dns"
"github.com/mjl-/mox/smtp"
)
@ -279,7 +281,7 @@ func (p *parser) xlocalpart() smtp.Localpart {
// ../rfc/5321:3486
p.xerrorf("localpart longer than 64 octets")
}
return smtp.Localpart(s)
return smtp.Localpart(norm.NFC.String(s))
}
func (p *parser) xquotedString() string {

View file

@ -30,7 +30,8 @@ type Domain struct {
// letters/digits/hyphens) labels. Always in lower case. No trailing dot.
ASCII string
// Name as U-labels. Empty if this is an ASCII-only domain. No trailing dot.
// Name as U-labels, in Unicode NFC. Empty if this is an ASCII-only domain. No
// trailing dot.
Unicode string
}
@ -87,7 +88,8 @@ func (d Domain) IsZero() bool {
// labels (unicode).
// Names are IDN-canonicalized and lower-cased.
// Characters in unicode can be replaced by equivalents. E.g. "Ⓡ" to "r". This
// means you should only compare parsed domain names, never strings directly.
// means you should only compare parsed domain names, never unparsed strings
// directly.
func ParseDomain(s string) (Domain, error) {
if strings.HasSuffix(s, ".") {
return Domain{}, errTrailingDot

View file

@ -65,7 +65,11 @@ func Parse(elog *slog.Logger, r io.ReaderAt) (*Message, *message.Part, error) {
if err != nil {
return smtp.Path{}, fmt.Errorf("parsing domain: %v", err)
}
return smtp.Path{Localpart: smtp.Localpart(a.User), IPDomain: dns.IPDomain{Domain: d}}, nil
lp, err := smtp.ParseLocalpart(a.User)
if err != nil {
return smtp.Path{}, fmt.Errorf("parsing localpart: %v", err)
}
return smtp.Path{Localpart: lp, IPDomain: dns.IPDomain{Domain: d}}, nil
}
if len(part.Envelope.From) == 1 {
m.From, err = addressPath(part.Envelope.From[0])
@ -318,17 +322,18 @@ func parseAddress(s string, utf8 bool) (smtp.Path, error) {
}
}
// todo: more proper parser
t = strings.SplitN(s, "@", 2)
if len(t) != 2 || t[0] == "" || t[1] == "" {
t = strings.Split(s, "@")
if len(t) == 1 {
return smtp.Path{}, fmt.Errorf("invalid email address")
}
d, err := dns.ParseDomain(t[1])
d, err := dns.ParseDomain(t[len(t)-1])
if err != nil {
return smtp.Path{}, fmt.Errorf("parsing domain: %v", err)
}
var lp string
var esc string
for _, c := range t[0] {
lead := strings.Join(t[:len(t)-1], "@")
for _, c := range lead {
if esc == "" && c == '\\' || esc == `\` && (c == 'x' || c == 'X') || esc == `\x` && c == '{' {
if c == 'X' {
c = 'x'
@ -352,7 +357,11 @@ func parseAddress(s string, utf8 bool) (smtp.Path, error) {
if esc != "" {
return smtp.Path{}, fmt.Errorf("parsing localpart: unfinished embedded unicode char")
}
p := smtp.Path{Localpart: smtp.Localpart(lp), IPDomain: dns.IPDomain{Domain: d}}
localpart, err := smtp.ParseLocalpart(lp)
if err != nil {
return smtp.Path{}, fmt.Errorf("parsing localpart: %v", err)
}
p := smtp.Path{Localpart: localpart, IPDomain: dns.IPDomain{Domain: d}}
return p, nil
}

15
main.go
View file

@ -640,18 +640,20 @@ must be set if and only if account does not yet exist.
d := xparseDomain(args[0], "domain")
mustLoadConfig()
var localpart string
var localpart smtp.Localpart
if len(args) == 3 {
localpart = args[2]
var err error
localpart, err = smtp.ParseLocalpart(args[2])
xcheckf(err, "parsing localpart")
}
ctlcmdConfigDomainAdd(xctl(), d, args[1], localpart)
}
func ctlcmdConfigDomainAdd(ctl *ctl, domain dns.Domain, account, localpart string) {
func ctlcmdConfigDomainAdd(ctl *ctl, domain dns.Domain, account string, localpart smtp.Localpart) {
ctl.xwrite("domainadd")
ctl.xwrite(domain.Name())
ctl.xwrite(account)
ctl.xwrite(localpart)
ctl.xwrite(string(localpart))
ctl.xreadok()
fmt.Printf("domain added, remember to add dns records, see:\n\nmox config dnsrecords %s\nmox config dnscheck %s\n", domain.Name(), domain.Name())
}
@ -2128,9 +2130,10 @@ headers prepended.
if len(p.Envelope.From) != 1 {
log.Fatalf("found %d from headers, need exactly 1", len(p.Envelope.From))
}
localpart := smtp.Localpart(p.Envelope.From[0].User)
localpart, err := smtp.ParseLocalpart(p.Envelope.From[0].User)
xcheckf(err, "parsing localpart of address in from-header")
dom, err := dns.ParseDomain(p.Envelope.From[0].Host)
xcheckf(err, "parsing domain in from header")
xcheckf(err, "parsing domain of address in from-header")
mustLoadConfig()

View file

@ -42,6 +42,10 @@ func From(elog *slog.Logger, strict bool, r io.ReaderAt) (raddr smtp.Address, en
if err != nil {
return raddr, nil, nil, fmt.Errorf("bad domain in from address: %v", err)
}
addr := smtp.Address{Localpart: smtp.Localpart(from[0].User), Domain: d}
lp, err := smtp.ParseLocalpart(from[0].User)
if err != nil {
return raddr, nil, nil, fmt.Errorf("parsing localpart in from address: %v", err)
}
addr := smtp.Address{Localpart: lp, Domain: d}
return addr, p.Envelope, textproto.MIMEHeader(header), nil
}

View file

@ -104,7 +104,7 @@ type Envelope struct {
// Address as used in From and To headers.
type Address struct {
Name string // Free-form name for display in mail applications.
User string // Localpart.
User string // Localpart, encoded as string. Must be parsed before using as Localpart.
Host string // Domain in ASCII.
}

View file

@ -85,9 +85,6 @@ func CanonicalLocalpart(localpart smtp.Localpart, d config.Domain) (smtp.Localpa
if d.LocalpartCatchallSeparator != "" {
t := strings.SplitN(string(localpart), d.LocalpartCatchallSeparator, 2)
localpart = smtp.Localpart(t[0])
if localpart == "" {
return "", fmt.Errorf("empty localpart")
}
}
if !d.LocalpartCaseSensitive {

View file

@ -6,6 +6,8 @@ import (
"strconv"
"strings"
"golang.org/x/text/unicode/norm"
"github.com/mjl-/mox/dns"
)
@ -17,6 +19,7 @@ var ErrBadAddress = errors.New("invalid email address")
// Localpart is a decoded local part of an email address, before the "@".
// For quoted strings, values do not hold the double quote or escaping backslashes.
// An empty string can be a valid localpart.
// Localparts are in Unicode NFC.
type Localpart string
// String returns a packed representation of an address, with proper escaping/quoting, for use in SMTP.
@ -268,7 +271,7 @@ func (p *parser) xlocalpart() Localpart {
// ../rfc/5321:3486
p.xerrorf("localpart longer than 64 octets")
}
return Localpart(s)
return Localpart(norm.NFC.String(s))
}
func (p *parser) xquotedString() string {

View file

@ -388,7 +388,8 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
if err != nil {
continue
}
if dom == d.rcptAcc.rcptTo.IPDomain.Domain && smtp.Localpart(a.User) == d.rcptAcc.rcptTo.Localpart {
lp, err := smtp.ParseLocalpart(a.User)
if err == nil && dom == d.rcptAcc.rcptTo.IPDomain.Domain && lp == d.rcptAcc.rcptTo.Localpart {
return true
}
}

View file

@ -8,6 +8,8 @@ import (
"strings"
"time"
"golang.org/x/text/unicode/norm"
"github.com/mjl-/mox/dns"
"github.com/mjl-/mox/mox-"
"github.com/mjl-/mox/smtp"
@ -342,7 +344,7 @@ func (p *parser) xlocalpart() smtp.Localpart {
// ../rfc/5321:3486
p.xerrorf("localpart longer than 64 octets")
}
return smtp.Localpart(s)
return smtp.Localpart(norm.NFC.String(s))
}
// ../rfc/5321:2324

View file

@ -125,7 +125,13 @@ func TestReputation(t *testing.T) {
rcptToDomain, err := dns.ParseDomain(hm.RcptToDomain)
tcheck(t, err, "parse rcptToDomain")
rcptToOrgDomain := publicsuffix.Lookup(ctxbg, log.Logger, rcptToDomain)
r := store.Recipient{MessageID: hm.ID, Localpart: hm.RcptToLocalpart, Domain: hm.RcptToDomain, OrgDomain: rcptToOrgDomain.Name(), Sent: hm.Received}
r := store.Recipient{
MessageID: hm.ID,
Localpart: hm.RcptToLocalpart.String(),
Domain: hm.RcptToDomain,
OrgDomain: rcptToOrgDomain.Name(),
Sent: hm.Received,
}
err = tx.Insert(&r)
tcheck(t, err, "insert recipient")
}

View file

@ -352,11 +352,40 @@ func TestDelivery(t *testing.T) {
// Set up iprev to get delivery from unknown user to be accepted.
resolver.PTR["127.0.0.10"] = []string{"example.org."}
// Only ascii o@ is configured, not the greek and cyrillic lookalikes.
ts.run(func(err error, client *smtpclient.Client) {
mailFrom := "remote@example.org"
rcptTo := "mjl@mox.example"
rcptTo := "ο@mox.example" // omicron \u03bf, looks like the configured o@
msg := strings.ReplaceAll(deliverMessage, "mjl@mox.example", rcptTo)
if err == nil {
err = client.Deliver(ctxbg, mailFrom, rcptTo, int64(len(deliverMessage)), strings.NewReader(deliverMessage), false, false, false)
err = client.Deliver(ctxbg, mailFrom, rcptTo, int64(len(msg)), strings.NewReader(msg), false, true, false)
}
var cerr smtpclient.Error
if err == nil || !errors.As(err, &cerr) || cerr.Code != smtp.C550MailboxUnavail {
t.Fatalf("deliver to omicron @ instead of ascii o @, got err %v, expected smtpclient.Error with code %d", err, smtp.C550MailboxUnavail)
}
})
ts.run(func(err error, client *smtpclient.Client) {
recipients := []string{
"mjl@mox.example",
"o@mox.example", // ascii o, as configured
"\u2126@mox.example", // ohm sign, as configured
"ω@mox.example", // lower-case omega, we match case-insensitively and this is the lowercase of ohm (!)
"\u03a9@mox.example", // capital omega, also lowercased to omega.
"tést@mox.example", // NFC
"te\u0301st@mox.example", // not NFC, but normalized as tést@, see https://go.dev/blog/normalization
}
for _, rcptTo := range recipients {
// Ensure SMTP RCPT TO and message address headers are the same, otherwise the junk
// filter treats us more strictly.
msg := strings.ReplaceAll(deliverMessage, "mjl@mox.example", rcptTo)
mailFrom := "remote@example.org"
if err == nil {
err = client.Deliver(ctxbg, mailFrom, rcptTo, int64(len(msg)), strings.NewReader(msg), false, true, false)
}
tcheck(t, err, "deliver to remote")
@ -372,6 +401,7 @@ func TestDelivery(t *testing.T) {
case <-timer.C:
t.Fatalf("no delivery in 1s")
}
}
})
checkEvaluationCount(t, 0)
@ -1005,7 +1035,6 @@ func TestTLSReport(t *testing.T) {
},
TXT: map[string][]string{
"testsel._domainkey.example.org.": {dkimTxt},
"example.org.": {"v=spf1 ip4:127.0.0.10 -all"},
"_dmarc.example.org.": {"v=DMARC1;p=reject;rua=mailto:dmarcrpt@example.org"},
},
PTR: map[string][]string{

View file

@ -668,7 +668,7 @@ func (m *Message) JunkFlagsForMailbox(mb Mailbox, conf config.Account) {
type Recipient struct {
ID int64
MessageID int64 `bstore:"nonzero,ref Message"` // Ref gives it its own index, useful for fast removal as well.
Localpart smtp.Localpart `bstore:"nonzero"`
Localpart string `bstore:"nonzero"` // Encoded localpart.
Domain string `bstore:"nonzero,index Domain+Localpart"` // Unicode string.
OrgDomain string `bstore:"nonzero,index"` // Unicode string.
Sent time.Time `bstore:"nonzero"`
@ -1416,9 +1416,14 @@ func (a *Account) DeliverMessage(log mlog.Log, tx *bstore.Tx, m *Message, msgFil
log.Debugx("parsing domain in to/cc/bcc address", err, slog.Any("address", addr))
continue
}
lp, err := smtp.ParseLocalpart(addr.User)
if err != nil {
log.Debugx("parsing localpart in to/cc/bcc address", err, slog.Any("address", addr))
continue
}
mr := Recipient{
MessageID: m.ID,
Localpart: smtp.Localpart(addr.User),
Localpart: lp.String(),
Domain: d.Name(),
OrgDomain: publicsuffix.Lookup(context.TODO(), log.Logger, d).Name(),
Sent: sent,

View file

@ -116,7 +116,11 @@ func Verify(elog *slog.Logger, r io.ReaderAt, key []byte, period time.Duration)
if err != nil {
return fmt.Errorf("%w: from address with bad domain: %v", ErrFrom, err)
}
addr := smtp.Address{Localpart: smtp.Localpart(from.User), Domain: d}.Pack(true)
lp, err := smtp.ParseLocalpart(from.User)
if err != nil {
return fmt.Errorf("%w: from address with bad localpart: %v", ErrFrom, err)
}
addr := smtp.Address{Localpart: lp, Domain: d}.Pack(true)
buf, err := base64.RawURLEncoding.DecodeString(token)
if err != nil {

View file

@ -8,6 +8,11 @@ Accounts:
mjl@mox.example: nil
mjl@mox2.example: nil
""@mox.example: nil
# ascii o, we'll check that greek & cyrillic lookalike isn't accepted
o@mox.example: nil
# ohm sign, \u2126
Ω@mox.example: nil
tést@mox.example: nil
JunkFilter:
Threshold: 0.9
Params:

View file

@ -172,7 +172,7 @@
},
{
"Name": "Unicode",
"Docs": "Name as U-labels. Empty if this is an ASCII-only domain. No trailing dot.",
"Docs": "Name as U-labels, in Unicode NFC. Empty if this is an ASCII-only domain. No trailing dot.",
"Typewords": [
"string"
]

View file

@ -8,7 +8,7 @@ namespace api {
// trailing dot. When using with StrictResolver, add the trailing dot.
export interface Domain {
ASCII: string // A non-unicode domain, e.g. with A-labels (xn--...) or NR-LDH (non-reserved letters/digits/hyphens) labels. Always in lower case. No trailing dot.
Unicode: string // Name as U-labels. Empty if this is an ASCII-only domain. No trailing dot.
Unicode: string // Name as U-labels, in Unicode NFC. Empty if this is an ASCII-only domain. No trailing dot.
}
export interface Destination {

View file

@ -36,6 +36,7 @@ import (
_ "embed"
"golang.org/x/exp/maps"
"golang.org/x/text/unicode/norm"
"github.com/mjl-/adns"
@ -1880,7 +1881,7 @@ func (Admin) DomainAdd(ctx context.Context, domain, accountName, localpart strin
d, err := dns.ParseDomain(domain)
xcheckuserf(ctx, err, "parsing domain")
err = mox.DomainAdd(ctx, d, accountName, smtp.Localpart(localpart))
err = mox.DomainAdd(ctx, d, accountName, smtp.Localpart(norm.NFC.String(localpart)))
xcheckf(ctx, err, "adding domain")
}

View file

@ -1376,7 +1376,7 @@
},
{
"Name": "Unicode",
"Docs": "Name as U-labels. Empty if this is an ASCII-only domain. No trailing dot.",
"Docs": "Name as U-labels, in Unicode NFC. Empty if this is an ASCII-only domain. No trailing dot.",
"Typewords": [
"string"
]
@ -4639,7 +4639,7 @@
},
{
"Name": "Localpart",
"Docs": "Localpart is a decoded local part of an email address, before the \"@\".\nFor quoted strings, values do not hold the double quote or escaping backslashes.\nAn empty string can be a valid localpart.",
"Docs": "Localpart is a decoded local part of an email address, before the \"@\".\nFor quoted strings, values do not hold the double quote or escaping backslashes.\nAn empty string can be a valid localpart.\nLocalparts are in Unicode NFC.",
"Values": null
},
{

View file

@ -43,7 +43,7 @@ export interface IPRevCheckResult {
// trailing dot. When using with StrictResolver, add the trailing dot.
export interface Domain {
ASCII: string // A non-unicode domain, e.g. with A-labels (xn--...) or NR-LDH (non-reserved letters/digits/hyphens) labels. Always in lower case. No trailing dot.
Unicode: string // Name as U-labels. Empty if this is an ASCII-only domain. No trailing dot.
Unicode: string // Name as U-labels, in Unicode NFC. Empty if this is an ASCII-only domain. No trailing dot.
}
export interface MXCheckResult {
@ -765,6 +765,7 @@ export enum SPFResult {
// Localpart is a decoded local part of an email address, before the "@".
// For quoted strings, values do not hold the double quote or escaping backslashes.
// An empty string can be a valid localpart.
// Localparts are in Unicode NFC.
export type Localpart = string
// An IP is a single IP address, a slice of bytes.

View file

@ -1347,7 +1347,7 @@ func (Webmail) CompleteRecipient(ctx context.Context, search string) ([]string,
acc.WithRLock(func() {
xdbread(ctx, acc, func(tx *bstore.Tx) {
type key struct {
localpart smtp.Localpart
localpart string
domain string
}
seen := map[key]bool{}
@ -1360,7 +1360,7 @@ func (Webmail) CompleteRecipient(ctx context.Context, search string) ([]string,
return nil
}
// todo: we should have the address including name available in the database for searching. Will result in better matching, and also for the name.
address := fmt.Sprintf("<%s@%s>", r.Localpart.String(), r.Domain)
address := fmt.Sprintf("<%s@%s>", r.Localpart, r.Domain)
if !strings.Contains(strings.ToLower(address), search) {
return nil
}
@ -1382,7 +1382,7 @@ func (Webmail) CompleteRecipient(ctx context.Context, search string) ([]string,
xcheckf(ctx, err, "parsing domain of recipient")
var found bool
lp := r.Localpart.String()
lp := r.Localpart
checkAddrs := func(l []message.Address) {
if found {
return

View file

@ -1009,7 +1009,7 @@
},
{
"Name": "User",
"Docs": "Localpart.",
"Docs": "Localpart, encoded as string. Must be parsed before using as Localpart.",
"Typewords": [
"string"
]
@ -1063,7 +1063,7 @@
},
{
"Name": "Unicode",
"Docs": "Name as U-labels. Empty if this is an ASCII-only domain. No trailing dot.",
"Docs": "Name as U-labels, in Unicode NFC. Empty if this is an ASCII-only domain. No trailing dot.",
"Typewords": [
"string"
]
@ -2817,7 +2817,7 @@
},
{
"Name": "Localpart",
"Docs": "Localpart is a decoded local part of an email address, before the \"@\".\nFor quoted strings, values do not hold the double quote or escaping backslashes.\nAn empty string can be a valid localpart.",
"Docs": "Localpart is a decoded local part of an email address, before the \"@\".\nFor quoted strings, values do not hold the double quote or escaping backslashes.\nAn empty string can be a valid localpart.\nLocalparts are in Unicode NFC.",
"Values": null
}
],

View file

@ -106,7 +106,7 @@ export interface Envelope {
// Address as used in From and To headers.
export interface Address {
Name: string // Free-form name for display in mail applications.
User: string // Localpart.
User: string // Localpart, encoded as string. Must be parsed before using as Localpart.
Host: string // Domain in ASCII.
}
@ -124,7 +124,7 @@ export interface MessageAddress {
// trailing dot. When using with StrictResolver, add the trailing dot.
export interface Domain {
ASCII: string // A non-unicode domain, e.g. with A-labels (xn--...) or NR-LDH (non-reserved letters/digits/hyphens) labels. Always in lower case. No trailing dot.
Unicode: string // Name as U-labels. Empty if this is an ASCII-only domain. No trailing dot.
Unicode: string // Name as U-labels, in Unicode NFC. Empty if this is an ASCII-only domain. No trailing dot.
}
// SubmitMessage is an email message to be sent to one or more recipients.
@ -516,6 +516,7 @@ export enum SecurityResult {
// Localpart is a decoded local part of an email address, before the "@".
// For quoted strings, values do not hold the double quote or escaping backslashes.
// An empty string can be a valid localpart.
// Localparts are in Unicode NFC.
export type Localpart = string
export const structTypes: {[typename: string]: boolean} = {"Address":true,"Attachment":true,"ChangeMailboxAdd":true,"ChangeMailboxCounts":true,"ChangeMailboxKeywords":true,"ChangeMailboxRemove":true,"ChangeMailboxRename":true,"ChangeMailboxSpecialUse":true,"ChangeMsgAdd":true,"ChangeMsgFlags":true,"ChangeMsgRemove":true,"ChangeMsgThread":true,"Domain":true,"DomainAddressConfig":true,"Envelope":true,"EventStart":true,"EventViewChanges":true,"EventViewErr":true,"EventViewMsgs":true,"EventViewReset":true,"File":true,"Filter":true,"Flags":true,"ForwardAttachments":true,"Mailbox":true,"Message":true,"MessageAddress":true,"MessageEnvelope":true,"MessageItem":true,"NotFilter":true,"Page":true,"ParsedMessage":true,"Part":true,"Query":true,"RecipientSecurity":true,"Request":true,"SpecialUse":true,"SubmitMessage":true}