mox/dsn/parse.go

382 lines
11 KiB
Go
Raw Normal View History

2023-01-30 16:27:06 +03:00
package dsn
import (
"bufio"
"fmt"
"io"
"log/slog"
2023-01-30 16:27:06 +03:00
"net/textproto"
"strconv"
"strings"
"time"
"github.com/mjl-/mox/dns"
"github.com/mjl-/mox/message"
"github.com/mjl-/mox/mlog"
2023-01-30 16:27:06 +03:00
"github.com/mjl-/mox/smtp"
)
// Parse reads a DSN message.
//
// A DSN is a multipart internet mail message with 2 or 3 parts: human-readable
// text, machine-parsable text, and optional original message or headers.
//
// The first return value is the machine-parsed DSN message. The second value is
// the entire MIME multipart message. Use its Parts field to access the
// human-readable text and optional original message/headers.
func Parse(elog *slog.Logger, r io.ReaderAt) (*Message, *message.Part, error) {
log := mlog.New("dsn", elog)
2023-01-30 16:27:06 +03:00
// DSNs can mix and match subtypes with and without utf-8. ../rfc/6533:441
part, err := message.Parse(log.Logger, false, r)
2023-01-30 16:27:06 +03:00
if err != nil {
return nil, nil, fmt.Errorf("parsing message: %v", err)
}
if part.MediaType != "MULTIPART" || part.MediaSubType != "REPORT" {
return nil, nil, fmt.Errorf(`message has content-type %q, must have "message/report"`, strings.ToLower(part.MediaType+"/"+part.MediaSubType))
}
err = part.Walk(log.Logger, nil)
2023-01-30 16:27:06 +03:00
if err != nil {
return nil, nil, fmt.Errorf("parsing message parts: %v", err)
}
nparts := len(part.Parts)
if nparts != 2 && nparts != 3 {
return nil, nil, fmt.Errorf("invalid dsn, got %d multipart parts, 2 or 3 required", nparts)
}
p0 := part.Parts[0]
if !(p0.MediaType == "" && p0.MediaSubType == "") && !(p0.MediaType == "TEXT" && p0.MediaSubType == "PLAIN") {
return nil, nil, fmt.Errorf(`invalid dsn, first part has content-type %q, must have "text/plain"`, strings.ToLower(p0.MediaType+"/"+p0.MediaSubType))
}
p1 := part.Parts[1]
var m *Message
if !(p1.MediaType == "MESSAGE" && (p1.MediaSubType == "DELIVERY-STATUS" || p1.MediaSubType == "GLOBAL-DELIVERY-STATUS")) {
return nil, nil, fmt.Errorf(`invalid dsn, second part has content-type %q, must have "message/delivery-status" or "message/global-delivery-status"`, strings.ToLower(p1.MediaType+"/"+p1.MediaSubType))
}
utf8 := p1.MediaSubType == "GLOBAL-DELIVERY-STATUS"
m, err = Decode(p1.Reader(), utf8)
if err != nil {
return nil, nil, fmt.Errorf("parsing dsn delivery-status part: %v", err)
}
addressPath := func(a message.Address) (smtp.Path, error) {
d, err := dns.ParseDomain(a.Host)
if err != nil {
return smtp.Path{}, fmt.Errorf("parsing domain: %v", err)
}
lp, err := smtp.ParseLocalpart(a.User)
if err != nil {
return smtp.Path{}, fmt.Errorf("parsing localpart: %v", err)
}
return smtp.Path{Localpart: lp, IPDomain: dns.IPDomain{Domain: d}}, nil
2023-01-30 16:27:06 +03:00
}
if len(part.Envelope.From) == 1 {
m.From, err = addressPath(part.Envelope.From[0])
if err != nil {
return nil, nil, fmt.Errorf("parsing From-header: %v", err)
}
}
if len(part.Envelope.To) == 1 {
m.To, err = addressPath(part.Envelope.To[0])
if err != nil {
return nil, nil, fmt.Errorf("parsing To-header: %v", err)
}
}
m.Subject = part.Envelope.Subject
buf, err := io.ReadAll(p0.ReaderUTF8OrBinary())
2023-01-30 16:27:06 +03:00
if err != nil {
return nil, nil, fmt.Errorf("reading human-readable text part: %v", err)
}
m.TextBody = strings.ReplaceAll(string(buf), "\r\n", "\n")
if nparts == 2 {
return m, &part, nil
}
p2 := part.Parts[2]
ct := strings.ToLower(p2.MediaType + "/" + p2.MediaSubType)
switch ct {
case "text/rfc822-headers":
case "message/global-headers":
case "message/rfc822":
case "message/global":
default:
return nil, nil, fmt.Errorf("invalid content-type %q for optional third part with original message/headers", ct)
}
return m, &part, nil
}
// Decode parses the (global) delivery-status part of a DSN.
//
// utf8 indicates if UTF-8 is allowed for this message, if used by the media
// subtype of the message parts.
func Decode(r io.Reader, utf8 bool) (*Message, error) {
m := Message{SMTPUTF8: utf8}
// We are using textproto.Reader to read mime headers. It requires a header section ending in \r\n.
// ../rfc/3464:486
b := bufio.NewReader(io.MultiReader(r, strings.NewReader("\r\n")))
mr := textproto.NewReader(b)
// Read per-message lines.
// ../rfc/3464:1522 ../rfc/6533:366
msgh, err := mr.ReadMIMEHeader()
if err != nil {
return nil, fmt.Errorf("reading per-message lines: %v", err)
}
for k, l := range msgh {
if len(l) != 1 {
return nil, fmt.Errorf("multiple values for %q: %v", k, l)
}
v := l[0]
// note: headers are in canonical form, as parsed by textproto.
switch k {
case "Original-Envelope-Id":
m.OriginalEnvelopeID = v
case "Reporting-Mta":
mta, err := parseMTA(v, utf8)
if err != nil {
return nil, fmt.Errorf("parsing reporting-mta: %v", err)
}
m.ReportingMTA = mta
case "Dsn-Gateway":
mta, err := parseMTA(v, utf8)
if err != nil {
return nil, fmt.Errorf("parsing dsn-gateway: %v", err)
}
m.DSNGateway = mta
case "Received-From-Mta":
mta, err := parseMTA(v, utf8)
if err != nil {
return nil, fmt.Errorf("parsing received-from-mta: %v", err)
}
d, err := dns.ParseDomain(mta)
if err != nil {
return nil, fmt.Errorf("parsing received-from-mta domain %q: %v", mta, err)
}
m.ReceivedFromMTA = smtp.Ehlo{Name: dns.IPDomain{Domain: d}}
case "Arrival-Date":
tm, err := parseDateTime(v)
if err != nil {
return nil, fmt.Errorf("parsing arrival-date: %v", err)
}
m.ArrivalDate = tm
default:
// We'll assume it is an extension field, we'll ignore it for now.
}
}
m.MessageHeader = msgh
required := []string{"Reporting-Mta"}
for _, req := range required {
if _, ok := msgh[req]; !ok {
return nil, fmt.Errorf("missing required recipient field %q", req)
}
}
rh, err := parseRecipientHeader(mr, utf8)
if err != nil {
return nil, fmt.Errorf("reading per-recipient header: %v", err)
}
m.Recipients = []Recipient{rh}
for {
if _, err := b.Peek(1); err == io.EOF {
break
}
rh, err := parseRecipientHeader(mr, utf8)
if err != nil {
return nil, fmt.Errorf("reading another per-recipient header: %v", err)
}
m.Recipients = append(m.Recipients, rh)
}
return &m, nil
}
// ../rfc/3464:1530 ../rfc/6533:370
func parseRecipientHeader(mr *textproto.Reader, utf8 bool) (Recipient, error) {
var r Recipient
h, err := mr.ReadMIMEHeader()
if err != nil {
return Recipient{}, err
}
for k, l := range h {
if len(l) != 1 {
return Recipient{}, fmt.Errorf("multiple values for %q: %v", k, l)
}
v := l[0]
// note: headers are in canonical form, as parsed by textproto.
var err error
switch k {
case "Original-Recipient":
r.OriginalRecipient, err = parseAddress(v, utf8)
case "Final-Recipient":
r.FinalRecipient, err = parseAddress(v, utf8)
case "Action":
a := Action(strings.ToLower(v))
actions := []Action{Failed, Delayed, Delivered, Relayed, Expanded}
var ok bool
for _, x := range actions {
if a == x {
ok = true
r.Action = a
2023-01-30 16:27:06 +03:00
break
}
}
if !ok {
err = fmt.Errorf("unrecognized action %q", v)
}
case "Status":
// todo: parse the enhanced status code?
r.Status = v
t := strings.SplitN(v, "(", 2)
v = strings.TrimSpace(v)
if len(t) == 2 && strings.HasSuffix(v, ")") {
r.Status = strings.TrimSpace(t[0])
r.StatusComment = strings.TrimSpace(strings.TrimSuffix(t[1], ")"))
}
2023-01-30 16:27:06 +03:00
case "Remote-Mta":
r.RemoteMTA = NameIP{Name: v}
case "Diagnostic-Code":
// ../rfc/3464:518
t := strings.SplitN(v, ";", 2)
dt := strings.TrimSpace(t[0])
if strings.ToLower(dt) != "smtp" {
err = fmt.Errorf("unknown diagnostic-type %q, expected smtp", dt)
} else if len(t) != 2 {
err = fmt.Errorf("missing semicolon to separate diagnostic-type from code")
} else {
add a webapi and webhooks for a simple http/json-based api for applications to compose/send messages, receive delivery feedback, and maintain suppression lists. this is an alternative to applications using a library to compose messages, submitting those messages using smtp, and monitoring a mailbox with imap for DSNs, which can be processed into the equivalent of suppression lists. but you need to know about all these standards/protocols and find libraries. by using the webapi & webhooks, you just need a http & json library. unfortunately, there is no standard for these kinds of api, so mox has made up yet another one... matching incoming DSNs about deliveries to original outgoing messages requires keeping history of "retired" messages (delivered from the queue, either successfully or failed). this can be enabled per account. history is also useful for debugging deliveries. we now also keep history of each delivery attempt, accessible while still in the queue, and kept when a message is retired. the queue webadmin pages now also have pagination, to show potentially large history. a queue of webhook calls is now managed too. failures are retried similar to message deliveries. webhooks can also be saved to the retired list after completing. also configurable per account. messages can be sent with a "unique smtp mail from" address. this can only be used if the domain is configured with a localpart catchall separator such as "+". when enabled, a queued message gets assigned a random "fromid", which is added after the separator when sending. when DSNs are returned, they can be related to previously sent messages based on this fromid. in the future, we can implement matching on the "envid" used in the smtp dsn extension, or on the "message-id" of the message. using a fromid can be triggered by authenticating with a login email address that is configured as enabling fromid. suppression lists are automatically managed per account. if a delivery attempt results in certain smtp errors, the destination address is added to the suppression list. future messages queued for that recipient will immediately fail without a delivery attempt. suppression lists protect your mail server reputation. submitted messages can carry "extra" data through the queue and webhooks for outgoing deliveries. through webapi as a json object, through smtp submission as message headers of the form "x-mox-extra-<key>: value". to make it easy to test webapi/webhooks locally, the "localserve" mode actually puts messages in the queue. when it's time to deliver, it still won't do a full delivery attempt, but just delivers to the sender account. unless the recipient address has a special form, simulating a failure to deliver. admins now have more control over the queue. "hold rules" can be added to mark newly queued messages as "on hold", pausing delivery. rules can be about certain sender or recipient domains/addresses, or apply to all messages pausing the entire queue. also useful for (local) testing. new config options have been introduced. they are editable through the admin and/or account web interfaces. the webapi http endpoints are enabled for newly generated configs with the quickstart, and in localserve. existing configurations must explicitly enable the webapi in mox.conf. gopherwatch.org was created to dogfood this code. it initially used just the compose/smtpclient/imapclient mox packages to send messages and process delivery feedback. it will get a config option to use the mox webapi/webhooks instead. the gopherwatch code to use webapi/webhook is smaller and simpler, and developing that shaped development of the mox webapi/webhooks. for issue #31 by cuu508
2024-04-15 22:49:02 +03:00
r.DiagnosticCodeSMTP = strings.TrimSpace(t[1])
2023-01-30 16:27:06 +03:00
}
case "Last-Attempt-Date":
r.LastAttemptDate, err = parseDateTime(v)
case "Final-Log-Id":
r.FinalLogID = v
case "Will-Retry-Until":
tm, err := parseDateTime(v)
if err == nil {
r.WillRetryUntil = &tm
}
default:
// todo future: parse localized diagnostic text field?
// We'll assume it is an extension field, we'll ignore it for now.
}
if err != nil {
return Recipient{}, fmt.Errorf("parsing field %q %q: %v", k, v, err)
}
}
required := []string{"Final-Recipient", "Action", "Status"}
for _, req := range required {
if _, ok := h[req]; !ok {
return Recipient{}, fmt.Errorf("missing required recipient field %q", req)
}
}
r.Header = h
return r, nil
}
// ../rfc/3464:525
func parseMTA(s string, utf8 bool) (string, error) {
s = removeComments(s)
t := strings.SplitN(s, ";", 2)
if len(t) != 2 {
return "", fmt.Errorf("missing semicolon that splits type and name")
}
k := strings.TrimSpace(t[0])
if !strings.EqualFold(k, "dns") {
return "", fmt.Errorf("unknown type %q, expected dns", k)
}
return strings.TrimSpace(t[1]), nil
}
func parseDateTime(s string) (time.Time, error) {
s = removeComments(s)
return time.Parse(message.RFC5322Z, s)
}
func parseAddress(s string, utf8 bool) (smtp.Path, error) {
s = removeComments(s)
t := strings.SplitN(s, ";", 2)
// ../rfc/3464:513 ../rfc/6533:250
addrType := strings.ToLower(strings.TrimSpace(t[0]))
if len(t) != 2 {
return smtp.Path{}, fmt.Errorf("missing semicolon that splits address type and address")
} else if addrType == "utf-8" {
if !utf8 {
return smtp.Path{}, fmt.Errorf("utf-8 address type for non-utf-8 dsn")
}
} else if addrType != "rfc822" {
return smtp.Path{}, fmt.Errorf("unrecognized address type %q, expected rfc822", addrType)
}
s = strings.TrimSpace(t[1])
if !utf8 {
for _, c := range s {
if c > 0x7f {
return smtp.Path{}, fmt.Errorf("non-ascii without utf-8 enabled")
}
}
}
// todo: more proper parser
t = strings.Split(s, "@")
if len(t) == 1 {
2023-01-30 16:27:06 +03:00
return smtp.Path{}, fmt.Errorf("invalid email address")
}
d, err := dns.ParseDomain(t[len(t)-1])
2023-01-30 16:27:06 +03:00
if err != nil {
return smtp.Path{}, fmt.Errorf("parsing domain: %v", err)
}
var lp string
var esc string
lead := strings.Join(t[:len(t)-1], "@")
for _, c := range lead {
2023-01-30 16:27:06 +03:00
if esc == "" && c == '\\' || esc == `\` && (c == 'x' || c == 'X') || esc == `\x` && c == '{' {
if c == 'X' {
c = 'x'
}
esc += string(c)
} else if strings.HasPrefix(esc, `\x{`) {
if c == '}' {
c, err := strconv.ParseInt(esc[3:], 16, 32)
if err != nil {
return smtp.Path{}, fmt.Errorf("parsing localpart with hexpoint: %v", err)
}
lp += string(rune(c))
esc = ""
} else {
esc += string(c)
}
} else {
lp += string(c)
}
}
if esc != "" {
return smtp.Path{}, fmt.Errorf("parsing localpart: unfinished embedded unicode char")
}
localpart, err := smtp.ParseLocalpart(lp)
if err != nil {
return smtp.Path{}, fmt.Errorf("parsing localpart: %v", err)
}
p := smtp.Path{Localpart: localpart, IPDomain: dns.IPDomain{Domain: d}}
2023-01-30 16:27:06 +03:00
return p, nil
}
func removeComments(s string) string {
n := 0
r := ""
for _, c := range s {
if c == '(' {
n++
} else if c == ')' && n > 0 {
n--
} else if n == 0 {
r += string(c)
}
}
return r
}