package message import ( "errors" "fmt" "strings" "github.com/mjl-/mox/smtp" ) var errBadMessageID = errors.New("not a message-id") // MessageIDCanonical parses the Message-ID, returning a canonical value that is // lower-cased, without <>, and no unneeded quoting. For matching in threading, // with References/In-Reply-To. If the message-id is invalid (e.g. no <>), an error // is returned. If the message-id could not be parsed as address (localpart "@" // domain), the raw value and the bool return parameter true is returned. It is // quite common that message-id's don't adhere to the localpart @ domain // syntax. func MessageIDCanonical(s string) (string, bool, error) { // ../rfc/5322:1383 s = strings.TrimSpace(s) if !strings.HasPrefix(s, "<") { return "", false, fmt.Errorf("%w: missing <", errBadMessageID) } s = s[1:] // Seen in practice: Message-ID: <valid@valid.example> (added by postmaster@some.example) // Doesn't seem valid, but we allow it. s, rem, have := strings.Cut(s, ">") if !have || (rem != "" && (Pedantic || !strings.HasPrefix(rem, " "))) { return "", false, fmt.Errorf("%w: missing >", errBadMessageID) } // We canonicalize the Message-ID: lower-case, no unneeded quoting. s = strings.ToLower(s) if s == "" { return "", false, fmt.Errorf("%w: empty message-id", errBadMessageID) } addr, err := smtp.ParseAddress(s) if err != nil { // Common reasons for not being an address: // 1. underscore in hostname. // 2. ip literal instead of domain. // 3. two @'s, perhaps intended as time-separator // 4. no @'s, so no domain/host return s, true, nil } // We preserve the unicode-ness of domain. t := strings.Split(s, "@") s = addr.Localpart.String() + "@" + t[len(t)-1] return s, false, nil }