webmail: recognize q/b-word-encoded filenames in attachments in messages

according to the rfc's (2231, and 2047), non-ascii filenames in content-type
and content-disposition headers should be encoded like this:

	Content-Type: text/plain; name*=utf-8''hi%E2%98%BA.txt
	Content-Disposition: attachment; filename*=utf-8''hi%E2%98%BA.txt

and that is what the Go standard library mime.ParseMediaType and
mime.FormatMediaType parse and generate.

this is what thunderbird sends:

	Content-Type: text/plain; charset=UTF-8; name="=?UTF-8?B?aGnimLoudHh0?="
	Content-Disposition: attachment; filename*=UTF-8''%68%69%E2%98%BA%2E%74%78%74

(thunderbird will also correctly split long filenames over multiple parameters,
named "filename*0*", "filename*1*", etc.)

this is what gmail sends:

	Content-Type: text/plain; charset="US-ASCII"; name="=?UTF-8?B?aGnimLoudHh0?="
	Content-Disposition: attachment; filename="=?UTF-8?B?aGnimLoudHh0?="

i cannot find where the q/b-word encoded values in "name" and "filename" are
allowed. until that time, we try parsing them unless in pedantic mode.

we didn't generate correctly encoded filenames yet, this commit also fixes that.

for issue #82 by mattfbacon, thanks for reporting!
This commit is contained in:
Mechiel Lukkien 2023-10-14 14:14:13 +02:00
parent 3e53343d21
commit a40f5a5eb3
No known key found for this signature in database
4 changed files with 74 additions and 45 deletions

View file

@ -524,9 +524,11 @@ func (w Webmail) MessageSubmit(ctx context.Context, m SubmitMessage) {
header("Content-Type", fmt.Sprintf(`multipart/mixed; boundary="%s"`, mp.Boundary()))
line(xmsgw)
ct := mime.FormatMediaType("text/plain", map[string]string{"charset": charset})
textHdr := textproto.MIMEHeader{}
textHdr.Set("Content-Type", "text/plain; charset="+escapeParam(charset))
textHdr.Set("Content-Type", ct)
textHdr.Set("Content-Transfer-Encoding", cte)
textp, err := mp.CreatePart(textHdr)
xcheckf(ctx, err, "adding text part to message")
_, err = textp.Write([]byte(text))
@ -534,13 +536,11 @@ func (w Webmail) MessageSubmit(ctx context.Context, m SubmitMessage) {
xaddPart := func(ct, filename string) io.Writer {
ahdr := textproto.MIMEHeader{}
if ct == "" {
ct = "application/octet-stream"
}
ct += fmt.Sprintf(`; name="%s"`, filename)
cd := mime.FormatMediaType("attachment", map[string]string{"filename": filename})
ahdr.Set("Content-Type", ct)
ahdr.Set("Content-Transfer-Encoding", "base64")
ahdr.Set("Content-Disposition", fmt.Sprintf(`attachment; filename=%s`, escapeParam(filename)))
ahdr.Set("Content-Disposition", cd)
ap, err := mp.CreatePart(ahdr)
xcheckf(ctx, err, "adding attachment part to message")
return ap
@ -587,12 +587,21 @@ func (w Webmail) MessageSubmit(ctx context.Context, m SubmitMessage) {
}
ct := strings.TrimSuffix(t[0], "base64")
ct = strings.TrimSuffix(ct, ";")
if ct == "" {
ct = "application/octet-stream"
}
filename := a.Filename
if filename == "" {
filename = "unnamed.bin"
}
params := map[string]string{"name": filename}
ct = mime.FormatMediaType(ct, params)
// Ensure base64 is valid, then we'll write the original string.
_, err := io.Copy(io.Discard, base64.NewDecoder(base64.StdEncoding, strings.NewReader(t[1])))
xcheckuserf(ctx, err, "parsing attachment as base64")
xaddAttachmentBase64(ct, a.Filename, []byte(t[1]))
xaddAttachmentBase64(ct, filename, []byte(t[1]))
}
if len(m.ForwardAttachments.Paths) > 0 {
@ -617,14 +626,16 @@ func (w Webmail) MessageSubmit(ctx context.Context, m SubmitMessage) {
ap = ap.Parts[xp]
}
filename := ap.ContentTypeParams["name"]
filename := tryDecodeParam(log, ap.ContentTypeParams["name"])
if filename == "" {
filename = "unnamed.bin"
}
ct := strings.ToLower(ap.MediaType + "/" + ap.MediaSubType)
params := map[string]string{"name": filename}
if pcharset := ap.ContentTypeParams["charset"]; pcharset != "" {
ct += "; charset=" + escapeParam(pcharset)
params["charset"] = pcharset
}
ct := strings.ToLower(ap.MediaType + "/" + ap.MediaSubType)
ct = mime.FormatMediaType(ct, params)
xaddAttachment(ct, filename, ap.Reader())
}
})
@ -634,7 +645,8 @@ func (w Webmail) MessageSubmit(ctx context.Context, m SubmitMessage) {
err = mp.Close()
xcheckf(ctx, err, "writing mime multipart")
} else {
header("Content-Type", "text/plain; charset="+escapeParam(charset))
ct := mime.FormatMediaType("text/plain", map[string]string{"charset": charset})
header("Content-Type", ct)
header("Content-Transfer-Encoding", cte)
line(xmsgw)
xmsgw.Write([]byte(text))

View file

@ -12,12 +12,46 @@ import (
"github.com/mjl-/mox/message"
"github.com/mjl-/mox/mlog"
"github.com/mjl-/mox/moxio"
"github.com/mjl-/mox/moxvar"
"github.com/mjl-/mox/smtp"
"github.com/mjl-/mox/store"
)
// todo: we should have all needed information for messageItem in store.Message (perhaps some data in message.Part) for fast access, not having to parse the on-disk message file.
// Attempt q/b-word-decode name, coming from Content-Type "name" field or
// Content-Disposition "filename" field.
//
// RFC 2231 specify an encoding for non-ascii values in mime header parameters. But
// it appears common practice to instead just q/b-word encode the values.
// Thunderbird and gmail.com do this for the Content-Type "name" parameter.
// gmail.com also does that for the Content-Disposition "filename" parameter, where
// Thunderbird uses the RFC 2231-defined encoding. Go's mime.ParseMediaType parses
// the mechanism specified in RFC 2231 only. The value for "name" we get here would
// already be decoded properly for standards-compliant headers, like
// "filename*0*=UTF-8”%...; filename*1*=%.... We'll look for Q/B-word encoding
// markers ("=?"-prefix or "?="-suffix) and try to decode if present. This would
// only cause trouble for filenames having this prefix/suffix.
func tryDecodeParam(log *mlog.Log, name string) string {
if name == "" || !strings.HasPrefix(name, "=?") && !strings.HasSuffix(name, "?=") {
return name
}
// todo: find where this is allowed. it seems quite common. perhaps we should remove the pedantic check?
if moxvar.Pedantic {
log.Debug("attachment contains rfc2047 q/b-word-encoded mime parameter instead of rfc2231-encoded", mlog.Field("name", name))
return name
}
dec := mime.WordDecoder{}
s, err := dec.DecodeHeader(name)
if err != nil {
log.Debugx("q/b-word decoding mime parameter", err, mlog.Field("name", name))
return name
}
return s
}
// todo: mime.FormatMediaType does not wrap long lines. should do it ourselves, and split header into several parts (if commonly supported).
func messageItem(log *mlog.Log, m store.Message, state *msgState) (MessageItem, error) {
pm, err := parsedMessage(log, m, state, false, true)
if err != nil {
@ -212,10 +246,9 @@ func parsedMessage(log *mlog.Log, m store.Message, state *msgState, full, msgite
disp, params, err := mime.ParseMediaType(cp)
log.Check(err, "parsing content-disposition", mlog.Field("cp", cp))
if strings.EqualFold(disp, "attachment") {
// todo: should we be decoding these names? i've seen messages with regular q-word style mime-encoding, not the one specified in ../rfc/2231:210
name := p.ContentTypeParams["name"]
name := tryDecodeParam(log, p.ContentTypeParams["name"])
if name == "" {
name = params["filename"]
name = tryDecodeParam(log, params["filename"])
}
pm.attachments = append(pm.attachments, Attachment{path, name, p})
return
@ -285,8 +318,8 @@ func parsedMessage(log *mlog.Log, m store.Message, state *msgState, full, msgite
return
}
name, ok := p.ContentTypeParams["name"]
if !ok && (full || msgitem) {
name := tryDecodeParam(log, p.ContentTypeParams["name"])
if name == "" && (full || msgitem) {
// todo: should have this, and perhaps all content-* headers, preparsed in message.Part?
h, err := p.Header()
log.Check(err, "parsing attachment headers", mlog.Field("msgid", m.ID))
@ -294,7 +327,7 @@ func parsedMessage(log *mlog.Log, m store.Message, state *msgState, full, msgite
if cp != "" {
_, params, err := mime.ParseMediaType(cp)
log.Check(err, "parsing content-disposition", mlog.Field("cp", cp))
name = params["filename"]
name = tryDecodeParam(log, params["filename"])
}
}
pm.attachments = append(pm.attachments, Attachment{path, name, p})

View file

@ -1786,7 +1786,7 @@ func attachmentTypes(log *mlog.Log, m store.Message, state *msgState) (map[Attac
mt := strings.ToLower(a.Part.MediaType + "/" + a.Part.MediaSubType)
if t, ok := attachmentMimetypes[mt]; ok {
types[t] = true
} else if ext := filepath.Ext(a.Part.ContentTypeParams["name"]); ext != "" {
} else if ext := filepath.Ext(tryDecodeParam(log, a.Part.ContentTypeParams["name"])); ext != "" {
if t, ok := attachmentExtensions[strings.ToLower(ext)]; ok {
types[t] = true
} else {

View file

@ -317,25 +317,6 @@ func serveContentFallback(log *mlog.Log, w http.ResponseWriter, r *http.Request,
http.ServeContent(w, r, "", fallbackMtime(log), bytes.NewReader(fallback))
}
// Escape mime content header parameter, such as content-type charset or
// content-disposition filename.
func escapeParam(s string) string {
// todo: follow ../rfc/2183?
basic := len(s) > 0
for _, c := range s {
if c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9' || c == '-' || c == '_' || c == '.' {
continue
}
basic = false
break
}
if basic {
return s
}
return `"` + strings.NewReplacer(`\`, `\\`, `"`, `\"`).Replace(s) + `"`
}
// Handler returns a handler for the webmail endpoints, customized for the max
// message size coming from the listener.
func Handler(maxMessageSize int64) func(w http.ResponseWriter, r *http.Request) {
@ -593,19 +574,20 @@ func handle(apiHandler http.Handler, w http.ResponseWriter, r *http.Request) {
subjectSlug = s
}
filename := fmt.Sprintf("email-%d-attachments-%s%s.zip", m.ID, m.Received.Format("20060102-150405"), subjectSlug)
h.Set("Content-Disposition", fmt.Sprintf(`attachment; filename=%s`, escapeParam(filename)))
cd := mime.FormatMediaType("attachment", map[string]string{"filename": filename})
h.Set("Content-Disposition", cd)
zw := zip.NewWriter(w)
names := map[string]bool{}
for _, a := range mi.Attachments {
ap := a.Part
name := ap.ContentTypeParams["name"]
name := tryDecodeParam(log, ap.ContentTypeParams["name"])
if name == "" {
// We don't check errors, this is all best-effort.
h, _ := ap.Header()
disposition := h.Get("Content-Disposition")
_, params, _ := mime.ParseMediaType(disposition)
name = params["filename"]
name = tryDecodeParam(log, params["filename"])
}
if name != "" {
name = filepath.Base(name)
@ -697,10 +679,11 @@ func handle(apiHandler http.Handler, w http.ResponseWriter, r *http.Request) {
// not, there is not much we could do better...
headers(false, false, false)
ct := "text/plain"
params := map[string]string{}
if charset := p.ContentTypeParams["charset"]; charset != "" {
ct += fmt.Sprintf("; charset=%s", escapeParam(charset))
params["charset"] = charset
}
h.Set("Content-Type", ct)
h.Set("Content-Type", mime.FormatMediaType(ct, params))
h.Set("Cache-Control", "no-cache, max-age=0")
_, err := io.Copy(w, &moxio.AtReader{R: msgr})
@ -892,18 +875,19 @@ func handle(apiHandler http.Handler, w http.ResponseWriter, r *http.Request) {
h.Set("Content-Type", ct)
h.Set("Cache-Control", "no-cache, max-age=0")
if t[1] == "download" {
name := ap.ContentTypeParams["name"]
name := tryDecodeParam(log, ap.ContentTypeParams["name"])
if name == "" {
// We don't check errors, this is all best-effort.
h, _ := ap.Header()
disposition := h.Get("Content-Disposition")
_, params, _ := mime.ParseMediaType(disposition)
name = params["filename"]
name = tryDecodeParam(log, params["filename"])
}
if name == "" {
name = "attachment.bin"
}
h.Set("Content-Disposition", fmt.Sprintf(`attachment; filename=%s`, escapeParam(name)))
cd := mime.FormatMediaType("attachment", map[string]string{"filename": name})
h.Set("Content-Disposition", cd)
}
_, err := io.Copy(w, ap.Reader())