mirror of
https://github.com/mjl-/mox.git
synced 2025-01-27 06:55:54 +03:00
use Go's mail.ReadMessage instead of textproto.ReadMIMEHeaders and decode RFC 2047 charsets in subject header when parsing message
as the recent Go patch release showed, textproto.ReadMIMEHeaders is parsing http headers, strictly. too strict for email message headers. valid headers, e.g. with a slash in them, were rejected by textproto.ReadMIMEHeaders. the functions in Go's mail package handle RFC 2047 charset-encoded words in address headers. it can do that because we tell it those headers are addresses, where such encodings are valid. but that encoding isn't valid in all places in all headers. for other cases, we must decode explicitly, such as for the subject header. with this change, some messages that could not be parsed before can now be parsed (where headers were previously rejected for being invalid). and the subject of parsed messages could now be properly decoded. you could run "mox ensureparsed -all <account>" (while mox isn't running) to force reparsing all messages. mox needs a subcommand to reparse while running... it wasn't much of a problem before, because imap email clients typically do their own parsing (of headers, including subject decoding) again. but with the upcoming webmail client, any wrong parsing quickly reveals itself.
This commit is contained in:
parent
3ef1f31359
commit
19550cc041
1 changed files with 34 additions and 2 deletions
|
@ -25,6 +25,8 @@ import (
|
|||
"strings"
|
||||
"time"
|
||||
|
||||
"golang.org/x/text/encoding/ianaindex"
|
||||
|
||||
"github.com/mjl-/mox/mlog"
|
||||
"github.com/mjl-/mox/moxio"
|
||||
"github.com/mjl-/mox/moxvar"
|
||||
|
@ -352,7 +354,32 @@ func (p *Part) HeaderReader() io.Reader {
|
|||
}
|
||||
|
||||
func parseHeader(r io.Reader) (textproto.MIMEHeader, error) {
|
||||
return textproto.NewReader(bufio.NewReader(r)).ReadMIMEHeader()
|
||||
// We read using mail.ReadMessage instead of textproto.ReadMIMEHeaders because the
|
||||
// first handles email messages properly, while the second only works for HTTP
|
||||
// headers.
|
||||
var zero textproto.MIMEHeader
|
||||
msg, err := mail.ReadMessage(bufio.NewReader(r))
|
||||
if err != nil {
|
||||
return zero, err
|
||||
}
|
||||
return textproto.MIMEHeader(msg.Header), nil
|
||||
}
|
||||
|
||||
var wordDecoder = mime.WordDecoder{
|
||||
CharsetReader: func(charset string, r io.Reader) (io.Reader, error) {
|
||||
switch strings.ToLower(charset) {
|
||||
case "", "us-ascii", "utf-8":
|
||||
return r, nil
|
||||
}
|
||||
enc, _ := ianaindex.MIME.Encoding(charset)
|
||||
if enc == nil {
|
||||
enc, _ = ianaindex.IANA.Encoding(charset)
|
||||
}
|
||||
if enc == nil {
|
||||
return r, fmt.Errorf("unknown charset %q", charset)
|
||||
}
|
||||
return enc.NewDecoder().Reader(r), nil
|
||||
},
|
||||
}
|
||||
|
||||
func parseEnvelope(h mail.Header) (*Envelope, error) {
|
||||
|
@ -369,9 +396,14 @@ func parseEnvelope(h mail.Header) (*Envelope, error) {
|
|||
date = time.Unix(date.Unix(), 0).UTC()
|
||||
}
|
||||
|
||||
subject := h.Get("Subject")
|
||||
if s, err := wordDecoder.DecodeHeader(subject); err == nil {
|
||||
subject = s
|
||||
}
|
||||
|
||||
env := &Envelope{
|
||||
date,
|
||||
h.Get("Subject"),
|
||||
subject,
|
||||
parseAddressList(h, "from"),
|
||||
parseAddressList(h, "sender"),
|
||||
parseAddressList(h, "reply-to"),
|
||||
|
|
Loading…
Reference in a new issue