decode TransferEncoding, Charset, Multipart functions

This commit is contained in:
mizho 2024-10-19 19:07:58 +09:00
parent 45cca10d6a
commit 9768cb32ed

View file

@ -215,40 +215,14 @@ func decodeRFC2047(encoded string) (string, error) {
var decodedStrings []string
for _, match := range matches {
charset := match[1]
encodingName := strings.ToUpper(match[2])
encodingName := match[2]
encodedText := match[3]
// Decode Base64 or Quoted-Printable
var decodedBytes []byte
var err error
switch encodingName {
case "B": // Base64
decodedBytes, err = base64.StdEncoding.DecodeString(encodedText)
if err != nil {
return encoded, fmt.Errorf("Base64 decode error: %w", err)
}
case "Q": // Quoted-Printable
decodedBytes, err = io.ReadAll(quotedprintable.NewReader(strings.NewReader(encodedText)))
if err != nil {
return "", fmt.Errorf("Quoted-Printable decode error: %w", err)
}
default:
return encoded, fmt.Errorf("not supported encoding: %s", encodingName)
reader, err := decodeTransferEncodeAndCharset(encodingName, charset, encodedText)
if err != nil {
return encoded, err
}
// Select charset
var enc encoding.Encoding
switch strings.ToLower(charset) {
case "iso-2022-jp":
enc = japanese.ISO2022JP
case "utf-8":
enc = encUnicode.UTF8
default:
return encoded, fmt.Errorf("not supported charset: %s", charset)
}
// Decode with charset
reader := transform.NewReader(strings.NewReader(string(decodedBytes)), enc.NewDecoder())
decodedText, err := io.ReadAll(reader)
if err != nil {
return encoded, err
@ -260,3 +234,116 @@ func decodeRFC2047(encoded string) (string, error) {
// Concat multiple strings
return strings.Join(decodedStrings, ""), nil
}
func decodeTransferEncodeAndCharset(encodingName string, charset string, encodedText string) (io.Reader, error) {
decodedString, err := decodeTransferEncode(encodingName, encodedText)
if len(decodedString) == 0 && err != nil {
return nil, err
}
// try to decode even if unknown encoding
reader, err := decodeCharset(charset, decodedString)
if err != nil {
return nil, err
}
return reader, nil
}
// Decode Base64 or Quoted Printable
func decodeTransferEncode(encodingName string, encodedText string) (string, error) {
// Decode Base64 or Quoted-Printable
var decodedBytes []byte
var err error
switch strings.ToUpper(encodingName) {
case "B": // Base64
decodedBytes, err = base64.StdEncoding.DecodeString(encodedText)
if err != nil {
return string(decodedBytes), fmt.Errorf("Base64 decode error: %w", err)
}
case "Q": // Quoted-Printable
decodedBytes, err = io.ReadAll(quotedprintable.NewReader(strings.NewReader(encodedText)))
if err != nil {
return string(decodedBytes), fmt.Errorf("Quoted-Printable decode error: %w", err)
}
default:
return encodedText, fmt.Errorf("not supported encoding: %s", encodingName)
}
return string(decodedBytes), nil
}
func decodeCharset(charset string, decodedString string) (io.Reader, error) {
// Select charset
var enc encoding.Encoding
switch strings.ToLower(charset) {
case "iso-2022-jp":
enc = japanese.ISO2022JP
case "utf-8":
enc = encUnicode.UTF8
case "us-ascii":
return strings.NewReader(decodedString), nil
default:
return nil, fmt.Errorf("not supported charset: %s", charset)
}
// Decode with charset
reader := transform.NewReader(strings.NewReader(decodedString), enc.NewDecoder())
return reader, nil
}
func decodeMultiPart(body string, boundary string) (io.Reader, error) {
encPattern := `Content-Transfer-Encoding:\s+(\w+)`
charsetPattern := `charset="((?:\w|-)+)"`
// Regexp for MIME encode type & Charset match
encRe, err := regexp.Compile(encPattern)
if err != nil {
return nil, fmt.Errorf("error compiling regex:%v", err)
}
charsetRe, err := regexp.Compile(charsetPattern)
if err != nil {
return nil, fmt.Errorf("error compiling regex:%v", err)
}
// Split by boundary
parts := strings.Split(body, boundary)
var readers []io.Reader
// Make decoded io.Readers for each part
for _, part := range parts {
part = strings.TrimSpace(part)
if len(part) == 0 {
continue
}
// Extract MIME header and body
headerBody := strings.SplitN(part, "\r\n\r\n", 2)
if len(headerBody) < 2 {
// retry
headerBody = strings.SplitN(part, "\n\n", 2)
if len(headerBody) < 2 {
continue
}
}
mimeHeader := headerBody[0]
encodedBody := headerBody[1]
// Find encode types
encMatches := encRe.FindStringSubmatch(mimeHeader)
charsetMatches := charsetRe.FindStringSubmatch(mimeHeader)
// Decode
if len(encMatches) > 1 && len(charsetMatches) > 1 {
reader, err := decodeTransferEncodeAndCharset(encMatches[1][0:1], charsetMatches[1], encodedBody)
if err != nil {
return nil, err
}
readers = append(readers, reader)
} else {
return nil, fmt.Errorf("failed to match encoding and charset in:\n%s", mimeHeader)
}
}
return io.MultiReader(readers...), nil
}