1
1
Fork 0
mirror of https://github.com/mjl-/mox.git synced 2025-04-02 17:33:28 +03:00

Subject mime decode functions and test

This commit is contained in:
mizho 2024-10-16 01:05:22 +09:00
parent e1eb8d47e9
commit e7958936e4
2 changed files with 98 additions and 0 deletions

View file

@ -2,13 +2,21 @@ package store
import (
"bytes"
"encoding/base64"
"fmt"
"io"
"regexp"
"strings"
"unicode"
"unicode/utf8"
"github.com/mjl-/mox/message"
"github.com/mjl-/mox/mlog"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/japanese"
encUnicode "golang.org/x/text/encoding/unicode"
"golang.org/x/text/transform"
)
// WordSearch holds context for a search, with scratch buffers to prevent
@ -193,3 +201,55 @@ func toLower(buf []byte) []byte {
}
return r
}
func decodeRFC2047(encoded string) (string, error) {
// match e.g. =?(iso-2022-jp)?(B)?(Rnc6...)?=
r := regexp.MustCompile(`=\?([^?]+)\?([BQ])\?([^?]+)\?=`)
matches := r.FindAllStringSubmatch(encoded, -1)
if len(matches) == 0 { // no match. Looks ASCII.
return encoded, nil
}
var decodedStrings []string
for _, match := range matches {
charset := match[1]
encodingName := match[2]
encodedText := match[3]
// Decode Base64 or Quoted-Printable
var decodedBytes []byte
var err error
if encodingName == "B" {
decodedBytes, err = base64.StdEncoding.DecodeString(encodedText)
if err != nil {
return "", fmt.Errorf("Base64 decode error: %w", err)
}
} else {
return "", fmt.Errorf("not supported encoding: %s", encodingName)
}
// Select charset
var enc encoding.Encoding
switch strings.ToLower(charset) {
case "iso-2022-jp":
enc = japanese.ISO2022JP
case "utf-8":
enc = encUnicode.UTF8
default:
return "", fmt.Errorf("not supported charset: %s", charset)
}
// Decode with charset
reader := transform.NewReader(strings.NewReader(string(decodedBytes)), enc.NewDecoder())
decodedText, err := io.ReadAll(reader)
if err != nil {
return "", err
}
decodedStrings = append(decodedStrings, string(decodedText))
}
// Concat multiple strings
return strings.Join(decodedStrings, ""), nil
}

38
store/search_test.go Normal file
View file

@ -0,0 +1,38 @@
package store
import (
"fmt"
"testing"
)
func TestSubjectMatch(t *testing.T) {
// Auto detect subject text encoding and decode
//log := mlog.New("search", nil)
originalSubject := `テストテキスト Abc 123...`
asciiSubject := "test text Abc 123..."
encodedSubjectUTF8 := `=?UTF-8?B?44OG44K544OI44OG44Kt44K544OIIEFiYyAxMjMuLi4=?=`
encodedSubjectISO2022 := `=?iso-2022-jp?B?GyRCJUYlOSVIJUYlLSU5JUgbKEIgQWJjIDEyMy4uLg==?=`
encodedSubjectUTF8 = encodedSubjectUTF8 + " \n " + encodedSubjectUTF8
encodedSubjectISO2022 = encodedSubjectISO2022 + " \n " + encodedSubjectISO2022
originalSubject = originalSubject + originalSubject
encodedTexts := map[string]string{encodedSubjectUTF8: originalSubject, encodedSubjectISO2022: originalSubject, asciiSubject: asciiSubject}
for encodedSubject, originalSubject := range encodedTexts {
// Autodetect & decode
decodedSubject, err := decodeRFC2047(encodedSubject)
fmt.Printf("decoded text:%s\n", decodedSubject)
if err != nil {
t.Fatalf("Decode error: %v", err)
}
if originalSubject != decodedSubject {
t.Fatalf("Decode mismatch %s != %s", originalSubject, decodedSubject)
}
}
}