mirror of
https://github.com/mjl-/mox.git
synced 2025-04-02 17:33:28 +03:00
Subject mime decode functions and test
This commit is contained in:
parent
e1eb8d47e9
commit
e7958936e4
2 changed files with 98 additions and 0 deletions
|
@ -2,13 +2,21 @@ package store
|
|||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/base64"
|
||||
"fmt"
|
||||
"io"
|
||||
"regexp"
|
||||
"strings"
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/mjl-/mox/message"
|
||||
"github.com/mjl-/mox/mlog"
|
||||
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/japanese"
|
||||
encUnicode "golang.org/x/text/encoding/unicode"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// WordSearch holds context for a search, with scratch buffers to prevent
|
||||
|
@ -193,3 +201,55 @@ func toLower(buf []byte) []byte {
|
|||
}
|
||||
return r
|
||||
}
|
||||
|
||||
func decodeRFC2047(encoded string) (string, error) {
|
||||
// match e.g. =?(iso-2022-jp)?(B)?(Rnc6...)?=
|
||||
r := regexp.MustCompile(`=\?([^?]+)\?([BQ])\?([^?]+)\?=`)
|
||||
matches := r.FindAllStringSubmatch(encoded, -1)
|
||||
|
||||
if len(matches) == 0 { // no match. Looks ASCII.
|
||||
return encoded, nil
|
||||
}
|
||||
|
||||
var decodedStrings []string
|
||||
for _, match := range matches {
|
||||
charset := match[1]
|
||||
encodingName := match[2]
|
||||
encodedText := match[3]
|
||||
|
||||
// Decode Base64 or Quoted-Printable
|
||||
var decodedBytes []byte
|
||||
var err error
|
||||
if encodingName == "B" {
|
||||
decodedBytes, err = base64.StdEncoding.DecodeString(encodedText)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("Base64 decode error: %w", err)
|
||||
}
|
||||
} else {
|
||||
return "", fmt.Errorf("not supported encoding: %s", encodingName)
|
||||
}
|
||||
|
||||
// Select charset
|
||||
var enc encoding.Encoding
|
||||
switch strings.ToLower(charset) {
|
||||
case "iso-2022-jp":
|
||||
enc = japanese.ISO2022JP
|
||||
case "utf-8":
|
||||
enc = encUnicode.UTF8
|
||||
default:
|
||||
return "", fmt.Errorf("not supported charset: %s", charset)
|
||||
}
|
||||
|
||||
// Decode with charset
|
||||
reader := transform.NewReader(strings.NewReader(string(decodedBytes)), enc.NewDecoder())
|
||||
decodedText, err := io.ReadAll(reader)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
decodedStrings = append(decodedStrings, string(decodedText))
|
||||
}
|
||||
|
||||
// Concat multiple strings
|
||||
return strings.Join(decodedStrings, ""), nil
|
||||
}
|
||||
|
|
38
store/search_test.go
Normal file
38
store/search_test.go
Normal file
|
@ -0,0 +1,38 @@
|
|||
package store
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestSubjectMatch(t *testing.T) {
|
||||
// Auto detect subject text encoding and decode
|
||||
|
||||
//log := mlog.New("search", nil)
|
||||
|
||||
originalSubject := `テストテキスト Abc 123...`
|
||||
asciiSubject := "test text Abc 123..."
|
||||
|
||||
encodedSubjectUTF8 := `=?UTF-8?B?44OG44K544OI44OG44Kt44K544OIIEFiYyAxMjMuLi4=?=`
|
||||
encodedSubjectISO2022 := `=?iso-2022-jp?B?GyRCJUYlOSVIJUYlLSU5JUgbKEIgQWJjIDEyMy4uLg==?=`
|
||||
encodedSubjectUTF8 = encodedSubjectUTF8 + " \n " + encodedSubjectUTF8
|
||||
encodedSubjectISO2022 = encodedSubjectISO2022 + " \n " + encodedSubjectISO2022
|
||||
originalSubject = originalSubject + originalSubject
|
||||
|
||||
encodedTexts := map[string]string{encodedSubjectUTF8: originalSubject, encodedSubjectISO2022: originalSubject, asciiSubject: asciiSubject}
|
||||
|
||||
for encodedSubject, originalSubject := range encodedTexts {
|
||||
|
||||
// Autodetect & decode
|
||||
decodedSubject, err := decodeRFC2047(encodedSubject)
|
||||
|
||||
fmt.Printf("decoded text:%s\n", decodedSubject)
|
||||
if err != nil {
|
||||
t.Fatalf("Decode error: %v", err)
|
||||
}
|
||||
|
||||
if originalSubject != decodedSubject {
|
||||
t.Fatalf("Decode mismatch %s != %s", originalSubject, decodedSubject)
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue