Match Multipart mail

This commit is contained in:
mizho 2024-10-19 19:09:09 +09:00
parent 9768cb32ed
commit 21ed331c7d
3 changed files with 70 additions and 3 deletions

View file

@ -245,6 +245,10 @@ func (p *Part) String() string {
return fmt.Sprintf("&Part{%s/%s offsets %d/%d/%d/%d lines %d decodedsize %d next %d last %d bound %q parts %v}", p.MediaType, p.MediaSubType, p.BoundaryOffset, p.HeaderOffset, p.BodyOffset, p.EndOffset, p.RawLineCount, p.DecodedSize, p.nextBoundOffset, p.lastBoundOffset, p.bound, p.Parts)
}
func (p *Part) GetBound() string {
return string(p.bound)
}
// newPart parses a new part, which can be the top-level message.
// offset is the bound offset for parts, and the start of message for top-level messages. parent indicates if this is a top-level message or sub-part.
// If an error occurs, p's exported values can still be relevant. EnsurePart uses these values.

View file

@ -91,11 +91,26 @@ func (ws WordSearch) matchPart(log mlog.Log, p *message.Part, headerToo bool, se
}
if len(p.Parts) == 0 {
var tp io.Reader
if p.MediaType != "TEXT" {
// todo: for other types we could try to find a library for parsing and search in there too.
return false, nil
if p.MediaType == "MULTIPART" {
// Decode and make io.Reader
// todo: avoid to load all content
content, err := io.ReadAll(p.RawReader())
if err != nil {
return false, err
}
tp, err = decodeMultiPart(string(content), p.GetBound())
if err != nil {
return false, err
}
} else {
// todo: for other types we could try to find a library for parsing and search in there too.
return false, nil
}
} else {
tp = p.ReaderUTF8OrBinary()
}
tp := p.ReaderUTF8OrBinary()
// todo: for html and perhaps other types, we could try to parse as text and filter on the text.
miss, err := ws.searchReader(log, tp, seen)
if miss || err != nil || ws.isQuickHit(seen) {

View file

@ -2,7 +2,14 @@ package store
import (
"fmt"
"io"
"log/slog"
"os"
"strings"
"testing"
"github.com/mjl-/mox/message"
"github.com/mjl-/mox/mlog"
)
func TestSubjectMatch(t *testing.T) {
@ -36,3 +43,44 @@ func TestSubjectMatch(t *testing.T) {
}
}
}
func TestMultipartMailDecode(t *testing.T) {
log := mlog.New("search", nil)
// Load raw mail file
filePath := "../../data/mail_raw.txt" // multipart mail raw data
wordFilePath := "../../data/word.txt"
msgFile, err := os.Open(filePath)
if err != nil {
t.Fatalf("Failed to open file: %v", err)
}
defer msgFile.Close()
// load word
wordFile, err := os.Open(wordFilePath)
if err != nil {
t.Fatalf("Failed to open file: %v", err)
}
defer wordFile.Close()
tmp, err := io.ReadAll(wordFile)
if err != nil {
t.Fatalf("Failed to load search word: %v", err)
}
searchWord := strings.TrimSpace(string(tmp))
// Parse mail
mr := FileMsgReader([]byte{}, msgFile)
p, err := message.Parse(log.Logger, false, mr)
if err != nil {
t.Fatalf("parsing message for evaluating rulesets, continuing with headers %v, %s", err, slog.String("parse", ""))
}
// Match
ws := PrepareWordSearch([]string{searchWord}, []string{})
ok, _ := ws.MatchPart(log, &p, true)
if !ok {
t.Fatalf("Match failed %s", ws.words)
}
log.Debug("Check match", slog.String("word", string(searchWord)), slog.Bool("ok", ok))
}