Match Multipart mail

This commit is contained in:
mizho 2024-10-19 19:09:09 +09:00
parent 9768cb32ed
commit 21ed331c7d
3 changed files with 70 additions and 3 deletions

View file

@ -245,6 +245,10 @@ func (p *Part) String() string {
return fmt.Sprintf("&Part{%s/%s offsets %d/%d/%d/%d lines %d decodedsize %d next %d last %d bound %q parts %v}", p.MediaType, p.MediaSubType, p.BoundaryOffset, p.HeaderOffset, p.BodyOffset, p.EndOffset, p.RawLineCount, p.DecodedSize, p.nextBoundOffset, p.lastBoundOffset, p.bound, p.Parts) return fmt.Sprintf("&Part{%s/%s offsets %d/%d/%d/%d lines %d decodedsize %d next %d last %d bound %q parts %v}", p.MediaType, p.MediaSubType, p.BoundaryOffset, p.HeaderOffset, p.BodyOffset, p.EndOffset, p.RawLineCount, p.DecodedSize, p.nextBoundOffset, p.lastBoundOffset, p.bound, p.Parts)
} }
func (p *Part) GetBound() string {
return string(p.bound)
}
// newPart parses a new part, which can be the top-level message. // newPart parses a new part, which can be the top-level message.
// offset is the bound offset for parts, and the start of message for top-level messages. parent indicates if this is a top-level message or sub-part. // offset is the bound offset for parts, and the start of message for top-level messages. parent indicates if this is a top-level message or sub-part.
// If an error occurs, p's exported values can still be relevant. EnsurePart uses these values. // If an error occurs, p's exported values can still be relevant. EnsurePart uses these values.

View file

@ -91,11 +91,26 @@ func (ws WordSearch) matchPart(log mlog.Log, p *message.Part, headerToo bool, se
} }
if len(p.Parts) == 0 { if len(p.Parts) == 0 {
var tp io.Reader
if p.MediaType != "TEXT" { if p.MediaType != "TEXT" {
if p.MediaType == "MULTIPART" {
// Decode and make io.Reader
// todo: avoid to load all content
content, err := io.ReadAll(p.RawReader())
if err != nil {
return false, err
}
tp, err = decodeMultiPart(string(content), p.GetBound())
if err != nil {
return false, err
}
} else {
// todo: for other types we could try to find a library for parsing and search in there too. // todo: for other types we could try to find a library for parsing and search in there too.
return false, nil return false, nil
} }
tp := p.ReaderUTF8OrBinary() } else {
tp = p.ReaderUTF8OrBinary()
}
// todo: for html and perhaps other types, we could try to parse as text and filter on the text. // todo: for html and perhaps other types, we could try to parse as text and filter on the text.
miss, err := ws.searchReader(log, tp, seen) miss, err := ws.searchReader(log, tp, seen)
if miss || err != nil || ws.isQuickHit(seen) { if miss || err != nil || ws.isQuickHit(seen) {

View file

@ -2,7 +2,14 @@ package store
import ( import (
"fmt" "fmt"
"io"
"log/slog"
"os"
"strings"
"testing" "testing"
"github.com/mjl-/mox/message"
"github.com/mjl-/mox/mlog"
) )
func TestSubjectMatch(t *testing.T) { func TestSubjectMatch(t *testing.T) {
@ -36,3 +43,44 @@ func TestSubjectMatch(t *testing.T) {
} }
} }
} }
func TestMultipartMailDecode(t *testing.T) {
log := mlog.New("search", nil)
// Load raw mail file
filePath := "../../data/mail_raw.txt" // multipart mail raw data
wordFilePath := "../../data/word.txt"
msgFile, err := os.Open(filePath)
if err != nil {
t.Fatalf("Failed to open file: %v", err)
}
defer msgFile.Close()
// load word
wordFile, err := os.Open(wordFilePath)
if err != nil {
t.Fatalf("Failed to open file: %v", err)
}
defer wordFile.Close()
tmp, err := io.ReadAll(wordFile)
if err != nil {
t.Fatalf("Failed to load search word: %v", err)
}
searchWord := strings.TrimSpace(string(tmp))
// Parse mail
mr := FileMsgReader([]byte{}, msgFile)
p, err := message.Parse(log.Logger, false, mr)
if err != nil {
t.Fatalf("parsing message for evaluating rulesets, continuing with headers %v, %s", err, slog.String("parse", ""))
}
// Match
ws := PrepareWordSearch([]string{searchWord}, []string{})
ok, _ := ws.MatchPart(log, &p, true)
if !ok {
t.Fatalf("Match failed %s", ws.words)
}
log.Debug("Check match", slog.String("word", string(searchWord)), slog.Bool("ok", ok))
}