mirror of
https://github.com/mjl-/mox.git
synced 2024-12-26 08:23:48 +03:00
junk filter: fix adjusting word counts after train/untrain
after seeing some junk messages pass the filter, i investigated word counts in junkfilter.db. i had seen suspicious counts that were just around powers of two. did not make sense at the time. more investigating makes it clear: instead of setting new word counts when updating the junk filter, we were adding the new value to the current value (instead of just setting the new value). so the counts got approximately doubled when being updated. users should retrain the junk filter after this update using the "retrain" subcommand. this also adds logging for the hypothetical case where numbers would get decreased below zero (which would wrap around due to uints). and this fixes junk filter tests that were passing wrong parameters to train/untrain...
This commit is contained in:
parent
69a4995449
commit
17baf9a883
3 changed files with 23 additions and 9 deletions
2
ctl.go
2
ctl.go
|
@ -1348,6 +1348,8 @@ func servectlcmd(ctx context.Context, ctl *ctl, shutdown func()) {
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
// todo: can we retrain an account without holding a write lock? perhaps by writing a junkfilter to a new location, and staying informed of message changes while we go through all messages in the account?
|
||||||
|
|
||||||
acc.WithWLock(func() {
|
acc.WithWLock(func() {
|
||||||
conf, _ := acc.Conf()
|
conf, _ := acc.Conf()
|
||||||
if conf.JunkFilter == nil {
|
if conf.JunkFilter == nil {
|
||||||
|
|
|
@ -298,7 +298,7 @@ func (f *Filter) Save() error {
|
||||||
} else if err != nil {
|
} else if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
return tx.Update(&wordscore{w, wc.Ham + ham, wc.Spam + spam})
|
return tx.Update(&wordscore{w, ham, spam})
|
||||||
}
|
}
|
||||||
if err := update("-", f.hams, f.spams); err != nil {
|
if err := update("-", f.hams, f.spams); err != nil {
|
||||||
return fmt.Errorf("storing total ham/spam message count: %s", err)
|
return fmt.Errorf("storing total ham/spam message count: %s", err)
|
||||||
|
@ -621,10 +621,16 @@ func (f *Filter) Untrain(ctx context.Context, ham bool, words map[string]struct{
|
||||||
|
|
||||||
// Modify the message count.
|
// Modify the message count.
|
||||||
f.modified = true
|
f.modified = true
|
||||||
|
var fv *uint32
|
||||||
if ham {
|
if ham {
|
||||||
f.hams--
|
fv = &f.hams
|
||||||
} else {
|
} else {
|
||||||
f.spams--
|
fv = &f.spams
|
||||||
|
}
|
||||||
|
if *fv == 0 {
|
||||||
|
f.log.Error("attempt to decrease ham/spam message count while already zero", slog.Bool("ham", ham))
|
||||||
|
} else {
|
||||||
|
*fv -= 1
|
||||||
}
|
}
|
||||||
|
|
||||||
// Decrease the word counts.
|
// Decrease the word counts.
|
||||||
|
@ -633,10 +639,16 @@ func (f *Filter) Untrain(ctx context.Context, ham bool, words map[string]struct{
|
||||||
if !ok {
|
if !ok {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
var v *uint32
|
||||||
if ham {
|
if ham {
|
||||||
c.Ham--
|
v = &c.Ham
|
||||||
} else {
|
} else {
|
||||||
c.Spam--
|
v = &c.Spam
|
||||||
|
}
|
||||||
|
if *v == 0 {
|
||||||
|
f.log.Error("attempt to decrease ham/spam word count while already zero", slog.String("word", w), slog.Bool("ham", ham))
|
||||||
|
} else {
|
||||||
|
*v -= 1
|
||||||
}
|
}
|
||||||
f.cache[w] = c
|
f.cache[w] = c
|
||||||
f.changed[w] = c
|
f.changed[w] = c
|
||||||
|
|
|
@ -126,7 +126,7 @@ func TestFilter(t *testing.T) {
|
||||||
tcheck(t, err, "train spam message")
|
tcheck(t, err, "train spam message")
|
||||||
_, err = spamf.Seek(0, 0)
|
_, err = spamf.Seek(0, 0)
|
||||||
tcheck(t, err, "seek spam message")
|
tcheck(t, err, "seek spam message")
|
||||||
err = f.TrainMessage(ctxbg, spamf, spamsize, true)
|
err = f.TrainMessage(ctxbg, spamf, spamsize, false)
|
||||||
tcheck(t, err, "train spam message")
|
tcheck(t, err, "train spam message")
|
||||||
|
|
||||||
if !f.modified {
|
if !f.modified {
|
||||||
|
@ -166,16 +166,16 @@ func TestFilter(t *testing.T) {
|
||||||
tcheck(t, err, "untrain ham message")
|
tcheck(t, err, "untrain ham message")
|
||||||
_, err = hamf.Seek(0, 0)
|
_, err = hamf.Seek(0, 0)
|
||||||
tcheck(t, err, "seek ham message")
|
tcheck(t, err, "seek ham message")
|
||||||
err = f.UntrainMessage(ctxbg, hamf, spamsize, true)
|
err = f.UntrainMessage(ctxbg, hamf, hamsize, true)
|
||||||
tcheck(t, err, "untrain ham message")
|
tcheck(t, err, "untrain ham message")
|
||||||
|
|
||||||
_, err = spamf.Seek(0, 0)
|
_, err = spamf.Seek(0, 0)
|
||||||
tcheck(t, err, "seek spam message")
|
tcheck(t, err, "seek spam message")
|
||||||
err = f.UntrainMessage(ctxbg, spamf, spamsize, true)
|
err = f.UntrainMessage(ctxbg, spamf, spamsize, false)
|
||||||
tcheck(t, err, "untrain spam message")
|
tcheck(t, err, "untrain spam message")
|
||||||
_, err = spamf.Seek(0, 0)
|
_, err = spamf.Seek(0, 0)
|
||||||
tcheck(t, err, "seek spam message")
|
tcheck(t, err, "seek spam message")
|
||||||
err = f.UntrainMessage(ctxbg, spamf, spamsize, true)
|
err = f.UntrainMessage(ctxbg, spamf, spamsize, false)
|
||||||
tcheck(t, err, "untrain spam message")
|
tcheck(t, err, "untrain spam message")
|
||||||
|
|
||||||
if !f.modified {
|
if !f.modified {
|
||||||
|
|
Loading…
Reference in a new issue