2023-01-30 16:27:06 +03:00
|
|
|
package store
|
|
|
|
|
|
|
|
import (
|
2023-05-22 15:40:36 +03:00
|
|
|
"context"
|
2023-01-30 16:27:06 +03:00
|
|
|
"errors"
|
2023-02-27 00:21:13 +03:00
|
|
|
"fmt"
|
2024-02-08 16:49:01 +03:00
|
|
|
"log/slog"
|
2023-01-30 16:27:06 +03:00
|
|
|
"os"
|
|
|
|
"path/filepath"
|
|
|
|
|
improve training of junk filter
before, we used heuristics to decide when to train/untrain a message as junk or
nonjunk: the message had to be seen, be in certain mailboxes. then if a message
was marked as junk, it was junk. and otherwise it was nonjunk. this wasn't good
enough: you may want to keep some messages around as neither junk or nonjunk.
and that wasn't possible.
ideally, we would just look at the imap $Junk and $NotJunk flags. the problem
is that mail clients don't set these flags, or don't make it easy. thunderbird
can set the flags based on its own bayesian filter. it has a shortcut for
marking Junk and moving it to the junk folder (good), but the counterpart of
notjunk only marks a message as notjunk without showing in the UI that it was
marked as notjunk. there is also no "move and mark as notjunk" mechanism. e.g.
"archive" does not mark a message as notjunk. ios mail and mutt don't appear to
have any way to see or change the $Junk and $NotJunk flags.
what email clients do have is the ability to move messages to other
mailboxes/folders. so mox now has a mechanism that allows you to configure
mailboxes that automatically set $Junk or $NotJunk (or clear both) when a
message is moved/copied/delivered to that folder. e.g. a mailbox called junk or
spam or rejects marks its messags as junk. inbox, postmaster, dmarc, tlsrpt,
neutral* mark their messages as neither junk or notjunk. other folders mark
their messages as notjunk. e.g. list/*, archive. this functionality is
optional, but enabled with the quickstart and for new accounts.
also, mox now keeps track of the previous training of a message and will only
untrain/train if needed. before, there probably have been duplicate or missing
(un)trainings.
this also includes a new subcommand "retrain" to recreate the junkfilter for an
account. you should run it after updating to this version. and you should
probably also modify your account config to include the AutomaticJunkFlags.
2023-02-12 01:00:12 +03:00
|
|
|
"github.com/mjl-/bstore"
|
|
|
|
|
2023-01-30 16:27:06 +03:00
|
|
|
"github.com/mjl-/mox/config"
|
|
|
|
"github.com/mjl-/mox/junk"
|
|
|
|
"github.com/mjl-/mox/mlog"
|
|
|
|
"github.com/mjl-/mox/mox-"
|
|
|
|
)
|
|
|
|
|
|
|
|
// ErrNoJunkFilter indicates user did not configure/enable a junk filter.
|
|
|
|
var ErrNoJunkFilter = errors.New("junkfilter: not configured")
|
|
|
|
|
|
|
|
// OpenJunkFilter returns an opened junk filter for the account.
|
|
|
|
// If the account does not have a junk filter enabled, ErrNotConfigured is returned.
|
|
|
|
// Do not forget to save the filter after modifying, and to always close the filter when done.
|
|
|
|
// An empty filter is initialized on first access of the filter.
|
2023-12-05 15:35:58 +03:00
|
|
|
func (a *Account) OpenJunkFilter(ctx context.Context, log mlog.Log) (*junk.Filter, *config.JunkFilter, error) {
|
2023-01-30 16:27:06 +03:00
|
|
|
conf, ok := mox.Conf.Account(a.Name)
|
|
|
|
if !ok {
|
|
|
|
return nil, nil, ErrAccountUnknown
|
|
|
|
}
|
|
|
|
jf := conf.JunkFilter
|
|
|
|
if jf == nil {
|
|
|
|
return nil, jf, ErrNoJunkFilter
|
|
|
|
}
|
|
|
|
|
|
|
|
basePath := mox.DataDirPath("accounts")
|
|
|
|
dbPath := filepath.Join(basePath, a.Name, "junkfilter.db")
|
|
|
|
bloomPath := filepath.Join(basePath, a.Name, "junkfilter.bloom")
|
|
|
|
|
|
|
|
if _, xerr := os.Stat(dbPath); xerr != nil && os.IsNotExist(xerr) {
|
2023-05-22 15:40:36 +03:00
|
|
|
f, err := junk.NewFilter(ctx, log, jf.Params, dbPath, bloomPath)
|
2023-01-30 16:27:06 +03:00
|
|
|
return f, jf, err
|
|
|
|
}
|
2023-05-22 15:40:36 +03:00
|
|
|
f, err := junk.OpenFilter(ctx, log, jf.Params, dbPath, bloomPath, false)
|
2023-01-30 16:27:06 +03:00
|
|
|
return f, jf, err
|
|
|
|
}
|
|
|
|
|
improve training of junk filter
before, we used heuristics to decide when to train/untrain a message as junk or
nonjunk: the message had to be seen, be in certain mailboxes. then if a message
was marked as junk, it was junk. and otherwise it was nonjunk. this wasn't good
enough: you may want to keep some messages around as neither junk or nonjunk.
and that wasn't possible.
ideally, we would just look at the imap $Junk and $NotJunk flags. the problem
is that mail clients don't set these flags, or don't make it easy. thunderbird
can set the flags based on its own bayesian filter. it has a shortcut for
marking Junk and moving it to the junk folder (good), but the counterpart of
notjunk only marks a message as notjunk without showing in the UI that it was
marked as notjunk. there is also no "move and mark as notjunk" mechanism. e.g.
"archive" does not mark a message as notjunk. ios mail and mutt don't appear to
have any way to see or change the $Junk and $NotJunk flags.
what email clients do have is the ability to move messages to other
mailboxes/folders. so mox now has a mechanism that allows you to configure
mailboxes that automatically set $Junk or $NotJunk (or clear both) when a
message is moved/copied/delivered to that folder. e.g. a mailbox called junk or
spam or rejects marks its messags as junk. inbox, postmaster, dmarc, tlsrpt,
neutral* mark their messages as neither junk or notjunk. other folders mark
their messages as notjunk. e.g. list/*, archive. this functionality is
optional, but enabled with the quickstart and for new accounts.
also, mox now keeps track of the previous training of a message and will only
untrain/train if needed. before, there probably have been duplicate or missing
(un)trainings.
this also includes a new subcommand "retrain" to recreate the junkfilter for an
account. you should run it after updating to this version. and you should
probably also modify your account config to include the AutomaticJunkFlags.
2023-02-12 01:00:12 +03:00
|
|
|
// RetrainMessages (un)trains messages, if relevant given their flags. Updates
|
|
|
|
// m.TrainedJunk after retraining.
|
2023-12-05 15:35:58 +03:00
|
|
|
func (a *Account) RetrainMessages(ctx context.Context, log mlog.Log, tx *bstore.Tx, msgs []Message, absentOK bool) (rerr error) {
|
2023-01-30 16:27:06 +03:00
|
|
|
if len(msgs) == 0 {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
var jf *junk.Filter
|
|
|
|
|
improve training of junk filter
before, we used heuristics to decide when to train/untrain a message as junk or
nonjunk: the message had to be seen, be in certain mailboxes. then if a message
was marked as junk, it was junk. and otherwise it was nonjunk. this wasn't good
enough: you may want to keep some messages around as neither junk or nonjunk.
and that wasn't possible.
ideally, we would just look at the imap $Junk and $NotJunk flags. the problem
is that mail clients don't set these flags, or don't make it easy. thunderbird
can set the flags based on its own bayesian filter. it has a shortcut for
marking Junk and moving it to the junk folder (good), but the counterpart of
notjunk only marks a message as notjunk without showing in the UI that it was
marked as notjunk. there is also no "move and mark as notjunk" mechanism. e.g.
"archive" does not mark a message as notjunk. ios mail and mutt don't appear to
have any way to see or change the $Junk and $NotJunk flags.
what email clients do have is the ability to move messages to other
mailboxes/folders. so mox now has a mechanism that allows you to configure
mailboxes that automatically set $Junk or $NotJunk (or clear both) when a
message is moved/copied/delivered to that folder. e.g. a mailbox called junk or
spam or rejects marks its messags as junk. inbox, postmaster, dmarc, tlsrpt,
neutral* mark their messages as neither junk or notjunk. other folders mark
their messages as notjunk. e.g. list/*, archive. this functionality is
optional, but enabled with the quickstart and for new accounts.
also, mox now keeps track of the previous training of a message and will only
untrain/train if needed. before, there probably have been duplicate or missing
(un)trainings.
this also includes a new subcommand "retrain" to recreate the junkfilter for an
account. you should run it after updating to this version. and you should
probably also modify your account config to include the AutomaticJunkFlags.
2023-02-12 01:00:12 +03:00
|
|
|
for i := range msgs {
|
|
|
|
if !msgs[i].NeedsTraining() {
|
2023-01-30 16:27:06 +03:00
|
|
|
continue
|
|
|
|
}
|
improve training of junk filter
before, we used heuristics to decide when to train/untrain a message as junk or
nonjunk: the message had to be seen, be in certain mailboxes. then if a message
was marked as junk, it was junk. and otherwise it was nonjunk. this wasn't good
enough: you may want to keep some messages around as neither junk or nonjunk.
and that wasn't possible.
ideally, we would just look at the imap $Junk and $NotJunk flags. the problem
is that mail clients don't set these flags, or don't make it easy. thunderbird
can set the flags based on its own bayesian filter. it has a shortcut for
marking Junk and moving it to the junk folder (good), but the counterpart of
notjunk only marks a message as notjunk without showing in the UI that it was
marked as notjunk. there is also no "move and mark as notjunk" mechanism. e.g.
"archive" does not mark a message as notjunk. ios mail and mutt don't appear to
have any way to see or change the $Junk and $NotJunk flags.
what email clients do have is the ability to move messages to other
mailboxes/folders. so mox now has a mechanism that allows you to configure
mailboxes that automatically set $Junk or $NotJunk (or clear both) when a
message is moved/copied/delivered to that folder. e.g. a mailbox called junk or
spam or rejects marks its messags as junk. inbox, postmaster, dmarc, tlsrpt,
neutral* mark their messages as neither junk or notjunk. other folders mark
their messages as notjunk. e.g. list/*, archive. this functionality is
optional, but enabled with the quickstart and for new accounts.
also, mox now keeps track of the previous training of a message and will only
untrain/train if needed. before, there probably have been duplicate or missing
(un)trainings.
this also includes a new subcommand "retrain" to recreate the junkfilter for an
account. you should run it after updating to this version. and you should
probably also modify your account config to include the AutomaticJunkFlags.
2023-02-12 01:00:12 +03:00
|
|
|
|
2023-01-30 16:27:06 +03:00
|
|
|
// Lazy open the junk filter.
|
|
|
|
if jf == nil {
|
|
|
|
var err error
|
2023-05-22 15:40:36 +03:00
|
|
|
jf, _, err = a.OpenJunkFilter(ctx, log)
|
2023-01-30 16:27:06 +03:00
|
|
|
if err != nil && errors.Is(err, ErrNoJunkFilter) {
|
|
|
|
// No junk filter configured. Nothing more to do.
|
|
|
|
return nil
|
2023-02-27 00:21:13 +03:00
|
|
|
} else if err != nil {
|
|
|
|
return fmt.Errorf("open junk filter: %v", err)
|
2023-01-30 16:27:06 +03:00
|
|
|
}
|
|
|
|
defer func() {
|
|
|
|
if jf != nil {
|
|
|
|
err := jf.Close()
|
|
|
|
if rerr == nil {
|
|
|
|
rerr = err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
}
|
2023-05-22 15:40:36 +03:00
|
|
|
if err := a.RetrainMessage(ctx, log, tx, jf, &msgs[i], absentOK); err != nil {
|
2023-01-30 16:27:06 +03:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
improve training of junk filter
before, we used heuristics to decide when to train/untrain a message as junk or
nonjunk: the message had to be seen, be in certain mailboxes. then if a message
was marked as junk, it was junk. and otherwise it was nonjunk. this wasn't good
enough: you may want to keep some messages around as neither junk or nonjunk.
and that wasn't possible.
ideally, we would just look at the imap $Junk and $NotJunk flags. the problem
is that mail clients don't set these flags, or don't make it easy. thunderbird
can set the flags based on its own bayesian filter. it has a shortcut for
marking Junk and moving it to the junk folder (good), but the counterpart of
notjunk only marks a message as notjunk without showing in the UI that it was
marked as notjunk. there is also no "move and mark as notjunk" mechanism. e.g.
"archive" does not mark a message as notjunk. ios mail and mutt don't appear to
have any way to see or change the $Junk and $NotJunk flags.
what email clients do have is the ability to move messages to other
mailboxes/folders. so mox now has a mechanism that allows you to configure
mailboxes that automatically set $Junk or $NotJunk (or clear both) when a
message is moved/copied/delivered to that folder. e.g. a mailbox called junk or
spam or rejects marks its messags as junk. inbox, postmaster, dmarc, tlsrpt,
neutral* mark their messages as neither junk or notjunk. other folders mark
their messages as notjunk. e.g. list/*, archive. this functionality is
optional, but enabled with the quickstart and for new accounts.
also, mox now keeps track of the previous training of a message and will only
untrain/train if needed. before, there probably have been duplicate or missing
(un)trainings.
this also includes a new subcommand "retrain" to recreate the junkfilter for an
account. you should run it after updating to this version. and you should
probably also modify your account config to include the AutomaticJunkFlags.
2023-02-12 01:00:12 +03:00
|
|
|
// RetrainMessage untrains and/or trains a message, if relevant given m.TrainedJunk
|
|
|
|
// and m.Junk/m.Notjunk. Updates m.TrainedJunk after retraining.
|
2023-12-05 15:35:58 +03:00
|
|
|
func (a *Account) RetrainMessage(ctx context.Context, log mlog.Log, tx *bstore.Tx, jf *junk.Filter, m *Message, absentOK bool) error {
|
improve training of junk filter
before, we used heuristics to decide when to train/untrain a message as junk or
nonjunk: the message had to be seen, be in certain mailboxes. then if a message
was marked as junk, it was junk. and otherwise it was nonjunk. this wasn't good
enough: you may want to keep some messages around as neither junk or nonjunk.
and that wasn't possible.
ideally, we would just look at the imap $Junk and $NotJunk flags. the problem
is that mail clients don't set these flags, or don't make it easy. thunderbird
can set the flags based on its own bayesian filter. it has a shortcut for
marking Junk and moving it to the junk folder (good), but the counterpart of
notjunk only marks a message as notjunk without showing in the UI that it was
marked as notjunk. there is also no "move and mark as notjunk" mechanism. e.g.
"archive" does not mark a message as notjunk. ios mail and mutt don't appear to
have any way to see or change the $Junk and $NotJunk flags.
what email clients do have is the ability to move messages to other
mailboxes/folders. so mox now has a mechanism that allows you to configure
mailboxes that automatically set $Junk or $NotJunk (or clear both) when a
message is moved/copied/delivered to that folder. e.g. a mailbox called junk or
spam or rejects marks its messags as junk. inbox, postmaster, dmarc, tlsrpt,
neutral* mark their messages as neither junk or notjunk. other folders mark
their messages as notjunk. e.g. list/*, archive. this functionality is
optional, but enabled with the quickstart and for new accounts.
also, mox now keeps track of the previous training of a message and will only
untrain/train if needed. before, there probably have been duplicate or missing
(un)trainings.
this also includes a new subcommand "retrain" to recreate the junkfilter for an
account. you should run it after updating to this version. and you should
probably also modify your account config to include the AutomaticJunkFlags.
2023-02-12 01:00:12 +03:00
|
|
|
untrain := m.TrainedJunk != nil
|
|
|
|
untrainJunk := untrain && *m.TrainedJunk
|
|
|
|
train := m.Junk || m.Notjunk && !(m.Junk && m.Notjunk)
|
|
|
|
trainJunk := m.Junk
|
2023-01-30 16:27:06 +03:00
|
|
|
|
improve training of junk filter
before, we used heuristics to decide when to train/untrain a message as junk or
nonjunk: the message had to be seen, be in certain mailboxes. then if a message
was marked as junk, it was junk. and otherwise it was nonjunk. this wasn't good
enough: you may want to keep some messages around as neither junk or nonjunk.
and that wasn't possible.
ideally, we would just look at the imap $Junk and $NotJunk flags. the problem
is that mail clients don't set these flags, or don't make it easy. thunderbird
can set the flags based on its own bayesian filter. it has a shortcut for
marking Junk and moving it to the junk folder (good), but the counterpart of
notjunk only marks a message as notjunk without showing in the UI that it was
marked as notjunk. there is also no "move and mark as notjunk" mechanism. e.g.
"archive" does not mark a message as notjunk. ios mail and mutt don't appear to
have any way to see or change the $Junk and $NotJunk flags.
what email clients do have is the ability to move messages to other
mailboxes/folders. so mox now has a mechanism that allows you to configure
mailboxes that automatically set $Junk or $NotJunk (or clear both) when a
message is moved/copied/delivered to that folder. e.g. a mailbox called junk or
spam or rejects marks its messags as junk. inbox, postmaster, dmarc, tlsrpt,
neutral* mark their messages as neither junk or notjunk. other folders mark
their messages as notjunk. e.g. list/*, archive. this functionality is
optional, but enabled with the quickstart and for new accounts.
also, mox now keeps track of the previous training of a message and will only
untrain/train if needed. before, there probably have been duplicate or missing
(un)trainings.
this also includes a new subcommand "retrain" to recreate the junkfilter for an
account. you should run it after updating to this version. and you should
probably also modify your account config to include the AutomaticJunkFlags.
2023-02-12 01:00:12 +03:00
|
|
|
if !untrain && !train || (untrain && train && untrainJunk == trainJunk) {
|
2023-01-30 16:27:06 +03:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2023-12-05 18:06:50 +03:00
|
|
|
log.Debug("updating junk filter",
|
|
|
|
slog.Bool("untrain", untrain),
|
|
|
|
slog.Bool("untrainjunk", untrainJunk),
|
|
|
|
slog.Bool("train", train),
|
|
|
|
slog.Bool("trainjunk", trainJunk))
|
2023-01-30 16:27:06 +03:00
|
|
|
|
improve training of junk filter
before, we used heuristics to decide when to train/untrain a message as junk or
nonjunk: the message had to be seen, be in certain mailboxes. then if a message
was marked as junk, it was junk. and otherwise it was nonjunk. this wasn't good
enough: you may want to keep some messages around as neither junk or nonjunk.
and that wasn't possible.
ideally, we would just look at the imap $Junk and $NotJunk flags. the problem
is that mail clients don't set these flags, or don't make it easy. thunderbird
can set the flags based on its own bayesian filter. it has a shortcut for
marking Junk and moving it to the junk folder (good), but the counterpart of
notjunk only marks a message as notjunk without showing in the UI that it was
marked as notjunk. there is also no "move and mark as notjunk" mechanism. e.g.
"archive" does not mark a message as notjunk. ios mail and mutt don't appear to
have any way to see or change the $Junk and $NotJunk flags.
what email clients do have is the ability to move messages to other
mailboxes/folders. so mox now has a mechanism that allows you to configure
mailboxes that automatically set $Junk or $NotJunk (or clear both) when a
message is moved/copied/delivered to that folder. e.g. a mailbox called junk or
spam or rejects marks its messags as junk. inbox, postmaster, dmarc, tlsrpt,
neutral* mark their messages as neither junk or notjunk. other folders mark
their messages as notjunk. e.g. list/*, archive. this functionality is
optional, but enabled with the quickstart and for new accounts.
also, mox now keeps track of the previous training of a message and will only
untrain/train if needed. before, there probably have been duplicate or missing
(un)trainings.
this also includes a new subcommand "retrain" to recreate the junkfilter for an
account. you should run it after updating to this version. and you should
probably also modify your account config to include the AutomaticJunkFlags.
2023-02-12 01:00:12 +03:00
|
|
|
mr := a.MessageReader(*m)
|
2023-02-16 15:22:00 +03:00
|
|
|
defer func() {
|
|
|
|
err := mr.Close()
|
|
|
|
log.Check(err, "closing message reader after retraining")
|
|
|
|
}()
|
2023-01-30 16:27:06 +03:00
|
|
|
|
|
|
|
p, err := m.LoadPart(mr)
|
|
|
|
if err != nil {
|
|
|
|
log.Errorx("loading part for message", err)
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
words, err := jf.ParseMessage(p)
|
|
|
|
if err != nil {
|
2023-12-05 15:35:58 +03:00
|
|
|
log.Errorx("parsing message for updating junk filter", err, slog.Any("parse", ""))
|
2023-01-30 16:27:06 +03:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
if untrain {
|
2023-05-22 15:40:36 +03:00
|
|
|
err := jf.Untrain(ctx, !untrainJunk, words)
|
2023-01-30 16:27:06 +03:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
improve training of junk filter
before, we used heuristics to decide when to train/untrain a message as junk or
nonjunk: the message had to be seen, be in certain mailboxes. then if a message
was marked as junk, it was junk. and otherwise it was nonjunk. this wasn't good
enough: you may want to keep some messages around as neither junk or nonjunk.
and that wasn't possible.
ideally, we would just look at the imap $Junk and $NotJunk flags. the problem
is that mail clients don't set these flags, or don't make it easy. thunderbird
can set the flags based on its own bayesian filter. it has a shortcut for
marking Junk and moving it to the junk folder (good), but the counterpart of
notjunk only marks a message as notjunk without showing in the UI that it was
marked as notjunk. there is also no "move and mark as notjunk" mechanism. e.g.
"archive" does not mark a message as notjunk. ios mail and mutt don't appear to
have any way to see or change the $Junk and $NotJunk flags.
what email clients do have is the ability to move messages to other
mailboxes/folders. so mox now has a mechanism that allows you to configure
mailboxes that automatically set $Junk or $NotJunk (or clear both) when a
message is moved/copied/delivered to that folder. e.g. a mailbox called junk or
spam or rejects marks its messags as junk. inbox, postmaster, dmarc, tlsrpt,
neutral* mark their messages as neither junk or notjunk. other folders mark
their messages as notjunk. e.g. list/*, archive. this functionality is
optional, but enabled with the quickstart and for new accounts.
also, mox now keeps track of the previous training of a message and will only
untrain/train if needed. before, there probably have been duplicate or missing
(un)trainings.
this also includes a new subcommand "retrain" to recreate the junkfilter for an
account. you should run it after updating to this version. and you should
probably also modify your account config to include the AutomaticJunkFlags.
2023-02-12 01:00:12 +03:00
|
|
|
m.TrainedJunk = nil
|
2023-01-30 16:27:06 +03:00
|
|
|
}
|
|
|
|
if train {
|
2023-05-22 15:40:36 +03:00
|
|
|
err := jf.Train(ctx, !trainJunk, words)
|
2023-01-30 16:27:06 +03:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
improve training of junk filter
before, we used heuristics to decide when to train/untrain a message as junk or
nonjunk: the message had to be seen, be in certain mailboxes. then if a message
was marked as junk, it was junk. and otherwise it was nonjunk. this wasn't good
enough: you may want to keep some messages around as neither junk or nonjunk.
and that wasn't possible.
ideally, we would just look at the imap $Junk and $NotJunk flags. the problem
is that mail clients don't set these flags, or don't make it easy. thunderbird
can set the flags based on its own bayesian filter. it has a shortcut for
marking Junk and moving it to the junk folder (good), but the counterpart of
notjunk only marks a message as notjunk without showing in the UI that it was
marked as notjunk. there is also no "move and mark as notjunk" mechanism. e.g.
"archive" does not mark a message as notjunk. ios mail and mutt don't appear to
have any way to see or change the $Junk and $NotJunk flags.
what email clients do have is the ability to move messages to other
mailboxes/folders. so mox now has a mechanism that allows you to configure
mailboxes that automatically set $Junk or $NotJunk (or clear both) when a
message is moved/copied/delivered to that folder. e.g. a mailbox called junk or
spam or rejects marks its messags as junk. inbox, postmaster, dmarc, tlsrpt,
neutral* mark their messages as neither junk or notjunk. other folders mark
their messages as notjunk. e.g. list/*, archive. this functionality is
optional, but enabled with the quickstart and for new accounts.
also, mox now keeps track of the previous training of a message and will only
untrain/train if needed. before, there probably have been duplicate or missing
(un)trainings.
this also includes a new subcommand "retrain" to recreate the junkfilter for an
account. you should run it after updating to this version. and you should
probably also modify your account config to include the AutomaticJunkFlags.
2023-02-12 01:00:12 +03:00
|
|
|
m.TrainedJunk = &trainJunk
|
|
|
|
}
|
|
|
|
if err := tx.Update(m); err != nil && (!absentOK || err != bstore.ErrAbsent) {
|
|
|
|
return err
|
2023-01-30 16:27:06 +03:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
improve training of junk filter
before, we used heuristics to decide when to train/untrain a message as junk or
nonjunk: the message had to be seen, be in certain mailboxes. then if a message
was marked as junk, it was junk. and otherwise it was nonjunk. this wasn't good
enough: you may want to keep some messages around as neither junk or nonjunk.
and that wasn't possible.
ideally, we would just look at the imap $Junk and $NotJunk flags. the problem
is that mail clients don't set these flags, or don't make it easy. thunderbird
can set the flags based on its own bayesian filter. it has a shortcut for
marking Junk and moving it to the junk folder (good), but the counterpart of
notjunk only marks a message as notjunk without showing in the UI that it was
marked as notjunk. there is also no "move and mark as notjunk" mechanism. e.g.
"archive" does not mark a message as notjunk. ios mail and mutt don't appear to
have any way to see or change the $Junk and $NotJunk flags.
what email clients do have is the ability to move messages to other
mailboxes/folders. so mox now has a mechanism that allows you to configure
mailboxes that automatically set $Junk or $NotJunk (or clear both) when a
message is moved/copied/delivered to that folder. e.g. a mailbox called junk or
spam or rejects marks its messags as junk. inbox, postmaster, dmarc, tlsrpt,
neutral* mark their messages as neither junk or notjunk. other folders mark
their messages as notjunk. e.g. list/*, archive. this functionality is
optional, but enabled with the quickstart and for new accounts.
also, mox now keeps track of the previous training of a message and will only
untrain/train if needed. before, there probably have been duplicate or missing
(un)trainings.
this also includes a new subcommand "retrain" to recreate the junkfilter for an
account. you should run it after updating to this version. and you should
probably also modify your account config to include the AutomaticJunkFlags.
2023-02-12 01:00:12 +03:00
|
|
|
|
|
|
|
// TrainMessage trains the junk filter based on the current m.Junk/m.Notjunk flags,
|
|
|
|
// disregarding m.TrainedJunk and not updating that field.
|
2023-12-05 15:35:58 +03:00
|
|
|
func (a *Account) TrainMessage(ctx context.Context, log mlog.Log, jf *junk.Filter, m Message) (bool, error) {
|
improve training of junk filter
before, we used heuristics to decide when to train/untrain a message as junk or
nonjunk: the message had to be seen, be in certain mailboxes. then if a message
was marked as junk, it was junk. and otherwise it was nonjunk. this wasn't good
enough: you may want to keep some messages around as neither junk or nonjunk.
and that wasn't possible.
ideally, we would just look at the imap $Junk and $NotJunk flags. the problem
is that mail clients don't set these flags, or don't make it easy. thunderbird
can set the flags based on its own bayesian filter. it has a shortcut for
marking Junk and moving it to the junk folder (good), but the counterpart of
notjunk only marks a message as notjunk without showing in the UI that it was
marked as notjunk. there is also no "move and mark as notjunk" mechanism. e.g.
"archive" does not mark a message as notjunk. ios mail and mutt don't appear to
have any way to see or change the $Junk and $NotJunk flags.
what email clients do have is the ability to move messages to other
mailboxes/folders. so mox now has a mechanism that allows you to configure
mailboxes that automatically set $Junk or $NotJunk (or clear both) when a
message is moved/copied/delivered to that folder. e.g. a mailbox called junk or
spam or rejects marks its messags as junk. inbox, postmaster, dmarc, tlsrpt,
neutral* mark their messages as neither junk or notjunk. other folders mark
their messages as notjunk. e.g. list/*, archive. this functionality is
optional, but enabled with the quickstart and for new accounts.
also, mox now keeps track of the previous training of a message and will only
untrain/train if needed. before, there probably have been duplicate or missing
(un)trainings.
this also includes a new subcommand "retrain" to recreate the junkfilter for an
account. you should run it after updating to this version. and you should
probably also modify your account config to include the AutomaticJunkFlags.
2023-02-12 01:00:12 +03:00
|
|
|
if !m.Junk && !m.Notjunk || (m.Junk && m.Notjunk) {
|
|
|
|
return false, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
mr := a.MessageReader(m)
|
2023-02-16 15:22:00 +03:00
|
|
|
defer func() {
|
|
|
|
err := mr.Close()
|
|
|
|
log.Check(err, "closing message after training")
|
|
|
|
}()
|
improve training of junk filter
before, we used heuristics to decide when to train/untrain a message as junk or
nonjunk: the message had to be seen, be in certain mailboxes. then if a message
was marked as junk, it was junk. and otherwise it was nonjunk. this wasn't good
enough: you may want to keep some messages around as neither junk or nonjunk.
and that wasn't possible.
ideally, we would just look at the imap $Junk and $NotJunk flags. the problem
is that mail clients don't set these flags, or don't make it easy. thunderbird
can set the flags based on its own bayesian filter. it has a shortcut for
marking Junk and moving it to the junk folder (good), but the counterpart of
notjunk only marks a message as notjunk without showing in the UI that it was
marked as notjunk. there is also no "move and mark as notjunk" mechanism. e.g.
"archive" does not mark a message as notjunk. ios mail and mutt don't appear to
have any way to see or change the $Junk and $NotJunk flags.
what email clients do have is the ability to move messages to other
mailboxes/folders. so mox now has a mechanism that allows you to configure
mailboxes that automatically set $Junk or $NotJunk (or clear both) when a
message is moved/copied/delivered to that folder. e.g. a mailbox called junk or
spam or rejects marks its messags as junk. inbox, postmaster, dmarc, tlsrpt,
neutral* mark their messages as neither junk or notjunk. other folders mark
their messages as notjunk. e.g. list/*, archive. this functionality is
optional, but enabled with the quickstart and for new accounts.
also, mox now keeps track of the previous training of a message and will only
untrain/train if needed. before, there probably have been duplicate or missing
(un)trainings.
this also includes a new subcommand "retrain" to recreate the junkfilter for an
account. you should run it after updating to this version. and you should
probably also modify your account config to include the AutomaticJunkFlags.
2023-02-12 01:00:12 +03:00
|
|
|
|
|
|
|
p, err := m.LoadPart(mr)
|
|
|
|
if err != nil {
|
|
|
|
log.Errorx("loading part for message", err)
|
|
|
|
return false, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
words, err := jf.ParseMessage(p)
|
|
|
|
if err != nil {
|
2023-12-05 15:35:58 +03:00
|
|
|
log.Errorx("parsing message for updating junk filter", err, slog.Any("parse", ""))
|
improve training of junk filter
before, we used heuristics to decide when to train/untrain a message as junk or
nonjunk: the message had to be seen, be in certain mailboxes. then if a message
was marked as junk, it was junk. and otherwise it was nonjunk. this wasn't good
enough: you may want to keep some messages around as neither junk or nonjunk.
and that wasn't possible.
ideally, we would just look at the imap $Junk and $NotJunk flags. the problem
is that mail clients don't set these flags, or don't make it easy. thunderbird
can set the flags based on its own bayesian filter. it has a shortcut for
marking Junk and moving it to the junk folder (good), but the counterpart of
notjunk only marks a message as notjunk without showing in the UI that it was
marked as notjunk. there is also no "move and mark as notjunk" mechanism. e.g.
"archive" does not mark a message as notjunk. ios mail and mutt don't appear to
have any way to see or change the $Junk and $NotJunk flags.
what email clients do have is the ability to move messages to other
mailboxes/folders. so mox now has a mechanism that allows you to configure
mailboxes that automatically set $Junk or $NotJunk (or clear both) when a
message is moved/copied/delivered to that folder. e.g. a mailbox called junk or
spam or rejects marks its messags as junk. inbox, postmaster, dmarc, tlsrpt,
neutral* mark their messages as neither junk or notjunk. other folders mark
their messages as notjunk. e.g. list/*, archive. this functionality is
optional, but enabled with the quickstart and for new accounts.
also, mox now keeps track of the previous training of a message and will only
untrain/train if needed. before, there probably have been duplicate or missing
(un)trainings.
this also includes a new subcommand "retrain" to recreate the junkfilter for an
account. you should run it after updating to this version. and you should
probably also modify your account config to include the AutomaticJunkFlags.
2023-02-12 01:00:12 +03:00
|
|
|
return false, nil
|
|
|
|
}
|
|
|
|
|
2023-05-22 15:40:36 +03:00
|
|
|
return true, jf.Train(ctx, m.Notjunk, words)
|
improve training of junk filter
before, we used heuristics to decide when to train/untrain a message as junk or
nonjunk: the message had to be seen, be in certain mailboxes. then if a message
was marked as junk, it was junk. and otherwise it was nonjunk. this wasn't good
enough: you may want to keep some messages around as neither junk or nonjunk.
and that wasn't possible.
ideally, we would just look at the imap $Junk and $NotJunk flags. the problem
is that mail clients don't set these flags, or don't make it easy. thunderbird
can set the flags based on its own bayesian filter. it has a shortcut for
marking Junk and moving it to the junk folder (good), but the counterpart of
notjunk only marks a message as notjunk without showing in the UI that it was
marked as notjunk. there is also no "move and mark as notjunk" mechanism. e.g.
"archive" does not mark a message as notjunk. ios mail and mutt don't appear to
have any way to see or change the $Junk and $NotJunk flags.
what email clients do have is the ability to move messages to other
mailboxes/folders. so mox now has a mechanism that allows you to configure
mailboxes that automatically set $Junk or $NotJunk (or clear both) when a
message is moved/copied/delivered to that folder. e.g. a mailbox called junk or
spam or rejects marks its messags as junk. inbox, postmaster, dmarc, tlsrpt,
neutral* mark their messages as neither junk or notjunk. other folders mark
their messages as notjunk. e.g. list/*, archive. this functionality is
optional, but enabled with the quickstart and for new accounts.
also, mox now keeps track of the previous training of a message and will only
untrain/train if needed. before, there probably have been duplicate or missing
(un)trainings.
this also includes a new subcommand "retrain" to recreate the junkfilter for an
account. you should run it after updating to this version. and you should
probably also modify your account config to include the AutomaticJunkFlags.
2023-02-12 01:00:12 +03:00
|
|
|
}
|