2023-01-30 16:27:06 +03:00
package main
import (
2023-07-01 17:43:20 +03:00
"bufio"
2023-05-22 15:40:36 +03:00
"context"
2023-01-30 16:27:06 +03:00
"encoding/json"
"errors"
"fmt"
"io"
"log"
2023-07-01 17:43:20 +03:00
"net"
2023-01-30 16:27:06 +03:00
"os"
"path/filepath"
"runtime/debug"
"strings"
"time"
2023-06-24 01:24:43 +03:00
"golang.org/x/exp/maps"
2023-07-01 17:43:20 +03:00
"github.com/mjl-/mox/config"
2023-01-30 16:27:06 +03:00
"github.com/mjl-/mox/message"
"github.com/mjl-/mox/metrics"
"github.com/mjl-/mox/mlog"
2023-07-01 17:43:20 +03:00
"github.com/mjl-/mox/mox-"
2023-01-30 16:27:06 +03:00
"github.com/mjl-/mox/store"
)
// todo: add option to trust imported messages, causing us to look at Authentication-Results and Received-SPF headers and add eg verified spf/dkim/dmarc domains to our store, to jumpstart reputation.
2023-02-16 11:57:27 +03:00
const importCommonHelp = ` By default , messages will train the junk filter based on their flags and , if
"automatic junk flags" configuration is set , based on mailbox naming .
2023-01-30 16:27:06 +03:00
If the destination mailbox is "Sent" , the recipients of the messages are added
to the message metadata , causing later incoming messages from these recipients
to be accepted , unless other reputation signals prevent that .
2023-02-16 11:57:27 +03:00
Users can also import mailboxes / messages through the account web page by
uploading a zip or tgz file with mbox and / or maildirs .
2023-01-30 16:27:06 +03:00
`
func cmdImportMaildir ( c * cmd ) {
c . params = "accountname mailboxname maildir"
c . help = ` Import a maildir into an account .
` + importCommonHelp + `
2023-02-16 11:57:27 +03:00
Mailbox flags , like "seen" , "answered" , will be imported . An optional
dovecot - keywords file can specify additional flags , like Forwarded / Junk / NotJunk .
2023-01-30 16:27:06 +03:00
The maildir files / directories are read by the mox process , so make sure it has
access to the maildir directories / files .
`
args := c . Parse ( )
2023-07-02 14:53:34 +03:00
if len ( args ) != 3 {
c . Usage ( )
}
mustLoadConfig ( )
ctlcmdImport ( xctl ( ) , false , args [ 0 ] , args [ 1 ] , args [ 2 ] )
2023-01-30 16:27:06 +03:00
}
func cmdImportMbox ( c * cmd ) {
c . params = "accountname mailboxname mbox"
c . help = ` Import an mbox into an account .
2023-02-16 11:57:27 +03:00
Using mbox is not recommended , maildir is a better defined format .
2023-01-30 16:27:06 +03:00
` + importCommonHelp + `
The mailbox is read by the mox process , so make sure it has access to the
maildir directories / files .
`
args := c . Parse ( )
if len ( args ) != 3 {
c . Usage ( )
}
2023-02-03 17:54:34 +03:00
mustLoadConfig ( )
2023-07-02 14:53:34 +03:00
ctlcmdImport ( xctl ( ) , true , args [ 0 ] , args [ 1 ] , args [ 2 ] )
2023-07-01 17:43:20 +03:00
}
func cmdXImportMaildir ( c * cmd ) {
c . unlisted = true
c . params = "accountdir mailboxname maildir"
c . help = ` Import a maildir into an account by directly accessing the data directory .
See "mox help import maildir" for details .
`
xcmdXImport ( false , c )
}
func cmdXImportMbox ( c * cmd ) {
c . unlisted = true
c . params = "accountdir mailboxname mbox"
c . help = ` Import an mbox into an account by directly accessing the data directory .
See "mox help import mbox" for details .
`
xcmdXImport ( true , c )
}
func xcmdXImport ( mbox bool , c * cmd ) {
args := c . Parse ( )
if len ( args ) != 3 {
c . Usage ( )
}
accountdir := args [ 0 ]
account := filepath . Base ( accountdir )
// Set up the mox config so the account can be opened.
if filepath . Base ( filepath . Dir ( accountdir ) ) != "accounts" {
log . Fatalf ( "accountdir must be of the form .../accounts/<name>" )
}
var err error
mox . Conf . Static . DataDir , err = filepath . Abs ( filepath . Dir ( filepath . Dir ( accountdir ) ) )
xcheckf ( err , "making absolute datadir" )
mox . ConfigStaticPath = "fake.conf"
mox . Conf . DynamicLastCheck = time . Now ( ) . Add ( time . Hour ) // Silence errors about config file.
mox . Conf . Dynamic . Accounts = map [ string ] config . Account {
account : { } ,
}
switchDone := store . Switchboard ( )
defer close ( switchDone )
xlog := mlog . New ( "import" )
cconn , sconn := net . Pipe ( )
clientctl := ctl { conn : cconn , r : bufio . NewReader ( cconn ) , log : xlog }
2023-07-02 14:53:34 +03:00
serverctl := ctl { conn : sconn , r : bufio . NewReader ( sconn ) , log : xlog }
go servectlcmd ( context . Background ( ) , & serverctl , func ( ) { } )
2023-07-01 17:43:20 +03:00
2023-07-02 14:53:34 +03:00
ctlcmdImport ( & clientctl , mbox , account , args [ 1 ] , args [ 2 ] )
2023-07-01 17:43:20 +03:00
}
2023-07-02 14:53:34 +03:00
func ctlcmdImport ( ctl * ctl , mbox bool , account , mailbox , src string ) {
if mbox {
ctl . xwrite ( "importmbox" )
} else {
ctl . xwrite ( "importmaildir" )
}
2023-01-30 16:27:06 +03:00
ctl . xwrite ( account )
2023-07-02 14:53:34 +03:00
if strings . EqualFold ( mailbox , "Inbox" ) {
mailbox = "Inbox"
}
2023-01-30 16:27:06 +03:00
ctl . xwrite ( mailbox )
ctl . xwrite ( src )
ctl . xreadok ( )
fmt . Fprintln ( os . Stderr , "importing..." )
for {
line := ctl . xread ( )
if strings . HasPrefix ( line , "progress " ) {
n := line [ len ( "progress " ) : ]
fmt . Fprintf ( os . Stderr , "%s...\n" , n )
continue
}
if line != "ok" {
log . Fatalf ( "import, expected ok, got %q" , line )
}
break
}
count := ctl . xread ( )
fmt . Fprintf ( os . Stderr , "%s imported\n" , count )
}
2023-05-22 15:40:36 +03:00
func importctl ( ctx context . Context , ctl * ctl , mbox bool ) {
2023-01-30 16:27:06 +03:00
/ * protocol :
> "importmaildir" or "importmbox"
> account
> mailbox
> src ( mbox file or maildir directory )
< "ok" or error
< "progress" count ( zero or more times , once for every 1000 messages )
< "ok" when done , or error
< count ( of total imported messages , only if not error )
* /
account := ctl . xread ( )
mailbox := ctl . xread ( )
src := ctl . xread ( )
kind := "maildir"
if mbox {
kind = "mbox"
}
ctl . log . Info ( "importing messages" , mlog . Field ( "kind" , kind ) , mlog . Field ( "account" , account ) , mlog . Field ( "mailbox" , mailbox ) , mlog . Field ( "source" , src ) )
var err error
var mboxf * os . File
var mdnewf , mdcurf * os . File
2023-02-16 11:57:27 +03:00
var msgreader store . MsgSource
2023-01-30 16:27:06 +03:00
defer func ( ) {
if mboxf != nil {
2023-02-16 15:22:00 +03:00
err := mboxf . Close ( )
ctl . log . Check ( err , "closing mbox file after import" )
2023-01-30 16:27:06 +03:00
}
if mdnewf != nil {
2023-02-16 15:22:00 +03:00
err := mdnewf . Close ( )
ctl . log . Check ( err , "closing maildir new after import" )
2023-01-30 16:27:06 +03:00
}
if mdcurf != nil {
2023-02-16 15:22:00 +03:00
err := mdcurf . Close ( )
ctl . log . Check ( err , "closing maildir cur after import" )
2023-01-30 16:27:06 +03:00
}
} ( )
// Open account, creating a database file if it doesn't exist yet. It must be known
// in the configuration file.
a , err := store . OpenAccount ( account )
ctl . xcheck ( err , "opening account" )
defer func ( ) {
if a != nil {
2023-02-16 15:22:00 +03:00
err := a . Close ( )
ctl . log . Check ( err , "closing account after import" )
2023-01-30 16:27:06 +03:00
}
} ( )
// Messages don't always have a junk flag set. We'll assume anything in a mailbox
// starting with junk or spam is junk mail.
improve training of junk filter
before, we used heuristics to decide when to train/untrain a message as junk or
nonjunk: the message had to be seen, be in certain mailboxes. then if a message
was marked as junk, it was junk. and otherwise it was nonjunk. this wasn't good
enough: you may want to keep some messages around as neither junk or nonjunk.
and that wasn't possible.
ideally, we would just look at the imap $Junk and $NotJunk flags. the problem
is that mail clients don't set these flags, or don't make it easy. thunderbird
can set the flags based on its own bayesian filter. it has a shortcut for
marking Junk and moving it to the junk folder (good), but the counterpart of
notjunk only marks a message as notjunk without showing in the UI that it was
marked as notjunk. there is also no "move and mark as notjunk" mechanism. e.g.
"archive" does not mark a message as notjunk. ios mail and mutt don't appear to
have any way to see or change the $Junk and $NotJunk flags.
what email clients do have is the ability to move messages to other
mailboxes/folders. so mox now has a mechanism that allows you to configure
mailboxes that automatically set $Junk or $NotJunk (or clear both) when a
message is moved/copied/delivered to that folder. e.g. a mailbox called junk or
spam or rejects marks its messags as junk. inbox, postmaster, dmarc, tlsrpt,
neutral* mark their messages as neither junk or notjunk. other folders mark
their messages as notjunk. e.g. list/*, archive. this functionality is
optional, but enabled with the quickstart and for new accounts.
also, mox now keeps track of the previous training of a message and will only
untrain/train if needed. before, there probably have been duplicate or missing
(un)trainings.
this also includes a new subcommand "retrain" to recreate the junkfilter for an
account. you should run it after updating to this version. and you should
probably also modify your account config to include the AutomaticJunkFlags.
2023-02-12 01:00:12 +03:00
2023-01-30 16:27:06 +03:00
// First check if we can access the mbox/maildir.
// Mox needs to be able to access those files, the user running the import command
// may be a different user who can access the files.
if mbox {
mboxf , err = os . Open ( src )
ctl . xcheck ( err , "open mbox file" )
2023-02-16 11:57:27 +03:00
msgreader = store . NewMboxReader ( store . CreateMessageTemp , src , mboxf , ctl . log )
2023-01-30 16:27:06 +03:00
} else {
mdnewf , err = os . Open ( filepath . Join ( src , "new" ) )
ctl . xcheck ( err , "open subdir new of maildir" )
mdcurf , err = os . Open ( filepath . Join ( src , "cur" ) )
ctl . xcheck ( err , "open subdir cur of maildir" )
2023-02-16 11:57:27 +03:00
msgreader = store . NewMaildirReader ( store . CreateMessageTemp , mdnewf , mdcurf , ctl . log )
2023-01-30 16:27:06 +03:00
}
2023-05-22 15:40:36 +03:00
tx , err := a . DB . Begin ( ctx , true )
2023-01-30 16:27:06 +03:00
ctl . xcheck ( err , "begin transaction" )
defer func ( ) {
if tx != nil {
2023-02-16 15:22:00 +03:00
err := tx . Rollback ( )
ctl . log . Check ( err , "rolling back transaction" )
2023-01-30 16:27:06 +03:00
}
} ( )
// All preparations done. Good to go.
ctl . xwriteok ( )
// We will be delivering messages. If we fail halfway, we need to remove the created msg files.
var deliveredIDs [ ] int64
defer func ( ) {
x := recover ( )
if x == nil {
return
}
2023-02-27 00:25:57 +03:00
if x != ctl . x {
ctl . log . Error ( "import error" , mlog . Field ( "panic" , fmt . Errorf ( "%v" , x ) ) )
debug . PrintStack ( )
metrics . PanicInc ( "import" )
} else {
ctl . log . Error ( "import error" )
}
2023-01-30 16:27:06 +03:00
for _ , id := range deliveredIDs {
p := a . MessagePath ( id )
2023-02-16 15:22:00 +03:00
err := os . Remove ( p )
ctl . log . Check ( err , "closing message file after import error" , mlog . Field ( "path" , p ) )
2023-01-30 16:27:06 +03:00
}
2023-02-27 00:25:57 +03:00
ctl . xerror ( fmt . Sprintf ( "import error: %v" , x ) )
2023-01-30 16:27:06 +03:00
} ( )
var changes [ ] store . Change
xdeliver := func ( m * store . Message , mf * os . File ) {
// todo: possibly set dmarcdomain to the domain of the from address? at least for non-spams that have been seen. otherwise user would start without any reputations. the assumption would be that the user has accepted email and deemed it legit, coming from the indicated sender.
const consumeFile = true
isSent := mailbox == "Sent"
const sync = false
2023-02-16 11:57:27 +03:00
const notrain = true
2023-04-20 15:16:56 +03:00
err := a . DeliverMessage ( ctl . log , tx , m , mf , consumeFile , isSent , sync , notrain )
ctl . xcheck ( err , "delivering message" )
2023-01-30 16:27:06 +03:00
deliveredIDs = append ( deliveredIDs , m . ID )
ctl . log . Debug ( "delivered message" , mlog . Field ( "id" , m . ID ) )
2023-06-24 01:24:43 +03:00
changes = append ( changes , store . ChangeAddUID { MailboxID : m . MailboxID , UID : m . UID , Flags : m . Flags , Keywords : m . Keywords } )
2023-01-30 16:27:06 +03:00
}
// todo: one goroutine for reading messages, one for parsing the message, one adding to database, one for junk filter training.
n := 0
a . WithWLock ( func ( ) {
// Ensure mailbox exists.
var mb store . Mailbox
2023-04-20 15:16:56 +03:00
mb , changes , err = a . MailboxEnsure ( tx , mailbox , true )
ctl . xcheck ( err , "ensuring mailbox exists" )
2023-01-30 16:27:06 +03:00
2023-06-24 01:24:43 +03:00
// We ensure keywords in messages make it to the mailbox as well.
mailboxKeywords := map [ string ] bool { }
2023-05-22 15:40:36 +03:00
jf , _ , err := a . OpenJunkFilter ( ctx , ctl . log )
2023-02-16 11:57:27 +03:00
if err != nil && ! errors . Is ( err , store . ErrNoJunkFilter ) {
ctl . xcheck ( err , "open junk filter" )
2023-01-30 16:27:06 +03:00
}
2023-02-16 11:57:27 +03:00
defer func ( ) {
if jf != nil {
err = jf . Close ( )
ctl . xcheck ( err , "close junk filter" )
}
} ( )
conf , _ := a . Conf ( )
2023-01-30 16:27:06 +03:00
process := func ( m * store . Message , msgf * os . File , origPath string ) {
defer func ( ) {
if msgf == nil {
return
}
2023-02-16 15:22:00 +03:00
err := os . Remove ( msgf . Name ( ) )
ctl . log . Check ( err , "removing temporary message after failing to import" )
err = msgf . Close ( )
ctl . log . Check ( err , "closing temporary message after failing to import" )
2023-01-30 16:27:06 +03:00
} ( )
2023-06-24 01:24:43 +03:00
for _ , kw := range m . Keywords {
mailboxKeywords [ kw ] = true
}
2023-01-30 16:27:06 +03:00
// Parse message and store parsed information for later fast retrieval.
p , err := message . EnsurePart ( msgf , m . Size )
if err != nil {
ctl . log . Infox ( "parsing message, continuing" , err , mlog . Field ( "path" , origPath ) )
}
m . ParsedBuf , err = json . Marshal ( p )
ctl . xcheck ( err , "marshal parsed message structure" )
if m . Received . IsZero ( ) {
if p . Envelope != nil && ! p . Envelope . Date . IsZero ( ) {
m . Received = p . Envelope . Date
} else {
m . Received = time . Now ( )
}
}
2023-02-16 11:57:27 +03:00
// We set the flags that Deliver would set now and train ourselves. This prevents
// Deliver from training, which would open the junk filter, change it, and write it
// back to disk, for each message (slow).
m . JunkFlagsForMailbox ( mb . Name , conf )
improve training of junk filter
before, we used heuristics to decide when to train/untrain a message as junk or
nonjunk: the message had to be seen, be in certain mailboxes. then if a message
was marked as junk, it was junk. and otherwise it was nonjunk. this wasn't good
enough: you may want to keep some messages around as neither junk or nonjunk.
and that wasn't possible.
ideally, we would just look at the imap $Junk and $NotJunk flags. the problem
is that mail clients don't set these flags, or don't make it easy. thunderbird
can set the flags based on its own bayesian filter. it has a shortcut for
marking Junk and moving it to the junk folder (good), but the counterpart of
notjunk only marks a message as notjunk without showing in the UI that it was
marked as notjunk. there is also no "move and mark as notjunk" mechanism. e.g.
"archive" does not mark a message as notjunk. ios mail and mutt don't appear to
have any way to see or change the $Junk and $NotJunk flags.
what email clients do have is the ability to move messages to other
mailboxes/folders. so mox now has a mechanism that allows you to configure
mailboxes that automatically set $Junk or $NotJunk (or clear both) when a
message is moved/copied/delivered to that folder. e.g. a mailbox called junk or
spam or rejects marks its messags as junk. inbox, postmaster, dmarc, tlsrpt,
neutral* mark their messages as neither junk or notjunk. other folders mark
their messages as notjunk. e.g. list/*, archive. this functionality is
optional, but enabled with the quickstart and for new accounts.
also, mox now keeps track of the previous training of a message and will only
untrain/train if needed. before, there probably have been duplicate or missing
(un)trainings.
this also includes a new subcommand "retrain" to recreate the junkfilter for an
account. you should run it after updating to this version. and you should
probably also modify your account config to include the AutomaticJunkFlags.
2023-02-12 01:00:12 +03:00
if jf != nil && m . NeedsTraining ( ) {
2023-01-30 16:27:06 +03:00
if words , err := jf . ParseMessage ( p ) ; err != nil {
ctl . log . Infox ( "parsing message for updating junk filter" , err , mlog . Field ( "parse" , "" ) , mlog . Field ( "path" , origPath ) )
} else {
2023-05-22 15:40:36 +03:00
err = jf . Train ( ctx , ! m . Junk , words )
2023-01-30 16:27:06 +03:00
ctl . xcheck ( err , "training junk filter" )
improve training of junk filter
before, we used heuristics to decide when to train/untrain a message as junk or
nonjunk: the message had to be seen, be in certain mailboxes. then if a message
was marked as junk, it was junk. and otherwise it was nonjunk. this wasn't good
enough: you may want to keep some messages around as neither junk or nonjunk.
and that wasn't possible.
ideally, we would just look at the imap $Junk and $NotJunk flags. the problem
is that mail clients don't set these flags, or don't make it easy. thunderbird
can set the flags based on its own bayesian filter. it has a shortcut for
marking Junk and moving it to the junk folder (good), but the counterpart of
notjunk only marks a message as notjunk without showing in the UI that it was
marked as notjunk. there is also no "move and mark as notjunk" mechanism. e.g.
"archive" does not mark a message as notjunk. ios mail and mutt don't appear to
have any way to see or change the $Junk and $NotJunk flags.
what email clients do have is the ability to move messages to other
mailboxes/folders. so mox now has a mechanism that allows you to configure
mailboxes that automatically set $Junk or $NotJunk (or clear both) when a
message is moved/copied/delivered to that folder. e.g. a mailbox called junk or
spam or rejects marks its messags as junk. inbox, postmaster, dmarc, tlsrpt,
neutral* mark their messages as neither junk or notjunk. other folders mark
their messages as notjunk. e.g. list/*, archive. this functionality is
optional, but enabled with the quickstart and for new accounts.
also, mox now keeps track of the previous training of a message and will only
untrain/train if needed. before, there probably have been duplicate or missing
(un)trainings.
this also includes a new subcommand "retrain" to recreate the junkfilter for an
account. you should run it after updating to this version. and you should
probably also modify your account config to include the AutomaticJunkFlags.
2023-02-12 01:00:12 +03:00
m . TrainedJunk = & m . Junk
2023-01-30 16:27:06 +03:00
}
}
m . MailboxID = mb . ID
m . MailboxOrigID = mb . ID
xdeliver ( m , msgf )
2023-02-16 15:22:00 +03:00
err = msgf . Close ( )
ctl . log . Check ( err , "closing message after delivery" )
2023-01-30 16:27:06 +03:00
msgf = nil
n ++
if n % 1000 == 0 {
ctl . xwrite ( fmt . Sprintf ( "progress %d" , n ) )
}
}
for {
m , msgf , origPath , err := msgreader . Next ( )
if err == io . EOF {
break
}
ctl . xcheck ( err , "reading next message" )
process ( m , msgf , origPath )
}
2023-06-24 01:24:43 +03:00
// If there are any new keywords, update the mailbox.
var changed bool
mb . Keywords , changed = store . MergeKeywords ( mb . Keywords , maps . Keys ( mailboxKeywords ) )
if changed {
err := tx . Update ( & mb )
ctl . xcheck ( err , "updating keywords in mailbox" )
}
2023-01-30 16:27:06 +03:00
err = tx . Commit ( )
ctl . xcheck ( err , "commit" )
tx = nil
ctl . log . Info ( "delivered messages through import" , mlog . Field ( "count" , len ( deliveredIDs ) ) )
deliveredIDs = nil
comm := store . RegisterComm ( a )
defer comm . Unregister ( )
comm . Broadcast ( changes )
} )
err = a . Close ( )
ctl . xcheck ( err , "closing account" )
a = nil
ctl . xwriteok ( )
ctl . xwrite ( fmt . Sprintf ( "%d" , n ) )
}