package main import ( "encoding/json" "errors" "fmt" "io" "log" "os" "path/filepath" "runtime/debug" "strings" "time" "github.com/mjl-/mox/message" "github.com/mjl-/mox/metrics" "github.com/mjl-/mox/mlog" "github.com/mjl-/mox/store" ) // todo: add option to trust imported messages, causing us to look at Authentication-Results and Received-SPF headers and add eg verified spf/dkim/dmarc domains to our store, to jumpstart reputation. const importCommonHelp = `By default, messages will train the junk filter based on their flags and, if "automatic junk flags" configuration is set, based on mailbox naming. If the destination mailbox is "Sent", the recipients of the messages are added to the message metadata, causing later incoming messages from these recipients to be accepted, unless other reputation signals prevent that. Users can also import mailboxes/messages through the account web page by uploading a zip or tgz file with mbox and/or maildirs. ` func cmdImportMaildir(c *cmd) { c.params = "accountname mailboxname maildir" c.help = `Import a maildir into an account. ` + importCommonHelp + ` Mailbox flags, like "seen", "answered", will be imported. An optional dovecot-keywords file can specify additional flags, like Forwarded/Junk/NotJunk. The maildir files/directories are read by the mox process, so make sure it has access to the maildir directories/files. ` args := c.Parse() xcmdImport(false, args, c) } func cmdImportMbox(c *cmd) { c.params = "accountname mailboxname mbox" c.help = `Import an mbox into an account. Using mbox is not recommended, maildir is a better defined format. ` + importCommonHelp + ` The mailbox is read by the mox process, so make sure it has access to the maildir directories/files. ` args := c.Parse() xcmdImport(true, args, c) } func xcmdImport(mbox bool, args []string, c *cmd) { if len(args) != 3 { c.Usage() } mustLoadConfig() account := args[0] mailbox := args[1] if strings.EqualFold(mailbox, "inbox") { mailbox = "Inbox" } src := args[2] var ctlcmd string if mbox { ctlcmd = "importmbox" } else { ctlcmd = "importmaildir" } ctl := xctl() ctl.xwrite(ctlcmd) ctl.xwrite(account) ctl.xwrite(mailbox) ctl.xwrite(src) ctl.xreadok() fmt.Fprintln(os.Stderr, "importing...") for { line := ctl.xread() if strings.HasPrefix(line, "progress ") { n := line[len("progress "):] fmt.Fprintf(os.Stderr, "%s...\n", n) continue } if line != "ok" { log.Fatalf("import, expected ok, got %q", line) } break } count := ctl.xread() fmt.Fprintf(os.Stderr, "%s imported\n", count) } func importctl(ctl *ctl, mbox bool) { /* protocol: > "importmaildir" or "importmbox" > account > mailbox > src (mbox file or maildir directory) < "ok" or error < "progress" count (zero or more times, once for every 1000 messages) < "ok" when done, or error < count (of total imported messages, only if not error) */ account := ctl.xread() mailbox := ctl.xread() src := ctl.xread() kind := "maildir" if mbox { kind = "mbox" } ctl.log.Info("importing messages", mlog.Field("kind", kind), mlog.Field("account", account), mlog.Field("mailbox", mailbox), mlog.Field("source", src)) var err error var mboxf *os.File var mdnewf, mdcurf *os.File var msgreader store.MsgSource defer func() { if mboxf != nil { if err := mboxf.Close(); err != nil { ctl.log.Infox("closing mbox file after import", err) } } if mdnewf != nil { if err := mdnewf.Close(); err != nil { ctl.log.Infox("closing maildir new after import", err) } } if mdcurf != nil { if err := mdcurf.Close(); err != nil { ctl.log.Infox("closing maildir cur after import", err) } } }() // Open account, creating a database file if it doesn't exist yet. It must be known // in the configuration file. a, err := store.OpenAccount(account) ctl.xcheck(err, "opening account") defer func() { if a != nil { if err := a.Close(); err != nil { ctl.log.Errorx("closing account after import", err) } } }() // Messages don't always have a junk flag set. We'll assume anything in a mailbox // starting with junk or spam is junk mail. // First check if we can access the mbox/maildir. // Mox needs to be able to access those files, the user running the import command // may be a different user who can access the files. if mbox { mboxf, err = os.Open(src) ctl.xcheck(err, "open mbox file") msgreader = store.NewMboxReader(store.CreateMessageTemp, src, mboxf, ctl.log) } else { mdnewf, err = os.Open(filepath.Join(src, "new")) ctl.xcheck(err, "open subdir new of maildir") mdcurf, err = os.Open(filepath.Join(src, "cur")) ctl.xcheck(err, "open subdir cur of maildir") msgreader = store.NewMaildirReader(store.CreateMessageTemp, mdnewf, mdcurf, ctl.log) } tx, err := a.DB.Begin(true) ctl.xcheck(err, "begin transaction") defer func() { if tx != nil { tx.Rollback() } }() // All preparations done. Good to go. ctl.xwriteok() // We will be delivering messages. If we fail halfway, we need to remove the created msg files. var deliveredIDs []int64 // Handle errors from store.*X calls. defer func() { x := recover() if x == nil { return } ctl.log.Error("store error", mlog.Field("panic", x)) debug.PrintStack() metrics.PanicInc("import") for _, id := range deliveredIDs { p := a.MessagePath(id) if err := os.Remove(p); err != nil { ctl.log.Errorx("closing message file after import error", err, mlog.Field("path", p)) } } ctl.xerror(fmt.Sprintf("%v", x)) }() var changes []store.Change xdeliver := func(m *store.Message, mf *os.File) { // todo: possibly set dmarcdomain to the domain of the from address? at least for non-spams that have been seen. otherwise user would start without any reputations. the assumption would be that the user has accepted email and deemed it legit, coming from the indicated sender. const consumeFile = true isSent := mailbox == "Sent" const sync = false const notrain = true a.DeliverX(ctl.log, tx, m, mf, consumeFile, isSent, sync, notrain) deliveredIDs = append(deliveredIDs, m.ID) ctl.log.Debug("delivered message", mlog.Field("id", m.ID)) changes = append(changes, store.ChangeAddUID{MailboxID: m.MailboxID, UID: m.UID, Flags: m.Flags}) } // todo: one goroutine for reading messages, one for parsing the message, one adding to database, one for junk filter training. n := 0 a.WithWLock(func() { // Ensure mailbox exists. var mb store.Mailbox mb, changes = a.MailboxEnsureX(tx, mailbox, true) jf, _, err := a.OpenJunkFilter(ctl.log) if err != nil && !errors.Is(err, store.ErrNoJunkFilter) { ctl.xcheck(err, "open junk filter") } defer func() { if jf != nil { err = jf.Close() ctl.xcheck(err, "close junk filter") } }() conf, _ := a.Conf() process := func(m *store.Message, msgf *os.File, origPath string) { defer func() { if msgf == nil { return } if err := os.Remove(msgf.Name()); err != nil { ctl.log.Errorx("removing temporary message after failing to import", err) } msgf.Close() }() // Parse message and store parsed information for later fast retrieval. p, err := message.EnsurePart(msgf, m.Size) if err != nil { ctl.log.Infox("parsing message, continuing", err, mlog.Field("path", origPath)) } m.ParsedBuf, err = json.Marshal(p) ctl.xcheck(err, "marshal parsed message structure") if m.Received.IsZero() { if p.Envelope != nil && !p.Envelope.Date.IsZero() { m.Received = p.Envelope.Date } else { m.Received = time.Now() } } // We set the flags that Deliver would set now and train ourselves. This prevents // Deliver from training, which would open the junk filter, change it, and write it // back to disk, for each message (slow). m.JunkFlagsForMailbox(mb.Name, conf) if jf != nil && m.NeedsTraining() { if words, err := jf.ParseMessage(p); err != nil { ctl.log.Infox("parsing message for updating junk filter", err, mlog.Field("parse", ""), mlog.Field("path", origPath)) } else { err = jf.Train(!m.Junk, words) ctl.xcheck(err, "training junk filter") m.TrainedJunk = &m.Junk } } m.MailboxID = mb.ID m.MailboxOrigID = mb.ID xdeliver(m, msgf) msgf.Close() msgf = nil n++ if n%1000 == 0 { ctl.xwrite(fmt.Sprintf("progress %d", n)) } } for { m, msgf, origPath, err := msgreader.Next() if err == io.EOF { break } ctl.xcheck(err, "reading next message") process(m, msgf, origPath) } err = tx.Commit() ctl.xcheck(err, "commit") tx = nil ctl.log.Info("delivered messages through import", mlog.Field("count", len(deliveredIDs))) deliveredIDs = nil comm := store.RegisterComm(a) defer comm.Unregister() comm.Broadcast(changes) }) err = a.Close() ctl.xcheck(err, "closing account") a = nil ctl.xwriteok() ctl.xwrite(fmt.Sprintf("%d", n)) }