2023-02-13 20:04:05 +03:00
package store
import (
"archive/tar"
"archive/zip"
"bufio"
"bytes"
"fmt"
"io"
"os"
"path/filepath"
"sort"
2023-02-14 00:37:25 +03:00
"strings"
2023-02-13 20:04:05 +03:00
"time"
"github.com/mjl-/bstore"
"github.com/mjl-/mox/mlog"
)
// Archiver can archive multiple mailboxes and their messages.
type Archiver interface {
2023-02-14 00:37:25 +03:00
// Add file to archive. If name ends with a slash, it is created as a directory and
// the returned io.WriteCloser can be ignored.
Create ( name string , size int64 , mtime time . Time ) ( io . WriteCloser , error )
2023-02-13 20:04:05 +03:00
Close ( ) error
}
// TarArchiver is an Archiver that writes to a tar ifle.
type TarArchiver struct {
* tar . Writer
}
// Create adds a file header to the tar file.
2023-02-14 00:37:25 +03:00
func ( a TarArchiver ) Create ( name string , size int64 , mtime time . Time ) ( io . WriteCloser , error ) {
2023-02-13 20:04:05 +03:00
hdr := tar . Header {
Name : name ,
Size : size ,
2023-02-14 00:37:25 +03:00
Mode : 0660 ,
2023-02-13 20:04:05 +03:00
ModTime : mtime ,
Format : tar . FormatPAX ,
}
if err := a . WriteHeader ( & hdr ) ; err != nil {
return nil , err
}
2023-02-14 00:37:25 +03:00
return nopCloser { a } , nil
2023-02-13 20:04:05 +03:00
}
// ZipArchiver is an Archiver that writes to a zip file.
type ZipArchiver struct {
* zip . Writer
}
// Create adds a file header to the zip file.
2023-02-14 00:37:25 +03:00
func ( a ZipArchiver ) Create ( name string , size int64 , mtime time . Time ) ( io . WriteCloser , error ) {
2023-02-13 20:04:05 +03:00
hdr := zip . FileHeader {
Name : name ,
Method : zip . Deflate ,
Modified : mtime ,
UncompressedSize64 : uint64 ( size ) ,
}
2023-02-14 00:37:25 +03:00
w , err := a . CreateHeader ( & hdr )
if err != nil {
return nil , err
}
return nopCloser { w } , nil
}
type nopCloser struct {
io . Writer
}
// Close does nothing.
func ( nopCloser ) Close ( ) error {
return nil
}
// DirArchiver is an Archiver that writes to a directory.
type DirArchiver struct {
Dir string
}
// Create create name in the file system, in dir.
func ( a DirArchiver ) Create ( name string , size int64 , mtime time . Time ) ( io . WriteCloser , error ) {
isdir := strings . HasSuffix ( name , "/" )
name = strings . TrimSuffix ( name , "/" )
p := filepath . Join ( a . Dir , name )
os . MkdirAll ( filepath . Dir ( p ) , 0770 )
if isdir {
return nil , os . Mkdir ( p , 0770 )
}
return os . OpenFile ( p , os . O_CREATE | os . O_EXCL | os . O_WRONLY , 0660 )
}
// Close on a dir does nothing.
func ( a DirArchiver ) Close ( ) error {
return nil
2023-02-13 20:04:05 +03:00
}
// ExportMessages writes messages to archiver. Either in maildir format, or otherwise in
// mbox. If mailboxOpt is empty, all mailboxes are exported, otherwise only the
// named mailbox.
//
// Some errors are not fatal and result in skipped messages. In that happens, a
// file "errors.txt" is added to the archive describing the errors. The goal is to
// let users export (hopefully) most messages even in the face of errors.
2023-02-14 00:37:25 +03:00
func ExportMessages ( log * mlog . Log , db * bstore . DB , accountDir string , archiver Archiver , maildir bool , mailboxOpt string ) error {
2023-02-13 20:04:05 +03:00
// Start transaction without closure, we are going to close it early, but don't
// want to deal with declaring many variables now to be able to assign them in a
// closure and use them afterwards.
2023-02-14 00:37:25 +03:00
tx , err := db . Begin ( false )
2023-02-13 20:04:05 +03:00
if err != nil {
return fmt . Errorf ( "transaction: %v" , err )
}
defer func ( ) {
if tx != nil {
2023-02-16 15:22:00 +03:00
err := tx . Rollback ( )
log . Check ( err , "transaction rollback after export error" )
2023-02-13 20:04:05 +03:00
}
} ( )
start := time . Now ( )
// Set up mailbox names and ids.
id2name := map [ int64 ] string { }
name2id := map [ string ] int64 { }
mailboxes , err := bstore . QueryTx [ Mailbox ] ( tx ) . List ( )
xcheckf ( err , "query mailboxes" )
for _ , mb := range mailboxes {
id2name [ mb . ID ] = mb . Name
name2id [ mb . Name ] = mb . ID
}
var mailboxID int64
if mailboxOpt != "" {
var ok bool
mailboxID , ok = name2id [ mailboxOpt ]
if ! ok {
return fmt . Errorf ( "mailbox not found" )
}
}
var names [ ] string
for _ , name := range id2name {
if mailboxOpt != "" && name != mailboxOpt {
continue
}
names = append ( names , name )
}
// We need to sort the names because maildirs can create subdirs. Ranging over
// id2name directly would randomize the directory names, we would create a sub
// maildir before the parent, and fail with "dir exists" when creating the parent
// dir.
sort . Slice ( names , func ( i , j int ) bool {
return names [ i ] < names [ j ]
} )
mailboxOrder := map [ int64 ] int { }
for i , name := range names {
mbID := name2id [ name ]
mailboxOrder [ mbID ] = i
}
// Fetch all messages. This can take quite a bit of memory if the mailbox is large.
q := bstore . QueryTx [ Message ] ( tx )
if mailboxID > 0 {
q . FilterNonzero ( Message { MailboxID : mailboxID } )
}
msgs , err := q . List ( )
if err != nil {
return fmt . Errorf ( "listing messages: %v" , err )
}
// Close transaction. We don't want to hold it for too long. We are now at risk
// that a message is be removed while we export, or flags changed. At least the
// size won't change. If we cannot open the message later on, we'll skip it and add
// an error message to an errors.txt file in the output archive.
if err := tx . Rollback ( ) ; err != nil {
return fmt . Errorf ( "closing transaction: %v" , err )
}
tx = nil
// Order the messages by mailbox, received time and finally message ID.
sort . Slice ( msgs , func ( i , j int ) bool {
iid := msgs [ i ] . MailboxID
jid := msgs [ j ] . MailboxID
if iid != jid {
return mailboxOrder [ iid ] < mailboxOrder [ jid ]
}
2023-02-13 21:28:28 +03:00
if ! msgs [ i ] . Received . Equal ( msgs [ j ] . Received ) {
return msgs [ i ] . Received . Before ( msgs [ j ] . Received )
2023-02-13 20:04:05 +03:00
}
return msgs [ i ] . ID < msgs [ j ] . ID
} )
// We keep track of errors reading message files. We continue exporting and add an
// errors.txt file to the archive. In case of errors, the user can get (hopefully)
// most of their emails, and see something went wrong. For other errors, like
// writing to the archiver (e.g. a browser), we abort, because we don't want to
// continue with useless work.
var errors string
var curMailboxID int64 // Used to set curMailbox and finish a previous mbox file.
var curMailbox string
var mboxtmp * os . File
var mboxwriter * bufio . Writer
defer func ( ) {
if mboxtmp != nil {
2023-02-16 15:22:00 +03:00
err := mboxtmp . Close ( )
log . Check ( err , "closing mbox temp file" )
2023-02-13 20:04:05 +03:00
}
} ( )
2023-02-14 00:37:25 +03:00
// For dovecot-keyword-style flags not in standard maildir.
maildirFlags := map [ string ] int { }
var maildirFlaglist [ ] string
maildirFlag := func ( flag string ) string {
i , ok := maildirFlags [ flag ]
if ! ok {
if len ( maildirFlags ) >= 26 {
// Max 26 flag characters.
return ""
}
i = len ( maildirFlags )
maildirFlags [ flag ] = i
maildirFlaglist = append ( maildirFlaglist , flag )
}
return string ( rune ( 'a' + i ) )
}
finishMailbox := func ( ) error {
if maildir {
if len ( maildirFlags ) == 0 {
return nil
}
var b bytes . Buffer
for i , flag := range maildirFlaglist {
if _ , err := fmt . Fprintf ( & b , "%d %s\n" , i , flag ) ; err != nil {
return err
}
}
w , err := archiver . Create ( curMailbox + "/dovecot-keywords" , int64 ( b . Len ( ) ) , start )
if err != nil {
return fmt . Errorf ( "adding dovecot-keywords: %v" , err )
}
if _ , err := w . Write ( b . Bytes ( ) ) ; err != nil {
2023-02-16 15:22:00 +03:00
xerr := w . Close ( )
log . Check ( xerr , "closing dovecot-keywords file after closing" )
2023-02-14 00:37:25 +03:00
return fmt . Errorf ( "writing dovecot-keywords: %v" , err )
}
maildirFlags = map [ string ] int { }
maildirFlaglist = nil
return w . Close ( )
}
2023-02-13 20:04:05 +03:00
if mboxtmp == nil {
return nil
}
if err := mboxwriter . Flush ( ) ; err != nil {
return fmt . Errorf ( "flush mbox writer: %v" , err )
}
fi , err := mboxtmp . Stat ( )
if err != nil {
return fmt . Errorf ( "stat temporary mbox file: %v" , err )
}
if _ , err := mboxtmp . Seek ( 0 , 0 ) ; err != nil {
return fmt . Errorf ( "seek to start of temporary mbox file" )
}
w , err := archiver . Create ( curMailbox + ".mbox" , fi . Size ( ) , fi . ModTime ( ) )
if err != nil {
return fmt . Errorf ( "add mbox to archive: %v" , err )
}
if _ , err := io . Copy ( w , mboxtmp ) ; err != nil {
2023-02-16 15:22:00 +03:00
xerr := w . Close ( )
log . Check ( xerr , "closing mbox message file after error" )
2023-02-13 20:04:05 +03:00
return fmt . Errorf ( "copying temp mbox file to archive: %v" , err )
}
2023-02-14 00:37:25 +03:00
if err := w . Close ( ) ; err != nil {
return fmt . Errorf ( "closing message file: %v" , err )
}
2023-02-16 15:22:00 +03:00
err = mboxtmp . Close ( )
log . Check ( err , "closing temporary mbox file" )
2023-02-13 20:04:05 +03:00
mboxwriter = nil
mboxtmp = nil
return nil
}
exportMessage := func ( m Message ) error {
2023-02-14 00:37:25 +03:00
mp := filepath . Join ( accountDir , "msg" , MessagePath ( m . ID ) )
2023-02-13 20:04:05 +03:00
var mr io . ReadCloser
if m . Size == int64 ( len ( m . MsgPrefix ) ) {
mr = io . NopCloser ( bytes . NewReader ( m . MsgPrefix ) )
} else {
2023-02-16 15:22:00 +03:00
mf , err := os . Open ( mp )
2023-02-13 20:04:05 +03:00
if err != nil {
errors += fmt . Sprintf ( "open message file for id %d, path %s: %v (message skipped)\n" , m . ID , mp , err )
return nil
}
2023-02-16 15:22:00 +03:00
defer func ( ) {
err := mf . Close ( )
log . Check ( err , "closing message file after export" )
} ( )
st , err := mf . Stat ( )
2023-02-13 20:04:05 +03:00
if err != nil {
errors += fmt . Sprintf ( "stat message file for id %d, path %s: %v (message skipped)\n" , m . ID , mp , err )
return nil
}
size := st . Size ( ) + int64 ( len ( m . MsgPrefix ) )
if size != m . Size {
errors += fmt . Sprintf ( "message size mismatch for message id %d, database has %d, size is %d+%d=%d, using calculated size\n" , m . ID , m . Size , len ( m . MsgPrefix ) , st . Size ( ) , size )
}
2023-02-16 15:22:00 +03:00
mr = FileMsgReader ( m . MsgPrefix , mf )
2023-02-13 20:04:05 +03:00
}
if maildir {
p := curMailbox
if m . Flags . Seen {
p = filepath . Join ( p , "cur" )
} else {
p = filepath . Join ( p , "new" )
}
name := fmt . Sprintf ( "%d.%d.mox:2," , m . Received . Unix ( ) , m . ID )
2023-02-14 00:37:25 +03:00
// Standard flags. May need to be sorted.
if m . Flags . Draft {
name += "D"
}
if m . Flags . Flagged {
name += "F"
2023-02-13 20:04:05 +03:00
}
if m . Flags . Answered {
name += "R"
}
2023-02-14 00:37:25 +03:00
if m . Flags . Seen {
name += "S"
2023-02-13 20:04:05 +03:00
}
2023-02-14 00:37:25 +03:00
if m . Flags . Deleted {
name += "T"
}
// Non-standard flag. We set them with a dovecot-keywords file.
if m . Flags . Forwarded {
name += maildirFlag ( "$Forwarded" )
}
if m . Flags . Junk {
name += maildirFlag ( "$Junk" )
2023-02-13 20:04:05 +03:00
}
2023-02-14 00:37:25 +03:00
if m . Flags . Notjunk {
name += maildirFlag ( "$NotJunk" )
}
if m . Flags . Phishing {
name += maildirFlag ( "$Phishing" )
}
if m . Flags . MDNSent {
name += maildirFlag ( "$MDNSent" )
}
2023-02-13 20:04:05 +03:00
p = filepath . Join ( p , name )
// We store messages with \r\n, maildir needs without. But we need to know the
// final size. So first convert, then create file with size, and write from buffer.
// todo: for large messages, we should go through a temporary file instead of memory.
var dst bytes . Buffer
r := bufio . NewReader ( mr )
for {
line , rerr := r . ReadBytes ( '\n' )
if rerr != io . EOF && rerr != nil {
errors += fmt . Sprintf ( "reading from message for id %d: %v (message skipped)\n" , m . ID , err )
return nil
}
if len ( line ) > 0 {
if bytes . HasSuffix ( line , [ ] byte ( "\r\n" ) ) {
line = line [ : len ( line ) - 1 ]
line [ len ( line ) - 1 ] = '\n'
}
if _ , err = dst . Write ( line ) ; err != nil {
return fmt . Errorf ( "writing message: %v" , err )
}
}
if rerr == io . EOF {
break
}
}
size := int64 ( dst . Len ( ) )
w , err := archiver . Create ( p , size , m . Received )
if err != nil {
return fmt . Errorf ( "adding message to archive: %v" , err )
}
if _ , err := io . Copy ( w , & dst ) ; err != nil {
2023-02-16 15:22:00 +03:00
xerr := w . Close ( )
log . Check ( xerr , "closing message" )
2023-02-13 20:04:05 +03:00
return fmt . Errorf ( "copying message to archive: %v" , err )
}
2023-02-14 00:37:25 +03:00
return w . Close ( )
2023-02-13 20:04:05 +03:00
}
// todo: should we put status flags in Status or X-Status header inside the message?
// todo: should we do anything with Content-Length headers? changing the escaping could invalidate those. is anything checking that field?
mailfrom := "mox"
if m . MailFrom != "" {
mailfrom = m . MailFrom
}
if _ , err := fmt . Fprintf ( mboxwriter , "From %s %s\n" , mailfrom , m . Received . Format ( time . ANSIC ) ) ; err != nil {
return fmt . Errorf ( "write message line to mbox temp file: %v" , err )
}
r := bufio . NewReader ( mr )
for {
line , rerr := r . ReadBytes ( '\n' )
if rerr != io . EOF && rerr != nil {
return fmt . Errorf ( "reading message: %v" , err )
}
if len ( line ) > 0 {
if bytes . HasSuffix ( line , [ ] byte ( "\r\n" ) ) {
line = line [ : len ( line ) - 1 ]
line [ len ( line ) - 1 ] = '\n'
}
if bytes . HasPrefix ( bytes . TrimLeft ( line , ">" ) , [ ] byte ( "From " ) ) {
if _ , err := fmt . Fprint ( mboxwriter , ">" ) ; err != nil {
return fmt . Errorf ( "writing escaping >: %v" , err )
}
}
if _ , err := mboxwriter . Write ( line ) ; err != nil {
return fmt . Errorf ( "writing line: %v" , err )
}
}
if rerr == io . EOF {
break
}
}
if _ , err := fmt . Fprint ( mboxwriter , "\n" ) ; err != nil {
return fmt . Errorf ( "writing end of message newline: %v" , err )
}
return nil
}
for _ , m := range msgs {
if m . MailboxID != curMailboxID {
2023-02-14 00:37:25 +03:00
if err := finishMailbox ( ) ; err != nil {
2023-02-13 20:04:05 +03:00
return err
}
curMailbox = id2name [ m . MailboxID ]
curMailboxID = m . MailboxID
if maildir {
// Create the directories that show this is a maildir.
if _ , err := archiver . Create ( curMailbox + "/new/" , 0 , start ) ; err != nil {
return fmt . Errorf ( "adding maildir new directory: %v" , err )
}
if _ , err := archiver . Create ( curMailbox + "/cur/" , 0 , start ) ; err != nil {
return fmt . Errorf ( "adding maildir cur directory: %v" , err )
}
if _ , err := archiver . Create ( curMailbox + "/tmp/" , 0 , start ) ; err != nil {
return fmt . Errorf ( "adding maildir tmp directory: %v" , err )
}
} else {
mboxtmp , err = os . CreateTemp ( "" , "mox-mail-export-mbox" )
if err != nil {
return fmt . Errorf ( "creating temp mbox file: %v" , err )
}
// Remove file immediately, so we are sure we don't leave it around.
if err := os . Remove ( mboxtmp . Name ( ) ) ; err != nil {
return fmt . Errorf ( "removing temp file just created: %v" , err )
}
mboxwriter = bufio . NewWriter ( mboxtmp )
}
}
if err := exportMessage ( m ) ; err != nil {
return err
}
}
2023-02-14 00:37:25 +03:00
if err := finishMailbox ( ) ; err != nil {
2023-02-13 20:04:05 +03:00
return err
}
if errors != "" {
w , err := archiver . Create ( "errors.txt" , int64 ( len ( errors ) ) , time . Now ( ) )
if err != nil {
log . Errorx ( "adding errors.txt to archive" , err )
return err
}
if _ , err := w . Write ( [ ] byte ( errors ) ) ; err != nil {
log . Errorx ( "writing errors.txt to archive" , err )
2023-02-16 15:22:00 +03:00
xerr := w . Close ( )
log . Check ( xerr , "closing errors.txt after error" )
2023-02-13 20:04:05 +03:00
return err
}
2023-02-14 00:37:25 +03:00
if err := w . Close ( ) ; err != nil {
return err
}
2023-02-13 20:04:05 +03:00
}
return nil
}