mox/verifydata.go

442 lines
15 KiB
Go
Raw Normal View History

package main
import (
"context"
"errors"
"fmt"
"io/fs"
"log"
"os"
"path/filepath"
"strconv"
"strings"
implement message threading in backend and webmail we match messages to their parents based on the "references" and "in-reply-to" headers (requiring the same base subject), and in absense of those headers we also by only base subject (against messages received max 4 weeks ago). we store a threadid with messages. all messages in a thread have the same threadid. messages also have a "thread parent ids", which holds all id's of parent messages up to the thread root. then there is "thread missing link", which is set when a referenced immediate parent wasn't found (but possibly earlier ancestors can still be found and will be in thread parent ids". threads can be muted: newly delivered messages are automatically marked as read/seen. threads can be marked as collapsed: if set, the webmail collapses the thread to a single item in the basic threading view (default is to expand threads). the muted and collapsed fields are copied from their parent on message delivery. the threading is implemented in the webmail. the non-threading mode still works as before. the new default threading mode "unread" automatically expands only the threads with at least one unread (not seen) meessage. the basic threading mode "on" expands all threads except when explicitly collapsed (as saved in the thread collapsed field). new shortcuts for navigation/interaction threads have been added, e.g. go to previous/next thread root, toggle collapse/expand of thread (or double click), toggle mute of thread. some previous shortcuts have changed, see the help for details. the message threading are added with an explicit account upgrade step, automatically started when an account is opened. the upgrade is done in the background because it will take too long for large mailboxes to block account operations. the upgrade takes two steps: 1. updating all message records in the database to add a normalized message-id and thread base subject (with "re:", "fwd:" and several other schemes stripped). 2. going through all messages in the database again, reading the "references" and "in-reply-to" headers from disk, and matching against their parents. this second step is also done at the end of each import of mbox/maildir mailboxes. new deliveries are matched immediately against other existing messages, currently no attempt is made to rematch previously delivered messages (which could be useful for related messages being delivered out of order). the threading is not yet exposed over imap.
2023-09-13 09:51:50 +03:00
"golang.org/x/exp/slices"
bolt "go.etcd.io/bbolt"
"github.com/mjl-/bstore"
"github.com/mjl-/mox/dmarcdb"
"github.com/mjl-/mox/junk"
"github.com/mjl-/mox/moxvar"
"github.com/mjl-/mox/mtastsdb"
"github.com/mjl-/mox/queue"
"github.com/mjl-/mox/store"
"github.com/mjl-/mox/tlsrptdb"
)
func cmdVerifydata(c *cmd) {
c.params = "data-dir"
c.help = `Verify the contents of a data directory, typically of a backup.
Verifydata checks all database files to see if they are valid BoltDB/bstore
databases. It checks that all messages in the database have a corresponding
on-disk message file and there are no unrecognized files. If option -fix is
specified, unrecognized message files are moved away. This may be needed after
a restore, because messages enqueued or delivered in the future may get those
message sequence numbers assigned and writing the message file would fail.
Consistency of message/mailbox UID, UIDNEXT and UIDVALIDITY is verified as
well.
Because verifydata opens the database files, schema upgrades may automatically
be applied. This can happen if you use a new mox release. It is useful to run
"mox verifydata" with a new binary before attempting an upgrade, but only on a
copy of the database files, as made with "mox backup". Before upgrading, make a
new backup again since "mox verifydata" may have upgraded the database files,
possibly making them potentially no longer readable by the previous version.
`
var fix bool
c.flag.BoolVar(&fix, "fix", false, "fix fixable problems, such as moving away message files not referenced by their database")
// To prevent aborting the upgrade test with v0.0.[45] that had a message with
// incorrect Size.
var skipSizeCheck bool
c.flag.BoolVar(&skipSizeCheck, "skip-size-check", false, "skip the check for message size")
args := c.Parse()
if len(args) != 1 {
c.Usage()
}
dataDir := filepath.Clean(args[0])
ctxbg := context.Background()
// Check whether file exists, or rather, that it doesn't not exist. Other errors
// will return true as well, so the triggered check can give the details.
exists := func(path string) bool {
_, err := os.Stat(path)
return err == nil || !os.IsNotExist(err)
}
// Check for error. If so, write a log line, including the path, and set fail so we
// can warn at the end.
var fail bool
checkf := func(err error, path, format string, args ...any) {
if err == nil {
return
}
fail = true
log.Printf("error: %s: %s: %v", path, fmt.Sprintf(format, args...), err)
}
// When we fix problems, we may have to move files/dirs. We need to ensure the
// directory of the destination path exists before we move. We keep track of
// created dirs so we don't try to create the same directory all the time.
createdDirs := map[string]struct{}{}
ensureDir := func(path string) {
dir := filepath.Dir(path)
if _, ok := createdDirs[dir]; ok {
return
}
err := os.MkdirAll(dir, 0770)
checkf(err, dir, "creating directory")
createdDirs[dir] = struct{}{}
}
// Check a database file by opening it with BoltDB and bstore and lightly checking
// its contents.
checkDB := func(required bool, path string, types []any) {
_, err := os.Stat(path)
if !required && err != nil && errors.Is(err, fs.ErrNotExist) {
return
}
checkf(err, path, "checking if database file exists")
if err != nil {
return
}
bdb, err := bolt.Open(path, 0600, nil)
checkf(err, path, "open database with bolt")
if err != nil {
return
}
// Check BoltDB consistency.
err = bdb.View(func(tx *bolt.Tx) error {
for err := range tx.Check() {
checkf(err, path, "bolt database problem")
}
return nil
})
checkf(err, path, "reading bolt database")
bdb.Close()
db, err := bstore.Open(ctxbg, path, nil, types...)
checkf(err, path, "open database with bstore")
if err != nil {
return
}
defer db.Close()
err = db.Read(ctxbg, func(tx *bstore.Tx) error {
// Check bstore consistency, if it can export all records for all types. This is a
// quick way to get bstore to parse all records.
types, err := tx.Types()
checkf(err, path, "getting bstore types from database")
if err != nil {
return nil
}
for _, t := range types {
var fields []string
err := tx.Records(t, &fields, func(m map[string]any) error {
return nil
})
checkf(err, path, "parsing record for type %q", t)
}
return nil
})
checkf(err, path, "checking database file")
}
checkFile := func(dbpath, path string, prefixSize int, size int64) {
st, err := os.Stat(path)
checkf(err, path, "checking if file exists")
if !skipSizeCheck && err == nil && int64(prefixSize)+st.Size() != size {
filesize := st.Size()
checkf(fmt.Errorf("%s: message size is %d, should be %d (length of MsgPrefix %d + file size %d), see \"mox fixmsgsize\"", path, size, int64(prefixSize)+st.Size(), prefixSize, filesize), dbpath, "checking message size")
}
}
checkQueue := func() {
dbpath := filepath.Join(dataDir, "queue/index.db")
checkDB(true, dbpath, queue.DBTypes)
// Check that all messages present in the database also exist on disk.
seen := map[string]struct{}{}
db, err := bstore.Open(ctxbg, dbpath, &bstore.Options{MustExist: true}, queue.DBTypes...)
checkf(err, dbpath, "opening queue database to check messages")
if err == nil {
err := bstore.QueryDB[queue.Msg](ctxbg, db).ForEach(func(m queue.Msg) error {
mp := store.MessagePath(m.ID)
seen[mp] = struct{}{}
p := filepath.Join(dataDir, "queue", mp)
checkFile(dbpath, p, len(m.MsgPrefix), m.Size)
return nil
})
checkf(err, dbpath, "reading messages in queue database to check files")
}
// Check that there are no files that could be treated as a message.
qdir := filepath.Join(dataDir, "queue")
err = filepath.WalkDir(qdir, func(qpath string, d fs.DirEntry, err error) error {
checkf(err, qpath, "walk")
if err != nil {
return nil
}
if d.IsDir() {
return nil
}
p := qpath[len(qdir)+1:]
if p == "index.db" {
return nil
}
if _, ok := seen[p]; ok {
return nil
}
l := strings.Split(p, string(filepath.Separator))
if len(l) == 1 {
log.Printf("warning: %s: unrecognized file in queue directory, ignoring", qpath)
return nil
}
// If it doesn't look like a message number, there is no risk of it being the name
// of a message enqueued in the future.
if len(l) >= 3 {
if _, err := strconv.ParseInt(l[1], 10, 64); err != nil {
log.Printf("warning: %s: unrecognized file in queue directory, ignoring", qpath)
return nil
}
}
if !fix {
checkf(errors.New("may interfere with messages enqueued in the future"), qpath, "unrecognized file in queue directory (use the -fix flag to move it away)")
return nil
}
npath := filepath.Join(dataDir, "moved", "queue", p)
ensureDir(npath)
err = os.Rename(qpath, npath)
checkf(err, qpath, "moving queue message file away")
if err == nil {
log.Printf("warning: moved %s to %s", qpath, npath)
}
return nil
})
checkf(err, qdir, "walking queue directory")
}
// Check an account, with its database file and messages.
checkAccount := func(name string) {
accdir := filepath.Join(dataDir, "accounts", name)
checkDB(true, filepath.Join(accdir, "index.db"), store.DBTypes)
jfdbpath := filepath.Join(accdir, "junkfilter.db")
jfbloompath := filepath.Join(accdir, "junkfilter.bloom")
if exists(jfdbpath) || exists(jfbloompath) {
checkDB(true, jfdbpath, junk.DBTypes)
}
// todo: add some kind of check for the bloom filter?
// Check that all messages in the database have a message file on disk.
// And check consistency of UIDs with the mailbox UIDNext, and check UIDValidity.
seen := map[string]struct{}{}
dbpath := filepath.Join(accdir, "index.db")
db, err := bstore.Open(ctxbg, dbpath, &bstore.Options{MustExist: true}, store.DBTypes...)
checkf(err, dbpath, "opening account database to check messages")
if err == nil {
uidvalidity := store.NextUIDValidity{ID: 1}
if err := db.Get(ctxbg, &uidvalidity); err != nil {
checkf(err, dbpath, "missing nextuidvalidity")
}
implement message threading in backend and webmail we match messages to their parents based on the "references" and "in-reply-to" headers (requiring the same base subject), and in absense of those headers we also by only base subject (against messages received max 4 weeks ago). we store a threadid with messages. all messages in a thread have the same threadid. messages also have a "thread parent ids", which holds all id's of parent messages up to the thread root. then there is "thread missing link", which is set when a referenced immediate parent wasn't found (but possibly earlier ancestors can still be found and will be in thread parent ids". threads can be muted: newly delivered messages are automatically marked as read/seen. threads can be marked as collapsed: if set, the webmail collapses the thread to a single item in the basic threading view (default is to expand threads). the muted and collapsed fields are copied from their parent on message delivery. the threading is implemented in the webmail. the non-threading mode still works as before. the new default threading mode "unread" automatically expands only the threads with at least one unread (not seen) meessage. the basic threading mode "on" expands all threads except when explicitly collapsed (as saved in the thread collapsed field). new shortcuts for navigation/interaction threads have been added, e.g. go to previous/next thread root, toggle collapse/expand of thread (or double click), toggle mute of thread. some previous shortcuts have changed, see the help for details. the message threading are added with an explicit account upgrade step, automatically started when an account is opened. the upgrade is done in the background because it will take too long for large mailboxes to block account operations. the upgrade takes two steps: 1. updating all message records in the database to add a normalized message-id and thread base subject (with "re:", "fwd:" and several other schemes stripped). 2. going through all messages in the database again, reading the "references" and "in-reply-to" headers from disk, and matching against their parents. this second step is also done at the end of each import of mbox/maildir mailboxes. new deliveries are matched immediately against other existing messages, currently no attempt is made to rematch previously delivered messages (which could be useful for related messages being delivered out of order). the threading is not yet exposed over imap.
2023-09-13 09:51:50 +03:00
up := store.Upgrade{ID: 1}
if err := db.Get(ctxbg, &up); err != nil {
log.Printf("warning: %s: getting upgrade record (continuing, but not checking message threading): %v", dbpath, err)
implement message threading in backend and webmail we match messages to their parents based on the "references" and "in-reply-to" headers (requiring the same base subject), and in absense of those headers we also by only base subject (against messages received max 4 weeks ago). we store a threadid with messages. all messages in a thread have the same threadid. messages also have a "thread parent ids", which holds all id's of parent messages up to the thread root. then there is "thread missing link", which is set when a referenced immediate parent wasn't found (but possibly earlier ancestors can still be found and will be in thread parent ids". threads can be muted: newly delivered messages are automatically marked as read/seen. threads can be marked as collapsed: if set, the webmail collapses the thread to a single item in the basic threading view (default is to expand threads). the muted and collapsed fields are copied from their parent on message delivery. the threading is implemented in the webmail. the non-threading mode still works as before. the new default threading mode "unread" automatically expands only the threads with at least one unread (not seen) meessage. the basic threading mode "on" expands all threads except when explicitly collapsed (as saved in the thread collapsed field). new shortcuts for navigation/interaction threads have been added, e.g. go to previous/next thread root, toggle collapse/expand of thread (or double click), toggle mute of thread. some previous shortcuts have changed, see the help for details. the message threading are added with an explicit account upgrade step, automatically started when an account is opened. the upgrade is done in the background because it will take too long for large mailboxes to block account operations. the upgrade takes two steps: 1. updating all message records in the database to add a normalized message-id and thread base subject (with "re:", "fwd:" and several other schemes stripped). 2. going through all messages in the database again, reading the "references" and "in-reply-to" headers from disk, and matching against their parents. this second step is also done at the end of each import of mbox/maildir mailboxes. new deliveries are matched immediately against other existing messages, currently no attempt is made to rematch previously delivered messages (which could be useful for related messages being delivered out of order). the threading is not yet exposed over imap.
2023-09-13 09:51:50 +03:00
} else if up.Threads != 2 {
log.Printf("warning: %s: no message threading in database, skipping checks for threading consistency", dbpath)
implement message threading in backend and webmail we match messages to their parents based on the "references" and "in-reply-to" headers (requiring the same base subject), and in absense of those headers we also by only base subject (against messages received max 4 weeks ago). we store a threadid with messages. all messages in a thread have the same threadid. messages also have a "thread parent ids", which holds all id's of parent messages up to the thread root. then there is "thread missing link", which is set when a referenced immediate parent wasn't found (but possibly earlier ancestors can still be found and will be in thread parent ids". threads can be muted: newly delivered messages are automatically marked as read/seen. threads can be marked as collapsed: if set, the webmail collapses the thread to a single item in the basic threading view (default is to expand threads). the muted and collapsed fields are copied from their parent on message delivery. the threading is implemented in the webmail. the non-threading mode still works as before. the new default threading mode "unread" automatically expands only the threads with at least one unread (not seen) meessage. the basic threading mode "on" expands all threads except when explicitly collapsed (as saved in the thread collapsed field). new shortcuts for navigation/interaction threads have been added, e.g. go to previous/next thread root, toggle collapse/expand of thread (or double click), toggle mute of thread. some previous shortcuts have changed, see the help for details. the message threading are added with an explicit account upgrade step, automatically started when an account is opened. the upgrade is done in the background because it will take too long for large mailboxes to block account operations. the upgrade takes two steps: 1. updating all message records in the database to add a normalized message-id and thread base subject (with "re:", "fwd:" and several other schemes stripped). 2. going through all messages in the database again, reading the "references" and "in-reply-to" headers from disk, and matching against their parents. this second step is also done at the end of each import of mbox/maildir mailboxes. new deliveries are matched immediately against other existing messages, currently no attempt is made to rematch previously delivered messages (which could be useful for related messages being delivered out of order). the threading is not yet exposed over imap.
2023-09-13 09:51:50 +03:00
}
mailboxes := map[int64]store.Mailbox{}
err := bstore.QueryDB[store.Mailbox](ctxbg, db).ForEach(func(mb store.Mailbox) error {
mailboxes[mb.ID] = mb
if mb.UIDValidity >= uidvalidity.Next {
checkf(errors.New(`inconsistent uidvalidity for mailbox/account, see "mox fixuidmeta"`), dbpath, "mailbox %q (id %d) has uidvalidity %d >= account nextuidvalidity %d", mb.Name, mb.ID, mb.UIDValidity, uidvalidity.Next)
}
return nil
})
checkf(err, dbpath, "reading mailboxes to check uidnext consistency")
add webmail it was far down on the roadmap, but implemented earlier, because it's interesting, and to help prepare for a jmap implementation. for jmap we need to implement more client-like functionality than with just imap. internal data structures need to change. jmap has lots of other requirements, so it's already a big project. by implementing a webmail now, some of the required data structure changes become clear and can be made now, so the later jmap implementation can do things similarly to the webmail code. the webmail frontend and webmail are written together, making their interface/api much smaller and simpler than jmap. one of the internal changes is that we now keep track of per-mailbox total/unread/unseen/deleted message counts and mailbox sizes. keeping this data consistent after any change to the stored messages (through the code base) is tricky, so mox now has a consistency check that verifies the counts are correct, which runs only during tests, each time an internal account reference is closed. we have a few more internal "changes" that are propagated for the webmail frontend (that imap doesn't have a way to propagate on a connection), like changes to the special-use flags on mailboxes, and used keywords in a mailbox. more changes that will be required have revealed themselves while implementing the webmail, and will be implemented next. the webmail user interface is modeled after the mail clients i use or have used: thunderbird, macos mail, mutt; and webmails i normally only use for testing: gmail, proton, yahoo, outlook. a somewhat technical user is assumed, but still the goal is to make this webmail client easy to use for everyone. the user interface looks like most other mail clients: a list of mailboxes, a search bar, a message list view, and message details. there is a top/bottom and a left/right layout for the list/message view, default is automatic based on screen size. the panes can be resized by the user. buttons for actions are just text, not icons. clicking a button briefly shows the shortcut for the action in the bottom right, helping with learning to operate quickly. any text that is underdotted has a title attribute that causes more information to be displayed, e.g. what a button does or a field is about. to highlight potential phishing attempts, any text (anywhere in the webclient) that switches unicode "blocks" (a rough approximation to (language) scripts) within a word is underlined orange. multiple messages can be selected with familiar ui interaction: clicking while holding control and/or shift keys. keyboard navigation works with arrows/page up/down and home/end keys, and also with a few basic vi-like keys for list/message navigation. we prefer showing the text instead of html (with inlined images only) version of a message. html messages are shown in an iframe served from an endpoint with CSP headers to prevent dangerous resources (scripts, external images) from being loaded. the html is also sanitized, with javascript removed. a user can choose to load external resources (e.g. images for tracking purposes). the frontend is just (strict) typescript, no external frameworks. all incoming/outgoing data is typechecked, both the api request parameters and response types, and the data coming in over SSE. the types and checking code are generated with sherpats, which uses the api definitions generated by sherpadoc based on the Go code. so types from the backend are automatically propagated to the frontend. since there is no framework to automatically propagate properties and rerender components, changes coming in over the SSE connection are propagated explicitly with regular function calls. the ui is separated into "views", each with a "root" dom element that is added to the visible document. these views have additional functions for getting changes propagated, often resulting in the view updating its (internal) ui state (dom). we keep the frontend compilation simple, it's just a few typescript files that get compiled (combined and types stripped) into a single js file, no additional runtime code needed or complicated build processes used. the webmail is served is served from a compressed, cachable html file that includes style and the javascript, currently just over 225kb uncompressed, under 60kb compressed (not minified, including comments). we include the generated js files in the repository, to keep Go's easily buildable self-contained binaries. authentication is basic http, as with the account and admin pages. most data comes in over one long-term SSE connection to the backend. api requests signal which mailbox/search/messages are requested over the SSE connection. fetching individual messages, and making changes, are done through api calls. the operations are similar to imap, so some code has been moved from package imapserver to package store. the future jmap implementation will benefit from these changes too. more functionality will probably be moved to the store package in the future. the quickstart enables webmail on the internal listener by default (for new installs). users can enable it on the public listener if they want to. mox localserve enables it too. to enable webmail on existing installs, add settings like the following to the listeners in mox.conf, similar to AccountHTTP(S): WebmailHTTP: Enabled: true WebmailHTTPS: Enabled: true special thanks to liesbeth, gerben, andrii for early user feedback. there is plenty still to do, see the list at the top of webmail/webmail.ts. feedback welcome as always.
2023-08-07 22:57:03 +03:00
mbCounts := map[int64]store.MailboxCounts{}
err = bstore.QueryDB[store.Message](ctxbg, db).ForEach(func(m store.Message) error {
add webmail it was far down on the roadmap, but implemented earlier, because it's interesting, and to help prepare for a jmap implementation. for jmap we need to implement more client-like functionality than with just imap. internal data structures need to change. jmap has lots of other requirements, so it's already a big project. by implementing a webmail now, some of the required data structure changes become clear and can be made now, so the later jmap implementation can do things similarly to the webmail code. the webmail frontend and webmail are written together, making their interface/api much smaller and simpler than jmap. one of the internal changes is that we now keep track of per-mailbox total/unread/unseen/deleted message counts and mailbox sizes. keeping this data consistent after any change to the stored messages (through the code base) is tricky, so mox now has a consistency check that verifies the counts are correct, which runs only during tests, each time an internal account reference is closed. we have a few more internal "changes" that are propagated for the webmail frontend (that imap doesn't have a way to propagate on a connection), like changes to the special-use flags on mailboxes, and used keywords in a mailbox. more changes that will be required have revealed themselves while implementing the webmail, and will be implemented next. the webmail user interface is modeled after the mail clients i use or have used: thunderbird, macos mail, mutt; and webmails i normally only use for testing: gmail, proton, yahoo, outlook. a somewhat technical user is assumed, but still the goal is to make this webmail client easy to use for everyone. the user interface looks like most other mail clients: a list of mailboxes, a search bar, a message list view, and message details. there is a top/bottom and a left/right layout for the list/message view, default is automatic based on screen size. the panes can be resized by the user. buttons for actions are just text, not icons. clicking a button briefly shows the shortcut for the action in the bottom right, helping with learning to operate quickly. any text that is underdotted has a title attribute that causes more information to be displayed, e.g. what a button does or a field is about. to highlight potential phishing attempts, any text (anywhere in the webclient) that switches unicode "blocks" (a rough approximation to (language) scripts) within a word is underlined orange. multiple messages can be selected with familiar ui interaction: clicking while holding control and/or shift keys. keyboard navigation works with arrows/page up/down and home/end keys, and also with a few basic vi-like keys for list/message navigation. we prefer showing the text instead of html (with inlined images only) version of a message. html messages are shown in an iframe served from an endpoint with CSP headers to prevent dangerous resources (scripts, external images) from being loaded. the html is also sanitized, with javascript removed. a user can choose to load external resources (e.g. images for tracking purposes). the frontend is just (strict) typescript, no external frameworks. all incoming/outgoing data is typechecked, both the api request parameters and response types, and the data coming in over SSE. the types and checking code are generated with sherpats, which uses the api definitions generated by sherpadoc based on the Go code. so types from the backend are automatically propagated to the frontend. since there is no framework to automatically propagate properties and rerender components, changes coming in over the SSE connection are propagated explicitly with regular function calls. the ui is separated into "views", each with a "root" dom element that is added to the visible document. these views have additional functions for getting changes propagated, often resulting in the view updating its (internal) ui state (dom). we keep the frontend compilation simple, it's just a few typescript files that get compiled (combined and types stripped) into a single js file, no additional runtime code needed or complicated build processes used. the webmail is served is served from a compressed, cachable html file that includes style and the javascript, currently just over 225kb uncompressed, under 60kb compressed (not minified, including comments). we include the generated js files in the repository, to keep Go's easily buildable self-contained binaries. authentication is basic http, as with the account and admin pages. most data comes in over one long-term SSE connection to the backend. api requests signal which mailbox/search/messages are requested over the SSE connection. fetching individual messages, and making changes, are done through api calls. the operations are similar to imap, so some code has been moved from package imapserver to package store. the future jmap implementation will benefit from these changes too. more functionality will probably be moved to the store package in the future. the quickstart enables webmail on the internal listener by default (for new installs). users can enable it on the public listener if they want to. mox localserve enables it too. to enable webmail on existing installs, add settings like the following to the listeners in mox.conf, similar to AccountHTTP(S): WebmailHTTP: Enabled: true WebmailHTTPS: Enabled: true special thanks to liesbeth, gerben, andrii for early user feedback. there is plenty still to do, see the list at the top of webmail/webmail.ts. feedback welcome as always.
2023-08-07 22:57:03 +03:00
mb := mailboxes[m.MailboxID]
if m.UID >= mb.UIDNext {
checkf(errors.New(`inconsistent uidnext for message/mailbox, see "mox fixuidmeta"`), dbpath, "message id %d in mailbox %q (id %d) has uid %d >= mailbox uidnext %d", m.ID, mb.Name, mb.ID, m.UID, mb.UIDNext)
}
add webmail it was far down on the roadmap, but implemented earlier, because it's interesting, and to help prepare for a jmap implementation. for jmap we need to implement more client-like functionality than with just imap. internal data structures need to change. jmap has lots of other requirements, so it's already a big project. by implementing a webmail now, some of the required data structure changes become clear and can be made now, so the later jmap implementation can do things similarly to the webmail code. the webmail frontend and webmail are written together, making their interface/api much smaller and simpler than jmap. one of the internal changes is that we now keep track of per-mailbox total/unread/unseen/deleted message counts and mailbox sizes. keeping this data consistent after any change to the stored messages (through the code base) is tricky, so mox now has a consistency check that verifies the counts are correct, which runs only during tests, each time an internal account reference is closed. we have a few more internal "changes" that are propagated for the webmail frontend (that imap doesn't have a way to propagate on a connection), like changes to the special-use flags on mailboxes, and used keywords in a mailbox. more changes that will be required have revealed themselves while implementing the webmail, and will be implemented next. the webmail user interface is modeled after the mail clients i use or have used: thunderbird, macos mail, mutt; and webmails i normally only use for testing: gmail, proton, yahoo, outlook. a somewhat technical user is assumed, but still the goal is to make this webmail client easy to use for everyone. the user interface looks like most other mail clients: a list of mailboxes, a search bar, a message list view, and message details. there is a top/bottom and a left/right layout for the list/message view, default is automatic based on screen size. the panes can be resized by the user. buttons for actions are just text, not icons. clicking a button briefly shows the shortcut for the action in the bottom right, helping with learning to operate quickly. any text that is underdotted has a title attribute that causes more information to be displayed, e.g. what a button does or a field is about. to highlight potential phishing attempts, any text (anywhere in the webclient) that switches unicode "blocks" (a rough approximation to (language) scripts) within a word is underlined orange. multiple messages can be selected with familiar ui interaction: clicking while holding control and/or shift keys. keyboard navigation works with arrows/page up/down and home/end keys, and also with a few basic vi-like keys for list/message navigation. we prefer showing the text instead of html (with inlined images only) version of a message. html messages are shown in an iframe served from an endpoint with CSP headers to prevent dangerous resources (scripts, external images) from being loaded. the html is also sanitized, with javascript removed. a user can choose to load external resources (e.g. images for tracking purposes). the frontend is just (strict) typescript, no external frameworks. all incoming/outgoing data is typechecked, both the api request parameters and response types, and the data coming in over SSE. the types and checking code are generated with sherpats, which uses the api definitions generated by sherpadoc based on the Go code. so types from the backend are automatically propagated to the frontend. since there is no framework to automatically propagate properties and rerender components, changes coming in over the SSE connection are propagated explicitly with regular function calls. the ui is separated into "views", each with a "root" dom element that is added to the visible document. these views have additional functions for getting changes propagated, often resulting in the view updating its (internal) ui state (dom). we keep the frontend compilation simple, it's just a few typescript files that get compiled (combined and types stripped) into a single js file, no additional runtime code needed or complicated build processes used. the webmail is served is served from a compressed, cachable html file that includes style and the javascript, currently just over 225kb uncompressed, under 60kb compressed (not minified, including comments). we include the generated js files in the repository, to keep Go's easily buildable self-contained binaries. authentication is basic http, as with the account and admin pages. most data comes in over one long-term SSE connection to the backend. api requests signal which mailbox/search/messages are requested over the SSE connection. fetching individual messages, and making changes, are done through api calls. the operations are similar to imap, so some code has been moved from package imapserver to package store. the future jmap implementation will benefit from these changes too. more functionality will probably be moved to the store package in the future. the quickstart enables webmail on the internal listener by default (for new installs). users can enable it on the public listener if they want to. mox localserve enables it too. to enable webmail on existing installs, add settings like the following to the listeners in mox.conf, similar to AccountHTTP(S): WebmailHTTP: Enabled: true WebmailHTTPS: Enabled: true special thanks to liesbeth, gerben, andrii for early user feedback. there is plenty still to do, see the list at the top of webmail/webmail.ts. feedback welcome as always.
2023-08-07 22:57:03 +03:00
if m.ModSeq < m.CreateSeq {
checkf(errors.New(`inconsistent modseq/createseq for message`), dbpath, "message id %d in mailbox %q (id %d) has modseq %d < createseq %d", m.ID, mb.Name, mb.ID, m.ModSeq, m.CreateSeq)
}
mc := mbCounts[mb.ID]
mc.Add(m.MailboxCounts())
mbCounts[mb.ID] = mc
if m.Expunged {
return nil
}
implement message threading in backend and webmail we match messages to their parents based on the "references" and "in-reply-to" headers (requiring the same base subject), and in absense of those headers we also by only base subject (against messages received max 4 weeks ago). we store a threadid with messages. all messages in a thread have the same threadid. messages also have a "thread parent ids", which holds all id's of parent messages up to the thread root. then there is "thread missing link", which is set when a referenced immediate parent wasn't found (but possibly earlier ancestors can still be found and will be in thread parent ids". threads can be muted: newly delivered messages are automatically marked as read/seen. threads can be marked as collapsed: if set, the webmail collapses the thread to a single item in the basic threading view (default is to expand threads). the muted and collapsed fields are copied from their parent on message delivery. the threading is implemented in the webmail. the non-threading mode still works as before. the new default threading mode "unread" automatically expands only the threads with at least one unread (not seen) meessage. the basic threading mode "on" expands all threads except when explicitly collapsed (as saved in the thread collapsed field). new shortcuts for navigation/interaction threads have been added, e.g. go to previous/next thread root, toggle collapse/expand of thread (or double click), toggle mute of thread. some previous shortcuts have changed, see the help for details. the message threading are added with an explicit account upgrade step, automatically started when an account is opened. the upgrade is done in the background because it will take too long for large mailboxes to block account operations. the upgrade takes two steps: 1. updating all message records in the database to add a normalized message-id and thread base subject (with "re:", "fwd:" and several other schemes stripped). 2. going through all messages in the database again, reading the "references" and "in-reply-to" headers from disk, and matching against their parents. this second step is also done at the end of each import of mbox/maildir mailboxes. new deliveries are matched immediately against other existing messages, currently no attempt is made to rematch previously delivered messages (which could be useful for related messages being delivered out of order). the threading is not yet exposed over imap.
2023-09-13 09:51:50 +03:00
mp := store.MessagePath(m.ID)
seen[mp] = struct{}{}
p := filepath.Join(accdir, "msg", mp)
checkFile(dbpath, p, len(m.MsgPrefix), m.Size)
implement message threading in backend and webmail we match messages to their parents based on the "references" and "in-reply-to" headers (requiring the same base subject), and in absense of those headers we also by only base subject (against messages received max 4 weeks ago). we store a threadid with messages. all messages in a thread have the same threadid. messages also have a "thread parent ids", which holds all id's of parent messages up to the thread root. then there is "thread missing link", which is set when a referenced immediate parent wasn't found (but possibly earlier ancestors can still be found and will be in thread parent ids". threads can be muted: newly delivered messages are automatically marked as read/seen. threads can be marked as collapsed: if set, the webmail collapses the thread to a single item in the basic threading view (default is to expand threads). the muted and collapsed fields are copied from their parent on message delivery. the threading is implemented in the webmail. the non-threading mode still works as before. the new default threading mode "unread" automatically expands only the threads with at least one unread (not seen) meessage. the basic threading mode "on" expands all threads except when explicitly collapsed (as saved in the thread collapsed field). new shortcuts for navigation/interaction threads have been added, e.g. go to previous/next thread root, toggle collapse/expand of thread (or double click), toggle mute of thread. some previous shortcuts have changed, see the help for details. the message threading are added with an explicit account upgrade step, automatically started when an account is opened. the upgrade is done in the background because it will take too long for large mailboxes to block account operations. the upgrade takes two steps: 1. updating all message records in the database to add a normalized message-id and thread base subject (with "re:", "fwd:" and several other schemes stripped). 2. going through all messages in the database again, reading the "references" and "in-reply-to" headers from disk, and matching against their parents. this second step is also done at the end of each import of mbox/maildir mailboxes. new deliveries are matched immediately against other existing messages, currently no attempt is made to rematch previously delivered messages (which could be useful for related messages being delivered out of order). the threading is not yet exposed over imap.
2023-09-13 09:51:50 +03:00
if up.Threads != 2 {
return nil
}
if m.ThreadID <= 0 {
checkf(errors.New(`see "mox reassignthreads"`), dbpath, "message id %d, thread %d in mailbox %q (id %d) has bad threadid", m.ID, m.ThreadID, mb.Name, mb.ID)
}
if len(m.ThreadParentIDs) == 0 {
return nil
}
if slices.Contains(m.ThreadParentIDs, m.ID) {
checkf(errors.New(`see "mox reassignthreads"`), dbpath, "message id %d, thread %d in mailbox %q (id %d) has itself as thread parent", m.ID, m.ThreadID, mb.Name, mb.ID)
}
for i, pid := range m.ThreadParentIDs {
am := store.Message{ID: pid}
if err := db.Get(ctxbg, &am); err == bstore.ErrAbsent {
continue
} else if err != nil {
return fmt.Errorf("get ancestor message: %v", err)
} else if !slices.Equal(m.ThreadParentIDs[i+1:], am.ThreadParentIDs) {
checkf(errors.New(`see "mox reassignthreads"`), dbpath, "message %d, thread %d has ancestor ids %v, and ancestor at index %d with id %d should have the same tail but has %v", m.ID, m.ThreadID, m.ThreadParentIDs, i, am.ID, am.ThreadParentIDs)
} else {
break
}
}
return nil
})
checkf(err, dbpath, "reading messages in account database to check files")
add webmail it was far down on the roadmap, but implemented earlier, because it's interesting, and to help prepare for a jmap implementation. for jmap we need to implement more client-like functionality than with just imap. internal data structures need to change. jmap has lots of other requirements, so it's already a big project. by implementing a webmail now, some of the required data structure changes become clear and can be made now, so the later jmap implementation can do things similarly to the webmail code. the webmail frontend and webmail are written together, making their interface/api much smaller and simpler than jmap. one of the internal changes is that we now keep track of per-mailbox total/unread/unseen/deleted message counts and mailbox sizes. keeping this data consistent after any change to the stored messages (through the code base) is tricky, so mox now has a consistency check that verifies the counts are correct, which runs only during tests, each time an internal account reference is closed. we have a few more internal "changes" that are propagated for the webmail frontend (that imap doesn't have a way to propagate on a connection), like changes to the special-use flags on mailboxes, and used keywords in a mailbox. more changes that will be required have revealed themselves while implementing the webmail, and will be implemented next. the webmail user interface is modeled after the mail clients i use or have used: thunderbird, macos mail, mutt; and webmails i normally only use for testing: gmail, proton, yahoo, outlook. a somewhat technical user is assumed, but still the goal is to make this webmail client easy to use for everyone. the user interface looks like most other mail clients: a list of mailboxes, a search bar, a message list view, and message details. there is a top/bottom and a left/right layout for the list/message view, default is automatic based on screen size. the panes can be resized by the user. buttons for actions are just text, not icons. clicking a button briefly shows the shortcut for the action in the bottom right, helping with learning to operate quickly. any text that is underdotted has a title attribute that causes more information to be displayed, e.g. what a button does or a field is about. to highlight potential phishing attempts, any text (anywhere in the webclient) that switches unicode "blocks" (a rough approximation to (language) scripts) within a word is underlined orange. multiple messages can be selected with familiar ui interaction: clicking while holding control and/or shift keys. keyboard navigation works with arrows/page up/down and home/end keys, and also with a few basic vi-like keys for list/message navigation. we prefer showing the text instead of html (with inlined images only) version of a message. html messages are shown in an iframe served from an endpoint with CSP headers to prevent dangerous resources (scripts, external images) from being loaded. the html is also sanitized, with javascript removed. a user can choose to load external resources (e.g. images for tracking purposes). the frontend is just (strict) typescript, no external frameworks. all incoming/outgoing data is typechecked, both the api request parameters and response types, and the data coming in over SSE. the types and checking code are generated with sherpats, which uses the api definitions generated by sherpadoc based on the Go code. so types from the backend are automatically propagated to the frontend. since there is no framework to automatically propagate properties and rerender components, changes coming in over the SSE connection are propagated explicitly with regular function calls. the ui is separated into "views", each with a "root" dom element that is added to the visible document. these views have additional functions for getting changes propagated, often resulting in the view updating its (internal) ui state (dom). we keep the frontend compilation simple, it's just a few typescript files that get compiled (combined and types stripped) into a single js file, no additional runtime code needed or complicated build processes used. the webmail is served is served from a compressed, cachable html file that includes style and the javascript, currently just over 225kb uncompressed, under 60kb compressed (not minified, including comments). we include the generated js files in the repository, to keep Go's easily buildable self-contained binaries. authentication is basic http, as with the account and admin pages. most data comes in over one long-term SSE connection to the backend. api requests signal which mailbox/search/messages are requested over the SSE connection. fetching individual messages, and making changes, are done through api calls. the operations are similar to imap, so some code has been moved from package imapserver to package store. the future jmap implementation will benefit from these changes too. more functionality will probably be moved to the store package in the future. the quickstart enables webmail on the internal listener by default (for new installs). users can enable it on the public listener if they want to. mox localserve enables it too. to enable webmail on existing installs, add settings like the following to the listeners in mox.conf, similar to AccountHTTP(S): WebmailHTTP: Enabled: true WebmailHTTPS: Enabled: true special thanks to liesbeth, gerben, andrii for early user feedback. there is plenty still to do, see the list at the top of webmail/webmail.ts. feedback welcome as always.
2023-08-07 22:57:03 +03:00
for _, mb := range mailboxes {
// We only check if database doesn't have zero values, i.e. not yet set.
if mb.HaveCounts && mb.MailboxCounts != mbCounts[mb.ID] {
checkf(errors.New(`wrong mailbox counts, see "mox recalculatemailboxcounts"`), dbpath, "mailbox %q (id %d) has wrong counts %s, should be %s", mb.Name, mb.ID, mb.MailboxCounts, mbCounts[mb.ID])
}
}
}
// Walk through all files in the msg directory. Warn about files that weren't in
// the database as message file. Possibly move away files that could cause trouble.
msgdir := filepath.Join(accdir, "msg")
if !exists(msgdir) {
// New accounts with messages don't have a msg directory.
return
}
err = filepath.WalkDir(msgdir, func(msgpath string, d fs.DirEntry, err error) error {
checkf(err, msgpath, "walk")
if err != nil {
return nil
}
if d.IsDir() {
return nil
}
p := msgpath[len(msgdir)+1:]
if _, ok := seen[p]; ok {
return nil
}
l := strings.Split(p, string(filepath.Separator))
if len(l) == 1 {
log.Printf("warning: %s: unrecognized file in message directory, ignoring", msgpath)
return nil
}
if !fix {
checkf(errors.New("may interfere with future account messages"), msgpath, "unrecognized file in account message directory (use the -fix flag to move it away)")
return nil
}
npath := filepath.Join(dataDir, "moved", "accounts", name, "msg", p)
ensureDir(npath)
err = os.Rename(msgpath, npath)
checkf(err, msgpath, "moving account message file away")
if err == nil {
log.Printf("warning: moved %s to %s", msgpath, npath)
}
return nil
})
checkf(err, msgdir, "walking account message directory")
}
// Check everything in the "accounts" directory.
checkAccounts := func() {
accountsDir := filepath.Join(dataDir, "accounts")
entries, err := os.ReadDir(accountsDir)
checkf(err, accountsDir, "reading accounts directory")
for _, e := range entries {
// We treat all directories as accounts. When we were backing up, we only verified
// accounts from the config and made regular file copies of all other files
// (perhaps an old account, but at least not with an open database file). It may
// turn out that that account was/is not valid, generating warnings. Better safe
// than sorry. It should hopefully get the admin to move away such an old account.
if e.IsDir() {
checkAccount(e.Name())
} else {
log.Printf("warning: %s: unrecognized file in accounts directory, ignoring", filepath.Join("accounts", e.Name()))
}
}
}
// Check all files, skipping the known files, queue and accounts directories. Warn
// about unknown files. Skip a "tmp" directory. And a "moved" directory, we
// probably created it ourselves.
backupmoxversion := "(unknown)"
checkOther := func() {
err := filepath.WalkDir(dataDir, func(dpath string, d fs.DirEntry, err error) error {
checkf(err, dpath, "walk")
if err != nil {
return nil
}
if dpath == dataDir {
return nil
}
p := dpath
if dataDir != "." {
p = p[len(dataDir)+1:]
}
switch p {
implement outgoing tls reports we were already accepting, processing and displaying incoming tls reports. now we start tracking TLS connection and security-policy-related errors for outgoing message deliveries as well. we send reports once a day, to the reporting addresses specified in TLSRPT records (rua) of a policy domain. these reports are about MTA-STS policies and/or DANE policies, and about STARTTLS-related failures. sending reports is enabled by default, but can be disabled through setting NoOutgoingTLSReports in mox.conf. only at the end of the implementation process came the realization that the TLSRPT policy domain for DANE (MX) hosts are separate from the TLSRPT policy for the recipient domain, and that MTA-STS and DANE TLS/policy results are typically delivered in separate reports. so MX hosts need their own TLSRPT policies. config for the per-host TLSRPT policy should be added to mox.conf for existing installs, in field HostTLSRPT. it is automatically configured by quickstart for new installs. with a HostTLSRPT config, the "dns records" and "dns check" admin pages now suggest the per-host TLSRPT record. by creating that record, you're requesting TLS reports about your MX host. gathering all the TLS/policy results is somewhat tricky. the tentacles go throughout the code. the positive result is that the TLS/policy-related code had to be cleaned up a bit. for example, the smtpclient TLS modes now reflect reality better, with independent settings about whether PKIX and/or DANE verification has to be done, and/or whether verification errors have to be ignored (e.g. for tls-required: no header). also, cached mtasts policies of mode "none" are now cleaned up once the MTA-STS DNS record goes away.
2023-11-09 19:40:46 +03:00
case "dmarcrpt.db", "dmarceval.db", "mtasts.db", "tlsrpt.db", "tlsrptresult.db", "receivedid.key", "lastknownversion":
return nil
case "acme", "queue", "accounts", "tmp", "moved":
return fs.SkipDir
case "moxversion":
buf, err := os.ReadFile(dpath)
checkf(err, dpath, "reading moxversion")
if err == nil {
backupmoxversion = string(buf)
}
return nil
}
log.Printf("warning: %s: unrecognized other file, ignoring", dpath)
return nil
})
checkf(err, dataDir, "walking data directory")
}
checkDB(true, filepath.Join(dataDir, "dmarcrpt.db"), dmarcdb.ReportsDBTypes)
checkDB(false, filepath.Join(dataDir, "dmarceval.db"), dmarcdb.EvalDBTypes) // After v0.0.7.
checkDB(true, filepath.Join(dataDir, "mtasts.db"), mtastsdb.DBTypes)
implement outgoing tls reports we were already accepting, processing and displaying incoming tls reports. now we start tracking TLS connection and security-policy-related errors for outgoing message deliveries as well. we send reports once a day, to the reporting addresses specified in TLSRPT records (rua) of a policy domain. these reports are about MTA-STS policies and/or DANE policies, and about STARTTLS-related failures. sending reports is enabled by default, but can be disabled through setting NoOutgoingTLSReports in mox.conf. only at the end of the implementation process came the realization that the TLSRPT policy domain for DANE (MX) hosts are separate from the TLSRPT policy for the recipient domain, and that MTA-STS and DANE TLS/policy results are typically delivered in separate reports. so MX hosts need their own TLSRPT policies. config for the per-host TLSRPT policy should be added to mox.conf for existing installs, in field HostTLSRPT. it is automatically configured by quickstart for new installs. with a HostTLSRPT config, the "dns records" and "dns check" admin pages now suggest the per-host TLSRPT record. by creating that record, you're requesting TLS reports about your MX host. gathering all the TLS/policy results is somewhat tricky. the tentacles go throughout the code. the positive result is that the TLS/policy-related code had to be cleaned up a bit. for example, the smtpclient TLS modes now reflect reality better, with independent settings about whether PKIX and/or DANE verification has to be done, and/or whether verification errors have to be ignored (e.g. for tls-required: no header). also, cached mtasts policies of mode "none" are now cleaned up once the MTA-STS DNS record goes away.
2023-11-09 19:40:46 +03:00
checkDB(true, filepath.Join(dataDir, "tlsrpt.db"), tlsrptdb.ReportDBTypes)
checkDB(false, filepath.Join(dataDir, "tlsrptresult.db"), tlsrptdb.ResultDBTypes) // After v0.0.7.
checkQueue()
checkAccounts()
checkOther()
if backupmoxversion != moxvar.Version {
log.Printf("NOTE: The backup was made with mox version %q, while verifydata was run with mox version %q. Database files have probably been modified by running mox verifydata. Make a fresh backup before upgrading.", backupmoxversion, moxvar.Version)
}
if fail {
log.Fatalf("errors were found")
} else {
fmt.Printf("%s: OK\n", dataDir)
}
}