mox/serve_unix.go

547 lines
17 KiB
Go
Raw Normal View History

make mox compile on windows, without "mox serve" but with working "mox localserve" getting mox to compile required changing code in only a few places where package "syscall" was used: for accessing file access times and for umask handling. an open problem is how to start a process as an unprivileged user on windows. that's why "mox serve" isn't implemented yet. and just finding a way to implement it now may not be good enough in the near future: we may want to starting using a more complete privilege separation approach, with a process handling sensitive tasks (handling private keys, authentication), where we may want to pass file descriptors between processes. how would that work on windows? anyway, getting mox to compile for windows doesn't mean it works properly on windows. the largest issue: mox would normally open a file, rename or remove it, and finally close it. this happens during message delivery. that doesn't work on windows, the rename/remove would fail because the file is still open. so this commit swaps many "remove" and "close" calls. renames are a longer story: message delivery had two ways to deliver: with "consuming" the (temporary) message file (which would rename it to its final destination), and without consuming (by hardlinking the file, falling back to copying). the last delivery to a recipient of a message (and the only one in the common case of a single recipient) would consume the message, and the earlier recipients would not. during delivery, the already open message file was used, to parse the message. we still want to use that open message file, and the caller now stays responsible for closing it, but we no longer try to rename (consume) the file. we always hardlink (or copy) during delivery (this works on windows), and the caller is responsible for closing and removing (in that order) the original temporary file. this does cost one syscall more. but it makes the delivery code (responsibilities) a bit simpler. there is one more obvious issue: the file system path separator. mox already used the "filepath" package to join paths in many places, but not everywhere. and it still used strings with slashes for local file access. with this commit, the code now uses filepath.FromSlash for path strings with slashes, uses "filepath" in a few more places where it previously didn't. also switches from "filepath" to regular "path" package when handling mailbox names in a few places, because those always use forward slashes, regardless of local file system conventions. windows can handle forward slashes when opening files, so test code that passes path strings with forward slashes straight to go stdlib file i/o functions are left unchanged to reduce code churn. the regular non-test code, or test code that uses path strings in places other than standard i/o functions, does have the paths converted for consistent paths (otherwise we would end up with paths with mixed forward/backward slashes in log messages). windows cannot dup a listening socket. for "mox localserve", it isn't important, and we can work around the issue. the current approach for "mox serve" (forking a process and passing file descriptors of listening sockets on "privileged" ports) won't work on windows. perhaps it isn't needed on windows, and any user can listen on "privileged" ports? that would be welcome. on windows, os.Open cannot open a directory, so we cannot call Sync on it after message delivery. a cursory internet search indicates that directories cannot be synced on windows. the story is probably much more nuanced than that, with long deep technical details/discussions/disagreement/confusion, like on unix. for "mox localserve" we can get away with making syncdir a no-op.
2023-10-14 11:54:07 +03:00
//go:build !windows
package main
import (
"context"
cryptorand "crypto/rand"
"fmt"
"io/fs"
"net"
"os"
"os/signal"
"path/filepath"
"runtime"
"runtime/debug"
"strings"
"sync"
"syscall"
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/mjl-/mox/dns"
"github.com/mjl-/mox/dnsbl"
"github.com/mjl-/mox/message"
"github.com/mjl-/mox/metrics"
"github.com/mjl-/mox/mlog"
"github.com/mjl-/mox/mox-"
"github.com/mjl-/mox/moxvar"
"github.com/mjl-/mox/store"
"github.com/mjl-/mox/updates"
)
func monitorDNSBL(log *mlog.Log) {
defer func() {
// On error, don't bring down the entire server.
x := recover()
if x != nil {
log.Error("monitordnsbl panic", mlog.Field("panic", x))
debug.PrintStack()
metrics.PanicInc(metrics.Serve)
}
}()
l, ok := mox.Conf.Static.Listeners["public"]
if !ok {
log.Info("no listener named public, not monitoring our ips at dnsbls")
return
}
var zones []dns.Domain
for _, zone := range l.SMTP.DNSBLs {
d, err := dns.ParseDomain(zone)
if err != nil {
log.Fatalx("parsing dnsbls zone", err, mlog.Field("zone", zone))
}
zones = append(zones, d)
}
if len(zones) == 0 {
return
}
type key struct {
zone dns.Domain
ip string
}
metrics := map[key]prometheus.GaugeFunc{}
var statusMutex sync.Mutex
statuses := map[key]bool{}
resolver := dns.StrictResolver{Pkg: "dnsblmonitor"}
var sleep time.Duration // No sleep on first iteration.
for {
time.Sleep(sleep)
sleep = 3 * time.Hour
ips, err := mox.IPs(mox.Context, false)
if err != nil {
log.Errorx("listing ips for dnsbl monitor", err)
continue
}
for _, ip := range ips {
if ip.IsLoopback() || ip.IsPrivate() {
continue
}
for _, zone := range zones {
status, expl, err := dnsbl.Lookup(mox.Context, resolver, zone, ip)
if err != nil {
log.Errorx("dnsbl monitor lookup", err, mlog.Field("ip", ip), mlog.Field("zone", zone), mlog.Field("expl", expl), mlog.Field("status", status))
}
k := key{zone, ip.String()}
statusMutex.Lock()
statuses[k] = status == dnsbl.StatusPass
statusMutex.Unlock()
if _, ok := metrics[k]; !ok {
metrics[k] = promauto.NewGaugeFunc(
prometheus.GaugeOpts{
Name: "mox_dnsbl_ips_success",
Help: "DNSBL lookups to configured DNSBLs of our IPs.",
ConstLabels: prometheus.Labels{
"zone": zone.LogString(),
"ip": k.ip,
},
},
func() float64 {
statusMutex.Lock()
defer statusMutex.Unlock()
if statuses[k] {
return 1
}
return 0
},
)
}
time.Sleep(time.Second)
}
}
}
}
// also see localserve.go, code is similar or even shared.
func cmdServe(c *cmd) {
c.help = `Start mox, serving SMTP/IMAP/HTTPS.
Incoming email is accepted over SMTP. Email can be retrieved by users using
IMAP. HTTP listeners are started for the admin/account web interfaces, and for
automated TLS configuration. Missing essential TLS certificates are immediately
requested, other TLS certificates are requested on demand.
Only implemented on unix systems, not Windows.
`
args := c.Parse()
if len(args) != 0 {
c.Usage()
}
// Set debug logging until config is fully loaded.
mlog.Logfmt = true
mox.Conf.Log[""] = mlog.LevelDebug
mlog.SetConfig(mox.Conf.Log)
checkACMEHosts := os.Getuid() != 0
log := mlog.New("serve")
if os.Getuid() == 0 {
mox.MustLoadConfig(true, checkACMEHosts)
// No need to potentially start and keep multiple processes. As root, we just need
// to start the child process.
runtime.GOMAXPROCS(1)
moxconf, err := filepath.Abs(mox.ConfigStaticPath)
log.Check(err, "finding absolute mox.conf path")
domainsconf, err := filepath.Abs(mox.ConfigDynamicPath)
log.Check(err, "finding absolute domains.conf path")
log.Print("starting as root, initializing network listeners", mlog.Field("version", moxvar.Version), mlog.Field("pid", os.Getpid()), mlog.Field("moxconf", moxconf), mlog.Field("domainsconf", domainsconf))
if os.Getenv("MOX_SOCKETS") != "" {
log.Fatal("refusing to start as root with $MOX_SOCKETS set")
}
if os.Getenv("MOX_FILES") != "" {
log.Fatal("refusing to start as root with $MOX_FILES set")
}
if !mox.Conf.Static.NoFixPermissions {
// Fix permissions now that we have privilege to do so. Useful for update of v0.0.1
// that was running directly as mox-user.
workdir, err := os.Getwd()
if err != nil {
log.Printx("get working dir, continuing without potentially fixing up permissions", err)
} else {
configdir := filepath.Dir(mox.ConfigStaticPath)
datadir := mox.DataDirPath(".")
err := fixperms(log, workdir, configdir, datadir, mox.Conf.Static.UID, mox.Conf.Static.GID)
if err != nil {
log.Fatalx("fixing permissions", err)
}
}
}
} else {
mox.RestorePassedFiles()
mox.MustLoadConfig(true, checkACMEHosts)
log.Print("starting as unprivileged user", mlog.Field("user", mox.Conf.Static.User), mlog.Field("uid", mox.Conf.Static.UID), mlog.Field("gid", mox.Conf.Static.GID), mlog.Field("pid", os.Getpid()))
}
syscall.Umask(syscall.Umask(007) | 007)
// Initialize key and random buffer for creating opaque SMTP
// transaction IDs based on "cid"s.
recvidpath := mox.DataDirPath("receivedid.key")
recvidbuf, err := os.ReadFile(recvidpath)
if err != nil || len(recvidbuf) != 16+8 {
recvidbuf = make([]byte, 16+8)
if _, err := cryptorand.Read(recvidbuf); err != nil {
log.Fatalx("reading random recvid data", err)
}
if err := os.WriteFile(recvidpath, recvidbuf, 0660); err != nil {
log.Fatalx("writing recvidpath", err, mlog.Field("path", recvidpath))
}
err := os.Chown(recvidpath, int(mox.Conf.Static.UID), 0)
log.Check(err, "chown receveidid.key", mlog.Field("path", recvidpath), mlog.Field("uid", mox.Conf.Static.UID), mlog.Field("gid", 0))
err = os.Chmod(recvidpath, 0640)
log.Check(err, "chmod receveidid.key to 0640", mlog.Field("path", recvidpath))
}
if err := mox.ReceivedIDInit(recvidbuf[:16], recvidbuf[16:]); err != nil {
log.Fatalx("init receivedid", err)
}
// Start mox. If running as root, this will bind/listen on network sockets, and
// fork and exec itself as unprivileged user, then waits for the child to stop and
// exit. When running as root, this function never returns. But the new
// unprivileged user will get here again, with network sockets prepared.
//
// We listen to the unix domain ctl socket afterwards, which we always remove
// before listening. We need to do that because we may not have cleaned up our
// control socket during unexpected shutdown. We don't want to remove and listen on
// the unix domain socket first. If we would, we would make the existing instance
// unreachable over its ctl socket, and then fail because the network addresses are
// taken.
const mtastsdbRefresher = true
const skipForkExec = false
if err := start(mtastsdbRefresher, !mox.Conf.Static.NoOutgoingDMARCReports, skipForkExec); err != nil {
make mox compile on windows, without "mox serve" but with working "mox localserve" getting mox to compile required changing code in only a few places where package "syscall" was used: for accessing file access times and for umask handling. an open problem is how to start a process as an unprivileged user on windows. that's why "mox serve" isn't implemented yet. and just finding a way to implement it now may not be good enough in the near future: we may want to starting using a more complete privilege separation approach, with a process handling sensitive tasks (handling private keys, authentication), where we may want to pass file descriptors between processes. how would that work on windows? anyway, getting mox to compile for windows doesn't mean it works properly on windows. the largest issue: mox would normally open a file, rename or remove it, and finally close it. this happens during message delivery. that doesn't work on windows, the rename/remove would fail because the file is still open. so this commit swaps many "remove" and "close" calls. renames are a longer story: message delivery had two ways to deliver: with "consuming" the (temporary) message file (which would rename it to its final destination), and without consuming (by hardlinking the file, falling back to copying). the last delivery to a recipient of a message (and the only one in the common case of a single recipient) would consume the message, and the earlier recipients would not. during delivery, the already open message file was used, to parse the message. we still want to use that open message file, and the caller now stays responsible for closing it, but we no longer try to rename (consume) the file. we always hardlink (or copy) during delivery (this works on windows), and the caller is responsible for closing and removing (in that order) the original temporary file. this does cost one syscall more. but it makes the delivery code (responsibilities) a bit simpler. there is one more obvious issue: the file system path separator. mox already used the "filepath" package to join paths in many places, but not everywhere. and it still used strings with slashes for local file access. with this commit, the code now uses filepath.FromSlash for path strings with slashes, uses "filepath" in a few more places where it previously didn't. also switches from "filepath" to regular "path" package when handling mailbox names in a few places, because those always use forward slashes, regardless of local file system conventions. windows can handle forward slashes when opening files, so test code that passes path strings with forward slashes straight to go stdlib file i/o functions are left unchanged to reduce code churn. the regular non-test code, or test code that uses path strings in places other than standard i/o functions, does have the paths converted for consistent paths (otherwise we would end up with paths with mixed forward/backward slashes in log messages). windows cannot dup a listening socket. for "mox localserve", it isn't important, and we can work around the issue. the current approach for "mox serve" (forking a process and passing file descriptors of listening sockets on "privileged" ports) won't work on windows. perhaps it isn't needed on windows, and any user can listen on "privileged" ports? that would be welcome. on windows, os.Open cannot open a directory, so we cannot call Sync on it after message delivery. a cursory internet search indicates that directories cannot be synced on windows. the story is probably much more nuanced than that, with long deep technical details/discussions/disagreement/confusion, like on unix. for "mox localserve" we can get away with making syncdir a no-op.
2023-10-14 11:54:07 +03:00
log.Fatalx("start", err)
}
log.Print("ready to serve")
if mox.Conf.Static.CheckUpdates {
checkUpdates := func() time.Duration {
next := 24 * time.Hour
current, lastknown, mtime, err := mox.LastKnown()
if err != nil {
log.Infox("determining own version before checking for updates, trying again in 24h", err)
return next
}
// We don't want to check for updates at every startup. So we sleep based on file
// mtime. But file won't exist initially.
if !mtime.IsZero() && time.Since(mtime) < 24*time.Hour {
d := 24*time.Hour - time.Since(mtime)
log.Debug("sleeping for next check for updates", mlog.Field("sleep", d))
time.Sleep(d)
next = 0
}
now := time.Now()
if err := os.Chtimes(mox.DataDirPath("lastknownversion"), now, now); err != nil {
if !os.IsNotExist(err) {
log.Infox("setting mtime on lastknownversion file, continuing", err)
}
}
log.Debug("checking for updates", mlog.Field("lastknown", lastknown))
updatesctx, updatescancel := context.WithTimeout(mox.Context, time.Minute)
latest, _, changelog, err := updates.Check(updatesctx, dns.StrictResolver{}, dns.Domain{ASCII: changelogDomain}, lastknown, changelogURL, changelogPubKey)
updatescancel()
if err != nil {
log.Infox("checking for updates", err, mlog.Field("latest", latest))
return next
}
if !latest.After(lastknown) {
log.Debug("no new version available")
return next
}
if len(changelog.Changes) == 0 {
log.Info("new version available, but changelog is empty, ignoring", mlog.Field("latest", latest))
return next
}
var cl string
for _, c := range changelog.Changes {
cl += "----\n\n" + strings.TrimSpace(c.Text) + "\n\n"
}
cl += "----"
a, err := store.OpenAccount(mox.Conf.Static.Postmaster.Account)
if err != nil {
log.Infox("open account for postmaster changelog delivery", err)
return next
}
defer func() {
err := a.Close()
log.Check(err, "closing account")
}()
f, err := store.CreateMessageTemp("changelog")
if err != nil {
log.Infox("making temporary message file for changelog delivery", err)
return next
}
defer func() {
name := f.Name()
err = f.Close()
log.Check(err, "closing temp changelog file")
err := os.Remove(name)
log.Check(err, "removing temp changelog file", mlog.Field("path", name))
}()
m := &store.Message{
Received: time.Now(),
Flags: store.Flags{Flagged: true},
}
n, err := fmt.Fprintf(f, "Date: %s\r\nSubject: mox %s available\r\nContent-Type: text/plain; charset=utf-8\r\nContent-Transfer-Encoding: 8-bit\r\n\r\nHi!\r\n\r\nVersion %s of mox is available, this install is at %s.\r\n\r\nChanges:\r\n\r\n%s\r\n\r\nRemember to make a backup with \"mox backup\" before upgrading.\r\nPlease report any issues at https://github.com/mjl-/mox, thanks!\r\n\r\nCheers,\r\nmox\r\n", time.Now().Format(message.RFC5322Z), latest, latest, current, strings.ReplaceAll(cl, "\n", "\r\n"))
if err != nil {
log.Infox("writing temporary message file for changelog delivery", err)
return next
}
m.Size = int64(n)
if err := a.DeliverMailbox(log, mox.Conf.Static.Postmaster.Mailbox, m, f); err != nil {
log.Errorx("changelog delivery", err)
return next
}
log.Info("delivered changelog", mlog.Field("current", current), mlog.Field("lastknown", lastknown), mlog.Field("latest", latest))
if err := mox.StoreLastKnown(latest); err != nil {
// This will be awkward, we'll keep notifying the postmaster once every 24h...
log.Infox("updating last known version", err)
}
return next
}
go func() {
for {
next := checkUpdates()
time.Sleep(next)
}
}()
}
go monitorDNSBL(log)
ctlpath := mox.DataDirPath("ctl")
_ = os.Remove(ctlpath)
ctl, err := net.Listen("unix", ctlpath)
if err != nil {
log.Fatalx("listen on ctl unix domain socket", err)
}
go func() {
for {
conn, err := ctl.Accept()
if err != nil {
log.Printx("accept for ctl", err)
continue
}
cid := mox.Cid()
ctx := context.WithValue(mox.Context, mlog.CidKey, cid)
go servectl(ctx, log.WithCid(cid), conn, func() { shutdown(log) })
}
}()
// Remove old temporary files that somehow haven't been cleaned up.
tmpdir := mox.DataDirPath("tmp")
os.MkdirAll(tmpdir, 0770)
tmps, err := os.ReadDir(tmpdir)
if err != nil {
log.Errorx("listing files in tmpdir", err)
} else {
now := time.Now()
for _, e := range tmps {
if fi, err := e.Info(); err != nil {
log.Errorx("stat tmp file", err, mlog.Field("filename", e.Name()))
} else if now.Sub(fi.ModTime()) > 7*24*time.Hour && !fi.IsDir() {
p := filepath.Join(tmpdir, e.Name())
if err := os.Remove(p); err != nil {
log.Errorx("removing stale temporary file", err, mlog.Field("path", p))
} else {
log.Info("removed stale temporary file", mlog.Field("path", p))
}
}
}
}
// Graceful shutdown.
sigc := make(chan os.Signal, 1)
signal.Notify(sigc, os.Interrupt, syscall.SIGTERM)
sig := <-sigc
log.Print("shutting down, waiting max 3s for existing connections", mlog.Field("signal", sig))
shutdown(log)
if num, ok := sig.(syscall.Signal); ok {
os.Exit(int(num))
} else {
os.Exit(1)
}
}
// Set correct permissions for mox working directory, binary, config and data and service file.
//
// We require being able to stat the basic non-optional paths. Then we'll try to
// fix up permissions. If an error occurs when fixing permissions, we log and
// continue (could not be an actual problem).
func fixperms(log *mlog.Log, workdir, configdir, datadir string, moxuid, moxgid uint32) (rerr error) {
type fserr struct{ Err error }
defer func() {
x := recover()
if x == nil {
return
}
e, ok := x.(fserr)
if ok {
rerr = e.Err
} else {
panic(x)
}
}()
checkf := func(err error, format string, args ...any) {
if err != nil {
panic(fserr{fmt.Errorf(format, args...)})
}
}
// Changes we have to make. We collect them first, then apply.
type change struct {
path string
uid, gid *uint32
olduid, oldgid uint32
mode *fs.FileMode
oldmode fs.FileMode
}
var changes []change
ensure := func(p string, uid, gid uint32, perm fs.FileMode) bool {
fi, err := os.Stat(p)
checkf(err, "stat %s", p)
st, ok := fi.Sys().(*syscall.Stat_t)
if !ok {
checkf(fmt.Errorf("got %T", st), "stat sys, expected syscall.Stat_t")
}
var ch change
if st.Uid != uid || st.Gid != gid {
ch.uid = &uid
ch.gid = &gid
ch.olduid = st.Uid
ch.oldgid = st.Gid
}
if perm != fi.Mode()&(fs.ModeSetgid|0777) {
ch.mode = &perm
ch.oldmode = fi.Mode() & (fs.ModeSetgid | 0777)
}
var zerochange change
if ch == zerochange {
return false
}
ch.path = p
changes = append(changes, ch)
return true
}
xexists := func(p string) bool {
_, err := os.Stat(p)
if err != nil && !os.IsNotExist(err) {
checkf(err, "stat %s", p)
}
return err == nil
}
// We ensure these permissions:
//
// $workdir root:mox 0751
// $configdir mox:root 0750 + setgid, and recursively (but files 0640)
// $datadir mox:root 0750 + setgid, and recursively (but files 0640)
// $workdir/mox (binary, optional) root:mox 0750
// $workdir/mox.service (systemd service file, optional) root:root 0644
const root = 0
ensure(workdir, root, moxgid, 0751)
fixconfig := ensure(configdir, moxuid, 0, fs.ModeSetgid|0750)
fixdata := ensure(datadir, moxuid, 0, fs.ModeSetgid|0750)
// Binary and systemd service file do not exist (there) when running under docker.
binary := filepath.Join(workdir, "mox")
if xexists(binary) {
ensure(binary, root, moxgid, 0750)
}
svc := filepath.Join(workdir, "mox.service")
if xexists(svc) {
ensure(svc, root, root, 0644)
}
if len(changes) == 0 {
return
}
// Apply changes.
log.Print("fixing up permissions, will continue on errors")
for _, ch := range changes {
if ch.uid != nil {
err := os.Chown(ch.path, int(*ch.uid), int(*ch.gid))
log.Printx("chown, fixing uid/gid", err, mlog.Field("path", ch.path), mlog.Field("olduid", ch.olduid), mlog.Field("oldgid", ch.oldgid), mlog.Field("newuid", *ch.uid), mlog.Field("newgid", *ch.gid))
}
if ch.mode != nil {
err := os.Chmod(ch.path, *ch.mode)
log.Printx("chmod, fixing permissions", err, mlog.Field("path", ch.path), mlog.Field("oldmode", fmt.Sprintf("%03o", ch.oldmode)), mlog.Field("newmode", fmt.Sprintf("%03o", *ch.mode)))
}
}
walkchange := func(dir string) {
err := filepath.WalkDir(dir, func(path string, d fs.DirEntry, err error) error {
if err != nil {
log.Printx("walk error, continuing", err, mlog.Field("path", path))
return nil
}
fi, err := d.Info()
if err != nil {
log.Printx("stat during walk, continuing", err, mlog.Field("path", path))
return nil
}
st, ok := fi.Sys().(*syscall.Stat_t)
if !ok {
log.Printx("syscall stat during walk, continuing", err, mlog.Field("path", path))
return nil
}
if st.Uid != moxuid || st.Gid != root {
err := os.Chown(path, int(moxuid), root)
log.Printx("walk chown, fixing uid/gid", err, mlog.Field("path", path), mlog.Field("olduid", st.Uid), mlog.Field("oldgid", st.Gid), mlog.Field("newuid", moxuid), mlog.Field("newgid", root))
}
omode := fi.Mode() & (fs.ModeSetgid | 0777)
var nmode fs.FileMode
if fi.IsDir() {
nmode = fs.ModeSetgid | 0750
} else {
nmode = 0640
}
if omode != nmode {
err := os.Chmod(path, nmode)
log.Printx("walk chmod, fixing permissions", err, mlog.Field("path", path), mlog.Field("oldmode", fmt.Sprintf("%03o", omode)), mlog.Field("newmode", fmt.Sprintf("%03o", nmode)))
}
return nil
})
log.Check(err, "walking dir to fix permissions", mlog.Field("dir", dir))
}
// If config or data dir needed fixing, also set uid/gid and mode and files/dirs
// inside, recursively. We don't always recurse, data probably contains many files.
if fixconfig {
log.Print("fixing permissions in config dir", mlog.Field("configdir", configdir))
walkchange(configdir)
}
if fixdata {
log.Print("fixing permissions in data dir", mlog.Field("configdir", configdir))
walkchange(datadir)
}
return nil
}