// Package autotls automatically configures TLS (for SMTP, IMAP, HTTP) by
// requesting certificates with ACME, typically from Let's Encrypt.
package autotls

// We do tls-alpn-01, and also http-01. For DNS we would need a third party tool
// with an API that can make the DNS changes, as we don't want to link in dozens of
// bespoke API's for DNS record manipulation into mox.

import (
	"bytes"
	"context"
	"crypto"
	"crypto/ecdsa"
	"crypto/elliptic"
	cryptorand "crypto/rand"
	"crypto/rsa"
	"crypto/tls"
	"crypto/x509"
	"encoding/pem"
	"errors"
	"fmt"
	"io"
	"log/slog"
	"net"
	"os"
	"path/filepath"
	"sort"
	"strings"
	"sync"
	"time"

	"golang.org/x/crypto/acme"

	"github.com/prometheus/client_golang/prometheus"
	"github.com/prometheus/client_golang/prometheus/promauto"

	"github.com/mjl-/autocert"

	"github.com/mjl-/mox/dns"
	"github.com/mjl-/mox/mlog"
	"github.com/mjl-/mox/moxvar"
)

var (
	metricCertput = promauto.NewCounter(
		prometheus.CounterOpts{
			Name: "mox_autotls_certput_total",
			Help: "Number of certificate store puts.",
		},
	)
)

// Manager is in charge of a single ACME identity, and automatically requests
// certificates for allowlisted hosts.
type Manager struct {
	ACMETLSConfig *tls.Config // For serving HTTPS on port 443, which is required for certificate requests to succeed.
	Manager       *autocert.Manager

	shutdown <-chan struct{}

	sync.Mutex
	hosts map[dns.Domain]struct{}
}

// Load returns an initialized autotls manager for "name" (used for the ACME key
// file and requested certs and their keys). All files are stored within acmeDir.
//
// contactEmail must be a valid email address to which notifications about ACME can
// be sent. directoryURL is the ACME starting point.
//
// eabKeyID and eabKey are for external account binding when making a new account,
// which some ACME providers require.
//
// getPrivateKey is called to get the private key for the host and key type. It
// can be used to deliver a specific (e.g. always the same) private key for a
// host, or a newly generated key.
//
// When shutdown is closed, no new TLS connections can be created.
func Load(name, acmeDir, contactEmail, directoryURL string, eabKeyID string, eabKey []byte, getPrivateKey func(host string, keyType autocert.KeyType) (crypto.Signer, error), shutdown <-chan struct{}) (*Manager, error) {
	if directoryURL == "" {
		return nil, fmt.Errorf("empty ACME directory URL")
	}
	if contactEmail == "" {
		return nil, fmt.Errorf("empty contact email")
	}

	// Load identity key if it exists. Otherwise, create a new key.
	p := filepath.Join(acmeDir, name+".key")
	var key crypto.Signer
	f, err := os.Open(p)
	if f != nil {
		defer f.Close()
	}
	if err != nil && os.IsNotExist(err) {
		key, err = ecdsa.GenerateKey(elliptic.P256(), cryptorand.Reader)
		if err != nil {
			return nil, fmt.Errorf("generating ecdsa identity key: %s", err)
		}
		der, err := x509.MarshalPKCS8PrivateKey(key)
		if err != nil {
			return nil, fmt.Errorf("marshal identity key: %s", err)
		}
		block := &pem.Block{
			Type: "PRIVATE KEY",
			Headers: map[string]string{
				"Note": fmt.Sprintf("PEM PKCS8 ECDSA private key generated for ACME provider %s by mox", name),
			},
			Bytes: der,
		}
		b := &bytes.Buffer{}
		if err := pem.Encode(b, block); err != nil {
			return nil, fmt.Errorf("pem encode: %s", err)
		} else if err := os.WriteFile(p, b.Bytes(), 0660); err != nil {
			return nil, fmt.Errorf("writing identity key: %s", err)
		}
	} else if err != nil {
		return nil, fmt.Errorf("open identity key file: %s", err)
	} else {
		var privKey any
		if buf, err := io.ReadAll(f); err != nil {
			return nil, fmt.Errorf("reading identity key: %s", err)
		} else if p, _ := pem.Decode(buf); p == nil {
			return nil, fmt.Errorf("no pem data")
		} else if p.Type != "PRIVATE KEY" {
			return nil, fmt.Errorf("got PEM block %q, expected \"PRIVATE KEY\"", p.Type)
		} else if privKey, err = x509.ParsePKCS8PrivateKey(p.Bytes); err != nil {
			return nil, fmt.Errorf("parsing PKCS8 private key: %s", err)
		}
		switch k := privKey.(type) {
		case *ecdsa.PrivateKey:
			key = k
		case *rsa.PrivateKey:
			key = k
		default:
			return nil, fmt.Errorf("unsupported private key type %T", key)
		}
	}

	m := &autocert.Manager{
		Cache:  dirCache(filepath.Join(acmeDir, "keycerts", name)),
		Prompt: autocert.AcceptTOS,
		Email:  contactEmail,
		Client: &acme.Client{
			DirectoryURL: directoryURL,
			Key:          key,
			UserAgent:    "mox/" + moxvar.Version,
		},
		GetPrivateKey: getPrivateKey,
		// HostPolicy set below.
	}
	// If external account binding key is provided, use it for registering a new account.
	// todo: ideally the key and its id are provided temporarily by the admin when registering a new account. but we don't do that interactive setup yet. in the future, an interactive setup/quickstart would ask for the key once to register a new acme account.
	if eabKeyID != "" {
		m.ExternalAccountBinding = &acme.ExternalAccountBinding{
			KID: eabKeyID,
			Key: eabKey,
		}
	}

	a := &Manager{
		Manager:  m,
		shutdown: shutdown,
		hosts:    map[dns.Domain]struct{}{},
	}
	m.HostPolicy = a.HostPolicy
	acmeTLSConfig := *m.TLSConfig()
	acmeTLSConfig.GetCertificate = func(hello *tls.ClientHelloInfo) (*tls.Certificate, error) {
		return a.loggingGetCertificate(hello, dns.Domain{}, false, false)
	}
	a.ACMETLSConfig = &acmeTLSConfig
	return a, nil
}

// logigngGetCertificate is a helper to implement crypto/tls.Config.GetCertificate,
// optionally falling back to a certificate for fallbackHostname in case SNI is
// absent or for an unknown hostname.
func (m *Manager) loggingGetCertificate(hello *tls.ClientHelloInfo, fallbackHostname dns.Domain, fallbackNoSNI, fallbackUnknownSNI bool) (*tls.Certificate, error) {
	log := mlog.New("autotls", nil).WithContext(hello.Context())

	// If we can't find a certificate (depending on fallback parameters), we return a
	// nil certificate and nil error, which crypto/tls turns into a TLS alert
	// "unrecognized name", which can be interpreted by clients as a hint that they are
	// using the wrong hostname, or a certificate is missing.

	if hello.ServerName == "" && fallbackNoSNI {
		hello.ServerName = fallbackHostname.ASCII
	}

	// Handle missing SNI to prevent logging an error below.
	if hello.ServerName == "" {
		log.Debug("tls request without sni servername, rejecting", slog.Any("localaddr", hello.Conn.LocalAddr()), slog.Any("supportedprotos", hello.SupportedProtos))
		return nil, nil
	}

	cert, err := m.Manager.GetCertificate(hello)
	if err != nil && errors.Is(err, errHostNotAllowed) {
		if !fallbackUnknownSNI {
			log.Debugx("requesting certificate", err, slog.String("host", hello.ServerName))
			return nil, nil
		}

		log.Debug("certificate for unknown hostname, using fallback hostname", slog.String("host", hello.ServerName))
		hello.ServerName = fallbackHostname.ASCII
		cert, err = m.Manager.GetCertificate(hello)
		if err != nil {
			log.Errorx("requesting certificate for fallback hostname", err, slog.String("host", hello.ServerName))
		} else {
			log.Debugx("requesting certificate for fallback hostname", err, slog.String("host", hello.ServerName))
		}
		return cert, err
	} else if err != nil {
		log.Errorx("requesting certificate", err, slog.String("host", hello.ServerName))
	}
	return cert, err
}

// TLSConfig returns a TLS server config that optionally returns a certificate for
// fallbackHostname if no SNI was done, or for an unknown hostname.
//
// If fallbackNoSNI is set, TLS connections without SNI will use a certificate for
// fallbackHostname. Otherwise, connections without SNI will fail with a message
// that no TLS certificate is available.
//
// If fallbackUnknownSNI is set, TLS connections with an SNI hostname that is not
// allowlisted will instead use a certificate for fallbackHostname. Otherwise, such
// TLS connections will fail.
func (m *Manager) TLSConfig(fallbackHostname dns.Domain, fallbackNoSNI, fallbackUnknownSNI bool) *tls.Config {
	return &tls.Config{
		GetCertificate: func(hello *tls.ClientHelloInfo) (*tls.Certificate, error) {
			return m.loggingGetCertificate(hello, fallbackHostname, fallbackNoSNI, fallbackUnknownSNI)
		},
	}
}

// CertAvailable checks whether a non-expired ECDSA certificate is available in the
// cache for host. No other checks than expiration are done.
func (m *Manager) CertAvailable(ctx context.Context, log mlog.Log, host dns.Domain) (bool, error) {
	ck := host.ASCII // Would be "+rsa" for rsa keys.
	data, err := m.Manager.Cache.Get(ctx, ck)
	if err != nil && errors.Is(err, autocert.ErrCacheMiss) {
		return false, nil
	} else if err != nil {
		return false, fmt.Errorf("attempt to get certificate from cache: %v", err)
	}

	// The cached keycert is of the form: private key, leaf certificate, intermediate certificates...
	privb, rem := pem.Decode(data)
	if privb == nil {
		return false, fmt.Errorf("missing private key in cached keycert file")
	}
	pubb, _ := pem.Decode(rem)
	if pubb == nil {
		return false, fmt.Errorf("missing certificate in cached keycert file")
	} else if pubb.Type != "CERTIFICATE" {
		return false, fmt.Errorf("second pem block is %q, expected CERTIFICATE", pubb.Type)
	}
	cert, err := x509.ParseCertificate(pubb.Bytes)
	if err != nil {
		return false, fmt.Errorf("parsing certificate from cached keycert file: %v", err)
	}
	// We assume the certificate has a matching hostname, and is properly CA-signed. We
	// only check the expiration time.
	if time.Until(cert.NotBefore) > 0 || time.Since(cert.NotAfter) > 0 {
		return false, nil
	}
	return true, nil
}

// SetAllowedHostnames sets a new list of allowed hostnames for automatic TLS.
// After setting the host names, a goroutine is start to check that new host names
// are fully served by publicIPs (only if non-empty and there is no unspecified
// address in the list). If no, log an error with a warning that ACME validation
// may fail.
func (m *Manager) SetAllowedHostnames(log mlog.Log, resolver dns.Resolver, hostnames map[dns.Domain]struct{}, publicIPs []string, checkHosts bool) {
	m.Lock()
	defer m.Unlock()

	// Log as slice, sorted.
	l := make([]dns.Domain, 0, len(hostnames))
	for d := range hostnames {
		l = append(l, d)
	}
	sort.Slice(l, func(i, j int) bool {
		return l[i].Name() < l[j].Name()
	})

	log.Debug("autotls setting allowed hostnames", slog.Any("hostnames", l), slog.Any("publicips", publicIPs))
	var added []dns.Domain
	for h := range hostnames {
		if _, ok := m.hosts[h]; !ok {
			added = append(added, h)
		}
	}
	m.hosts = hostnames

	if checkHosts && len(added) > 0 && len(publicIPs) > 0 {
		for _, ip := range publicIPs {
			if net.ParseIP(ip).IsUnspecified() {
				return
			}
		}
		go func() {
			ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
			defer cancel()

			publicIPstrs := map[string]struct{}{}
			for _, ip := range publicIPs {
				publicIPstrs[ip] = struct{}{}
			}

			log.Debug("checking ips of hosts configured for acme tls cert validation")
			for _, h := range added {
				ips, _, err := resolver.LookupIP(ctx, "ip", h.ASCII+".")
				if err != nil {
					log.Warnx("acme tls cert validation for host may fail due to dns lookup error", err, slog.Any("host", h))
					continue
				}
				for _, ip := range ips {
					if _, ok := publicIPstrs[ip.String()]; !ok {
						log.Warn("acme tls cert validation for host is likely to fail because not all its ips are being listened on",
							slog.Any("hostname", h),
							slog.Any("listenedips", publicIPs),
							slog.Any("hostips", ips),
							slog.Any("missingip", ip))
					}
				}
			}
		}()
	}
}

// Hostnames returns the allowed host names for use with ACME.
func (m *Manager) Hostnames() []dns.Domain {
	m.Lock()
	defer m.Unlock()
	var l []dns.Domain
	for h := range m.hosts {
		l = append(l, h)
	}
	return l
}

var errHostNotAllowed = errors.New("autotls: host not in allowlist")

// HostPolicy decides if a host is allowed for use with ACME, i.e. whether a
// certificate will be returned if present and/or will be requested if not yet
// present. Only hosts added with SetAllowedHostnames are allowed. During shutdown,
// no new connections are allowed.
func (m *Manager) HostPolicy(ctx context.Context, host string) (rerr error) {
	log := mlog.New("autotls", nil).WithContext(ctx)
	defer func() {
		log.Debugx("autotls hostpolicy result", rerr, slog.String("host", host))
	}()

	// Don't request new TLS certs when we are shutting down.
	select {
	case <-m.shutdown:
		return fmt.Errorf("shutting down")
	default:
	}

	xhost, _, err := net.SplitHostPort(host)
	if err == nil {
		// For http-01, host may include a port number.
		host = xhost
	}

	d, err := dns.ParseDomain(host)
	if err != nil {
		return fmt.Errorf("invalid host: %v", err)
	}

	m.Lock()
	defer m.Unlock()
	if _, ok := m.hosts[d]; !ok {
		return fmt.Errorf("%w: %q", errHostNotAllowed, d)
	}
	return nil
}

type dirCache autocert.DirCache

func (d dirCache) Delete(ctx context.Context, name string) (rerr error) {
	log := mlog.New("autotls", nil).WithContext(ctx)
	defer func() {
		log.Debugx("dircache delete result", rerr, slog.String("name", name))
	}()
	err := autocert.DirCache(d).Delete(ctx, name)
	if err != nil {
		log.Errorx("deleting cert from dir cache", err, slog.String("name", name))
	} else if !strings.HasSuffix(name, "+token") {
		log.Info("autotls cert delete", slog.String("name", name))
	}
	return err
}

func (d dirCache) Get(ctx context.Context, name string) (rbuf []byte, rerr error) {
	log := mlog.New("autotls", nil).WithContext(ctx)
	defer func() {
		log.Debugx("dircache get result", rerr, slog.String("name", name))
	}()
	buf, err := autocert.DirCache(d).Get(ctx, name)
	if err != nil && errors.Is(err, autocert.ErrCacheMiss) {
		log.Infox("getting cert from dir cache", err, slog.String("name", name))
	} else if err != nil {
		log.Errorx("getting cert from dir cache", err, slog.String("name", name))
	} else if !strings.HasSuffix(name, "+token") {
		log.Debug("autotls cert get", slog.String("name", name))
	}
	return buf, err
}

func (d dirCache) Put(ctx context.Context, name string, data []byte) (rerr error) {
	log := mlog.New("autotls", nil).WithContext(ctx)
	defer func() {
		log.Debugx("dircache put result", rerr, slog.String("name", name))
	}()
	metricCertput.Inc()
	err := autocert.DirCache(d).Put(ctx, name, data)
	if err != nil {
		log.Errorx("storing cert in dir cache", err, slog.String("name", name))
	} else if !strings.HasSuffix(name, "+token") {
		log.Info("autotls cert store", slog.String("name", name))
	}
	return err
}