From fc2ff9155cc53393ac29885f6de83ed87093b274 Mon Sep 17 00:00:00 2001 From: Matthew Holt Date: Sun, 4 Feb 2018 00:58:27 -0700 Subject: [PATCH 1/8] tls: Restructure and improve certificate management - Expose the list of Caddy instances through caddy.Instances() - Added arbitrary storage to caddy.Instance - The cache of loaded certificates is no longer global; now scoped per-instance, meaning upon reload (like SIGUSR1) the old cert cache will be discarded entirely, whereas before, aggressively reloading config that added and removed lots of sites would cause unnecessary build-up in the cache over time. - Key certificates in the cache by their SHA-256 hash instead of by their names. This means certificates will not be duplicated in memory (within each instance), making Caddy much more memory-efficient for large-scale deployments with thousands of sites sharing certs. - Perform name-to-certificate lookups scoped per caddytls.Config instead of a single global lookup. This prevents certificates from stepping on each other when they overlap in their names. - Do not allow TLS configurations keyed by the same hostname to be different; this now throws an error. - Updated relevant tests, with a stark awareness that more tests are needed. - Change the NewContext function signature to include an *Instance. - Strongly recommend (basically require) use of caddytls.NewConfig() to create a new *caddytls.Config, to ensure pointers to the instance certificate cache are initialized properly. - Update the TLS-SNI challenge solver (even though TLS-SNI is disabled currently on the CA side). Store temporary challenge cert in instance cache, but do so directly by the ACME challenge name, not the hash. Modified the getCertificate function to check the cache directly for a name match if one isn't found otherwise. This will allow any caddytls.Config to be able to help solve a TLS-SNI challenge, with one extra side-effect that might actually be kind of interesting (and useless): clients could send a certificate's hash as the SNI and Caddy would be able to serve that certificate for the handshake. - Do not attempt to match a "default" (random) certificate when SNI is present but unrecognized; return no certificate so a TLS alert happens instead. - Store an Instance in the list of instances even while the instance is still starting up (this allows access to the cert cache for performing renewals at startup, etc). Will be removed from list again if instance startup fails. - Laid groundwork for ACMEv2 and Let's Encrypt wildcard support. Server type plugins will need to be updated slightly to accommodate minor adjustments to their API (like passing in an Instance). This commit includes the changes for the HTTP server. Certain Caddyfile configurations might error out with this change, if they configured different TLS settings for the same hostname. This change trades some complexity for other complexity, but ultimately this new complexity is more correct and robust than earlier logic. Fixes #1991 Fixes #1994 Fixes #1303 --- caddy.go | 56 ++++- caddyhttp/httpserver/https.go | 7 +- caddyhttp/httpserver/plugin.go | 24 +- caddyhttp/httpserver/plugin_test.go | 8 +- caddytls/certificates.go | 323 +++++++++++++++----------- caddytls/certificates_test.go | 70 +++--- caddytls/client.go | 2 +- caddytls/config.go | 116 +++++++++- caddytls/crypto.go | 8 +- caddytls/handshake.go | 127 +++++++--- caddytls/handshake_test.go | 38 +-- caddytls/maintain.go | 346 +++++++++++++++------------- caddytls/setup.go | 17 +- caddytls/setup_test.go | 44 +++- caddytls/tls.go | 22 +- controller.go | 20 +- plugins.go | 2 +- 17 files changed, 801 insertions(+), 429 deletions(-) diff --git a/caddy.go b/caddy.go index 8da6d4db8..dd2d473a9 100644 --- a/caddy.go +++ b/caddy.go @@ -79,6 +79,8 @@ var ( // Instance contains the state of servers created as a result of // calling Start and can be used to access or control those servers. +// It is literally an instance of a server type. Instance values +// should NOT be copied. Use *Instance for safety. type Instance struct { // serverType is the name of the instance's server type serverType string @@ -89,10 +91,11 @@ type Instance struct { // wg is used to wait for all servers to shut down wg *sync.WaitGroup - // context is the context created for this instance. + // context is the context created for this instance, + // used to coordinate the setting up of the server type context Context - // servers is the list of servers with their listeners. + // servers is the list of servers with their listeners servers []ServerListener // these callbacks execute when certain events occur @@ -101,6 +104,18 @@ type Instance struct { onRestart []func() error // before restart commences onShutdown []func() error // stopping, even as part of a restart onFinalShutdown []func() error // stopping, not as part of a restart + + // storing values on an instance is preferable to + // global state because these will get garbage- + // collected after in-process reloads when the + // old instances are destroyed; use StorageMu + // to access this value safely + Storage map[interface{}]interface{} + StorageMu sync.RWMutex +} + +func Instances() []*Instance { + return instances } // Servers returns the ServerListeners in i. @@ -196,7 +211,7 @@ func (i *Instance) Restart(newCaddyfile Input) (*Instance, error) { } // create new instance; if the restart fails, it is simply discarded - newInst := &Instance{serverType: newCaddyfile.ServerType(), wg: i.wg} + newInst := &Instance{serverType: newCaddyfile.ServerType(), wg: i.wg, Storage: make(map[interface{}]interface{})} // attempt to start new instance err := startWithListenerFds(newCaddyfile, newInst, restartFds) @@ -455,7 +470,7 @@ func (i *Instance) Caddyfile() Input { // // This function blocks until all the servers are listening. func Start(cdyfile Input) (*Instance, error) { - inst := &Instance{serverType: cdyfile.ServerType(), wg: new(sync.WaitGroup)} + inst := &Instance{serverType: cdyfile.ServerType(), wg: new(sync.WaitGroup), Storage: make(map[interface{}]interface{})} err := startWithListenerFds(cdyfile, inst, nil) if err != nil { return inst, err @@ -468,11 +483,34 @@ func Start(cdyfile Input) (*Instance, error) { } func startWithListenerFds(cdyfile Input, inst *Instance, restartFds map[string]restartTriple) error { + // save this instance in the list now so that + // plugins can access it if need be, for example + // the caddytls package, so it can perform cert + // renewals while starting up; we just have to + // remove the instance from the list later if + // it fails + instancesMu.Lock() + instances = append(instances, inst) + instancesMu.Unlock() + var err error + defer func() { + if err != nil { + instancesMu.Lock() + for i, otherInst := range instances { + if otherInst == inst { + instances = append(instances[:i], instances[i+1:]...) + break + } + } + instancesMu.Unlock() + } + }() + if cdyfile == nil { cdyfile = CaddyfileInput{} } - err := ValidateAndExecuteDirectives(cdyfile, inst, false) + err = ValidateAndExecuteDirectives(cdyfile, inst, false) if err != nil { return err } @@ -504,10 +542,6 @@ func startWithListenerFds(cdyfile Input, inst *Instance, restartFds map[string]r return err } - instancesMu.Lock() - instances = append(instances, inst) - instancesMu.Unlock() - // run any AfterStartup callbacks if this is not // part of a restart; then show file descriptor notice if restartFds == nil { @@ -546,7 +580,7 @@ func startWithListenerFds(cdyfile Input, inst *Instance, restartFds map[string]r func ValidateAndExecuteDirectives(cdyfile Input, inst *Instance, justValidate bool) error { // If parsing only inst will be nil, create an instance for this function call only. if justValidate { - inst = &Instance{serverType: cdyfile.ServerType(), wg: new(sync.WaitGroup)} + inst = &Instance{serverType: cdyfile.ServerType(), wg: new(sync.WaitGroup), Storage: make(map[interface{}]interface{})} } stypeName := cdyfile.ServerType() @@ -563,7 +597,7 @@ func ValidateAndExecuteDirectives(cdyfile Input, inst *Instance, justValidate bo return err } - inst.context = stype.NewContext() + inst.context = stype.NewContext(inst) if inst.context == nil { return fmt.Errorf("server type %s produced a nil Context", stypeName) } diff --git a/caddyhttp/httpserver/https.go b/caddyhttp/httpserver/https.go index a1c84f11b..a12d9982c 100644 --- a/caddyhttp/httpserver/https.go +++ b/caddyhttp/httpserver/https.go @@ -27,7 +27,7 @@ func activateHTTPS(cctx caddy.Context) error { operatorPresent := !caddy.Started() if !caddy.Quiet && operatorPresent { - fmt.Print("Activating privacy features...") + fmt.Print("Activating privacy features... ") } ctx := cctx.(*httpContext) @@ -69,7 +69,7 @@ func activateHTTPS(cctx caddy.Context) error { } if !caddy.Quiet && operatorPresent { - fmt.Println(" done.") + fmt.Println("done.") } return nil @@ -163,6 +163,7 @@ func redirPlaintextHost(cfg *SiteConfig) *SiteConfig { if redirPort == DefaultHTTPSPort { redirPort = "" // default port is redundant } + redirMiddleware := func(next Handler) Handler { return HandlerFunc(func(w http.ResponseWriter, r *http.Request) (int, error) { // Construct the URL to which to redirect. Note that the Host in a request might @@ -184,9 +185,11 @@ func redirPlaintextHost(cfg *SiteConfig) *SiteConfig { return 0, nil }) } + host := cfg.Addr.Host port := HTTPPort addr := net.JoinHostPort(host, port) + return &SiteConfig{ Addr: Address{Original: addr, Host: host, Port: port}, ListenHost: cfg.ListenHost, diff --git a/caddyhttp/httpserver/plugin.go b/caddyhttp/httpserver/plugin.go index 643eea7f7..ea31a58d8 100644 --- a/caddyhttp/httpserver/plugin.go +++ b/caddyhttp/httpserver/plugin.go @@ -91,11 +91,13 @@ func hideCaddyfile(cctx caddy.Context) error { return nil } -func newContext() caddy.Context { - return &httpContext{keysToSiteConfigs: make(map[string]*SiteConfig)} +func newContext(inst *caddy.Instance) caddy.Context { + return &httpContext{instance: inst, keysToSiteConfigs: make(map[string]*SiteConfig)} } type httpContext struct { + instance *caddy.Instance + // keysToSiteConfigs maps an address at the top of a // server block (a "key") to its SiteConfig. Not all // SiteConfigs will be represented here, only ones @@ -146,15 +148,19 @@ func (h *httpContext) InspectServerBlocks(sourceFile string, serverBlocks []cadd altTLSSNIPort = HTTPSPort } + // Make our caddytls.Config, which has a pointer to the + // instance's certificate cache and enough information + // to use automatic HTTPS when the time comes + caddytlsConfig := caddytls.NewConfig(h.instance) + caddytlsConfig.Hostname = addr.Host + caddytlsConfig.AltHTTPPort = altHTTPPort + caddytlsConfig.AltTLSSNIPort = altTLSSNIPort + // Save the config to our master list, and key it for lookups cfg := &SiteConfig{ - Addr: addr, - Root: Root, - TLS: &caddytls.Config{ - Hostname: addr.Host, - AltHTTPPort: altHTTPPort, - AltTLSSNIPort: altTLSSNIPort, - }, + Addr: addr, + Root: Root, + TLS: caddytlsConfig, originCaddyfile: sourceFile, IndexPages: staticfiles.DefaultIndexPages, } diff --git a/caddyhttp/httpserver/plugin_test.go b/caddyhttp/httpserver/plugin_test.go index 31eafd8f2..5a60f2e83 100644 --- a/caddyhttp/httpserver/plugin_test.go +++ b/caddyhttp/httpserver/plugin_test.go @@ -137,7 +137,7 @@ func TestAddressString(t *testing.T) { func TestInspectServerBlocksWithCustomDefaultPort(t *testing.T) { Port = "9999" filename := "Testfile" - ctx := newContext().(*httpContext) + ctx := newContext(&caddy.Instance{Storage: make(map[interface{}]interface{})}).(*httpContext) input := strings.NewReader(`localhost`) sblocks, err := caddyfile.Parse(filename, input, nil) if err != nil { @@ -155,7 +155,7 @@ func TestInspectServerBlocksWithCustomDefaultPort(t *testing.T) { func TestInspectServerBlocksCaseInsensitiveKey(t *testing.T) { filename := "Testfile" - ctx := newContext().(*httpContext) + ctx := newContext(&caddy.Instance{Storage: make(map[interface{}]interface{})}).(*httpContext) input := strings.NewReader("localhost {\n}\nLOCALHOST {\n}") sblocks, err := caddyfile.Parse(filename, input, nil) if err != nil { @@ -207,7 +207,7 @@ func TestDirectivesList(t *testing.T) { } func TestContextSaveConfig(t *testing.T) { - ctx := newContext().(*httpContext) + ctx := newContext(&caddy.Instance{Storage: make(map[interface{}]interface{})}).(*httpContext) ctx.saveConfig("foo", new(SiteConfig)) if _, ok := ctx.keysToSiteConfigs["foo"]; !ok { t.Error("Expected config to be saved, but it wasn't") @@ -226,7 +226,7 @@ func TestContextSaveConfig(t *testing.T) { // Test to make sure we are correctly hiding the Caddyfile func TestHideCaddyfile(t *testing.T) { - ctx := newContext().(*httpContext) + ctx := newContext(&caddy.Instance{Storage: make(map[interface{}]interface{})}).(*httpContext) ctx.saveConfig("test", &SiteConfig{ Root: Root, originCaddyfile: "Testfile", diff --git a/caddytls/certificates.go b/caddytls/certificates.go index 05af914fe..2df576ff3 100644 --- a/caddytls/certificates.go +++ b/caddytls/certificates.go @@ -15,9 +15,11 @@ package caddytls import ( + "crypto/sha256" "crypto/tls" "crypto/x509" "errors" + "fmt" "io/ioutil" "log" "strings" @@ -27,24 +29,14 @@ import ( "golang.org/x/crypto/ocsp" ) -// certCache stores certificates in memory, -// keying certificates by name. Certificates -// should not overlap in the names they serve, -// because a name only maps to one certificate. -var certCache = make(map[string]Certificate) -var certCacheMu sync.RWMutex - // Certificate is a tls.Certificate with associated metadata tacked on. // Even if the metadata can be obtained by parsing the certificate, -// we can be more efficient by extracting the metadata once so it's -// just there, ready to use. +// we are more efficient by extracting the metadata onto this struct. type Certificate struct { tls.Certificate // Names is the list of names this certificate is written for. // The first is the CommonName (if any), the rest are SAN. - // This should be the exact list of keys by which this cert - // is accessed in the cache, careful to avoid overlap. Names []string // NotAfter is when the certificate expires. @@ -53,59 +45,91 @@ type Certificate struct { // OCSP contains the certificate's parsed OCSP response. OCSP *ocsp.Response - // Config is the configuration with which the certificate was - // loaded or obtained and with which it should be maintained. - Config *Config + // The hex-encoded hash of this cert's chain's bytes. + Hash string + + // configs is the list of configs that use or refer to + // The first one is assumed to be the config that is + // "in charge" of this certificate (i.e. determines + // whether it is managed, how it is managed, etc). + // This field will be populated by cacheCertificate. + // Only meddle with it if you know what you're doing! + configs []*Config } -// getCertificate gets a certificate that matches name (a server name) -// from the in-memory cache. If there is no exact match for name, it -// will be checked against names of the form '*.example.com' (wildcard -// certificates) according to RFC 6125. If a match is found, matched will -// be true. If no matches are found, matched will be false and a default -// certificate will be returned with defaulted set to true. If no default -// certificate is set, defaulted will be set to false. +// certificateCache is to be an instance-wide cache of certs +// that site-specific TLS configs can refer to. Using a +// central map like this avoids duplication of certs in +// memory when the cert is used by multiple sites, and makes +// maintenance easier. Because these are not to be global, +// the cache will get garbage collected after a config reload +// (a new instance will take its place). +type certificateCache struct { + sync.RWMutex + cache map[string]Certificate // keyed by certificate hash +} + +// replaceCertificate replaces oldCert with newCert in the cache, and +// updates all configs that are pointing to the old certificate to +// point to the new one instead. newCert must already be loaded into +// the cache (this method does NOT load it into the cache). // -// The logic in this function is adapted from the Go standard library, -// which is by the Go Authors. +// Note that all the names on the old certificate will be deleted +// from the name lookup maps of each config, then all the names on +// the new certificate will be added to the lookup maps as long as +// they do not overwrite any entries. // -// This function is safe for concurrent use. -func getCertificate(name string) (cert Certificate, matched, defaulted bool) { - var ok bool +// The newCert may be modified and its cache entry updated. +// +// This method is safe for concurrent use. +func (certCache *certificateCache) replaceCertificate(oldCert, newCert Certificate) error { + certCache.Lock() + defer certCache.Unlock() - // Not going to trim trailing dots here since RFC 3546 says, - // "The hostname is represented ... without a trailing dot." - // Just normalize to lowercase. - name = strings.ToLower(name) + // have all the configs that are pointing to the old + // certificate point to the new certificate instead + for _, cfg := range oldCert.configs { + // first delete all the name lookup entries that + // pointed to the old certificate + for name, certKey := range cfg.Certificates { + if certKey == oldCert.Hash { + delete(cfg.Certificates, name) + } + } - certCacheMu.RLock() - defer certCacheMu.RUnlock() - - // exact match? great, let's use it - if cert, ok = certCache[name]; ok { - matched = true - return - } - - // try replacing labels in the name with wildcards until we get a match - labels := strings.Split(name, ".") - for i := range labels { - labels[i] = "*" - candidate := strings.Join(labels, ".") - if cert, ok = certCache[candidate]; ok { - matched = true - return + // then add name lookup entries for the names + // on the new certificate, but don't overwrite + // entries that may already exist, not only as + // a courtesy, but importantly: because if we + // overwrote a value here, and this config no + // longer pointed to a certain certificate in + // the cache, that certificate's list of configs + // referring to it would be incorrect; so just + // insert entries, don't overwrite any + for _, name := range newCert.Names { + if _, ok := cfg.Certificates[name]; !ok { + cfg.Certificates[name] = newCert.Hash + } } } - // if nothing matches, use the default certificate or bust - cert, defaulted = certCache[""] - return + // since caching a new certificate attaches only the config + // that loaded it, the new certificate needs to be given the + // list of all the configs that use it, so copy the list + // over from the old certificate to the new certificate + // in the cache + newCert.configs = oldCert.configs + certCache.cache[newCert.Hash] = newCert + + // finally, delete the old certificate from the cache + delete(certCache.cache, oldCert.Hash) + + return nil } // CacheManagedCertificate loads the certificate for domain into the -// cache, flagging it as Managed and, if onDemand is true, as "OnDemand" -// (meaning that it was obtained or loaded during a TLS handshake). +// cache, from the TLS storage for managed certificates. It returns a +// copy of the Certificate that was put into the cache. // // This method is safe for concurrent use. func (cfg *Config) CacheManagedCertificate(domain string) (Certificate, error) { @@ -117,39 +141,24 @@ func (cfg *Config) CacheManagedCertificate(domain string) (Certificate, error) { if err != nil { return Certificate{}, err } - cert, err := makeCertificate(siteData.Cert, siteData.Key) + cert, err := makeCertificateWithOCSP(siteData.Cert, siteData.Key) if err != nil { return cert, err } - cert.Config = cfg - cacheCertificate(cert) - return cert, nil + return cfg.cacheCertificate(cert), nil } // cacheUnmanagedCertificatePEMFile loads a certificate for host using certFile // and keyFile, which must be in PEM format. It stores the certificate in -// memory after evicting any other entries in the cache keyed by the names -// on this certificate. In other words, it replaces existing certificates keyed -// by the names on this certificate. The Managed and OnDemand flags of the -// certificate will be set to false. +// the in-memory cache. // // This function is safe for concurrent use. -func cacheUnmanagedCertificatePEMFile(certFile, keyFile string) error { - cert, err := makeCertificateFromDisk(certFile, keyFile) +func (cfg *Config) cacheUnmanagedCertificatePEMFile(certFile, keyFile string) error { + cert, err := makeCertificateFromDiskWithOCSP(certFile, keyFile) if err != nil { return err } - - // since this is manually managed, this call might be part of a reload after - // the owner renewed a certificate; so clear cache of any previous cert first, - // otherwise the renewed certificate may never be loaded - certCacheMu.Lock() - for _, name := range cert.Names { - delete(certCache, name) - } - certCacheMu.Unlock() - - cacheCertificate(cert) + cfg.cacheCertificate(cert) return nil } @@ -157,20 +166,20 @@ func cacheUnmanagedCertificatePEMFile(certFile, keyFile string) error { // of the certificate and key, then caches it in memory. // // This function is safe for concurrent use. -func cacheUnmanagedCertificatePEMBytes(certBytes, keyBytes []byte) error { - cert, err := makeCertificate(certBytes, keyBytes) +func (cfg *Config) cacheUnmanagedCertificatePEMBytes(certBytes, keyBytes []byte) error { + cert, err := makeCertificateWithOCSP(certBytes, keyBytes) if err != nil { return err } - cacheCertificate(cert) + cfg.cacheCertificate(cert) return nil } -// makeCertificateFromDisk makes a Certificate by loading the +// makeCertificateFromDiskWithOCSP makes a Certificate by loading the // certificate and key files. It fills out all the fields in // the certificate except for the Managed and OnDemand flags. -// (It is up to the caller to set those.) -func makeCertificateFromDisk(certFile, keyFile string) (Certificate, error) { +// (It is up to the caller to set those.) It staples OCSP. +func makeCertificateFromDiskWithOCSP(certFile, keyFile string) (Certificate, error) { certPEMBlock, err := ioutil.ReadFile(certFile) if err != nil { return Certificate{}, err @@ -179,13 +188,14 @@ func makeCertificateFromDisk(certFile, keyFile string) (Certificate, error) { if err != nil { return Certificate{}, err } - return makeCertificate(certPEMBlock, keyPEMBlock) + return makeCertificateWithOCSP(certPEMBlock, keyPEMBlock) } // makeCertificate turns a certificate PEM bundle and a key PEM block into -// a Certificate, with OCSP and other relevant metadata tagged with it, -// except for the OnDemand and Managed flags. It is up to the caller to -// set those properties. +// a Certificate with necessary metadata from parsing its bytes filled into +// its struct fields for convenience (except for the OnDemand and Managed +// flags; it is up to the caller to set those properties!). This function +// does NOT staple OCSP. func makeCertificate(certPEMBlock, keyPEMBlock []byte) (Certificate, error) { var cert Certificate @@ -195,16 +205,26 @@ func makeCertificate(certPEMBlock, keyPEMBlock []byte) (Certificate, error) { return cert, err } - // Extract relevant metadata and staple OCSP + // Extract necessary metadata err = fillCertFromLeaf(&cert, tlsCert) if err != nil { return cert, err } + + return cert, nil +} + +// makeCertificateWithOCSP is the same as makeCertificate except that it also +// staples OCSP to the certificate. +func makeCertificateWithOCSP(certPEMBlock, keyPEMBlock []byte) (Certificate, error) { + cert, err := makeCertificate(certPEMBlock, keyPEMBlock) + if err != nil { + return cert, err + } err = stapleOCSP(&cert, certPEMBlock) if err != nil { log.Printf("[WARNING] Stapling OCSP: %v", err) } - return cert, nil } @@ -243,65 +263,104 @@ func fillCertFromLeaf(cert *Certificate, tlsCert tls.Certificate) error { return errors.New("certificate has no names") } + // save the hash of this certificate (chain) and + // expiration date, for necessity and efficiency + cert.Hash = hashCertificateChain(cert.Certificate.Certificate) cert.NotAfter = leaf.NotAfter return nil } -// cacheCertificate adds cert to the in-memory cache. If the cache is -// empty, cert will be used as the default certificate. If the cache is -// full, random entries are deleted until there is room to map all the -// names on the certificate. +// hashCertificateChain computes the unique hash of certChain, +// which is the chain of DER-encoded bytes. It returns the +// hex encoding of the hash. +func hashCertificateChain(certChain [][]byte) string { + h := sha256.New() + for _, certInChain := range certChain { + h.Write(certInChain) + } + return fmt.Sprintf("%x", h.Sum(nil)) +} + +// managedCertInStorageExpiresSoon returns true if cert (being a +// managed certificate) is expiring within RenewDurationBefore. +// It returns false if there was an error checking the expiration +// of the certificate as found in storage, or if the certificate +// in storage is NOT expiring soon. A certificate that is expiring +// soon in our cache but is not expiring soon in storage probably +// means that another instance renewed the certificate in the +// meantime, and it would be a good idea to simply load the cert +// into our cache rather than repeating the renewal process again. +func managedCertInStorageExpiresSoon(cert Certificate) (bool, error) { + if len(cert.configs) == 0 { + return false, fmt.Errorf("no configs for certificate") + } + storage, err := cert.configs[0].StorageFor(cert.configs[0].CAUrl) + if err != nil { + return false, err + } + siteData, err := storage.LoadSite(cert.Names[0]) + if err != nil { + return false, err + } + tlsCert, err := tls.X509KeyPair(siteData.Cert, siteData.Key) + if err != nil { + return false, err + } + leaf, err := x509.ParseCertificate(tlsCert.Certificate[0]) + if err != nil { + return false, err + } + timeLeft := leaf.NotAfter.Sub(time.Now().UTC()) + return timeLeft < RenewDurationBefore, nil +} + +// cacheCertificate adds cert to the in-memory cache. If a certificate +// with the same hash is already cached, it is NOT overwritten; instead, +// cfg is added to the existing certificate's list of configs if not +// already in the list. Then all the names on cert are used to add +// entries to cfg.Certificates (the config's name lookup map). +// Then the certificate is stored/updated in the cache. It returns +// a copy of the certificate that ends up being stored in the cache. // -// This certificate will be keyed to the names in cert.Names. Any names -// already used as a cache key will NOT be replaced by this cert; in -// other words, no overlap is allowed, and this certificate will not -// service those pre-existing names. +// It is VERY important, even for some test cases, that the Hash field +// of the cert be set properly. // // This function is safe for concurrent use. -func cacheCertificate(cert Certificate) { - if cert.Config == nil { - cert.Config = new(Config) +func (cfg *Config) cacheCertificate(cert Certificate) Certificate { + cfg.certCache.Lock() + defer cfg.certCache.Unlock() + + // if this certificate already exists in the cache, + // use it instead of overwriting it -- very important! + if existingCert, ok := cfg.certCache.cache[cert.Hash]; ok { + cert = existingCert } - certCacheMu.Lock() - if _, ok := certCache[""]; !ok { - // use as default - must be *appended* to end of list, or bad things happen! - cert.Names = append(cert.Names, "") - } - for len(certCache)+len(cert.Names) > 10000 { - // for simplicity, just remove random elements - for key := range certCache { - if key == "" { // ... but not the default cert - continue - } - delete(certCache, key) + + // attach this config to the certificate so we know which + // configs are referencing/using the certificate, but don't + // duplicate entries + var found bool + for _, c := range cert.configs { + if c == cfg { + found = true break } } - for i := 0; i < len(cert.Names); i++ { - name := cert.Names[i] - if _, ok := certCache[name]; ok { - // do not allow certificates to overlap in the names they serve; - // this ambiguity causes problems because it is confusing while - // maintaining certificates; see OCSP maintenance code and - // https://caddy.community/t/random-ocsp-response-errors-for-random-clients/2473?u=matt. - log.Printf("[NOTICE] There is already a certificate loaded for %s, "+ - "so certificate for %v will not service that name", - name, cert.Names) - cert.Names = append(cert.Names[:i], cert.Names[i+1:]...) - i-- - continue - } - certCache[name] = cert + if !found { + cert.configs = append(cert.configs, cfg) } - certCacheMu.Unlock() -} -// uncacheCertificate deletes name's certificate from the -// cache. If name is not a key in the certificate cache, -// this function does nothing. -func uncacheCertificate(name string) { - certCacheMu.Lock() - delete(certCache, name) - certCacheMu.Unlock() + // key the certificate by all its names for this config only, + // this is how we find the certificate during handshakes + // (yes, if certs overlap in the names they serve, one will + // overwrite another here, but that's just how it goes) + for _, name := range cert.Names { + cfg.Certificates[name] = cert.Hash + } + + // store the certificate + cfg.certCache.cache[cert.Hash] = cert + + return cert } diff --git a/caddytls/certificates_test.go b/caddytls/certificates_test.go index ce848d10b..817d16496 100644 --- a/caddytls/certificates_test.go +++ b/caddytls/certificates_test.go @@ -17,57 +17,71 @@ package caddytls import "testing" func TestUnexportedGetCertificate(t *testing.T) { - defer func() { certCache = make(map[string]Certificate) }() + certCache := &certificateCache{cache: make(map[string]Certificate)} + cfg := &Config{Certificates: make(map[string]string), certCache: certCache} // When cache is empty - if _, matched, defaulted := getCertificate("example.com"); matched || defaulted { + if _, matched, defaulted := cfg.getCertificate("example.com"); matched || defaulted { t.Errorf("Got a certificate when cache was empty; matched=%v, defaulted=%v", matched, defaulted) } - // When cache has one certificate in it (also is default) - defaultCert := Certificate{Names: []string{"example.com", ""}} - certCache[""] = defaultCert - certCache["example.com"] = defaultCert - if cert, matched, defaulted := getCertificate("Example.com"); !matched || defaulted || cert.Names[0] != "example.com" { + // When cache has one certificate in it + firstCert := Certificate{Names: []string{"example.com"}} + certCache.cache["0xdeadbeef"] = firstCert + cfg.Certificates["example.com"] = "0xdeadbeef" + if cert, matched, defaulted := cfg.getCertificate("Example.com"); !matched || defaulted || cert.Names[0] != "example.com" { t.Errorf("Didn't get a cert for 'Example.com' or got the wrong one: %v, matched=%v, defaulted=%v", cert, matched, defaulted) } - if cert, matched, defaulted := getCertificate(""); !matched || defaulted || cert.Names[0] != "example.com" { - t.Errorf("Didn't get a cert for '' or got the wrong one: %v, matched=%v, defaulted=%v", cert, matched, defaulted) + if cert, matched, defaulted := cfg.getCertificate("example.com"); !matched || defaulted || cert.Names[0] != "example.com" { + t.Errorf("Didn't get a cert for 'example.com' or got the wrong one: %v, matched=%v, defaulted=%v", cert, matched, defaulted) } // When retrieving wildcard certificate - certCache["*.example.com"] = Certificate{Names: []string{"*.example.com"}} - if cert, matched, defaulted := getCertificate("sub.example.com"); !matched || defaulted || cert.Names[0] != "*.example.com" { + certCache.cache["0xb01dface"] = Certificate{Names: []string{"*.example.com"}} + cfg.Certificates["*.example.com"] = "0xb01dface" + if cert, matched, defaulted := cfg.getCertificate("sub.example.com"); !matched || defaulted || cert.Names[0] != "*.example.com" { t.Errorf("Didn't get wildcard cert for 'sub.example.com' or got the wrong one: %v, matched=%v, defaulted=%v", cert, matched, defaulted) } - // When no certificate matches, the default is returned - if cert, matched, defaulted := getCertificate("nomatch"); matched || !defaulted { + // When no certificate matches and SNI is provided, return no certificate (should be TLS alert) + if cert, matched, defaulted := cfg.getCertificate("nomatch"); matched || defaulted { + t.Errorf("Expected matched=false, defaulted=false; but got matched=%v, defaulted=%v (cert: %v)", matched, defaulted, cert) + } + + // When no certificate matches and SNI is NOT provided, a random is returned + if cert, matched, defaulted := cfg.getCertificate(""); matched || !defaulted { t.Errorf("Expected matched=false, defaulted=true; but got matched=%v, defaulted=%v (cert: %v)", matched, defaulted, cert) - } else if cert.Names[0] != "example.com" { - t.Errorf("Expected default cert, got: %v", cert) } } func TestCacheCertificate(t *testing.T) { - defer func() { certCache = make(map[string]Certificate) }() + certCache := &certificateCache{cache: make(map[string]Certificate)} + cfg := &Config{Certificates: make(map[string]string), certCache: certCache} - cacheCertificate(Certificate{Names: []string{"example.com", "sub.example.com"}}) - if _, ok := certCache["example.com"]; !ok { - t.Error("Expected first cert to be cached by key 'example.com', but it wasn't") + cfg.cacheCertificate(Certificate{Names: []string{"example.com", "sub.example.com"}, Hash: "foobar"}) + if len(certCache.cache) != 1 { + t.Errorf("Expected length of certificate cache to be 1") } - if _, ok := certCache["sub.example.com"]; !ok { - t.Error("Expected first cert to be cached by key 'sub.example.com', but it wasn't") + if _, ok := certCache.cache["foobar"]; !ok { + t.Error("Expected first cert to be cached by key 'foobar', but it wasn't") } - if cert, ok := certCache[""]; !ok || cert.Names[2] != "" { - t.Error("Expected first cert to be cached additionally as the default certificate with empty name added, but it wasn't") + if _, ok := cfg.Certificates["example.com"]; !ok { + t.Error("Expected first cert to be keyed by 'example.com', but it wasn't") + } + if _, ok := cfg.Certificates["sub.example.com"]; !ok { + t.Error("Expected first cert to be keyed by 'sub.example.com', but it wasn't") } - cacheCertificate(Certificate{Names: []string{"example2.com"}}) - if _, ok := certCache["example2.com"]; !ok { - t.Error("Expected second cert to be cached by key 'exmaple2.com', but it wasn't") + // different config, but using same cache; and has cert with overlapping name, + // but different hash + cfg2 := &Config{Certificates: make(map[string]string), certCache: certCache} + cfg2.cacheCertificate(Certificate{Names: []string{"example.com"}, Hash: "barbaz"}) + if _, ok := certCache.cache["barbaz"]; !ok { + t.Error("Expected second cert to be cached by key 'barbaz.com', but it wasn't") } - if cert, ok := certCache[""]; ok && cert.Names[0] == "example2.com" { - t.Error("Expected second cert to NOT be cached as default, but it was") + if hash, ok := cfg2.Certificates["example.com"]; !ok { + t.Error("Expected second cert to be keyed by 'example.com', but it wasn't") + } else if hash != "barbaz" { + t.Errorf("Expected second cert to map to 'barbaz' but it was %s instead", hash) } } diff --git a/caddytls/client.go b/caddytls/client.go index 26ef6a3c5..4775a2d18 100644 --- a/caddytls/client.go +++ b/caddytls/client.go @@ -160,7 +160,7 @@ var newACMEClient = func(config *Config, allowPrompts bool) (*ACMEClient, error) // See if TLS challenge needs to be handled by our own facilities if caddy.HasListenerWithAddress(net.JoinHostPort(config.ListenHost, useTLSSNIPort)) { - c.acmeClient.SetChallengeProvider(acme.TLSSNI01, tlsSniSolver{}) + c.acmeClient.SetChallengeProvider(acme.TLSSNI01, tlsSNISolver{certCache: config.certCache}) } // Disable any challenges that should not be used diff --git a/caddytls/config.go b/caddytls/config.go index d3468e348..0b64f3575 100644 --- a/caddytls/config.go +++ b/caddytls/config.go @@ -134,7 +134,12 @@ type Config struct { // Protocol Negotiation (ALPN). ALPN []string - tlsConfig *tls.Config // the final tls.Config created with buildStandardTLSConfig() + // The map of hostname to certificate hash. This is used to complete + // handshakes and serve the right certificate given the SNI. + Certificates map[string]string + + certCache *certificateCache // pointer to the Instance's certificate store + tlsConfig *tls.Config // the final tls.Config created with buildStandardTLSConfig() } // OnDemandState contains some state relevant for providing @@ -155,6 +160,25 @@ type OnDemandState struct { AskURL *url.URL } +// NewConfig returns a new Config with a pointer to the instance's +// certificate cache. You will usually need to set Other fields on +// the returned Config for successful practical use. +func NewConfig(inst *caddy.Instance) *Config { + inst.StorageMu.RLock() + certCache, ok := inst.Storage[CertCacheInstStorageKey].(*certificateCache) + inst.StorageMu.RUnlock() + if !ok || certCache == nil { + certCache = &certificateCache{cache: make(map[string]Certificate)} + inst.StorageMu.Lock() + inst.Storage[CertCacheInstStorageKey] = certCache + inst.StorageMu.Unlock() + } + cfg := new(Config) + cfg.Certificates = make(map[string]string) + cfg.certCache = certCache + return cfg +} + // ObtainCert obtains a certificate for name using c, as long // as a certificate does not already exist in storage for that // name. The name must qualify and c must be flagged as Managed. @@ -330,7 +354,9 @@ func (c *Config) buildStandardTLSConfig() error { // MakeTLSConfig makes a tls.Config from configs. The returned // tls.Config is programmed to load the matching caddytls.Config -// based on the hostname in SNI, but that's all. +// based on the hostname in SNI, but that's all. This is used +// to create a single TLS configuration for a listener (a group +// of sites). func MakeTLSConfig(configs []*Config) (*tls.Config, error) { if len(configs) == 0 { return nil, nil @@ -358,15 +384,28 @@ func MakeTLSConfig(configs []*Config) (*tls.Config, error) { configs[i-1].Hostname, lastConfProto, cfg.Hostname, thisConfProto) } - // convert each caddytls.Config into a tls.Config + // convert this caddytls.Config into a tls.Config if err := cfg.buildStandardTLSConfig(); err != nil { return nil, err } - // Key this config by its hostname (overwriting - // configs with the same hostname pattern); during - // TLS handshakes, configs are loaded based on - // the hostname pattern, according to client's SNI. + // if an existing config with this hostname was already + // configured, then they must be identical (or at least + // compatible), otherwise that is a configuration error + if otherConfig, ok := configMap[cfg.Hostname]; ok { + if err := assertConfigsCompatible(cfg, otherConfig); err != nil { + return nil, fmt.Errorf("incompabile TLS configurations for the same SNI "+ + "name (%s) on the same listener: %v", + cfg.Hostname, err) + } + } + + // key this config by its hostname (overwrites + // configs with the same hostname pattern; should + // be OK since we already asserted they are roughly + // the same); during TLS handshakes, configs are + // loaded based on the hostname pattern, according + // to client's SNI configMap[cfg.Hostname] = cfg } @@ -383,6 +422,63 @@ func MakeTLSConfig(configs []*Config) (*tls.Config, error) { }, nil } +// assertConfigsCompatible returns an error if the two Configs +// do not have the same (or roughly compatible) configurations. +// If one of the tlsConfig pointers on either Config is nil, +// an error will be returned. If both are nil, no error. +func assertConfigsCompatible(cfg1, cfg2 *Config) error { + c1, c2 := cfg1.tlsConfig, cfg2.tlsConfig + + if (c1 == nil && c2 != nil) || (c1 != nil && c2 == nil) { + return fmt.Errorf("one config is not made") + } + if c1 == nil && c2 == nil { + return nil + } + + if len(c1.CipherSuites) != len(c2.CipherSuites) { + return fmt.Errorf("different number of allowed cipher suites") + } + for i, ciph := range c1.CipherSuites { + if c2.CipherSuites[i] != ciph { + return fmt.Errorf("different cipher suites or different order") + } + } + + if len(c1.CurvePreferences) != len(c2.CurvePreferences) { + return fmt.Errorf("different number of allowed cipher suites") + } + for i, curve := range c1.CurvePreferences { + if c2.CurvePreferences[i] != curve { + return fmt.Errorf("different curve preferences or different order") + } + } + + if len(c1.NextProtos) != len(c2.NextProtos) { + return fmt.Errorf("different number of ALPN (NextProtos) values") + } + for i, proto := range c1.NextProtos { + if c2.NextProtos[i] != proto { + return fmt.Errorf("different ALPN (NextProtos) values or different order") + } + } + + if c1.PreferServerCipherSuites != c2.PreferServerCipherSuites { + return fmt.Errorf("one prefers server cipher suites, the other does not") + } + if c1.MinVersion != c2.MinVersion { + return fmt.Errorf("minimum TLS version mismatch") + } + if c1.MaxVersion != c2.MaxVersion { + return fmt.Errorf("maximum TLS version mismatch") + } + if c1.ClientAuth != c2.ClientAuth { + return fmt.Errorf("client authentication policy mismatch") + } + + return nil +} + // ConfigGetter gets a Config keyed by key. type ConfigGetter func(c *caddy.Controller) *Config @@ -522,7 +618,7 @@ var supportedCurvesMap = map[string]tls.CurveID{ "P521": tls.CurveP521, } -// List of all the curves we want to use by default +// List of all the curves we want to use by default. // // This list should only include curves which are fast by design (e.g. X25519) // and those for which an optimized assembly implementation exists (e.g. P256). @@ -548,4 +644,8 @@ const ( // be capable of proxying or forwarding the request to this // alternate port. DefaultHTTPAlternatePort = "5033" + + // CertCacheInstStorageKey is the name of the key for + // accessing the certificate storage on the *caddy.Instance. + CertCacheInstStorageKey = "tls_cert_cache" ) diff --git a/caddytls/crypto.go b/caddytls/crypto.go index 3036834c4..b2107f152 100644 --- a/caddytls/crypto.go +++ b/caddytls/crypto.go @@ -237,15 +237,17 @@ func makeSelfSignedCert(config *Config) error { return fmt.Errorf("could not create certificate: %v", err) } - cacheCertificate(Certificate{ + chain := [][]byte{derBytes} + + config.cacheCertificate(Certificate{ Certificate: tls.Certificate{ - Certificate: [][]byte{derBytes}, + Certificate: chain, PrivateKey: privKey, Leaf: cert, }, Names: cert.DNSNames, NotAfter: cert.NotAfter, - Config: config, + Hash: hashCertificateChain(chain), }) return nil diff --git a/caddytls/handshake.go b/caddytls/handshake.go index c50e8ab63..2f3f34af3 100644 --- a/caddytls/handshake.go +++ b/caddytls/handshake.go @@ -59,15 +59,7 @@ func (cg configGroup) getConfig(name string) *Config { } } - // as a fallback, try a config that serves all names - if config, ok := cg[""]; ok { - return config - } - - // as a last resort, use a random config - // (even if the config isn't for that hostname, - // it should help us serve clients without SNI - // or at least defer TLS alerts to the cert) + // no matches, so just serve up a random config for _, config := range cg { return config } @@ -102,6 +94,86 @@ func (cfg *Config) GetCertificate(clientHello *tls.ClientHelloInfo) (*tls.Certif return &cert.Certificate, err } +// getCertificate gets a certificate that matches name (a server name) +// from the in-memory cache, according to the lookup table associated with +// cfg. The lookup then points to a certificate in the Instance certificate +// cache. +// +// If there is no exact match for name, it will be checked against names of +// the form '*.example.com' (wildcard certificates) according to RFC 6125. +// If a match is found, matched will be true. If no matches are found, matched +// will be false and a "default" certificate will be returned with defaulted +// set to true. If defaulted is false, then no certificates were available. +// +// The logic in this function is adapted from the Go standard library, +// which is by the Go Authors. +// +// This function is safe for concurrent use. +func (cfg *Config) getCertificate(name string) (cert Certificate, matched, defaulted bool) { + var certKey string + var ok bool + + // Not going to trim trailing dots here since RFC 3546 says, + // "The hostname is represented ... without a trailing dot." + // Just normalize to lowercase. + name = strings.ToLower(name) + + cfg.certCache.RLock() + defer cfg.certCache.RUnlock() + + // exact match? great, let's use it + if certKey, ok = cfg.Certificates[name]; ok { + cert = cfg.certCache.cache[certKey] + matched = true + return + } + + // try replacing labels in the name with wildcards until we get a match + labels := strings.Split(name, ".") + for i := range labels { + labels[i] = "*" + candidate := strings.Join(labels, ".") + if certKey, ok = cfg.Certificates[candidate]; ok { + cert = cfg.certCache.cache[certKey] + matched = true + return + } + } + + // check the certCache directly to see if the SNI name is + // already the key of the certificate it wants! this is vital + // for supporting the TLS-SNI challenge, since the tlsSNISolver + // just puts the temporary certificate in the instance cache, + // with no regard for configs; this also means that the SNI + // can contain the hash of a specific cert (chain) it wants + // and we will still be able to serve it up + // (this behavior, by the way, could be controversial as to + // whether it complies with RFC 6066 about SNI, but I think + // it does soooo...) + // NOTE/TODO: TLS-SNI challenge is changing, as of Jan. 2018 + // but what will be different, if it ever returns, is unclear + if directCert, ok := cfg.certCache.cache[name]; ok { + cert = directCert + matched = true + return + } + + // if nothing matches and SNI was not provided, use a random + // certificate; at least there's a chance this older client + // can connect, and in the future we won't need this provision + // (if SNI is present, it's probably best to just raise a TLS + // alert by not serving a certificate) + if name == "" { + for _, certKey := range cfg.Certificates { + defaulted = true + cert = cfg.certCache.cache[certKey] + return + } + } + + return +} + // getCertDuringHandshake will get a certificate for name. It first tries // the in-memory cache. If no certificate for name is in the cache, the // config most closely corresponding to name will be loaded. If that config @@ -115,7 +187,7 @@ func (cfg *Config) GetCertificate(clientHello *tls.ClientHelloInfo) (*tls.Certif // This function is safe for concurrent use. func (cfg *Config) getCertDuringHandshake(name string, loadIfNecessary, obtainIfNecessary bool) (Certificate, error) { // First check our in-memory cache to see if we've already loaded it - cert, matched, defaulted := getCertificate(name) + cert, matched, defaulted := cfg.getCertificate(name) if matched { return cert, nil } @@ -258,7 +330,7 @@ func (cfg *Config) obtainOnDemandCertificate(name string) (Certificate, error) { obtainCertWaitChans[name] = wait obtainCertWaitChansMu.Unlock() - // do the obtain + // obtain the certificate log.Printf("[INFO] Obtaining new certificate for %s", name) err := cfg.ObtainCert(name, false) @@ -317,9 +389,9 @@ func (cfg *Config) handshakeMaintenance(name string, cert Certificate) (Certific // quite common considering not all certs have issuer URLs that support it. log.Printf("[ERROR] Getting OCSP for %s: %v", name, err) } - certCacheMu.Lock() - certCache[name] = cert - certCacheMu.Unlock() + cfg.certCache.Lock() + cfg.certCache.cache[cert.Hash] = cert + cfg.certCache.Unlock() } } @@ -348,29 +420,22 @@ func (cfg *Config) renewDynamicCertificate(name string, currentCert Certificate) obtainCertWaitChans[name] = wait obtainCertWaitChansMu.Unlock() - // do the renew and reload the certificate + // renew and reload the certificate log.Printf("[INFO] Renewing certificate for %s", name) err := cfg.RenewCert(name, false) if err == nil { - // immediately flush this certificate from the cache so - // the name doesn't overlap when we try to replace it, - // which would fail, because overlapping existing cert - // names isn't allowed - certCacheMu.Lock() - for _, certName := range currentCert.Names { - delete(certCache, certName) - } - certCacheMu.Unlock() - // even though the recursive nature of the dynamic cert loading // would just call this function anyway, we do it here to - // make the replacement as atomic as possible. (TODO: similar - // to the note in maintain.go, it'd be nice if the clearing of - // the cache entries above and this load function were truly - // atomic...) - _, err := currentCert.Config.CacheManagedCertificate(name) + // make the replacement as atomic as possible. + newCert, err := currentCert.configs[0].CacheManagedCertificate(name) if err != nil { - log.Printf("[ERROR] loading renewed certificate: %v", err) + log.Printf("[ERROR] loading renewed certificate for %s: %v", name, err) + } else { + // replace the old certificate with the new one + err = cfg.certCache.replaceCertificate(currentCert, newCert) + if err != nil { + log.Printf("[ERROR] Replacing certificate for %s: %v", name, err) + } } } diff --git a/caddytls/handshake_test.go b/caddytls/handshake_test.go index 63a6c1dba..f0b8f7be2 100644 --- a/caddytls/handshake_test.go +++ b/caddytls/handshake_test.go @@ -21,9 +21,8 @@ import ( ) func TestGetCertificate(t *testing.T) { - defer func() { certCache = make(map[string]Certificate) }() - - cfg := new(Config) + certCache := &certificateCache{cache: make(map[string]Certificate)} + cfg := &Config{Certificates: make(map[string]string), certCache: certCache} hello := &tls.ClientHelloInfo{ServerName: "example.com"} helloSub := &tls.ClientHelloInfo{ServerName: "sub.example.com"} @@ -38,33 +37,40 @@ func TestGetCertificate(t *testing.T) { t.Errorf("GetCertificate should return error when cache is empty even if server name is blank, got: %v", cert) } - // When cache has one certificate in it (also is default) - defaultCert := Certificate{Names: []string{"example.com", ""}, Certificate: tls.Certificate{Leaf: &x509.Certificate{DNSNames: []string{"example.com"}}}} - certCache[""] = defaultCert - certCache["example.com"] = defaultCert + // When cache has one certificate in it + firstCert := Certificate{Names: []string{"example.com"}, Certificate: tls.Certificate{Leaf: &x509.Certificate{DNSNames: []string{"example.com"}}}} + cfg.cacheCertificate(firstCert) if cert, err := cfg.GetCertificate(hello); err != nil { t.Errorf("Got an error but shouldn't have, when cert exists in cache: %v", err) } else if cert.Leaf.DNSNames[0] != "example.com" { t.Errorf("Got wrong certificate with exact match; expected 'example.com', got: %v", cert) } - if cert, err := cfg.GetCertificate(helloNoSNI); err != nil { + if _, err := cfg.GetCertificate(helloNoSNI); err != nil { t.Errorf("Got an error with no SNI but shouldn't have, when cert exists in cache: %v", err) - } else if cert.Leaf.DNSNames[0] != "example.com" { - t.Errorf("Got wrong certificate for no SNI; expected 'example.com' as default, got: %v", cert) } // When retrieving wildcard certificate - certCache["*.example.com"] = Certificate{Names: []string{"*.example.com"}, Certificate: tls.Certificate{Leaf: &x509.Certificate{DNSNames: []string{"*.example.com"}}}} + wildcardCert := Certificate{ + Names: []string{"*.example.com"}, + Certificate: tls.Certificate{Leaf: &x509.Certificate{DNSNames: []string{"*.example.com"}}}, + Hash: "(don't overwrite the first one)", + } + cfg.cacheCertificate(wildcardCert) if cert, err := cfg.GetCertificate(helloSub); err != nil { t.Errorf("Didn't get wildcard cert, got: cert=%v, err=%v ", cert, err) } else if cert.Leaf.DNSNames[0] != "*.example.com" { t.Errorf("Got wrong certificate, expected wildcard: %v", cert) } - // When no certificate matches, the default is returned - if cert, err := cfg.GetCertificate(helloNoMatch); err != nil { - t.Errorf("Expected default certificate with no error when no matches, got err: %v", err) - } else if cert.Leaf.DNSNames[0] != "example.com" { - t.Errorf("Expected default cert with no matches, got: %v", cert) + // When cache is NOT empty but there's no SNI + if cert, err := cfg.GetCertificate(helloNoSNI); err != nil { + t.Errorf("Expected random certificate with no error when no SNI, got err: %v", err) + } else if cert == nil || len(cert.Leaf.DNSNames) == 0 { + t.Errorf("Expected random cert with no matches, got: %v", cert) + } + + // When no certificate matches, raise an alert + if _, err := cfg.GetCertificate(helloNoMatch); err == nil { + t.Errorf("Expected an error when no certificate matched the SNI, got: %v", err) } } diff --git a/caddytls/maintain.go b/caddytls/maintain.go index 9e42fc87c..7ce6c5e26 100644 --- a/caddytls/maintain.go +++ b/caddytls/maintain.go @@ -87,119 +87,163 @@ func maintainAssets(stopChan chan struct{}) { // RenewManagedCertificates renews managed certificates, // including ones loaded on-demand. func RenewManagedCertificates(allowPrompts bool) (err error) { - var renewQueue, deleteQueue []Certificate - visitedNames := make(map[string]struct{}) - - certCacheMu.RLock() - for name, cert := range certCache { - if !cert.Config.Managed || cert.Config.SelfSigned { + for _, inst := range caddy.Instances() { + inst.StorageMu.RLock() + certCache, ok := inst.Storage[CertCacheInstStorageKey].(*certificateCache) + inst.StorageMu.RUnlock() + if !ok || certCache == nil { continue } - // the list of names on this cert should never be empty... - if cert.Names == nil || len(cert.Names) == 0 { - log.Printf("[WARNING] Certificate keyed by '%s' has no names: %v - removing from cache", name, cert.Names) - deleteQueue = append(deleteQueue, cert) - continue - } + // we use the queues for a very important reason: to do any and all + // operations that could require an exclusive write lock outside + // of the read lock! otherwise we get a deadlock, yikes. in other + // words, our first iteration through the certificate cache does NOT + // perform any operations--only queues them--so that more fine-grained + // write locks may be obtained during the actual operations. + var renewQueue, reloadQueue, deleteQueue []Certificate - // skip names whose certificate we've already renewed - if _, ok := visitedNames[name]; ok { - continue - } - for _, name := range cert.Names { - visitedNames[name] = struct{}{} - } - - // if its time is up or ending soon, we need to try to renew it - timeLeft := cert.NotAfter.Sub(time.Now().UTC()) - if timeLeft < RenewDurationBefore { - log.Printf("[INFO] Certificate for %v expires in %v; attempting renewal", cert.Names, timeLeft) - - if cert.Config == nil { - log.Printf("[ERROR] %s: No associated TLS config; unable to renew", name) + certCache.RLock() + for certKey, cert := range certCache.cache { + if len(cert.configs) == 0 { + // this is bad if this happens, probably a programmer error (oops) + log.Printf("[ERROR] No associated TLS config for certificate with names %v; unable to manage", cert.Names) + continue + } + if !cert.configs[0].Managed || cert.configs[0].SelfSigned { continue } - // queue for renewal when we aren't in a read lock anymore - // (the TLS-SNI challenge will need a write lock in order to - // present the certificate, so we renew outside of read lock) - renewQueue = append(renewQueue, cert) - } - } - certCacheMu.RUnlock() - - // Perform renewals that are queued - for _, cert := range renewQueue { - // Get the name which we should use to renew this certificate; - // we only support managing certificates with one name per cert, - // so this should be easy. We can't rely on cert.Config.Hostname - // because it may be a wildcard value from the Caddyfile (e.g. - // *.something.com) which, as of Jan. 2017, is not supported by ACME. - var renewName string - for _, name := range cert.Names { - if name != "" { - renewName = name - break - } - } - - // perform renewal - err := cert.Config.RenewCert(renewName, allowPrompts) - if err != nil { - if allowPrompts { - // Certificate renewal failed and the operator is present. See a discussion - // about this in issue 642. For a while, we only stopped if the certificate - // was expired, but in reality, there is no difference between reporting - // it now versus later, except that there's somebody present to deal with - // it right now. - timeLeft := cert.NotAfter.Sub(time.Now().UTC()) - if timeLeft < RenewDurationBeforeAtStartup { - // See issue 1680. Only fail at startup if the certificate is dangerously - // close to expiration. - return err - } - } - log.Printf("[ERROR] %v", err) - if cert.Config.OnDemand { - // loaded dynamically, removed dynamically + // the list of names on this cert should never be empty... programmer error? + if cert.Names == nil || len(cert.Names) == 0 { + log.Printf("[WARNING] Certificate keyed by '%s' has no names: %v - removing from cache", certKey, cert.Names) deleteQueue = append(deleteQueue, cert) + continue } - } else { + + // if time is up or expires soon, we need to try to renew it + timeLeft := cert.NotAfter.Sub(time.Now().UTC()) + if timeLeft < RenewDurationBefore { + // see if the certificate in storage has already been renewed, possibly by another + // instance of Caddy that didn't coordinate with this one; if so, just load it (this + // might happen if another instance already renewed it - kinda sloppy but checking disk + // first is a simple way to possibly drastically reduce rate limit problems) + storedCertExpiring, err := managedCertInStorageExpiresSoon(cert) + if err != nil { + // hmm, weird, but not a big deal, maybe it was deleted or something + log.Printf("[NOTICE] Error while checking if certificate for %v in storage is also expiring soon: %v", + cert.Names, err) + } else if !storedCertExpiring { + // if the certificate is NOT expiring soon and there was no error, then we + // are good to just reload the certificate from storage instead of repeating + // a likely-unnecessary renewal procedure + reloadQueue = append(reloadQueue, cert) + continue + } + + // the certificate in storage has not been renewed yet, so we will do it + // NOTE 1: This is not correct 100% of the time, if multiple Caddy instances + // happen to run their maintenance checks at approximately the same times; + // both might start renewal at about the same time and do two renewals and one + // will overwrite the other. Hence TLS storage plugins. This is sort of a TODO. + // NOTE 2: It is super-important to note that the TLS-SNI challenge requires + // a write lock on the cache in order to complete its challenge, so it is extra + // vital that this renew operation does not happen inside our read lock! + renewQueue = append(renewQueue, cert) + } + } + certCache.RUnlock() + + // Reload certificates that merely need to be updated in memory + for _, oldCert := range reloadQueue { + timeLeft := oldCert.NotAfter.Sub(time.Now().UTC()) + log.Printf("[INFO] Certificate for %v expires in %v, but is already renewed in storage; reloading stored certificate", + oldCert.Names, timeLeft) + + // get the certificate from storage and cache it + newCert, err := oldCert.configs[0].CacheManagedCertificate(oldCert.Names[0]) + if err != nil { + log.Printf("[ERROR] Unable to reload certificate for %v into cache: %v", oldCert.Names, err) + continue + } + + // and replace the old certificate with the new one + err = certCache.replaceCertificate(oldCert, newCert) + if err != nil { + log.Printf("[ERROR] Replacing certificate: %v", err) + } + } + + // Renewal queue + for _, oldCert := range renewQueue { + timeLeft := oldCert.NotAfter.Sub(time.Now().UTC()) + log.Printf("[INFO] Certificate for %v expires in %v; attempting renewal", oldCert.Names, timeLeft) + + // Get the name which we should use to renew this certificate; + // we only support managing certificates with one name per cert, + // so this should be easy. We can't rely on cert.Config.Hostname + // because it may be a wildcard value from the Caddyfile (e.g. + // *.something.com) which, as of Jan. 2017, is not supported by ACME. + // TODO: ^ ^ ^ (wildcards) + renewName := oldCert.Names[0] + + // perform renewal + err := oldCert.configs[0].RenewCert(renewName, allowPrompts) + if err != nil { + if allowPrompts { + // Certificate renewal failed and the operator is present. See a discussion + // about this in issue 642. For a while, we only stopped if the certificate + // was expired, but in reality, there is no difference between reporting + // it now versus later, except that there's somebody present to deal with + // it right now. Follow-up: See issue 1680. Only fail in this case if the + // certificate is dangerously close to expiration. + timeLeft := oldCert.NotAfter.Sub(time.Now().UTC()) + if timeLeft < RenewDurationBeforeAtStartup { + return err + } + } + log.Printf("[ERROR] %v", err) + if oldCert.configs[0].OnDemand { + // loaded dynamically, remove dynamically + deleteQueue = append(deleteQueue, oldCert) + } + continue + } + // successful renewal, so update in-memory cache by loading // renewed certificate so it will be used with handshakes - // we must delete all the names this cert services from the cache - // so that we can replace the certificate, because replacing names - // already in the cache is not allowed, to avoid later conflicts - // with renewals. - // TODO: It would be nice if this whole operation were idempotent; - // i.e. a thread-safe function to replace a certificate in the cache, - // see also handshake.go for on-demand maintenance. - certCacheMu.Lock() - for _, name := range cert.Names { - delete(certCache, name) - } - certCacheMu.Unlock() - // put the certificate in the cache - _, err := cert.Config.CacheManagedCertificate(cert.Names[0]) + newCert, err := oldCert.configs[0].CacheManagedCertificate(renewName) if err != nil { if allowPrompts { return err // operator is present, so report error immediately } log.Printf("[ERROR] %v", err) } - } - } - // Apply queued deletion changes to the cache - for _, cert := range deleteQueue { - certCacheMu.Lock() - for _, name := range cert.Names { - delete(certCache, name) + // replace the old certificate with the new one + err = certCache.replaceCertificate(oldCert, newCert) + if err != nil { + log.Printf("[ERROR] Replacing certificate: %v", err) + } + } + + // Deletion queue + for _, cert := range deleteQueue { + certCache.Lock() + // remove any pointers to this certificate from Configs + for _, cfg := range cert.configs { + for name, certKey := range cfg.Certificates { + if certKey == cert.Hash { + delete(cfg.Certificates, name) + } + } + } + // then delete the certificate from the cache + delete(certCache.cache, cert.Hash) + certCache.Unlock() } - certCacheMu.Unlock() } return nil @@ -212,91 +256,75 @@ func RenewManagedCertificates(allowPrompts bool) (err error) { // Ryan Sleevi's recommendations for good OCSP support: // https://gist.github.com/sleevi/5efe9ef98961ecfb4da8 func UpdateOCSPStaples() { - // Create a temporary place to store updates - // until we release the potentially long-lived - // read lock and use a short-lived write lock. - type ocspUpdate struct { - rawBytes []byte - parsed *ocsp.Response - } - updated := make(map[string]ocspUpdate) - - // A single SAN certificate maps to multiple names, so we use this - // set to make sure we don't waste cycles checking OCSP for the same - // certificate multiple times. - visited := make(map[string]struct{}) - - certCacheMu.RLock() - for name, cert := range certCache { - // skip this certificate if we've already visited it, - // and if not, mark all the names as visited - if _, ok := visited[name]; ok { - continue - } - for _, n := range cert.Names { - visited[n] = struct{}{} - } - - // no point in updating OCSP for expired certificates - if time.Now().After(cert.NotAfter) { + for _, inst := range caddy.Instances() { + inst.StorageMu.RLock() + certCache, ok := inst.Storage[CertCacheInstStorageKey].(*certificateCache) + inst.StorageMu.RUnlock() + if !ok || certCache == nil { continue } - var lastNextUpdate time.Time - if cert.OCSP != nil { - lastNextUpdate = cert.OCSP.NextUpdate - if freshOCSP(cert.OCSP) { - // no need to update staple if ours is still fresh + // Create a temporary place to store updates + // until we release the potentially long-lived + // read lock and use a short-lived write lock + // on the certificate cache. + type ocspUpdate struct { + rawBytes []byte + parsed *ocsp.Response + } + updated := make(map[string]ocspUpdate) + + certCache.RLock() + for certHash, cert := range certCache.cache { + // no point in updating OCSP for expired certificates + if time.Now().After(cert.NotAfter) { continue } - } - err := stapleOCSP(&cert, nil) - if err != nil { + var lastNextUpdate time.Time if cert.OCSP != nil { - // if there was no staple before, that's fine; otherwise we should log the error - log.Printf("[ERROR] Checking OCSP: %v", err) + lastNextUpdate = cert.OCSP.NextUpdate + if freshOCSP(cert.OCSP) { + continue // no need to update staple if ours is still fresh + } } - continue - } - // By this point, we've obtained the latest OCSP response. - // If there was no staple before, or if the response is updated, make - // sure we apply the update to all names on the certificate. - if cert.OCSP != nil && (lastNextUpdate.IsZero() || lastNextUpdate != cert.OCSP.NextUpdate) { - log.Printf("[INFO] Advancing OCSP staple for %v from %s to %s", - cert.Names, lastNextUpdate, cert.OCSP.NextUpdate) - for _, n := range cert.Names { - // BUG: If this certificate has names on it that appear on another - // certificate in the cache, AND the other certificate is keyed by - // that name in the cache, then this method of 'queueing' the staple - // update will cause this certificate's new OCSP to be stapled to - // a different certificate! See: - // https://caddy.community/t/random-ocsp-response-errors-for-random-clients/2473?u=matt - // This problem should be avoided if names on certificates in the - // cache don't overlap with regards to the cache keys. - // (This is isn't a bug anymore, since we're careful when we add - // certificates to the cache by skipping keying when key already exists.) - updated[n] = ocspUpdate{rawBytes: cert.Certificate.OCSPStaple, parsed: cert.OCSP} + err := stapleOCSP(&cert, nil) + if err != nil { + if cert.OCSP != nil { + // if there was no staple before, that's fine; otherwise we should log the error + log.Printf("[ERROR] Checking OCSP: %v", err) + } + continue + } + + // By this point, we've obtained the latest OCSP response. + // If there was no staple before, or if the response is updated, make + // sure we apply the update to all names on the certificate. + if cert.OCSP != nil && (lastNextUpdate.IsZero() || lastNextUpdate != cert.OCSP.NextUpdate) { + log.Printf("[INFO] Advancing OCSP staple for %v from %s to %s", + cert.Names, lastNextUpdate, cert.OCSP.NextUpdate) + updated[certHash] = ocspUpdate{rawBytes: cert.Certificate.OCSPStaple, parsed: cert.OCSP} } } - } - certCacheMu.RUnlock() + certCache.RUnlock() - // This write lock should be brief since we have all the info we need now. - certCacheMu.Lock() - for name, update := range updated { - cert := certCache[name] - cert.OCSP = update.parsed - cert.Certificate.OCSPStaple = update.rawBytes - certCache[name] = cert + // These write locks should be brief since we have all the info we need now. + for certKey, update := range updated { + certCache.Lock() + cert := certCache.cache[certKey] + cert.OCSP = update.parsed + cert.Certificate.OCSPStaple = update.rawBytes + certCache.cache[certKey] = cert + certCache.Unlock() + } } - certCacheMu.Unlock() } // DeleteOldStapleFiles deletes cached OCSP staples that have expired. // TODO: Should we do this for certificates too? func DeleteOldStapleFiles() { + // TODO: Upgrade caddytls.Storage to support OCSP operations too files, err := ioutil.ReadDir(ocspFolder) if err != nil { // maybe just hasn't been created yet; no big deal diff --git a/caddytls/setup.go b/caddytls/setup.go index cbc2baca1..63c2a9e6d 100644 --- a/caddytls/setup.go +++ b/caddytls/setup.go @@ -38,6 +38,7 @@ func init() { // are specified by the user in the config file. All the automatic HTTPS // stuff comes later outside of this function. func setupTLS(c *caddy.Controller) error { + // obtain the configGetter, which loads the config we're, uh, configuring configGetter, ok := configGetters[c.ServerType()] if !ok { return fmt.Errorf("no caddytls.ConfigGetter for %s server type; must call RegisterConfigGetter", c.ServerType()) @@ -47,6 +48,14 @@ func setupTLS(c *caddy.Controller) error { return fmt.Errorf("no caddytls.Config to set up for %s", c.Key) } + // the certificate cache is tied to the current caddy.Instance; get a pointer to it + certCache, ok := c.Get(CertCacheInstStorageKey).(*certificateCache) + if !ok || certCache == nil { + certCache = &certificateCache{cache: make(map[string]Certificate)} + c.Set(CertCacheInstStorageKey, certCache) + } + config.certCache = certCache + config.Enabled = true for c.Next() { @@ -237,7 +246,7 @@ func setupTLS(c *caddy.Controller) error { // load a single certificate and key, if specified if certificateFile != "" && keyFile != "" { - err := cacheUnmanagedCertificatePEMFile(certificateFile, keyFile) + err := config.cacheUnmanagedCertificatePEMFile(certificateFile, keyFile) if err != nil { return c.Errf("Unable to load certificate and key files for '%s': %v", c.Key, err) } @@ -246,7 +255,7 @@ func setupTLS(c *caddy.Controller) error { // load a directory of certificates, if specified if loadDir != "" { - err := loadCertsInDir(c, loadDir) + err := loadCertsInDir(config, c, loadDir) if err != nil { return err } @@ -273,7 +282,7 @@ func setupTLS(c *caddy.Controller) error { // https://cbonte.github.io/haproxy-dconv/configuration-1.5.html#5.1-crt // // This function may write to the log as it walks the directory tree. -func loadCertsInDir(c *caddy.Controller, dir string) error { +func loadCertsInDir(cfg *Config, c *caddy.Controller, dir string) error { return filepath.Walk(dir, func(path string, info os.FileInfo, err error) error { if err != nil { log.Printf("[WARNING] Unable to traverse into %s; skipping", path) @@ -336,7 +345,7 @@ func loadCertsInDir(c *caddy.Controller, dir string) error { return c.Errf("%s: no private key block found", path) } - err = cacheUnmanagedCertificatePEMBytes(certPEMBytes, keyPEMBytes) + err = cfg.cacheUnmanagedCertificatePEMBytes(certPEMBytes, keyPEMBytes) if err != nil { return c.Errf("%s: failed to load cert and key for '%s': %v", path, c.Key, err) } diff --git a/caddytls/setup_test.go b/caddytls/setup_test.go index ee8a709bd..b93b1fc5f 100644 --- a/caddytls/setup_test.go +++ b/caddytls/setup_test.go @@ -46,9 +46,12 @@ func TestMain(m *testing.M) { } func TestSetupParseBasic(t *testing.T) { - cfg := new(Config) + certCache := &certificateCache{cache: make(map[string]Certificate)} + cfg := &Config{Certificates: make(map[string]string), certCache: certCache} + RegisterConfigGetter("", func(c *caddy.Controller) *Config { return cfg }) c := caddy.NewTestController("", `tls `+certFile+` `+keyFile+``) + c.Set(CertCacheInstStorageKey, certCache) err := setupTLS(c) if err != nil { @@ -124,9 +127,12 @@ func TestSetupParseWithOptionalParams(t *testing.T) { must_staple alpn http/1.1 }` - cfg := new(Config) + certCache := &certificateCache{cache: make(map[string]Certificate)} + cfg := &Config{Certificates: make(map[string]string), certCache: certCache} + RegisterConfigGetter("", func(c *caddy.Controller) *Config { return cfg }) c := caddy.NewTestController("", params) + c.Set(CertCacheInstStorageKey, certCache) err := setupTLS(c) if err != nil { @@ -158,9 +164,11 @@ func TestSetupDefaultWithOptionalParams(t *testing.T) { params := `tls { ciphers RSA-3DES-EDE-CBC-SHA }` - cfg := new(Config) + certCache := &certificateCache{cache: make(map[string]Certificate)} + cfg := &Config{Certificates: make(map[string]string), certCache: certCache} RegisterConfigGetter("", func(c *caddy.Controller) *Config { return cfg }) c := caddy.NewTestController("", params) + c.Set(CertCacheInstStorageKey, certCache) err := setupTLS(c) if err != nil { @@ -176,9 +184,12 @@ func TestSetupParseWithWrongOptionalParams(t *testing.T) { params := `tls ` + certFile + ` ` + keyFile + ` { protocols ssl tls }` - cfg := new(Config) + certCache := &certificateCache{cache: make(map[string]Certificate)} + cfg := &Config{Certificates: make(map[string]string), certCache: certCache} RegisterConfigGetter("", func(c *caddy.Controller) *Config { return cfg }) c := caddy.NewTestController("", params) + c.Set(CertCacheInstStorageKey, certCache) + err := setupTLS(c) if err == nil { t.Errorf("Expected errors, but no error returned") @@ -191,6 +202,7 @@ func TestSetupParseWithWrongOptionalParams(t *testing.T) { cfg = new(Config) RegisterConfigGetter("", func(c *caddy.Controller) *Config { return cfg }) c = caddy.NewTestController("", params) + c.Set(CertCacheInstStorageKey, certCache) err = setupTLS(c) if err == nil { t.Error("Expected errors, but no error returned") @@ -215,6 +227,7 @@ func TestSetupParseWithWrongOptionalParams(t *testing.T) { cfg = new(Config) RegisterConfigGetter("", func(c *caddy.Controller) *Config { return cfg }) c = caddy.NewTestController("", params) + c.Set(CertCacheInstStorageKey, certCache) err = setupTLS(c) if err == nil { t.Error("Expected errors, but no error returned") @@ -226,7 +239,8 @@ func TestSetupParseWithClientAuth(t *testing.T) { params := `tls ` + certFile + ` ` + keyFile + ` { clients }` - cfg := new(Config) + certCache := &certificateCache{cache: make(map[string]Certificate)} + cfg := &Config{Certificates: make(map[string]string), certCache: certCache} RegisterConfigGetter("", func(c *caddy.Controller) *Config { return cfg }) c := caddy.NewTestController("", params) err := setupTLS(c) @@ -259,9 +273,11 @@ func TestSetupParseWithClientAuth(t *testing.T) { clients verify_if_given }`, tls.VerifyClientCertIfGiven, true, noCAs}, } { - cfg := new(Config) + certCache := &certificateCache{cache: make(map[string]Certificate)} + cfg := &Config{Certificates: make(map[string]string), certCache: certCache} RegisterConfigGetter("", func(c *caddy.Controller) *Config { return cfg }) c := caddy.NewTestController("", caseData.params) + c.Set(CertCacheInstStorageKey, certCache) err := setupTLS(c) if caseData.expectedErr { if err == nil { @@ -311,9 +327,11 @@ func TestSetupParseWithCAUrl(t *testing.T) { ca 1 2 }`, true, ""}, } { - cfg := new(Config) + certCache := &certificateCache{cache: make(map[string]Certificate)} + cfg := &Config{Certificates: make(map[string]string), certCache: certCache} RegisterConfigGetter("", func(c *caddy.Controller) *Config { return cfg }) c := caddy.NewTestController("", caseData.params) + c.Set(CertCacheInstStorageKey, certCache) err := setupTLS(c) if caseData.expectedErr { if err == nil { @@ -335,9 +353,11 @@ func TestSetupParseWithKeyType(t *testing.T) { params := `tls { key_type p384 }` - cfg := new(Config) + certCache := &certificateCache{cache: make(map[string]Certificate)} + cfg := &Config{Certificates: make(map[string]string), certCache: certCache} RegisterConfigGetter("", func(c *caddy.Controller) *Config { return cfg }) c := caddy.NewTestController("", params) + c.Set(CertCacheInstStorageKey, certCache) err := setupTLS(c) if err != nil { @@ -353,9 +373,11 @@ func TestSetupParseWithCurves(t *testing.T) { params := `tls { curves x25519 p256 p384 p521 }` - cfg := new(Config) + certCache := &certificateCache{cache: make(map[string]Certificate)} + cfg := &Config{Certificates: make(map[string]string), certCache: certCache} RegisterConfigGetter("", func(c *caddy.Controller) *Config { return cfg }) c := caddy.NewTestController("", params) + c.Set(CertCacheInstStorageKey, certCache) err := setupTLS(c) if err != nil { @@ -380,9 +402,11 @@ func TestSetupParseWithOneTLSProtocol(t *testing.T) { params := `tls { protocols tls1.2 }` - cfg := new(Config) + certCache := &certificateCache{cache: make(map[string]Certificate)} + cfg := &Config{Certificates: make(map[string]string), certCache: certCache} RegisterConfigGetter("", func(c *caddy.Controller) *Config { return cfg }) c := caddy.NewTestController("", params) + c.Set(CertCacheInstStorageKey, certCache) err := setupTLS(c) if err != nil { diff --git a/caddytls/tls.go b/caddytls/tls.go index 9a17ddd3d..bf1a8301e 100644 --- a/caddytls/tls.go +++ b/caddytls/tls.go @@ -88,30 +88,38 @@ func Revoke(host string) error { return client.Revoke(host) } -// tlsSniSolver is a type that can solve tls-sni challenges using +// tlsSNISolver is a type that can solve TLS-SNI challenges using // an existing listener and our custom, in-memory certificate cache. -type tlsSniSolver struct{} +type tlsSNISolver struct { + certCache *certificateCache +} // Present adds the challenge certificate to the cache. -func (s tlsSniSolver) Present(domain, token, keyAuth string) error { +func (s tlsSNISolver) Present(domain, token, keyAuth string) error { cert, acmeDomain, err := acme.TLSSNI01ChallengeCert(keyAuth) if err != nil { return err } - cacheCertificate(Certificate{ + certHash := hashCertificateChain(cert.Certificate) + s.certCache.Lock() + s.certCache.cache[acmeDomain] = Certificate{ Certificate: cert, Names: []string{acmeDomain}, - }) + Hash: certHash, // perhaps not necesssary + } + s.certCache.Unlock() return nil } // CleanUp removes the challenge certificate from the cache. -func (s tlsSniSolver) CleanUp(domain, token, keyAuth string) error { +func (s tlsSNISolver) CleanUp(domain, token, keyAuth string) error { _, acmeDomain, err := acme.TLSSNI01ChallengeCert(keyAuth) if err != nil { return err } - uncacheCertificate(acmeDomain) + s.certCache.Lock() + delete(s.certCache.cache, acmeDomain) + s.certCache.Unlock() return nil } diff --git a/controller.go b/controller.go index e162280c0..6015d210f 100644 --- a/controller.go +++ b/controller.go @@ -103,6 +103,20 @@ func (c *Controller) Context() Context { return c.instance.context } +// Get safely gets a value from the Instance's storage. +func (c *Controller) Get(key interface{}) interface{} { + c.instance.StorageMu.RLock() + defer c.instance.StorageMu.RUnlock() + return c.instance.Storage[key] +} + +// Set safely sets a value on the Instance's storage. +func (c *Controller) Set(key, val interface{}) { + c.instance.StorageMu.Lock() + c.instance.Storage[key] = val + c.instance.StorageMu.Unlock() +} + // NewTestController creates a new Controller for // the server type and input specified. The filename // is "Testfile". If the server type is not empty and @@ -113,12 +127,12 @@ func (c *Controller) Context() Context { // Used only for testing, but exported so plugins can // use this for convenience. func NewTestController(serverType, input string) *Controller { - var ctx Context + testInst := &Instance{serverType: serverType, Storage: make(map[interface{}]interface{})} if stype, err := getServerType(serverType); err == nil { - ctx = stype.NewContext() + testInst.context = stype.NewContext(testInst) } return &Controller{ - instance: &Instance{serverType: serverType, context: ctx}, + instance: testInst, Dispenser: caddyfile.NewDispenser("Testfile", strings.NewReader(input)), OncePerServerBlock: func(f func() error) error { return f() }, } diff --git a/plugins.go b/plugins.go index f5372184e..f7d14f86b 100644 --- a/plugins.go +++ b/plugins.go @@ -191,7 +191,7 @@ type ServerType struct { // startup phases before this one. It's a way to keep // each set of server instances separate and to reduce // the amount of global state you need. - NewContext func() Context + NewContext func(inst *Instance) Context } // Plugin is a type which holds information about a plugin. From 08028714b57540a46209b6ab094c0a9cc103830f Mon Sep 17 00:00:00 2001 From: Matthew Holt Date: Tue, 13 Feb 2018 13:23:09 -0700 Subject: [PATCH 2/8] tls: Synchronize renewals between Caddy instances sharing file storage Also introduce caddy.OnProcessExit which is a list of functions that run before exiting the process cleanly; these do not count as shutdown callbacks, so they do not return errors and must execute quickly. --- caddy.go | 8 +++ caddytls/certificates.go | 76 ++++++++++++++-------- caddytls/client.go | 54 +++++----------- caddytls/filestorage.go | 7 +- caddytls/filestoragesync.go | 123 ++++++++++++++++++++++++++++++++++++ caddytls/maintain.go | 25 ++------ caddytls/storage.go | 4 ++ caddytls/sync_locker.go | 57 ----------------- plugins.go | 8 +++ sigtrap.go | 9 +-- sigtrap_posix.go | 8 +-- 11 files changed, 227 insertions(+), 152 deletions(-) create mode 100644 caddytls/filestoragesync.go delete mode 100644 caddytls/sync_locker.go diff --git a/caddy.go b/caddy.go index dd2d473a9..628673a73 100644 --- a/caddy.go +++ b/caddy.go @@ -77,6 +77,14 @@ var ( mu sync.Mutex ) +func init() { + OnProcessExit = append(OnProcessExit, func() { + if PidFile != "" { + os.Remove(PidFile) + } + }) +} + // Instance contains the state of servers created as a result of // calling Start and can be used to access or control those servers. // It is literally an instance of a server type. Instance values diff --git a/caddytls/certificates.go b/caddytls/certificates.go index 2df576ff3..29c0c8c21 100644 --- a/caddytls/certificates.go +++ b/caddytls/certificates.go @@ -29,34 +29,6 @@ import ( "golang.org/x/crypto/ocsp" ) -// Certificate is a tls.Certificate with associated metadata tacked on. -// Even if the metadata can be obtained by parsing the certificate, -// we are more efficient by extracting the metadata onto this struct. -type Certificate struct { - tls.Certificate - - // Names is the list of names this certificate is written for. - // The first is the CommonName (if any), the rest are SAN. - Names []string - - // NotAfter is when the certificate expires. - NotAfter time.Time - - // OCSP contains the certificate's parsed OCSP response. - OCSP *ocsp.Response - - // The hex-encoded hash of this cert's chain's bytes. - Hash string - - // configs is the list of configs that use or refer to - // The first one is assumed to be the config that is - // "in charge" of this certificate (i.e. determines - // whether it is managed, how it is managed, etc). - // This field will be populated by cacheCertificate. - // Only meddle with it if you know what you're doing! - configs []*Config -} - // certificateCache is to be an instance-wide cache of certs // that site-specific TLS configs can refer to. Using a // central map like this avoids duplication of certs in @@ -127,6 +99,54 @@ func (certCache *certificateCache) replaceCertificate(oldCert, newCert Certifica return nil } +// reloadManagedCertificate reloads the certificate corresponding to the name(s) +// on oldCert into the cache, from storage. This also replaces the old certificate +// with the new one, so that all configurations that used the old cert now point +// to the new cert. +func (certCache *certificateCache) reloadManagedCertificate(oldCert Certificate) error { + // get the certificate from storage and cache it + newCert, err := oldCert.configs[0].CacheManagedCertificate(oldCert.Names[0]) + if err != nil { + return fmt.Errorf("unable to reload certificate for %v into cache: %v", oldCert.Names, err) + } + + // and replace the old certificate with the new one + err = certCache.replaceCertificate(oldCert, newCert) + if err != nil { + return fmt.Errorf("replacing certificate %v: %v", oldCert.Names, err) + } + + return nil +} + +// Certificate is a tls.Certificate with associated metadata tacked on. +// Even if the metadata can be obtained by parsing the certificate, +// we are more efficient by extracting the metadata onto this struct. +type Certificate struct { + tls.Certificate + + // Names is the list of names this certificate is written for. + // The first is the CommonName (if any), the rest are SAN. + Names []string + + // NotAfter is when the certificate expires. + NotAfter time.Time + + // OCSP contains the certificate's parsed OCSP response. + OCSP *ocsp.Response + + // The hex-encoded hash of this cert's chain's bytes. + Hash string + + // configs is the list of configs that use or refer to + // The first one is assumed to be the config that is + // "in charge" of this certificate (i.e. determines + // whether it is managed, how it is managed, etc). + // This field will be populated by cacheCertificate. + // Only meddle with it if you know what you're doing! + configs []*Config +} + // CacheManagedCertificate loads the certificate for domain into the // cache, from the TLS storage for managed certificates. It returns a // copy of the Certificate that was put into the cache. diff --git a/caddytls/client.go b/caddytls/client.go index 4775a2d18..cdd715b97 100644 --- a/caddytls/client.go +++ b/caddytls/client.go @@ -39,7 +39,7 @@ type ACMEClient struct { AllowPrompts bool config *Config acmeClient *acme.Client - locker Locker + storage Storage } // newACMEClient creates a new ACMEClient given an email and whether @@ -121,10 +121,7 @@ var newACMEClient = func(config *Config, allowPrompts bool) (*ACMEClient, error) AllowPrompts: allowPrompts, config: config, acmeClient: client, - locker: &syncLock{ - nameLocks: make(map[string]*sync.WaitGroup), - nameLocksMu: sync.Mutex{}, - }, + storage: storage, } if config.DNSProvider == "" { @@ -209,13 +206,7 @@ var newACMEClient = func(config *Config, allowPrompts bool) (*ACMEClient, error) // Callers who have access to a Config value should use the ObtainCert // method on that instead of this lower-level method. func (c *ACMEClient) Obtain(name string) error { - // Get access to ACME storage - storage, err := c.config.StorageFor(c.config.CAUrl) - if err != nil { - return err - } - - waiter, err := c.locker.TryLock(name) + waiter, err := c.storage.TryLock(name) if err != nil { return err } @@ -225,7 +216,7 @@ func (c *ACMEClient) Obtain(name string) error { return nil // we assume the process with the lock succeeded, rather than hammering this execution path again } defer func() { - if err := c.locker.Unlock(name); err != nil { + if err := c.storage.Unlock(name); err != nil { log.Printf("[ERROR] Unable to unlock obtain call for %s: %v", name, err) } }() @@ -268,7 +259,7 @@ Attempts: } // Success - immediately save the certificate resource - err = saveCertResource(storage, certificate) + err = saveCertResource(c.storage, certificate) if err != nil { return fmt.Errorf("error saving assets for %v: %v", name, err) } @@ -279,35 +270,30 @@ Attempts: return nil } -// Renew renews the managed certificate for name. This function is -// safe for concurrent use. +// Renew renews the managed certificate for name. It puts the renewed +// certificate into storage (not the cache). This function is safe for +// concurrent use. // // Callers who have access to a Config value should use the RenewCert // method on that instead of this lower-level method. func (c *ACMEClient) Renew(name string) error { - // Get access to ACME storage - storage, err := c.config.StorageFor(c.config.CAUrl) - if err != nil { - return err - } - - waiter, err := c.locker.TryLock(name) + waiter, err := c.storage.TryLock(name) if err != nil { return err } if waiter != nil { log.Printf("[INFO] Certificate for %s is already being renewed elsewhere and stored; waiting", name) waiter.Wait() - return nil // we assume the process with the lock succeeded, rather than hammering this execution path again + return nil // assume that the worker that renewed the cert succeeded; avoid hammering this path over and over } defer func() { - if err := c.locker.Unlock(name); err != nil { + if err := c.storage.Unlock(name); err != nil { log.Printf("[ERROR] Unable to unlock renew call for %s: %v", name, err) } }() // Prepare for renewal (load PEM cert, key, and meta) - siteData, err := storage.LoadSite(name) + siteData, err := c.storage.LoadSite(name) if err != nil { return err } @@ -350,21 +336,15 @@ func (c *ACMEClient) Renew(name string) error { return errors.New("too many renewal attempts; last error: " + err.Error()) } - // Executes Cert renew events caddy.EmitEvent(caddy.CertRenewEvent, name) - return saveCertResource(storage, newCertMeta) + return saveCertResource(c.storage, newCertMeta) } -// Revoke revokes the certificate for name and deltes +// Revoke revokes the certificate for name and deletes // it from storage. func (c *ACMEClient) Revoke(name string) error { - storage, err := c.config.StorageFor(c.config.CAUrl) - if err != nil { - return err - } - - siteExists, err := storage.SiteExists(name) + siteExists, err := c.storage.SiteExists(name) if err != nil { return err } @@ -373,7 +353,7 @@ func (c *ACMEClient) Revoke(name string) error { return errors.New("no certificate and key for " + name) } - siteData, err := storage.LoadSite(name) + siteData, err := c.storage.LoadSite(name) if err != nil { return err } @@ -383,7 +363,7 @@ func (c *ACMEClient) Revoke(name string) error { return err } - err = storage.DeleteSite(name) + err = c.storage.DeleteSite(name) if err != nil { return errors.New("certificate revoked, but unable to delete certificate file: " + err.Error()) } diff --git a/caddytls/filestorage.go b/caddytls/filestorage.go index 67084ef45..9dd2d8494 100644 --- a/caddytls/filestorage.go +++ b/caddytls/filestorage.go @@ -38,9 +38,9 @@ var storageBasePath = filepath.Join(caddy.AssetsPath(), "acme") // Storage instance backed by the local disk. The resulting Storage // instance is guaranteed to be non-nil if there is no error. func NewFileStorage(caURL *url.URL) (Storage, error) { - return &FileStorage{ - Path: filepath.Join(storageBasePath, caURL.Host), - }, nil + storage := &FileStorage{Path: filepath.Join(storageBasePath, caURL.Host)} + storage.Locker = &fileStorageLock{caURL: caURL.Host, storage: storage} + return storage, nil } // FileStorage facilitates forming file paths derived from a root @@ -48,6 +48,7 @@ func NewFileStorage(caURL *url.URL) (Storage, error) { // cross-platform way or persisting ACME assets on the file system. type FileStorage struct { Path string + Locker } // sites gets the directory that stores site certificate and keys. diff --git a/caddytls/filestoragesync.go b/caddytls/filestoragesync.go new file mode 100644 index 000000000..4c81ca02e --- /dev/null +++ b/caddytls/filestoragesync.go @@ -0,0 +1,123 @@ +// Copyright 2015 Light Code Labs, LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package caddytls + +import ( + "fmt" + "os" + "sync" + "time" + + "github.com/mholt/caddy" +) + +func init() { + // be sure to remove lock files when exiting the process! + caddy.OnProcessExit = append(caddy.OnProcessExit, func() { + fileStorageNameLocksMu.Lock() + defer fileStorageNameLocksMu.Unlock() + for key, fw := range fileStorageNameLocks { + os.Remove(fw.filename) + delete(fileStorageNameLocks, key) + } + }) +} + +// fileStorageLock facilitates ACME-related locking by using +// the associated FileStorage, so multiple processes can coordinate +// renewals on the certificates on a shared file system. +type fileStorageLock struct { + caURL string + storage *FileStorage +} + +// TryLock attempts to get a lock for name, otherwise it returns +// a Waiter value to wait until the other process is finished. +func (s *fileStorageLock) TryLock(name string) (Waiter, error) { + fileStorageNameLocksMu.Lock() + defer fileStorageNameLocksMu.Unlock() + + // see if lock already exists within this process + fw, ok := fileStorageNameLocks[s.caURL+name] + if ok { + // lock already created within process, let caller wait on it + return fw, nil + } + + // attempt to persist lock to disk by creating lock file + fw = &fileWaiter{ + filename: s.storage.siteCertFile(name) + ".lock", + wg: new(sync.WaitGroup), + } + lf, err := os.OpenFile(fw.filename, os.O_CREATE|os.O_EXCL, 0644) + if err != nil { + if os.IsExist(err) { + // another process has the lock; use it to wait + return fw, nil + } + // otherwise, this was some unexpected error + return nil, err + } + lf.Close() + + // looks like we get the lock + fw.wg.Add(1) + fileStorageNameLocks[s.caURL+name] = fw + + return nil, nil +} + +// Unlock unlocks name. +func (s *fileStorageLock) Unlock(name string) error { + fileStorageNameLocksMu.Lock() + defer fileStorageNameLocksMu.Unlock() + fw, ok := fileStorageNameLocks[s.caURL+name] + if !ok { + return fmt.Errorf("FileStorage: no lock to release for %s", name) + } + os.Remove(fw.filename) + fw.wg.Done() + delete(fileStorageNameLocks, s.caURL+name) + return nil +} + +// fileWaiter waits for a file to disappear; it polls +// the file system to check for the existence of a file. +// It also has a WaitGroup which will be faster than +// polling, for when locking need only happen within this +// process. +type fileWaiter struct { + filename string + wg *sync.WaitGroup +} + +// Wait waits until the lock is released. +func (fw *fileWaiter) Wait() { + start := time.Now() + fw.wg.Wait() + for time.Since(start) < 1*time.Hour { + _, err := os.Stat(fw.filename) + if os.IsNotExist(err) { + return + } + time.Sleep(1 * time.Second) + } +} + +var fileStorageNameLocks = make(map[string]*fileWaiter) // keyed by CA + name +var fileStorageNameLocksMu sync.Mutex + +var _ Locker = &fileStorageLock{} +var _ Waiter = &fileWaiter{} diff --git a/caddytls/maintain.go b/caddytls/maintain.go index 7ce6c5e26..5e867d4b8 100644 --- a/caddytls/maintain.go +++ b/caddytls/maintain.go @@ -160,17 +160,12 @@ func RenewManagedCertificates(allowPrompts bool) (err error) { log.Printf("[INFO] Certificate for %v expires in %v, but is already renewed in storage; reloading stored certificate", oldCert.Names, timeLeft) - // get the certificate from storage and cache it - newCert, err := oldCert.configs[0].CacheManagedCertificate(oldCert.Names[0]) + err = certCache.reloadManagedCertificate(oldCert) if err != nil { - log.Printf("[ERROR] Unable to reload certificate for %v into cache: %v", oldCert.Names, err) - continue - } - - // and replace the old certificate with the new one - err = certCache.replaceCertificate(oldCert, newCert) - if err != nil { - log.Printf("[ERROR] Replacing certificate: %v", err) + if allowPrompts { + return err // operator is present, so report error immediately + } + log.Printf("[ERROR] Loading renewed certificate: %v", err) } } @@ -212,21 +207,13 @@ func RenewManagedCertificates(allowPrompts bool) (err error) { // successful renewal, so update in-memory cache by loading // renewed certificate so it will be used with handshakes - - // put the certificate in the cache - newCert, err := oldCert.configs[0].CacheManagedCertificate(renewName) + err = certCache.reloadManagedCertificate(oldCert) if err != nil { if allowPrompts { return err // operator is present, so report error immediately } log.Printf("[ERROR] %v", err) } - - // replace the old certificate with the new one - err = certCache.replaceCertificate(oldCert, newCert) - if err != nil { - log.Printf("[ERROR] Replacing certificate: %v", err) - } } // Deletion queue diff --git a/caddytls/storage.go b/caddytls/storage.go index 8587dd026..05606ed92 100644 --- a/caddytls/storage.go +++ b/caddytls/storage.go @@ -107,6 +107,10 @@ type Storage interface { // in StoreUser. The result is an empty string if there are no // persisted users in storage. MostRecentUserEmail() string + + // Locker is necessary because synchronizing certificate maintenance + // depends on how storage is implemented. + Locker } // ErrNotExist is returned by Storage implementations when diff --git a/caddytls/sync_locker.go b/caddytls/sync_locker.go deleted file mode 100644 index 693f3b875..000000000 --- a/caddytls/sync_locker.go +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright 2015 Light Code Labs, LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package caddytls - -import ( - "fmt" - "sync" -) - -var _ Locker = &syncLock{} - -type syncLock struct { - nameLocks map[string]*sync.WaitGroup - nameLocksMu sync.Mutex -} - -// TryLock attempts to get a lock for name, otherwise it returns -// a Waiter value to wait until the other process is finished. -func (s *syncLock) TryLock(name string) (Waiter, error) { - s.nameLocksMu.Lock() - defer s.nameLocksMu.Unlock() - wg, ok := s.nameLocks[name] - if ok { - // lock already obtained, let caller wait on it - return wg, nil - } - // caller gets lock - wg = new(sync.WaitGroup) - wg.Add(1) - s.nameLocks[name] = wg - return nil, nil -} - -// Unlock unlocks name. -func (s *syncLock) Unlock(name string) error { - s.nameLocksMu.Lock() - defer s.nameLocksMu.Unlock() - wg, ok := s.nameLocks[name] - if !ok { - return fmt.Errorf("FileStorage: no lock to release for %s", name) - } - wg.Done() - delete(s.nameLocks, name) - return nil -} diff --git a/plugins.go b/plugins.go index f7d14f86b..ba1114034 100644 --- a/plugins.go +++ b/plugins.go @@ -383,6 +383,14 @@ func loadCaddyfileInput(serverType string) (Input, error) { return caddyfileToUse, nil } +// OnProcessExit is a list of functions to run when the process +// exits -- they are ONLY for cleanup and should not block, +// return errors, or do anything fancy. They will be run with +// every signal, even if "shutdown callbacks" are not executed. +// This variable must only be modified in the main goroutine +// from init() functions. +var OnProcessExit []func() + // caddyfileLoader pairs the name of a loader to the loader. type caddyfileLoader struct { name string diff --git a/sigtrap.go b/sigtrap.go index a10cf0f09..ac61c59c0 100644 --- a/sigtrap.go +++ b/sigtrap.go @@ -44,16 +44,17 @@ func trapSignalsCrossPlatform() { if i > 0 { log.Println("[INFO] SIGINT: Force quit") - if PidFile != "" { - os.Remove(PidFile) + for _, f := range OnProcessExit { + f() // important cleanup actions only } os.Exit(2) } log.Println("[INFO] SIGINT: Shutting down") - if PidFile != "" { - os.Remove(PidFile) + // important cleanup actions before shutdown callbacks + for _, f := range OnProcessExit { + f() } go func() { diff --git a/sigtrap_posix.go b/sigtrap_posix.go index 71b6969af..38aaa774c 100644 --- a/sigtrap_posix.go +++ b/sigtrap_posix.go @@ -33,8 +33,8 @@ func trapSignalsPosix() { switch sig { case syscall.SIGTERM: log.Println("[INFO] SIGTERM: Terminating process") - if PidFile != "" { - os.Remove(PidFile) + for _, f := range OnProcessExit { + f() // only perform important cleanup actions } os.Exit(0) @@ -46,8 +46,8 @@ func trapSignalsPosix() { log.Printf("[ERROR] SIGQUIT stop: %v", err) exitCode = 3 } - if PidFile != "" { - os.Remove(PidFile) + for _, f := range OnProcessExit { + f() // only perform important cleanup actions } os.Exit(exitCode) From 4b2e22289d9c7423eea2f9b44430bfefed074d8f Mon Sep 17 00:00:00 2001 From: Matthew Holt Date: Tue, 13 Feb 2018 13:27:08 -0700 Subject: [PATCH 3/8] sigtrap: Ensure cleanup actions happen before too many things go wrong --- sigtrap_posix.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sigtrap_posix.go b/sigtrap_posix.go index 2a0a0de57..cc65ccb46 100644 --- a/sigtrap_posix.go +++ b/sigtrap_posix.go @@ -41,14 +41,14 @@ func trapSignalsPosix() { case syscall.SIGTERM: log.Println("[INFO] SIGTERM: Shutting down servers then terminating") exitCode := executeShutdownCallbacks("SIGTERM") + for _, f := range OnProcessExit { + f() // only perform important cleanup actions + } err := Stop() if err != nil { log.Printf("[ERROR] SIGTERM stop: %v", err) exitCode = 3 } - for _, f := range OnProcessExit { - f() // only perform important cleanup actions - } os.Exit(exitCode) case syscall.SIGUSR1: From ef585ed810a6913e7174c19ed383ddba9c3949ed Mon Sep 17 00:00:00 2001 From: Matthew Holt Date: Wed, 14 Feb 2018 13:32:16 -0700 Subject: [PATCH 4/8] tls: Ensure parent dir exists before creating lock file --- caddytls/filestoragesync.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/caddytls/filestoragesync.go b/caddytls/filestoragesync.go index 4c81ca02e..a8e7b9291 100644 --- a/caddytls/filestoragesync.go +++ b/caddytls/filestoragesync.go @@ -61,6 +61,10 @@ func (s *fileStorageLock) TryLock(name string) (Waiter, error) { filename: s.storage.siteCertFile(name) + ".lock", wg: new(sync.WaitGroup), } + // parent dir must exist + if err := os.MkdirAll(s.storage.site(name), 0700); err != nil { + return nil, err + } lf, err := os.OpenFile(fw.filename, os.O_CREATE|os.O_EXCL, 0644) if err != nil { if os.IsExist(err) { From be96cc0e656de2cac5913508f1dfab79872bfd7e Mon Sep 17 00:00:00 2001 From: Matthew Holt Date: Thu, 15 Feb 2018 00:04:31 -0700 Subject: [PATCH 5/8] httpserver: Raise error when adjusted site addresses clash at startup See discussion on #2015 for how this situation was discovered. For a Caddyfile like this: localhost { ... } :2015 { ... } Running Caddy like this: caddy -host localhost Produces two sites both defined as `localhost:2015` because the flag changes the default host value to be `localhost`. This should be an error since the sites are not distinct and it is confusing. It can also cause issues with TLS handshakes loading the wrong cert, as the linked discussion shows. --- caddy.go | 2 +- caddyhttp/httpserver/plugin.go | 21 ++++++++++++++++++++- caddyhttp/httpserver/plugin_test.go | 17 +++++++++++++++++ 3 files changed, 38 insertions(+), 2 deletions(-) diff --git a/caddy.go b/caddy.go index 628673a73..917a5d069 100644 --- a/caddy.go +++ b/caddy.go @@ -612,7 +612,7 @@ func ValidateAndExecuteDirectives(cdyfile Input, inst *Instance, justValidate bo sblocks, err = inst.context.InspectServerBlocks(cdyfile.Path(), sblocks) if err != nil { - return err + return fmt.Errorf("error inspecting server blocks: %v", err) } return executeDirectives(inst, cdyfile.Path(), stype.Directives(), sblocks, justValidate) diff --git a/caddyhttp/httpserver/plugin.go b/caddyhttp/httpserver/plugin.go index ea31a58d8..93811abcb 100644 --- a/caddyhttp/httpserver/plugin.go +++ b/caddyhttp/httpserver/plugin.go @@ -117,12 +117,14 @@ func (h *httpContext) saveConfig(key string, cfg *SiteConfig) { // executing directives and otherwise prepares the directives to // be parsed and executed. func (h *httpContext) InspectServerBlocks(sourceFile string, serverBlocks []caddyfile.ServerBlock) ([]caddyfile.ServerBlock, error) { + siteAddrs := make(map[string]string) + // For each address in each server block, make a new config for _, sb := range serverBlocks { for _, key := range sb.Keys { key = strings.ToLower(key) if _, dup := h.keysToSiteConfigs[key]; dup { - return serverBlocks, fmt.Errorf("duplicate site address: %s", key) + return serverBlocks, fmt.Errorf("duplicate site key: %s", key) } addr, err := standardizeAddress(key) if err != nil { @@ -138,6 +140,23 @@ func (h *httpContext) InspectServerBlocks(sourceFile string, serverBlocks []cadd addr.Port = Port } + // Make sure the adjusted site address is distinct + addrCopy := addr // make copy so we don't disturb the original, carefully-parsed address struct + if addrCopy.Port == "" && Port == DefaultPort { + addrCopy.Port = Port + } + addrStr := strings.ToLower(addrCopy.String()) + if otherSiteKey, dup := siteAddrs[addrStr]; dup { + err := fmt.Errorf("duplicate site address: %s", addrStr) + if (addrCopy.Host == Host && Host != DefaultHost) || + (addrCopy.Port == Port && Port != DefaultPort) { + err = fmt.Errorf("site defined as %s is a duplicate of %s because of modified "+ + "default host and/or port values (usually via -host or -port flags)", key, otherSiteKey) + } + return serverBlocks, err + } + siteAddrs[addrStr] = key + // If default HTTP or HTTPS ports have been customized, // make sure the ACME challenge ports match var altHTTPPort, altTLSSNIPort string diff --git a/caddyhttp/httpserver/plugin_test.go b/caddyhttp/httpserver/plugin_test.go index 5a60f2e83..f7b9cfc00 100644 --- a/caddyhttp/httpserver/plugin_test.go +++ b/caddyhttp/httpserver/plugin_test.go @@ -153,6 +153,23 @@ func TestInspectServerBlocksWithCustomDefaultPort(t *testing.T) { } } +// See discussion on PR #2015 +func TestInspectServerBlocksWithAdjustedAddress(t *testing.T) { + Port = DefaultPort + Host = "example.com" + filename := "Testfile" + ctx := newContext(&caddy.Instance{Storage: make(map[interface{}]interface{})}).(*httpContext) + input := strings.NewReader("example.com {\n}\n:2015 {\n}") + sblocks, err := caddyfile.Parse(filename, input, nil) + if err != nil { + t.Fatalf("Expected no error setting up test, got: %v", err) + } + _, err = ctx.InspectServerBlocks(filename, sblocks) + if err == nil { + t.Fatalf("Expected an error because site definitions should overlap, got: %v", err) + } +} + func TestInspectServerBlocksCaseInsensitiveKey(t *testing.T) { filename := "Testfile" ctx := newContext(&caddy.Instance{Storage: make(map[interface{}]interface{})}).(*httpContext) From 896dc6bc690bce51884bd43da91180882ce90cab Mon Sep 17 00:00:00 2001 From: Matthew Holt Date: Thu, 15 Feb 2018 08:48:05 -0700 Subject: [PATCH 6/8] tls: Try empty name if no matches for getting config during handshake See discussion on #2015; the initial change had removed this check, and I can't remember why I removed it or if it was accidental. Anyway, it's back now. --- caddytls/handshake.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/caddytls/handshake.go b/caddytls/handshake.go index 2f3f34af3..841d06cd0 100644 --- a/caddytls/handshake.go +++ b/caddytls/handshake.go @@ -59,6 +59,14 @@ func (cg configGroup) getConfig(name string) *Config { } } + // try a config that serves all names (this + // is basically the same as a config defined + // for "*" -- I think -- but the above loop + // doesn't try an empty string) + if config, ok := cg[""]; ok { + return config + } + // no matches, so just serve up a random config for _, config := range cg { return config From 8db80c4a88e53c9b710dc7753fd2561fa974ffb2 Mon Sep 17 00:00:00 2001 From: Matthew Holt Date: Fri, 16 Feb 2018 12:05:34 -0700 Subject: [PATCH 7/8] tls: Fix HTTP->HTTPS redirects and HTTP challenge when using custom port --- caddyhttp/httpserver/https.go | 30 +++++++++++++++++------------- caddyhttp/httpserver/server.go | 22 ++-------------------- caddytls/config.go | 13 +++++++------ caddytls/httphandler.go | 15 ++++++++++----- caddytls/httphandler_test.go | 4 ++-- 5 files changed, 38 insertions(+), 46 deletions(-) diff --git a/caddyhttp/httpserver/https.go b/caddyhttp/httpserver/https.go index a12d9982c..3d1de7499 100644 --- a/caddyhttp/httpserver/https.go +++ b/caddyhttp/httpserver/https.go @@ -159,25 +159,29 @@ func hostHasOtherPort(allConfigs []*SiteConfig, thisConfigIdx int, otherPort str // be the HTTPS configuration. The returned configuration is set // to listen on HTTPPort. The TLS field of cfg must not be nil. func redirPlaintextHost(cfg *SiteConfig) *SiteConfig { - redirPort := cfg.Addr.Port - if redirPort == DefaultHTTPSPort { - redirPort = "" // default port is redundant - } - redirMiddleware := func(next Handler) Handler { return HandlerFunc(func(w http.ResponseWriter, r *http.Request) (int, error) { - // Construct the URL to which to redirect. Note that the Host in a request might - // contain a port, but we just need the hostname; we'll set the port if needed. + // Construct the URL to which to redirect. Note that the Host in a + // request might contain a port, but we just need the hostname. toURL := "https://" requestHost, _, err := net.SplitHostPort(r.Host) if err != nil { - requestHost = r.Host // Host did not contain a port; great - } - if redirPort == "" { - toURL += requestHost - } else { - toURL += net.JoinHostPort(requestHost, redirPort) + requestHost = r.Host // host did not contain a port; okay } + + // The rest of the URL will consist of the hostname and the URI. + // We do not append a port because if the HTTPSPort is changed + // from the default value, it is probably because there is port + // forwarding going on; and we do not need to specify the default + // HTTPS port in the redirect. Serving HTTPS on a port other than + // 443 is unusual, and is considered an advanced use case. If port + // forwarding IS happening, then redirecting the external client to + // this internal port will cause the connection to fail; and it + // definitely causes ACME HTTP-01 challenges to fail, because it + // only allows redirecting to port 80 or 443 (as of Feb 2018). + // If a user wants to redirect HTTP to HTTPS on an external port + // other than 443, they can easily configure that themselves. + toURL += requestHost toURL += r.URL.RequestURI() w.Header().Set("Connection", "close") diff --git a/caddyhttp/httpserver/server.go b/caddyhttp/httpserver/server.go index 92f2b6fd7..9d3ae0389 100644 --- a/caddyhttp/httpserver/server.go +++ b/caddyhttp/httpserver/server.go @@ -389,7 +389,7 @@ func (s *Server) serveHTTP(w http.ResponseWriter, r *http.Request) (int, error) if vhost == nil { // check for ACME challenge even if vhost is nil; // could be a new host coming online soon - if caddytls.HTTPChallengeHandler(w, r, "localhost", caddytls.DefaultHTTPAlternatePort) { + if caddytls.HTTPChallengeHandler(w, r, "localhost") { return 0, nil } // otherwise, log the error and write a message to the client @@ -405,7 +405,7 @@ func (s *Server) serveHTTP(w http.ResponseWriter, r *http.Request) (int, error) // we still check for ACME challenge if the vhost exists, // because we must apply its HTTP challenge config settings - if s.proxyHTTPChallenge(vhost, w, r) { + if caddytls.HTTPChallengeHandler(w, r, vhost.ListenHost) { return 0, nil } @@ -422,24 +422,6 @@ func (s *Server) serveHTTP(w http.ResponseWriter, r *http.Request) (int, error) return vhost.middlewareChain.ServeHTTP(w, r) } -// proxyHTTPChallenge solves the ACME HTTP challenge if r is the HTTP -// request for the challenge. If it is, and if the request has been -// fulfilled (response written), true is returned; false otherwise. -// If you don't have a vhost, just call the challenge handler directly. -func (s *Server) proxyHTTPChallenge(vhost *SiteConfig, w http.ResponseWriter, r *http.Request) bool { - if vhost.Addr.Port != caddytls.HTTPChallengePort { - return false - } - if vhost.TLS != nil && vhost.TLS.Manual { - return false - } - altPort := caddytls.DefaultHTTPAlternatePort - if vhost.TLS != nil && vhost.TLS.AltHTTPPort != "" { - altPort = vhost.TLS.AltHTTPPort - } - return caddytls.HTTPChallengeHandler(w, r, vhost.ListenHost, altPort) -} - // Address returns the address s was assigned to listen on. func (s *Server) Address() string { return s.Server.Addr diff --git a/caddytls/config.go b/caddytls/config.go index 0b64f3575..938cb08ca 100644 --- a/caddytls/config.go +++ b/caddytls/config.go @@ -93,16 +93,17 @@ type Config struct { // an ACME challenge ListenHost string - // The alternate port (ONLY port, not host) - // to use for the ACME HTTP challenge; this - // port will be used if we proxy challenges - // coming in on port 80 to this alternate port + // The alternate port (ONLY port, not host) to + // use for the ACME HTTP challenge; if non-empty, + // this port will be used instead of + // HTTPChallengePort to spin up a listener for + // the HTTP challenge AltHTTPPort string // The alternate port (ONLY port, not host) // to use for the ACME TLS-SNI challenge. - // The system must forward the standard port - // for the TLS-SNI challenge to this port. + // The system must forward TLSSNIChallengePort + // to this port for challenge to succeed AltTLSSNIPort string // The string identifier of the DNS provider diff --git a/caddytls/httphandler.go b/caddytls/httphandler.go index ca356cd43..663e2eb02 100644 --- a/caddytls/httphandler.go +++ b/caddytls/httphandler.go @@ -27,10 +27,11 @@ import ( const challengeBasePath = "/.well-known/acme-challenge" // HTTPChallengeHandler proxies challenge requests to ACME client if the -// request path starts with challengeBasePath. It returns true if it -// handled the request and no more needs to be done; it returns false -// if this call was a no-op and the request still needs handling. -func HTTPChallengeHandler(w http.ResponseWriter, r *http.Request, listenHost, altPort string) bool { +// request path starts with challengeBasePath, if the HTTP challenge is not +// disabled, and if we are known to be obtaining a certificate for the name. +// It returns true if it handled the request and no more needs to be done; +// it returns false if this call was a no-op and the request still needs handling. +func HTTPChallengeHandler(w http.ResponseWriter, r *http.Request, listenHost string) bool { if !strings.HasPrefix(r.URL.Path, challengeBasePath) { return false } @@ -50,7 +51,11 @@ func HTTPChallengeHandler(w http.ResponseWriter, r *http.Request, listenHost, al listenHost = "localhost" } - upstream, err := url.Parse(fmt.Sprintf("%s://%s:%s", scheme, listenHost, altPort)) + // always proxy to the DefaultHTTPAlternatePort because obviously the + // ACME challenge request already got into one of our HTTP handlers, so + // it means we must have started a HTTP listener on the alternate + // port instead; which is only accessible via listenHost + upstream, err := url.Parse(fmt.Sprintf("%s://%s:%s", scheme, listenHost, DefaultHTTPAlternatePort)) if err != nil { w.WriteHeader(http.StatusInternalServerError) log.Printf("[ERROR] ACME proxy handler: %v", err) diff --git a/caddytls/httphandler_test.go b/caddytls/httphandler_test.go index 451c0cf4f..cae65ac8c 100644 --- a/caddytls/httphandler_test.go +++ b/caddytls/httphandler_test.go @@ -39,7 +39,7 @@ func TestHTTPChallengeHandlerNoOp(t *testing.T) { t.Fatalf("Could not craft request, got error: %v", err) } rw := httptest.NewRecorder() - if HTTPChallengeHandler(rw, req, "", DefaultHTTPAlternatePort) { + if HTTPChallengeHandler(rw, req, "") { t.Errorf("Got true with this URL, but shouldn't have: %s", url) } } @@ -76,7 +76,7 @@ func TestHTTPChallengeHandlerSuccess(t *testing.T) { } rw := httptest.NewRecorder() - HTTPChallengeHandler(rw, req, "", DefaultHTTPAlternatePort) + HTTPChallengeHandler(rw, req, "") if !proxySuccess { t.Fatal("Expected request to be proxied, but it wasn't") From a03eba6fbc2770ef6bbe7defa184ec4a6817f0c2 Mon Sep 17 00:00:00 2001 From: Matthew Holt Date: Fri, 16 Feb 2018 12:36:28 -0700 Subject: [PATCH 8/8] tls: In HTTP->HTTPS redirects, preserve redir port in some circumstances Only strip the port from the Location URL value if the port is NOT the HTTPSPort (before, we compared against DefaultHTTPSPort instead of HTTPSPort). The HTTPSPort can be changed, but is done so for port forwarding, since in reality you can't 'change' the standard HTTPS port, you can only forward it. --- caddyhttp/httpserver/https.go | 39 ++++++++++++++++++------------ caddyhttp/httpserver/https_test.go | 2 +- 2 files changed, 25 insertions(+), 16 deletions(-) diff --git a/caddyhttp/httpserver/https.go b/caddyhttp/httpserver/https.go index 3d1de7499..ae3c4e902 100644 --- a/caddyhttp/httpserver/https.go +++ b/caddyhttp/httpserver/https.go @@ -159,29 +159,38 @@ func hostHasOtherPort(allConfigs []*SiteConfig, thisConfigIdx int, otherPort str // be the HTTPS configuration. The returned configuration is set // to listen on HTTPPort. The TLS field of cfg must not be nil. func redirPlaintextHost(cfg *SiteConfig) *SiteConfig { + redirPort := cfg.Addr.Port + if redirPort == HTTPSPort { + // By default, HTTPSPort should be DefaultHTTPSPort, + // which of course doesn't need to be explicitly stated + // in the Location header. Even if HTTPSPort is changed + // so that it is no longer DefaultHTTPSPort, we shouldn't + // append it to the URL in the Location because changing + // the HTTPS port is assumed to be an internal-only change + // (in other words, we assume port forwarding is going on); + // but redirects go back to a presumably-external client. + // (If redirect clients are also internal, that is more + // advanced, and the user should configure HTTP->HTTPS + // redirects themselves.) + redirPort = "" + } + redirMiddleware := func(next Handler) Handler { return HandlerFunc(func(w http.ResponseWriter, r *http.Request) (int, error) { // Construct the URL to which to redirect. Note that the Host in a - // request might contain a port, but we just need the hostname. + // request might contain a port, but we just need the hostname from + // it; and we'll set the port if needed. toURL := "https://" requestHost, _, err := net.SplitHostPort(r.Host) if err != nil { - requestHost = r.Host // host did not contain a port; okay + requestHost = r.Host // Host did not contain a port, so use the whole value + } + if redirPort == "" { + toURL += requestHost + } else { + toURL += net.JoinHostPort(requestHost, redirPort) } - // The rest of the URL will consist of the hostname and the URI. - // We do not append a port because if the HTTPSPort is changed - // from the default value, it is probably because there is port - // forwarding going on; and we do not need to specify the default - // HTTPS port in the redirect. Serving HTTPS on a port other than - // 443 is unusual, and is considered an advanced use case. If port - // forwarding IS happening, then redirecting the external client to - // this internal port will cause the connection to fail; and it - // definitely causes ACME HTTP-01 challenges to fail, because it - // only allows redirecting to port 80 or 443 (as of Feb 2018). - // If a user wants to redirect HTTP to HTTPS on an external port - // other than 443, they can easily configure that themselves. - toURL += requestHost toURL += r.URL.RequestURI() w.Header().Set("Connection", "close") diff --git a/caddyhttp/httpserver/https_test.go b/caddyhttp/httpserver/https_test.go index 3e9fe915a..043249445 100644 --- a/caddyhttp/httpserver/https_test.go +++ b/caddyhttp/httpserver/https_test.go @@ -53,7 +53,7 @@ func TestRedirPlaintextHost(t *testing.T) { }, { Host: "foohost", - Port: "443", // since this is the default HTTPS port, should not be included in Location value + Port: HTTPSPort, // since this is the 'default' HTTPS port, should not be included in Location value }, { Host: "*.example.com",