diagnostics: Add/remove metrics

This commit is contained in:
Matthew Holt 2018-03-21 17:01:14 -06:00
parent 385ea53309
commit 4df8028bc3
No known key found for this signature in database
GPG key ID: 2A349DD577D586A5
12 changed files with 178 additions and 59 deletions

View file

@ -123,6 +123,7 @@ type Instance struct {
StorageMu sync.RWMutex
}
// Instances returns the list of instances.
func Instances() []*Instance {
return instances
}
@ -616,7 +617,7 @@ func ValidateAndExecuteDirectives(cdyfile Input, inst *Instance, justValidate bo
return fmt.Errorf("error inspecting server blocks: %v", err)
}
diagnostics.Set("num_server_blocks", len(sblocks))
diagnostics.Set("http_num_server_blocks", len(sblocks))
return executeDirectives(inst, cdyfile.Path(), stype.Directives(), sblocks, justValidate)
}
@ -872,7 +873,7 @@ func Stop() error {
// explicitly like a common local hostname. addr must only
// be a host or a host:port combination.
func IsLoopback(addr string) bool {
host, _, err := net.SplitHostPort(addr)
host, _, err := net.SplitHostPort(strings.ToLower(addr))
if err != nil {
host = addr // happens if the addr is just a hostname
}

View file

@ -51,6 +51,9 @@ type tlsHandler struct {
// Halderman, et. al. in "The Security Impact of HTTPS Interception" (NDSS '17):
// https://jhalderm.com/pub/papers/interception-ndss17.pdf
func (h *tlsHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
// TODO: one request per connection, we should report UA in connection with
// handshake (reported in caddytls package) and our MITM assessment
if h.listener == nil {
h.next.ServeHTTP(w, r)
return
@ -100,12 +103,12 @@ func (h *tlsHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
if checked {
r = r.WithContext(context.WithValue(r.Context(), MitmCtxKey, mitm))
if mitm {
go diagnostics.AppendUnique("mitm", "likely")
go diagnostics.AppendUnique("http_mitm", "likely")
} else {
go diagnostics.AppendUnique("mitm", "unlikely")
go diagnostics.AppendUnique("http_mitm", "unlikely")
}
} else {
go diagnostics.AppendUnique("mitm", "unknown")
go diagnostics.AppendUnique("http_mitm", "unknown")
}
if mitm && h.closeOnMITM {

View file

@ -29,6 +29,7 @@ import (
"github.com/mholt/caddy/caddyfile"
"github.com/mholt/caddy/caddyhttp/staticfiles"
"github.com/mholt/caddy/caddytls"
"github.com/mholt/caddy/diagnostics"
)
const serverType = "http"
@ -205,9 +206,34 @@ func (h *httpContext) InspectServerBlocks(sourceFile string, serverBlocks []cadd
// MakeServers uses the newly-created siteConfigs to
// create and return a list of server instances.
func (h *httpContext) MakeServers() ([]caddy.Server, error) {
// make sure TLS is disabled for explicitly-HTTP sites
// (necessary when HTTP address shares a block containing tls)
// make a rough estimate as to whether we're in a "production
// environment/system" - start by assuming that most production
// servers will set their default CA endpoint to a public,
// trusted CA (obviously not a perfect hueristic)
var looksLikeProductionCA bool
for _, publicCAEndpoint := range caddytls.KnownACMECAs {
if strings.Contains(caddytls.DefaultCAUrl, publicCAEndpoint) {
looksLikeProductionCA = true
break
}
}
var atLeastOneSiteLooksLikeProduction bool
for _, cfg := range h.siteConfigs {
// if we aren't sure yet whether it's a "production" server,
// continue to see if all the addresses (both sites and
// listeners) are loopback
if !atLeastOneSiteLooksLikeProduction {
if !caddy.IsLoopback(cfg.Addr.Host) &&
!caddy.IsLoopback(cfg.ListenHost) &&
(caddytls.QualifiesForManagedTLS(cfg) ||
caddytls.HostQualifies(cfg.Addr.Host)) {
atLeastOneSiteLooksLikeProduction = true
}
}
// make sure TLS is disabled for explicitly-HTTP sites
// (necessary when HTTP address shares a block containing tls)
if !cfg.TLS.Enabled {
continue
}
@ -246,6 +272,18 @@ func (h *httpContext) MakeServers() ([]caddy.Server, error) {
servers = append(servers, s)
}
// NOTE: This value is only a "good" guess. Quite often, development
// environments will use internal DNS or a local hosts file to serve
// real-looking domains in local development. We can't easily tell
// which without doing a DNS lookup, so this guess is definitely naive,
// and if we ever want a better guess, we will have to do DNS lookups.
deploymentGuess := "dev"
if looksLikeProductionCA && atLeastOneSiteLooksLikeProduction {
deploymentGuess = "production"
}
diagnostics.Set("http_deployment_guess", deploymentGuess)
diagnostics.Set("http_num_sites", len(h.siteConfigs))
return servers, nil
}

View file

@ -346,7 +346,9 @@ func (s *Server) ServeHTTP(w http.ResponseWriter, r *http.Request) {
}
}()
go diagnostics.AppendUnique("user_agent", r.Header.Get("User-Agent"))
// TODO: Somehow report UA string in conjunction with TLS handshake, if any (and just once per connection)
go diagnostics.AppendUnique("http_user_agent", r.Header.Get("User-Agent"))
go diagnostics.Increment("http_request_count")
// copy the original, unchanged URL into the context
// so it can be referenced by middlewares

View file

@ -26,6 +26,7 @@ import (
"sync"
"time"
"github.com/mholt/caddy/diagnostics"
"golang.org/x/crypto/ocsp"
)
@ -165,6 +166,7 @@ func (cfg *Config) CacheManagedCertificate(domain string) (Certificate, error) {
if err != nil {
return cert, err
}
diagnostics.Increment("tls_managed_cert_count")
return cfg.cacheCertificate(cert), nil
}
@ -179,6 +181,7 @@ func (cfg *Config) cacheUnmanagedCertificatePEMFile(certFile, keyFile string) er
return err
}
cfg.cacheCertificate(cert)
diagnostics.Increment("tls_manual_cert_count")
return nil
}
@ -192,6 +195,7 @@ func (cfg *Config) cacheUnmanagedCertificatePEMBytes(certBytes, keyBytes []byte)
return err
}
cfg.cacheCertificate(cert)
diagnostics.Increment("tls_manual_cert_count")
return nil
}

View file

@ -268,7 +268,7 @@ Attempts:
break
}
go diagnostics.Increment("acme_certificates_obtained")
go diagnostics.Increment("tls_acme_certs_obtained")
return nil
}
@ -340,8 +340,7 @@ func (c *ACMEClient) Renew(name string) error {
}
caddy.EmitEvent(caddy.CertRenewEvent, name)
go diagnostics.Increment("acme_certificates_obtained")
go diagnostics.Increment("acme_certificates_renewed")
go diagnostics.Increment("tls_acme_certs_renewed")
return saveCertResource(c.storage, newCertMeta)
}
@ -368,6 +367,8 @@ func (c *ACMEClient) Revoke(name string) error {
return err
}
go diagnostics.Increment("tls_acme_certs_revoked")
err = c.storage.DeleteSite(name)
if err != nil {
return errors.New("certificate revoked, but unable to delete certificate file: " + err.Error())
@ -419,3 +420,10 @@ func (c *nameCoordinator) Has(name string) bool {
c.mu.RUnlock()
return ok
}
// KnownACMECAs is a list of ACME directory endpoints of
// known, public, and trusted ACME-compatible certificate
// authorities.
var KnownACMECAs = []string{
"https://acme-v02.api.letsencrypt.org/directory",
}

View file

@ -100,24 +100,31 @@ func (cg configGroup) GetConfigForClient(clientHello *tls.ClientHelloInfo) (*tls
//
// This method is safe for use as a tls.Config.GetCertificate callback.
func (cfg *Config) GetCertificate(clientHello *tls.ClientHelloInfo) (*tls.Certificate, error) {
go diagnostics.Append("client_hello", struct {
NoSNI bool `json:"no_sni,omitempty"`
CipherSuites []uint16 `json:"cipher_suites,omitempty"`
SupportedCurves []tls.CurveID `json:"curves,omitempty"`
SupportedPoints []uint8 `json:"points,omitempty"`
SignatureSchemes []tls.SignatureScheme `json:"sig_scheme,omitempty"`
ALPN []string `json:"alpn,omitempty"`
SupportedVersions []uint16 `json:"versions,omitempty"`
}{
NoSNI: clientHello.ServerName == "",
CipherSuites: clientHello.CipherSuites,
SupportedCurves: clientHello.SupportedCurves,
SupportedPoints: clientHello.SupportedPoints,
SignatureSchemes: clientHello.SignatureSchemes,
ALPN: clientHello.SupportedProtos,
SupportedVersions: clientHello.SupportedVersions,
})
// TODO: We need to collect this in a heavily de-duplicating way
// It would also be nice to associate a handshake with the UA string (but that is only for HTTP server type)
// go diagnostics.Append("tls_client_hello", struct {
// NoSNI bool `json:"no_sni,omitempty"`
// CipherSuites []uint16 `json:"cipher_suites,omitempty"`
// SupportedCurves []tls.CurveID `json:"curves,omitempty"`
// SupportedPoints []uint8 `json:"points,omitempty"`
// SignatureSchemes []tls.SignatureScheme `json:"sig_scheme,omitempty"`
// ALPN []string `json:"alpn,omitempty"`
// SupportedVersions []uint16 `json:"versions,omitempty"`
// }{
// NoSNI: clientHello.ServerName == "",
// CipherSuites: clientHello.CipherSuites,
// SupportedCurves: clientHello.SupportedCurves,
// SupportedPoints: clientHello.SupportedPoints,
// SignatureSchemes: clientHello.SignatureSchemes,
// ALPN: clientHello.SupportedProtos,
// SupportedVersions: clientHello.SupportedVersions,
// })
cert, err := cfg.getCertDuringHandshake(strings.ToLower(clientHello.ServerName), true, true)
if err == nil {
go diagnostics.Increment("tls_handshake_count")
} else {
go diagnostics.Append("tls_handshake_error", err.Error())
}
return &cert.Certificate, err
}

View file

@ -28,6 +28,7 @@ import (
"strings"
"github.com/mholt/caddy"
"github.com/mholt/caddy/diagnostics"
)
func init() {
@ -174,9 +175,11 @@ func setupTLS(c *caddy.Controller) error {
case "max_certs":
c.Args(&maxCerts)
config.OnDemand = true
diagnostics.Increment("tls_on_demand_count")
case "ask":
c.Args(&askURL)
config.OnDemand = true
diagnostics.Increment("tls_on_demand_count")
case "dns":
args := c.RemainingArgs()
if len(args) != 1 {
@ -251,6 +254,7 @@ func setupTLS(c *caddy.Controller) error {
return c.Errf("Unable to load certificate and key files for '%s': %v", c.Key, err)
}
log.Printf("[INFO] Successfully loaded TLS assets from %s and %s", certificateFile, keyFile)
diagnostics.Increment("tls_manual_cert_count")
}
// load a directory of certificates, if specified
@ -270,6 +274,7 @@ func setupTLS(c *caddy.Controller) error {
if err != nil {
return fmt.Errorf("self-signed: %v", err)
}
diagnostics.Increment("tls_self_signed_count")
}
return nil
@ -350,6 +355,7 @@ func loadCertsInDir(cfg *Config, c *caddy.Controller, dir string) error {
return c.Errf("%s: failed to load cert and key for '%s': %v", path, c.Key, err)
}
log.Printf("[INFO] Successfully loaded TLS assets from %s", path)
diagnostics.Increment("tls_manual_cert_count")
}
return nil
})

View file

@ -33,7 +33,7 @@ func Init(instanceID uuid.UUID) {
panic("already initialized")
}
if str := instanceID.String(); str == "" ||
instanceID.String() == "00000000-0000-0000-0000-000000000000" {
str == "00000000-0000-0000-0000-000000000000" {
panic("empty UUID")
}
instanceUUID = instanceID
@ -73,6 +73,10 @@ func StartEmitting() {
//
// It is a no-op if the package was never initialized
// or if emitting was never started.
//
// NOTE: This function is blocking. Run in a goroutine if
// you want to guarantee no blocking at critical times
// like exiting the program.
func StopEmitting() {
if !enabled {
return
@ -83,7 +87,12 @@ func StopEmitting() {
return
}
updateTimerMu.Unlock()
logEmit(true)
logEmit(true) // likely too early; may take minutes to return
}
// Reset empties the current payload buffer.
func Reset() {
resetBuffer()
}
// Set puts a value in the buffer to be included
@ -142,7 +151,7 @@ func Append(key string, value interface{}) {
bufferMu.Unlock()
}
// AppendUnique adds value to a set namedkey.
// AppendUnique adds value to a set named key.
// Set items are unordered. Values in the set
// are unique, but how many times they are
// appended is counted.
@ -178,24 +187,23 @@ func AppendUnique(key string, value interface{}) {
bufferMu.Unlock()
}
// Increment adds 1 to a value named key.
// Add adds amount to a value named key.
// If it does not exist, it is created with
// a value of 1. If key maps to a type that
// is not an integer, a panic is logged,
// and this is a no-op.
func Add(key string, amount int) {
atomicAdd(key, amount)
}
// Increment is a shortcut for Add(key, 1)
func Increment(key string) {
incrementOrDecrement(key, true)
atomicAdd(key, 1)
}
// Decrement is the same as increment except
// it subtracts 1.
func Decrement(key string) {
incrementOrDecrement(key, false)
}
// inc == true: increment
// inc == false: decrement
func incrementOrDecrement(key string, inc bool) {
// atomicAdd adds amount (negative to subtract)
// to key.
func atomicAdd(key string, amount int) {
if !enabled {
return
}
@ -214,10 +222,6 @@ func incrementOrDecrement(key string, inc bool) {
}
bufferItemCount++
}
if inc {
buffer[key] = intVal + 1
} else {
buffer[key] = intVal - 1
}
buffer[key] = intVal + amount
bufferMu.Unlock()
}

View file

@ -48,14 +48,16 @@ import (
)
// logEmit calls emit and then logs the error, if any.
// See docs for emit.
func logEmit(final bool) {
err := emit(final)
if err != nil {
log.Printf("[ERROR] Sending diganostics: %v", err)
log.Printf("[ERROR] Sending diagnostics: %v", err)
}
}
// emit sends an update to the diagnostics server.
// Set final to true if this is the last call to emit.
// If final is true, no future updates will be scheduled.
// Otherwise, the next update will be scheduled.
func emit(final bool) error {
@ -136,9 +138,11 @@ func emit(final bool) error {
reply.NextUpdate = time.Duration(ra) * time.Second
}
}
log.Printf("[NOTICE] Sending diagnostics: we were too early; waiting %s before trying again", reply.NextUpdate)
time.Sleep(reply.NextUpdate)
continue
if !final {
log.Printf("[NOTICE] Sending diagnostics: we were too early; waiting %s before trying again", reply.NextUpdate)
time.Sleep(reply.NextUpdate)
continue
}
} else if resp.StatusCode >= 400 {
err = fmt.Errorf("diagnostics server returned status code %d", resp.StatusCode)
continue
@ -146,7 +150,7 @@ func emit(final bool) error {
break
}
if err == nil {
if err == nil && !final {
// (remember, if there was an error, we return it
// below, so it WILL get logged if it's supposed to)
log.Println("[INFO] Sending diagnostics: success")
@ -181,13 +185,7 @@ func emit(final bool) error {
// resulting byte slice is lost, the payload is
// gone with it.
func makePayloadAndResetBuffer() ([]byte, error) {
// make a local pointer to the buffer, then reset
// the buffer to an empty map to clear it out
bufferMu.Lock()
bufCopy := buffer
buffer = make(map[string]interface{})
bufferItemCount = 0
bufferMu.Unlock()
bufCopy := resetBuffer()
// encode payload in preparation for transmission
payload := Payload{
@ -198,6 +196,21 @@ func makePayloadAndResetBuffer() ([]byte, error) {
return json.Marshal(payload)
}
// resetBuffer makes a local pointer to the buffer,
// then resets the buffer by assigning to be a newly-
// made value to clear it out, then sets the buffer
// item count to 0. It returns the copied pointer to
// the original map so the old buffer value can be
// used locally.
func resetBuffer() map[string]interface{} {
bufferMu.Lock()
bufCopy := buffer
buffer = make(map[string]interface{})
bufferItemCount = 0
bufferMu.Unlock()
return bufCopy
}
// Response contains the body of a response from the
// diagnostics server.
type Response struct {
@ -222,10 +235,28 @@ type Payload struct {
// The UTC timestamp of the transmission
Timestamp time.Time `json:"timestamp"`
// The timestamp before which the next update is expected
// (NOT populated by client - the server fills this in
// before it stores the data)
ExpectNext time.Time `json:"expect_next,omitempty"`
// The metrics
Data map[string]interface{} `json:"data,omitempty"`
}
// Int returns the value of the data keyed by key
// if it is an integer; otherwise it returns 0.
func (p Payload) Int(key string) int {
val, _ := p.Data[key]
switch p.Data[key].(type) {
case int:
return val.(int)
case float64: // after JSON-decoding, int becomes float64...
return int(val.(float64))
}
return 0
}
// countingSet implements a set that counts how many
// times a key is inserted. It marshals to JSON in a
// way such that keys are converted to values next
@ -272,6 +303,7 @@ var (
// instanceUUID is the ID of the current instance.
// This MUST be set to emit diagnostics.
// This MUST NOT be openly exposed to clients, for privacy.
instanceUUID uuid.UUID
// enabled indicates whether the package has

View file

@ -19,6 +19,8 @@ import (
"os"
"os/signal"
"sync"
"github.com/mholt/caddy/diagnostics"
)
// TrapSignals create signal handlers for all applicable signals for this
@ -52,6 +54,9 @@ func trapSignalsCrossPlatform() {
log.Println("[INFO] SIGINT: Shutting down")
diagnostics.AppendUnique("sigtrap", "SIGINT")
go diagnostics.StopEmitting() // not guaranteed to finish in time; that's OK (just don't block!)
// important cleanup actions before shutdown callbacks
for _, f := range OnProcessExit {
f()

View file

@ -21,6 +21,8 @@ import (
"os"
"os/signal"
"syscall"
"github.com/mholt/caddy/diagnostics"
)
// trapSignalsPosix captures POSIX-only signals.
@ -49,10 +51,15 @@ func trapSignalsPosix() {
log.Printf("[ERROR] SIGTERM stop: %v", err)
exitCode = 3
}
diagnostics.AppendUnique("sigtrap", "SIGTERM")
go diagnostics.StopEmitting() // won't finish in time, but that's OK - just don't block
os.Exit(exitCode)
case syscall.SIGUSR1:
log.Println("[INFO] SIGUSR1: Reloading")
go diagnostics.AppendUnique("sigtrap", "SIGUSR1")
// Start with the existing Caddyfile
caddyfileToUse, inst, err := getCurrentCaddyfile()
@ -84,12 +91,14 @@ func trapSignalsPosix() {
case syscall.SIGUSR2:
log.Println("[INFO] SIGUSR2: Upgrading")
go diagnostics.AppendUnique("sigtrap", "SIGUSR2")
if err := Upgrade(); err != nil {
log.Printf("[ERROR] SIGUSR2: upgrading: %v", err)
}
case syscall.SIGHUP:
// ignore; this signal is sometimes sent outside of the user's control
go diagnostics.AppendUnique("sigtrap", "SIGHUP")
}
}
}()