1
1
Fork 0
mirror of https://github.com/mjl-/mox.git synced 2025-04-21 21:40:01 +03:00

add prometheus metrics for errors when getting certificates through acme (typically from let's encrypt)

and add an alerting rule for it.
we certainly want a heads up when there are issues with the certificates.
This commit is contained in:
Mechiel Lukkien 2025-02-06 15:12:36 +01:00
parent 1277d78cb1
commit e5e15a3965
No known key found for this signature in database
2 changed files with 29 additions and 2 deletions

View file

@ -42,6 +42,24 @@ import (
)
var (
metricMissingServerName = promauto.NewCounter(
prometheus.CounterOpts{
Name: "mox_autotls_missing_servername_total",
Help: "Number of failed TLS connection attempts with missing SNI where no fallback hostname was configured.",
},
)
metricUnknownServerName = promauto.NewCounter(
prometheus.CounterOpts{
Name: "mox_autotls_unknown_servername_total",
Help: "Number of failed TLS connection attempts with an unrecognized SNI name where no fallback hostname was configured.",
},
)
metricCertRequestErrors = promauto.NewCounter(
prometheus.CounterOpts{
Name: "mox_autotls_cert_request_errors_total",
Help: "Number of errors trying to retrieve a certificate for a hostname, possibly ACME verification errors.",
},
)
metricCertput = promauto.NewCounter(
prometheus.CounterOpts{
Name: "mox_autotls_certput_total",
@ -171,7 +189,7 @@ func Load(name, acmeDir, contactEmail, directoryURL string, eabKeyID string, eab
return a, nil
}
// logigngGetCertificate is a helper to implement crypto/tls.Config.GetCertificate,
// loggingGetCertificate is a helper to implement crypto/tls.Config.GetCertificate,
// optionally falling back to a certificate for fallbackHostname in case SNI is
// absent or for an unknown hostname.
func (m *Manager) loggingGetCertificate(hello *tls.ClientHelloInfo, fallbackHostname dns.Domain, fallbackNoSNI, fallbackUnknownSNI bool) (*tls.Certificate, error) {
@ -188,6 +206,7 @@ func (m *Manager) loggingGetCertificate(hello *tls.ClientHelloInfo, fallbackHost
// Handle missing SNI to prevent logging an error below.
if hello.ServerName == "" {
metricMissingServerName.Inc()
log.Debug("tls request without sni servername, rejecting", slog.Any("localaddr", hello.Conn.LocalAddr()), slog.Any("supportedprotos", hello.SupportedProtos))
return nil, nil
}
@ -195,6 +214,7 @@ func (m *Manager) loggingGetCertificate(hello *tls.ClientHelloInfo, fallbackHost
cert, err := m.Manager.GetCertificate(hello)
if err != nil && errors.Is(err, errHostNotAllowed) {
if !fallbackUnknownSNI {
metricUnknownServerName.Inc()
log.Debugx("requesting certificate", err, slog.String("host", hello.ServerName))
return nil, nil
}
@ -203,12 +223,14 @@ func (m *Manager) loggingGetCertificate(hello *tls.ClientHelloInfo, fallbackHost
hello.ServerName = fallbackHostname.ASCII
cert, err = m.Manager.GetCertificate(hello)
if err != nil {
metricCertRequestErrors.Inc()
log.Errorx("requesting certificate for fallback hostname", err, slog.String("host", hello.ServerName))
} else {
log.Debugx("requesting certificate for fallback hostname", err, slog.String("host", hello.ServerName))
log.Debug("using certificate for fallback hostname", slog.String("host", hello.ServerName))
}
return cert, err
} else if err != nil {
metricCertRequestErrors.Inc()
log.Errorx("requesting certificate", err, slog.String("host", hello.ServerName))
}
return cert, err

View file

@ -8,6 +8,11 @@ groups:
annotations:
summary: unhandled panic
- alert: mox-acme-request-cert-errors
expr: increase(mox_autotls_cert_request_errors_total[1h]) > 0
annotations:
summary: errors requesting tls certificates with acme
- alert: mox-ip-on-dns-blocklist
expr: mox_dnsbl_ips_success < 1
annotations: