mirror of
https://github.com/mjl-/mox.git
synced 2024-12-26 16:33:47 +03:00
initialize metric mox_panic_total with 0, so the alerting rule also catches the first panic for a label
increase() and rate() don't seem to assume a previous value of 0 when a vector gets a first value for a label. you would think that an increase() on a first-value mox_panic_total{"..."}=1 would return 1, and similar for rate(), but that doesn't appear to be the behaviour. so we just explicitly initialize the count to 0 for each possible label value. mox has more vector metrics, but panics feels like the most important, and it's too much code to initialize them all, for all combinations of label values. there is probably a better way that fixes this for all cases...
This commit is contained in:
parent
af71e9855b
commit
3620d6f05e
16 changed files with 75 additions and 22 deletions
2
ctl.go
2
ctl.go
|
@ -282,7 +282,7 @@ func servectl(ctx context.Context, log *mlog.Log, conn net.Conn, shutdown func()
|
||||||
}
|
}
|
||||||
log.Error("servectl panic", mlog.Field("err", x), mlog.Field("cmd", ctl.cmd))
|
log.Error("servectl panic", mlog.Field("err", x), mlog.Field("cmd", ctl.cmd))
|
||||||
debug.PrintStack()
|
debug.PrintStack()
|
||||||
metrics.PanicInc("ctl")
|
metrics.PanicInc(metrics.Ctl)
|
||||||
}()
|
}()
|
||||||
|
|
||||||
defer conn.Close()
|
defer conn.Close()
|
||||||
|
|
|
@ -41,7 +41,7 @@ var (
|
||||||
metricDKIMSign = promauto.NewCounterVec(
|
metricDKIMSign = promauto.NewCounterVec(
|
||||||
prometheus.CounterOpts{
|
prometheus.CounterOpts{
|
||||||
Name: "mox_dkim_sign_total",
|
Name: "mox_dkim_sign_total",
|
||||||
Help: "DKIM messages signings.",
|
Help: "DKIM messages signings, label key is the type of key, rsa or ed25519.",
|
||||||
},
|
},
|
||||||
[]string{
|
[]string{
|
||||||
"key",
|
"key",
|
||||||
|
|
|
@ -682,7 +682,7 @@ func serve(listenerName string, cid int64, tlsConfig *tls.Config, nc net.Conn, x
|
||||||
} else {
|
} else {
|
||||||
c.log.Error("unhandled panic", mlog.Field("err", x))
|
c.log.Error("unhandled panic", mlog.Field("err", x))
|
||||||
debug.PrintStack()
|
debug.PrintStack()
|
||||||
metrics.PanicInc("imapserver")
|
metrics.PanicInc(metrics.Imapserver)
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
|
|
@ -256,7 +256,7 @@ func importctl(ctx context.Context, ctl *ctl, mbox bool) {
|
||||||
if x != ctl.x {
|
if x != ctl.x {
|
||||||
ctl.log.Error("import error", mlog.Field("panic", fmt.Errorf("%v", x)))
|
ctl.log.Error("import error", mlog.Field("panic", fmt.Errorf("%v", x)))
|
||||||
debug.PrintStack()
|
debug.PrintStack()
|
||||||
metrics.PanicInc("import")
|
metrics.PanicInc(metrics.Import)
|
||||||
} else {
|
} else {
|
||||||
ctl.log.Error("import error")
|
ctl.log.Error("import error")
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,6 +15,59 @@ var metricPanic = promauto.NewCounterVec(
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
func PanicInc(pkg string) {
|
type Panic string
|
||||||
metricPanic.WithLabelValues(pkg).Inc()
|
|
||||||
|
const (
|
||||||
|
Ctl Panic = "ctl"
|
||||||
|
Import Panic = "import"
|
||||||
|
Serve Panic = "serve"
|
||||||
|
Imapserver Panic = "imapserver"
|
||||||
|
Mtastsdb Panic = "mtastsdb"
|
||||||
|
Queue Panic = "queue"
|
||||||
|
Smtpclient Panic = "smtpclient"
|
||||||
|
Smtpserver Panic = "smtpserver"
|
||||||
|
Dkimverify Panic = "dkimverify"
|
||||||
|
Spfverify Panic = "spfverify"
|
||||||
|
Upgradethreads Panic = "upgradethreads"
|
||||||
|
Importmanage Panic = "importmanage"
|
||||||
|
Importmessages Panic = "importmessages"
|
||||||
|
Webadmin Panic = "webadmin"
|
||||||
|
Webmailsendevent Panic = "webmailsendevent"
|
||||||
|
Webmail Panic = "webmail"
|
||||||
|
Webmailrequest Panic = "webmailrequest"
|
||||||
|
Webmailquery Panic = "webmailquery"
|
||||||
|
Webmailhandle Panic = "webmailhandle"
|
||||||
|
)
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
// Ensure the panic counts are initialized to 0, so the query for change also picks
|
||||||
|
// up the first panic.
|
||||||
|
names := []Panic{
|
||||||
|
Ctl,
|
||||||
|
Import,
|
||||||
|
Serve,
|
||||||
|
Imapserver,
|
||||||
|
Mtastsdb,
|
||||||
|
Queue,
|
||||||
|
Smtpclient,
|
||||||
|
Smtpserver,
|
||||||
|
Dkimverify,
|
||||||
|
Spfverify,
|
||||||
|
Upgradethreads,
|
||||||
|
Importmanage,
|
||||||
|
Importmessages,
|
||||||
|
Webadmin,
|
||||||
|
Webmailsendevent,
|
||||||
|
Webmail,
|
||||||
|
Webmailrequest,
|
||||||
|
Webmailquery,
|
||||||
|
Webmailhandle,
|
||||||
|
}
|
||||||
|
for _, name := range names {
|
||||||
|
metricPanic.WithLabelValues(string(name)).Add(0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func PanicInc(name Panic) {
|
||||||
|
metricPanic.WithLabelValues(string(name)).Inc()
|
||||||
}
|
}
|
||||||
|
|
|
@ -112,7 +112,7 @@ func refreshDomain(ctx context.Context, db *bstore.DB, resolver dns.Resolver, pr
|
||||||
// Should not happen, but make sure errors don't take down the application.
|
// Should not happen, but make sure errors don't take down the application.
|
||||||
log.Error("refresh1", mlog.Field("panic", x))
|
log.Error("refresh1", mlog.Field("panic", x))
|
||||||
debug.PrintStack()
|
debug.PrintStack()
|
||||||
metrics.PanicInc("mtastsdb")
|
metrics.PanicInc(metrics.Mtastsdb)
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
|
|
@ -497,7 +497,7 @@ func deliver(resolver dns.Resolver, m Msg) {
|
||||||
if x != nil {
|
if x != nil {
|
||||||
qlog.Error("deliver panic", mlog.Field("panic", x))
|
qlog.Error("deliver panic", mlog.Field("panic", x))
|
||||||
debug.PrintStack()
|
debug.PrintStack()
|
||||||
metrics.PanicInc("queue")
|
metrics.PanicInc(metrics.Queue)
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
|
2
serve.go
2
serve.go
|
@ -44,7 +44,7 @@ func monitorDNSBL(log *mlog.Log) {
|
||||||
if x != nil {
|
if x != nil {
|
||||||
log.Error("monitordnsbl panic", mlog.Field("panic", x))
|
log.Error("monitordnsbl panic", mlog.Field("panic", x))
|
||||||
debug.PrintStack()
|
debug.PrintStack()
|
||||||
metrics.PanicInc("serve")
|
metrics.PanicInc(metrics.Serve)
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
|
|
@ -457,7 +457,7 @@ func (c *Client) recover(rerr *error) {
|
||||||
}
|
}
|
||||||
cerr, ok := x.(Error)
|
cerr, ok := x.(Error)
|
||||||
if !ok {
|
if !ok {
|
||||||
metrics.PanicInc("smtpclient")
|
metrics.PanicInc(metrics.Smtpclient)
|
||||||
panic(x)
|
panic(x)
|
||||||
}
|
}
|
||||||
*rerr = cerr
|
*rerr = cerr
|
||||||
|
|
|
@ -595,7 +595,7 @@ func serve(listenerName string, cid int64, hostname dns.Domain, tlsConfig *tls.C
|
||||||
} else {
|
} else {
|
||||||
c.log.Error("unhandled panic", mlog.Field("err", x))
|
c.log.Error("unhandled panic", mlog.Field("err", x))
|
||||||
debug.PrintStack()
|
debug.PrintStack()
|
||||||
metrics.PanicInc("smtpserver")
|
metrics.PanicInc(metrics.Smtpserver)
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
@ -1903,7 +1903,7 @@ func (c *conn) deliver(ctx context.Context, recvHdrFor func(string) string, msgW
|
||||||
if x != nil {
|
if x != nil {
|
||||||
c.log.Error("dkim verify panic", mlog.Field("err", x))
|
c.log.Error("dkim verify panic", mlog.Field("err", x))
|
||||||
debug.PrintStack()
|
debug.PrintStack()
|
||||||
metrics.PanicInc("dkimverify")
|
metrics.PanicInc(metrics.Dkimverify)
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
|
@ -1939,7 +1939,7 @@ func (c *conn) deliver(ctx context.Context, recvHdrFor func(string) string, msgW
|
||||||
if x != nil {
|
if x != nil {
|
||||||
c.log.Error("spf verify panic", mlog.Field("err", x))
|
c.log.Error("spf verify panic", mlog.Field("err", x))
|
||||||
debug.PrintStack()
|
debug.PrintStack()
|
||||||
metrics.PanicInc("spfverify")
|
metrics.PanicInc(metrics.Spfverify)
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
|
|
|
@ -855,7 +855,7 @@ func OpenAccountDB(accountDir, accountName string) (a *Account, rerr error) {
|
||||||
if x != nil {
|
if x != nil {
|
||||||
xlog.Error("upgradeThreads panic", mlog.Field("err", x))
|
xlog.Error("upgradeThreads panic", mlog.Field("err", x))
|
||||||
debug.PrintStack()
|
debug.PrintStack()
|
||||||
metrics.PanicInc("upgradeThreads")
|
metrics.PanicInc(metrics.Upgradethreads)
|
||||||
acc.threadsErr = fmt.Errorf("panic during upgradeThreads: %v", x)
|
acc.threadsErr = fmt.Errorf("panic during upgradeThreads: %v", x)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -69,7 +69,7 @@ func ImportManage() {
|
||||||
if x := recover(); x != nil {
|
if x := recover(); x != nil {
|
||||||
log.Error("import manage panic", mlog.Field("err", x))
|
log.Error("import manage panic", mlog.Field("err", x))
|
||||||
debug.PrintStack()
|
debug.PrintStack()
|
||||||
metrics.PanicInc("importmanage")
|
metrics.PanicInc(metrics.Importmanage)
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
@ -342,7 +342,7 @@ func importMessages(ctx context.Context, log *mlog.Log, token string, acc *store
|
||||||
} else {
|
} else {
|
||||||
log.Error("import panic", mlog.Field("err", x))
|
log.Error("import panic", mlog.Field("err", x))
|
||||||
debug.PrintStack()
|
debug.PrintStack()
|
||||||
metrics.PanicInc("importmessages")
|
metrics.PanicInc(metrics.Importmessages)
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
|
|
@ -367,7 +367,7 @@ func logPanic(ctx context.Context) {
|
||||||
log := xlog.WithContext(ctx)
|
log := xlog.WithContext(ctx)
|
||||||
log.Error("recover from panic", mlog.Field("panic", x))
|
log.Error("recover from panic", mlog.Field("panic", x))
|
||||||
debug.PrintStack()
|
debug.PrintStack()
|
||||||
metrics.PanicInc("webadmin")
|
metrics.PanicInc(metrics.Webadmin)
|
||||||
}
|
}
|
||||||
|
|
||||||
// return IPs we may be listening on.
|
// return IPs we may be listening on.
|
||||||
|
|
|
@ -84,7 +84,7 @@ func (ew *eventWriter) xsendEvent(ctx context.Context, log *mlog.Log, name strin
|
||||||
if x != nil {
|
if x != nil {
|
||||||
log.WithContext(ctx).Error("writeEvent panic", mlog.Field("err", x))
|
log.WithContext(ctx).Error("writeEvent panic", mlog.Field("err", x))
|
||||||
debug.PrintStack()
|
debug.PrintStack()
|
||||||
metrics.PanicInc("webmail-sendEvent")
|
metrics.PanicInc(metrics.Webmailsendevent)
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
|
|
@ -568,7 +568,7 @@ func serveEvents(ctx context.Context, log *mlog.Log, w http.ResponseWriter, r *h
|
||||||
} else {
|
} else {
|
||||||
log.WithContext(ctx).Error("serveEvents panic", mlog.Field("err", x))
|
log.WithContext(ctx).Error("serveEvents panic", mlog.Field("err", x))
|
||||||
debug.PrintStack()
|
debug.PrintStack()
|
||||||
metrics.PanicInc("webmail")
|
metrics.PanicInc(metrics.Webmail)
|
||||||
panic(x)
|
panic(x)
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
@ -1216,7 +1216,7 @@ func viewRequestTx(ctx context.Context, log *mlog.Log, acc *store.Account, tx *b
|
||||||
if x != nil {
|
if x != nil {
|
||||||
log.WithContext(ctx).Error("viewRequestTx panic", mlog.Field("err", x))
|
log.WithContext(ctx).Error("viewRequestTx panic", mlog.Field("err", x))
|
||||||
debug.PrintStack()
|
debug.PrintStack()
|
||||||
metrics.PanicInc("webmail-request")
|
metrics.PanicInc(metrics.Webmailrequest)
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
@ -1300,7 +1300,7 @@ func queryMessages(ctx context.Context, log *mlog.Log, acc *store.Account, tx *b
|
||||||
log.WithContext(ctx).Error("queryMessages panic", mlog.Field("err", x))
|
log.WithContext(ctx).Error("queryMessages panic", mlog.Field("err", x))
|
||||||
debug.PrintStack()
|
debug.PrintStack()
|
||||||
mrc <- msgResp{err: fmt.Errorf("query failed")}
|
mrc <- msgResp{err: fmt.Errorf("query failed")}
|
||||||
metrics.PanicInc("webmail-query")
|
metrics.PanicInc(metrics.Webmailquery)
|
||||||
}
|
}
|
||||||
|
|
||||||
close(mrc)
|
close(mrc)
|
||||||
|
|
|
@ -381,7 +381,7 @@ func handle(apiHandler http.Handler, w http.ResponseWriter, r *http.Request) {
|
||||||
if !ok {
|
if !ok {
|
||||||
log.WithContext(ctx).Error("handle panic", mlog.Field("err", x))
|
log.WithContext(ctx).Error("handle panic", mlog.Field("err", x))
|
||||||
debug.PrintStack()
|
debug.PrintStack()
|
||||||
metrics.PanicInc("webmail-handle")
|
metrics.PanicInc(metrics.Webmailhandle)
|
||||||
panic(x)
|
panic(x)
|
||||||
}
|
}
|
||||||
if strings.HasPrefix(err.Code, "user:") {
|
if strings.HasPrefix(err.Code, "user:") {
|
||||||
|
|
Loading…
Reference in a new issue