# example prometheus alerting rules file for mox. groups: - name: mox rules: - alert: mox-panic expr: increase(mox_panic_total[1h]) > 0 annotations: summary: unhandled panic - alert: mox-ip-on-dns-blocklist expr: mox_dnsbl_ips_success < 1 annotations: summary: ip is on dns blocklist - alert: mox-queue-failing-delivery expr: increase(mox_queue_delivery_duration_seconds_count{attempt!~"[123]",result!="ok"}[1h]) > 0 annotations: summary: delivery from queue had a 4th or later attempt fail - alert: mox-smtpserver-errors expr: increase(mox_smtpserver_errors_total[1h]) > 0 annotations: summary: errors in smtpserver operation - alert: mox-webserver-errors expr: increase(mox_httpserver_request_duration_seconds_count{code=~"5.."}[1h]) > 0 annotations: summary: http 5xx responses from webserver - alert: mox-queue-hold expr: mox_queue_hold > 0 for: 2h annotations: summary: messages on hold in queue for at least two hours - alert: mox-submission-errors expr: increase(mox_smtpserver_submission_total{result=~".*error"}[1h]) > 0 annotations: summary: smtp submission errors - alert: mox-delivery-errors expr: increase(mox_smtpserver_delivery_total{result=~".*error"}[1h]) > 0 annotations: summary: smtp delivery errors - alert: mox-webmail-errors expr: increase(mox_webmail_errors_total[1h]) > 0 annotations: summary: errors in webmail operation - alert: mox-webmailsubmission-errors expr: increase(mox_webmail_submission_total{result=~".*error"}[1h]) > 0 annotations: summary: webmail submission errors - alert: mox-sherpa-server-errors expr: increase(sherpa_errors_total{api=~"mox.*",code=~"server:.*"}[1h]) > 0 annotations: summary: sherpa web api server errors # the alerts below can be used to keep a closer eye or when starting to use mox, # but can be noisy, or you may not be able to prevent them. - alert: mox-incoming-delivery-starttls-errors expr: sum by (instance) (increase(mox_smtpserver_delivery_starttls_errors_total[1h])) / sum by (instance) (increase(mox_smtpserver_delivery_starttls_total[1h])) > 0.1 annotations: summary: starttls handshake errors for >10% of incoming smtp delivery connections # change period to match your expected incoming message rate. - alert: mox-no-deliveries expr: sum by (instance) (rate(mox_smtpserver_delivery_total{result="delivered"}[6h])) == 0 annotations: summary: no mail delivered for 6 hours # may be noisy. anyone can send these reports. you may want to silence it. - alert: mox-tlsrpt-errors expr: increase(mox_tlsrptdb_session_total{type!="success"}[1h]) > 0 annotations: summary: tls reports about unsuccessful tls connections # may be noisy. can be caused by someone trying to send email as you. and # anyone can send these reports. you are not in control over when this fires, # so you may want to silence it. - alert: mox-dmarc-rejects expr: increase(mox_dmarcdb_policy_evaluated_total{disposition!="none"}[1h]) > 0 annotations: summary: dmarc reports about rejects/quarantines due to failing dmarc check # may be noisy - alert: mox-auth-ratelimited expr: increase(mox_authentication_ratelimited_total[1h]) > 0 annotations: summary: authentication connections/requests were rate limited