2019-09-03 21:10:11 +03:00
|
|
|
// Copyright 2015 Matthew Holt and The Caddy Authors
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
package reverseproxy
|
|
|
|
|
|
|
|
import (
|
2019-09-05 22:14:39 +03:00
|
|
|
"context"
|
2019-09-03 21:10:11 +03:00
|
|
|
"fmt"
|
|
|
|
"io"
|
|
|
|
"io/ioutil"
|
|
|
|
"net"
|
|
|
|
"net/http"
|
|
|
|
"net/url"
|
|
|
|
"regexp"
|
|
|
|
"strconv"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/caddyserver/caddy/v2"
|
|
|
|
"github.com/caddyserver/caddy/v2/modules/caddyhttp"
|
2019-10-30 01:02:40 +03:00
|
|
|
"go.uber.org/zap"
|
2019-09-03 21:10:11 +03:00
|
|
|
)
|
|
|
|
|
2019-09-04 01:56:09 +03:00
|
|
|
// HealthChecks holds configuration related to health checking.
|
2019-09-03 21:10:11 +03:00
|
|
|
type HealthChecks struct {
|
|
|
|
Active *ActiveHealthChecks `json:"active,omitempty"`
|
|
|
|
Passive *PassiveHealthChecks `json:"passive,omitempty"`
|
|
|
|
}
|
|
|
|
|
2019-09-04 01:56:09 +03:00
|
|
|
// ActiveHealthChecks holds configuration related to active
|
|
|
|
// health checks (that is, health checks which occur in a
|
|
|
|
// background goroutine independently).
|
2019-09-03 21:10:11 +03:00
|
|
|
type ActiveHealthChecks struct {
|
|
|
|
Path string `json:"path,omitempty"`
|
|
|
|
Port int `json:"port,omitempty"`
|
2019-10-05 02:21:38 +03:00
|
|
|
Headers http.Header `json:"headers,omitempty"`
|
2019-09-03 21:10:11 +03:00
|
|
|
Interval caddy.Duration `json:"interval,omitempty"`
|
|
|
|
Timeout caddy.Duration `json:"timeout,omitempty"`
|
|
|
|
MaxSize int64 `json:"max_size,omitempty"`
|
|
|
|
ExpectStatus int `json:"expect_status,omitempty"`
|
|
|
|
ExpectBody string `json:"expect_body,omitempty"`
|
|
|
|
|
|
|
|
stopChan chan struct{}
|
|
|
|
httpClient *http.Client
|
|
|
|
bodyRegexp *regexp.Regexp
|
2019-10-30 01:02:40 +03:00
|
|
|
logger *zap.Logger
|
2019-09-03 21:10:11 +03:00
|
|
|
}
|
|
|
|
|
2019-09-04 01:56:09 +03:00
|
|
|
// PassiveHealthChecks holds configuration related to passive
|
|
|
|
// health checks (that is, health checks which occur during
|
|
|
|
// the normal flow of request proxying).
|
2019-09-03 21:10:11 +03:00
|
|
|
type PassiveHealthChecks struct {
|
|
|
|
MaxFails int `json:"max_fails,omitempty"`
|
|
|
|
FailDuration caddy.Duration `json:"fail_duration,omitempty"`
|
|
|
|
UnhealthyRequestCount int `json:"unhealthy_request_count,omitempty"`
|
|
|
|
UnhealthyStatus []int `json:"unhealthy_status,omitempty"`
|
|
|
|
UnhealthyLatency caddy.Duration `json:"unhealthy_latency,omitempty"`
|
2019-10-30 01:02:40 +03:00
|
|
|
|
|
|
|
logger *zap.Logger
|
2019-09-03 21:10:11 +03:00
|
|
|
}
|
|
|
|
|
2019-09-04 04:06:54 +03:00
|
|
|
// CircuitBreaker is a type that can act as an early-warning
|
|
|
|
// system for the health checker when backends are getting
|
|
|
|
// overloaded.
|
|
|
|
type CircuitBreaker interface {
|
|
|
|
OK() bool
|
|
|
|
RecordMetric(statusCode int, latency time.Duration)
|
|
|
|
}
|
|
|
|
|
2019-09-04 01:56:09 +03:00
|
|
|
// activeHealthChecker runs active health checks on a
|
|
|
|
// regular basis and blocks until
|
|
|
|
// h.HealthChecks.Active.stopChan is closed.
|
2019-09-03 21:10:11 +03:00
|
|
|
func (h *Handler) activeHealthChecker() {
|
|
|
|
ticker := time.NewTicker(time.Duration(h.HealthChecks.Active.Interval))
|
|
|
|
h.doActiveHealthChecksForAllHosts()
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case <-ticker.C:
|
|
|
|
h.doActiveHealthChecksForAllHosts()
|
|
|
|
case <-h.HealthChecks.Active.stopChan:
|
|
|
|
ticker.Stop()
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-09-04 01:56:09 +03:00
|
|
|
// doActiveHealthChecksForAllHosts immediately performs a
|
|
|
|
// health checks for all hosts in the global repository.
|
2019-09-03 21:10:11 +03:00
|
|
|
func (h *Handler) doActiveHealthChecksForAllHosts() {
|
|
|
|
hosts.Range(func(key, value interface{}) bool {
|
2019-09-05 22:14:39 +03:00
|
|
|
networkAddr := key.(string)
|
2019-09-03 21:10:11 +03:00
|
|
|
host := value.(Host)
|
|
|
|
|
2019-09-05 22:14:39 +03:00
|
|
|
go func(networkAddr string, host Host) {
|
2019-11-12 01:33:38 +03:00
|
|
|
addr, err := caddy.ParseNetworkAddress(networkAddr)
|
2019-09-03 21:10:11 +03:00
|
|
|
if err != nil {
|
2019-10-30 01:02:40 +03:00
|
|
|
h.HealthChecks.Active.logger.Error("bad network address",
|
|
|
|
zap.String("address", networkAddr),
|
|
|
|
zap.Error(err),
|
|
|
|
)
|
2019-09-05 22:14:39 +03:00
|
|
|
return
|
2019-09-03 21:10:11 +03:00
|
|
|
}
|
2019-11-12 01:33:38 +03:00
|
|
|
if addr.PortRangeSize() != 1 {
|
2019-10-30 01:02:40 +03:00
|
|
|
h.HealthChecks.Active.logger.Error("multiple addresses (upstream must map to only one address)",
|
|
|
|
zap.String("address", networkAddr),
|
|
|
|
)
|
2019-09-05 22:14:39 +03:00
|
|
|
return
|
|
|
|
}
|
2019-11-12 01:33:38 +03:00
|
|
|
hostAddr := addr.JoinHostPort(0)
|
|
|
|
if addr.Network == "unix" || addr.Network == "unixgram" || addr.Network == "unixpacket" {
|
2019-09-05 22:14:39 +03:00
|
|
|
// this will be used as the Host portion of a http.Request URL, and
|
|
|
|
// paths to socket files would produce an error when creating URL,
|
2019-09-14 22:25:26 +03:00
|
|
|
// so use a fake Host value instead; unix sockets are usually local
|
|
|
|
hostAddr = "localhost"
|
2019-09-05 22:14:39 +03:00
|
|
|
}
|
2019-11-12 01:33:38 +03:00
|
|
|
err = h.doActiveHealthCheck(DialInfo{Network: addr.Network, Address: hostAddr}, hostAddr, host)
|
2019-09-05 22:14:39 +03:00
|
|
|
if err != nil {
|
2019-10-30 01:02:40 +03:00
|
|
|
h.HealthChecks.Active.logger.Error("active health check failed",
|
|
|
|
zap.String("address", networkAddr),
|
|
|
|
zap.Error(err),
|
|
|
|
)
|
2019-09-05 22:14:39 +03:00
|
|
|
}
|
|
|
|
}(networkAddr, host)
|
2019-09-03 21:10:11 +03:00
|
|
|
|
|
|
|
// continue to iterate all hosts
|
|
|
|
return true
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
// doActiveHealthCheck performs a health check to host which
|
|
|
|
// can be reached at address hostAddr. The actual address for
|
|
|
|
// the request will be built according to active health checker
|
|
|
|
// config. The health status of the host will be updated
|
|
|
|
// according to whether it passes the health check. An error is
|
|
|
|
// returned only if the health check fails to occur or if marking
|
|
|
|
// the host's health status fails.
|
2019-09-05 22:14:39 +03:00
|
|
|
func (h *Handler) doActiveHealthCheck(dialInfo DialInfo, hostAddr string, host Host) error {
|
|
|
|
// create the URL for the request that acts as a health check
|
|
|
|
scheme := "http"
|
|
|
|
if ht, ok := h.Transport.(*http.Transport); ok && ht.TLSClientConfig != nil {
|
|
|
|
// this is kind of a hacky way to know if we should use HTTPS, but whatever
|
|
|
|
scheme = "https"
|
2019-09-03 21:10:11 +03:00
|
|
|
}
|
2019-09-05 22:14:39 +03:00
|
|
|
u := &url.URL{
|
|
|
|
Scheme: scheme,
|
|
|
|
Host: hostAddr,
|
|
|
|
Path: h.HealthChecks.Active.Path,
|
2019-09-03 21:10:11 +03:00
|
|
|
}
|
2019-09-05 22:14:39 +03:00
|
|
|
|
|
|
|
// adjust the port, if configured to be different
|
2019-09-03 21:10:11 +03:00
|
|
|
if h.HealthChecks.Active.Port != 0 {
|
|
|
|
portStr := strconv.Itoa(h.HealthChecks.Active.Port)
|
2019-09-05 22:14:39 +03:00
|
|
|
host, _, err := net.SplitHostPort(hostAddr)
|
|
|
|
if err != nil {
|
|
|
|
host = hostAddr
|
|
|
|
}
|
|
|
|
u.Host = net.JoinHostPort(host, portStr)
|
2019-09-03 21:10:11 +03:00
|
|
|
}
|
|
|
|
|
2019-09-05 22:14:39 +03:00
|
|
|
// attach dialing information to this request
|
|
|
|
ctx := context.Background()
|
|
|
|
ctx = context.WithValue(ctx, caddy.ReplacerCtxKey, caddy.NewReplacer())
|
|
|
|
ctx = context.WithValue(ctx, DialInfoCtxKey, dialInfo)
|
|
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, u.String(), nil)
|
2019-09-03 21:10:11 +03:00
|
|
|
if err != nil {
|
2019-09-05 22:14:39 +03:00
|
|
|
return fmt.Errorf("making request: %v", err)
|
2019-09-03 21:10:11 +03:00
|
|
|
}
|
2019-10-05 02:21:38 +03:00
|
|
|
for key, hdrs := range h.HealthChecks.Active.Headers {
|
|
|
|
req.Header[key] = hdrs
|
|
|
|
}
|
2019-09-03 21:10:11 +03:00
|
|
|
|
2019-09-05 22:14:39 +03:00
|
|
|
// do the request, being careful to tame the response body
|
2019-09-03 21:10:11 +03:00
|
|
|
resp, err := h.HealthChecks.Active.httpClient.Do(req)
|
|
|
|
if err != nil {
|
2019-10-30 01:02:40 +03:00
|
|
|
h.HealthChecks.Active.logger.Info("HTTP request failed",
|
|
|
|
zap.String("host", hostAddr),
|
|
|
|
zap.Error(err),
|
|
|
|
)
|
2019-09-03 21:10:11 +03:00
|
|
|
_, err2 := host.SetHealthy(false)
|
|
|
|
if err2 != nil {
|
|
|
|
return fmt.Errorf("marking unhealthy: %v", err2)
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
var body io.Reader = resp.Body
|
|
|
|
if h.HealthChecks.Active.MaxSize > 0 {
|
|
|
|
body = io.LimitReader(body, h.HealthChecks.Active.MaxSize)
|
|
|
|
}
|
|
|
|
defer func() {
|
2019-09-05 22:14:39 +03:00
|
|
|
// drain any remaining body so connection could be re-used
|
2019-09-03 21:10:11 +03:00
|
|
|
io.Copy(ioutil.Discard, body)
|
|
|
|
resp.Body.Close()
|
|
|
|
}()
|
|
|
|
|
|
|
|
// if status code is outside criteria, mark down
|
|
|
|
if h.HealthChecks.Active.ExpectStatus > 0 {
|
|
|
|
if !caddyhttp.StatusCodeMatches(resp.StatusCode, h.HealthChecks.Active.ExpectStatus) {
|
2019-10-30 01:02:40 +03:00
|
|
|
h.HealthChecks.Active.logger.Info("unexpected status code",
|
|
|
|
zap.Int("status_code", resp.StatusCode),
|
|
|
|
zap.String("host", hostAddr),
|
|
|
|
)
|
2019-09-03 21:10:11 +03:00
|
|
|
_, err := host.SetHealthy(false)
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("marking unhealthy: %v", err)
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
} else if resp.StatusCode < 200 || resp.StatusCode >= 400 {
|
2019-10-30 01:02:40 +03:00
|
|
|
h.HealthChecks.Active.logger.Info("status code out of tolerances",
|
|
|
|
zap.Int("status_code", resp.StatusCode),
|
|
|
|
zap.String("host", hostAddr),
|
|
|
|
)
|
2019-09-03 21:10:11 +03:00
|
|
|
_, err := host.SetHealthy(false)
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("marking unhealthy: %v", err)
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// if body does not match regex, mark down
|
|
|
|
if h.HealthChecks.Active.bodyRegexp != nil {
|
|
|
|
bodyBytes, err := ioutil.ReadAll(body)
|
|
|
|
if err != nil {
|
2019-10-30 01:02:40 +03:00
|
|
|
h.HealthChecks.Active.logger.Info("failed to read response body",
|
|
|
|
zap.String("host", hostAddr),
|
|
|
|
zap.Error(err),
|
|
|
|
)
|
2019-09-03 21:10:11 +03:00
|
|
|
_, err := host.SetHealthy(false)
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("marking unhealthy: %v", err)
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
if !h.HealthChecks.Active.bodyRegexp.Match(bodyBytes) {
|
2019-10-30 01:02:40 +03:00
|
|
|
h.HealthChecks.Active.logger.Info("response body failed expectations",
|
|
|
|
zap.String("host", hostAddr),
|
|
|
|
)
|
2019-09-03 21:10:11 +03:00
|
|
|
_, err := host.SetHealthy(false)
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("marking unhealthy: %v", err)
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// passed health check parameters, so mark as healthy
|
|
|
|
swapped, err := host.SetHealthy(true)
|
|
|
|
if swapped {
|
2019-10-30 01:02:40 +03:00
|
|
|
h.HealthChecks.Active.logger.Info("host is up",
|
|
|
|
zap.String("host", hostAddr),
|
|
|
|
)
|
2019-09-03 21:10:11 +03:00
|
|
|
}
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("marking healthy: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// countFailure is used with passive health checks. It
|
|
|
|
// remembers 1 failure for upstream for the configured
|
|
|
|
// duration. If passive health checks are disabled or
|
|
|
|
// failure expiry is 0, this is a no-op.
|
2019-09-04 04:06:54 +03:00
|
|
|
func (h *Handler) countFailure(upstream *Upstream) {
|
2019-09-03 21:10:11 +03:00
|
|
|
// only count failures if passive health checking is enabled
|
|
|
|
// and if failures are configured have a non-zero expiry
|
|
|
|
if h.HealthChecks == nil || h.HealthChecks.Passive == nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
failDuration := time.Duration(h.HealthChecks.Passive.FailDuration)
|
|
|
|
if failDuration == 0 {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// count failure immediately
|
|
|
|
err := upstream.Host.CountFail(1)
|
|
|
|
if err != nil {
|
2019-10-30 01:02:40 +03:00
|
|
|
h.HealthChecks.Passive.logger.Error("could not count failure",
|
|
|
|
zap.String("host", upstream.Dial),
|
|
|
|
zap.Error(err),
|
|
|
|
)
|
2019-09-03 21:10:11 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// forget it later
|
|
|
|
go func(host Host, failDuration time.Duration) {
|
|
|
|
time.Sleep(failDuration)
|
|
|
|
err := host.CountFail(-1)
|
|
|
|
if err != nil {
|
2019-10-30 01:02:40 +03:00
|
|
|
h.HealthChecks.Passive.logger.Error("could not forget failure",
|
|
|
|
zap.String("host", upstream.Dial),
|
|
|
|
zap.Error(err),
|
|
|
|
)
|
2019-09-03 21:10:11 +03:00
|
|
|
}
|
|
|
|
}(upstream.Host, failDuration)
|
|
|
|
}
|