caddyhttp: Honor grace period in background (#5043)

* caddyhttp: Honor grace period in background

This avoids blocking during config reloads.

* Don't quit process until servers shut down

* Make tests more likely to pass on fast CI (#5045)

* caddyhttp: Even faster shutdowns

Simultaneously shut down all HTTP servers, rather than one at a time.

In practice there usually won't be more than 1 that lingers. But this
code ensures that they all Shutdown() in their own goroutine
and then we wait for them at the end (if exiting).

We also wait for them to start up so we can be fairly confident the
shutdowns have begun; i.e. old servers no longer
accepting new connections.

* Fix comment typo

* Pull functions out of loop, for readability
This commit is contained in:
Matt Holt 2022-09-19 21:54:47 -06:00 committed by GitHub
parent 0950ba4f0b
commit da8b7fe58f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 78 additions and 10 deletions

View file

@ -31,6 +31,7 @@ import (
"strconv"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/caddyserver/caddy/v2/notify"
@ -676,6 +677,10 @@ func Validate(cfg *Config) error {
// Errors are logged along the way, and an appropriate exit
// code is emitted.
func exitProcess(ctx context.Context, logger *zap.Logger) {
// let the rest of the program know we're quitting
atomic.StoreInt32(exiting, 1)
// give the OS or service/process manager our 2 weeks' notice: we quit
if err := notify.Stopping(); err != nil {
Log().Error("unable to notify service manager of stopping state", zap.Error(err))
}
@ -739,6 +744,12 @@ func exitProcess(ctx context.Context, logger *zap.Logger) {
}()
}
var exiting = new(int32) // accessed atomically
// Exiting returns true if the process is exiting.
// EXPERIMENTAL API: subject to change or removal.
func Exiting() bool { return atomic.LoadInt32(exiting) == 1 }
// Duration can be an integer or a string. An integer is
// interpreted as nanoseconds. If a string, it is a Go
// time.Duration value such as `300ms`, `1.5h`, or `2h45m`;

View file

@ -43,7 +43,7 @@ type Defaults struct {
// Default testing values
var Default = Defaults{
AdminPort: 2019,
AdminPort: 2999, // different from what a real server also running on a developer's machine might be
Certifcates: []string{"/caddy.localhost.crt", "/caddy.localhost.key"},
TestRequestTimeout: 5 * time.Second,
LoadRequestTimeout: 5 * time.Second,

View file

@ -16,6 +16,7 @@ func TestRespond(t *testing.T) {
{
http_port 9080
https_port 9443
grace_period 1
}
localhost:9080 {
@ -37,6 +38,7 @@ func TestRedirect(t *testing.T) {
{
http_port 9080
https_port 9443
grace_period 1
}
localhost:9080 {
@ -86,6 +88,7 @@ func TestReadCookie(t *testing.T) {
{
http_port 9080
https_port 9443
grace_period 1
}
localhost:9080 {
@ -109,6 +112,7 @@ func TestReplIndex(t *testing.T) {
{
http_port 9080
https_port 9443
grace_period 1
}
localhost:9080 {

View file

@ -13,6 +13,7 @@ func TestBrowse(t *testing.T) {
{
http_port 9080
https_port 9443
grace_period 1
}
http://localhost:9080 {
file_server browse

View file

@ -13,6 +13,7 @@ func TestMap(t *testing.T) {
tester.InitServer(`{
http_port 9080
https_port 9443
grace_period 1
}
localhost:9080 {

View file

@ -18,6 +18,7 @@ func TestSRVReverseProxy(t *testing.T) {
{
"apps": {
"http": {
"grace_period": 1,
"servers": {
"srv0": {
"listen": [
@ -50,6 +51,7 @@ func TestSRVWithDial(t *testing.T) {
{
"apps": {
"http": {
"grace_period": 1,
"servers": {
"srv0": {
"listen": [
@ -115,6 +117,7 @@ func TestDialWithPlaceholderUnix(t *testing.T) {
{
"apps": {
"http": {
"grace_period": 1,
"servers": {
"srv0": {
"listen": [
@ -156,6 +159,7 @@ func TestReverseProxyWithPlaceholderDialAddress(t *testing.T) {
{
"apps": {
"http": {
"grace_period": 1,
"servers": {
"srv0": {
"listen": [
@ -239,6 +243,7 @@ func TestReverseProxyWithPlaceholderTCPDialAddress(t *testing.T) {
{
"apps": {
"http": {
"grace_period": 1,
"servers": {
"srv0": {
"listen": [
@ -321,6 +326,7 @@ func TestSRVWithActiveHealthcheck(t *testing.T) {
{
"apps": {
"http": {
"grace_period": 1,
"servers": {
"srv0": {
"listen": [

View file

@ -15,6 +15,7 @@ func TestDefaultSNI(t *testing.T) {
"http": {
"http_port": 9080,
"https_port": 9443,
"grace_period": 1,
"servers": {
"srv0": {
"listen": [
@ -112,6 +113,7 @@ func TestDefaultSNIWithNamedHostAndExplicitIP(t *testing.T) {
"http": {
"http_port": 9080,
"https_port": 9443,
"grace_period": 1,
"servers": {
"srv0": {
"listen": [
@ -212,6 +214,7 @@ func TestDefaultSNIWithPortMappingOnly(t *testing.T) {
"http": {
"http_port": 9080,
"https_port": 9443,
"grace_period": 1,
"servers": {
"srv0": {
"listen": [

View file

@ -27,6 +27,7 @@ func TestH2ToH2CStream(t *testing.T) {
"http": {
"http_port": 9080,
"https_port": 9443,
"grace_period": 1,
"servers": {
"srv0": {
"listen": [
@ -216,6 +217,7 @@ func TestH2ToH1ChunkedResponse(t *testing.T) {
"http": {
"http_port": 9080,
"https_port": 9443,
"grace_period": 1,
"servers": {
"srv0": {
"listen": [

View file

@ -505,15 +505,34 @@ func (app *App) Stop() error {
app.logger.Debug("servers shutting down with eternal grace period")
}
// shut down servers
for _, server := range app.Servers {
// goroutines aren't guaranteed to be scheduled right away,
// so we'll use one WaitGroup to wait for all the goroutines
// to start their server shutdowns, and another to wait for
// them to finish; we'll always block for them to start so
// that when we return the caller can be confident* that the
// old servers are no longer accepting new connections
// (* the scheduler might still pause them right before
// calling Shutdown(), but it's unlikely)
var startedShutdown, finishedShutdown sync.WaitGroup
// these will run in goroutines
stopServer := func(server *Server) {
defer finishedShutdown.Done()
startedShutdown.Done()
if err := server.server.Shutdown(ctx); err != nil {
app.logger.Error("server shutdown",
zap.Error(err),
zap.Strings("addresses", server.Listen))
}
}
stopH3Server := func(server *Server) {
defer finishedShutdown.Done()
startedShutdown.Done()
if server.h3server != nil {
if server.h3server == nil {
return
}
// TODO: CloseGracefully, once implemented upstream (see https://github.com/lucas-clemente/quic-go/issues/2103)
if err := server.h3server.Close(); err != nil {
app.logger.Error("HTTP/3 server shutdown",
@ -521,6 +540,27 @@ func (app *App) Stop() error {
zap.Strings("addresses", server.Listen))
}
}
for _, server := range app.Servers {
startedShutdown.Add(2)
finishedShutdown.Add(2)
go stopServer(server)
go stopH3Server(server)
}
// block until all the goroutines have been run by the scheduler;
// this means that they have likely called Shutdown() by now
startedShutdown.Wait()
// if the process is exiting, we need to block here and wait
// for the grace periods to complete, otherwise the process will
// terminate before the servers are finished shutting down; but
// we don't really need to wait for the grace period to finish
// if the process isn't exiting (but note that frequent config
// reloads with long grace periods for a sustained length of time
// may deplete resources)
if caddy.Exiting() {
finishedShutdown.Wait()
}
return nil