Merge pull request #807 from mholt/graceful-inproc-restart

Restart gracefully with in-process restart
This commit is contained in:
Matt Holt 2016-05-07 08:58:18 -06:00
commit 1f29c52151
5 changed files with 27 additions and 207 deletions

View file

@ -4,8 +4,7 @@
// To use this package, follow a few simple steps:
//
// 1. Set the AppName and AppVersion variables.
// 2. Call LoadCaddyfile() to get the Caddyfile (it
// might have been piped in as part of a restart).
// 2. Call LoadCaddyfile() to get the Caddyfile.
// You should pass in your own Caddyfile loader.
// 3. Call caddy.Start() to start Caddy, caddy.Stop()
// to stop it, or caddy.Restart() to restart it.
@ -16,7 +15,6 @@ package caddy
import (
"bytes"
"encoding/gob"
"errors"
"fmt"
"io/ioutil"
@ -26,7 +24,6 @@ import (
"path"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/mholt/caddy/caddy/https"
@ -52,11 +49,6 @@ var (
// GracefulTimeout is the maximum duration of a graceful shutdown.
GracefulTimeout time.Duration
// RestartMode is the mode used for restart,
// "inproc" will restart in process,
// otherwise default behavior is used (inproc on Windows, fork on Linux).
RestartMode = ""
)
var (
@ -66,10 +58,6 @@ var (
// caddyfileMu protects caddyfile during changes
caddyfileMu sync.Mutex
// errIncompleteRestart occurs if this process is a fork
// of the parent but no Caddyfile was piped in
errIncompleteRestart = errors.New("incomplete restart")
// servers is a list of all the currently-listening servers
servers []*server.Server
@ -79,11 +67,8 @@ var (
// wg is used to wait for all servers to shut down
wg sync.WaitGroup
// loadedGob is used if this is a child process as part of
// a graceful restart; it is used to map listeners to their
// index in the list of inherited file descriptors. This
// variable is not safe for concurrent access.
loadedGob caddyfileGob
// restartFds keeps the servers' sockets for graceful in-process restart
restartFds = make(map[string]*os.File)
// startedBefore should be set to true if caddy has been started
// at least once (does not indicate whether currently running).
@ -104,31 +89,7 @@ const (
// one.
//
// This function blocks until all the servers are listening.
//
// Note (POSIX): If Start is called in the child process of a
// restart more than once within the duration of the graceful
// cutoff (i.e. the child process called Start a first time,
// then called Stop, then Start again within the first 5 seconds
// or however long GracefulTimeout is) and the Caddyfiles have
// at least one listener address in common, the second Start
// may fail with "address already in use" as there's no
// guarantee that the parent process has relinquished the
// address before the grace period ends.
func Start(cdyfile Input) (err error) {
// If we return with no errors, we must do two things: tell the
// parent that we succeeded and write to the pidfile.
defer func() {
if err == nil {
signalSuccessToParent() // TODO: Is doing this more than once per process a bad idea? Start could get called more than once in other apps.
if PidFile != "" {
err := writePidFile()
if err != nil {
log.Printf("[ERROR] Could not write pidfile: %v", err)
}
}
}
}()
// Input must never be nil; try to load something
if cdyfile == nil {
cdyfile, err = LoadCaddyfile(nil)
@ -158,10 +119,11 @@ func Start(cdyfile Input) (err error) {
if err != nil {
return err
}
startedBefore = true
showInitializationOutput(groupings)
startedBefore = true
return nil
}
@ -193,8 +155,8 @@ func showInitializationOutput(groupings bindingGroup) {
// startServers starts all the servers in groupings,
// taking into account whether or not this process is
// a child from a graceful restart or not. It blocks
// until the servers are listening.
// from a graceful restart or not. It blocks until
// the servers are listening.
func startServers(groupings bindingGroup) error {
var startupWg sync.WaitGroup
errChan := make(chan error, len(groupings)) // must be buffered to allow Serve functions below to return if stopped later
@ -213,12 +175,9 @@ func startServers(groupings bindingGroup) error {
}
var ln server.ListenerFile
if IsRestart() {
// Look up this server's listener in the map of inherited file descriptors;
// if we don't have one, we must make a new one (later).
if fdIndex, ok := loadedGob.ListenerFds[s.Addr]; ok {
file := os.NewFile(fdIndex, "")
if len(restartFds) > 0 {
// Reuse the listeners for in-process restart
if file, ok := restartFds[s.Addr]; ok {
fln, err := net.FileListener(file)
if err != nil {
return err
@ -230,7 +189,7 @@ func startServers(groupings bindingGroup) error {
}
file.Close()
delete(loadedGob.ListenerFds, s.Addr)
delete(restartFds, s.Addr)
}
}
@ -240,7 +199,7 @@ func startServers(groupings bindingGroup) error {
// run startup functions that should only execute when
// the original parent process is starting.
if !IsRestart() && !startedBefore {
if !startedBefore {
err := s.RunFirstStartupFuncs()
if err != nil {
errChan <- err
@ -268,10 +227,10 @@ func startServers(groupings bindingGroup) error {
}
// Close the remaining (unused) file descriptors to free up resources
if IsRestart() {
for key, fdIndex := range loadedGob.ListenerFds {
os.NewFile(fdIndex, "").Close()
delete(loadedGob.ListenerFds, key)
if len(restartFds) > 0 {
for key, file := range restartFds {
file.Close()
delete(restartFds, key)
}
}
@ -314,25 +273,11 @@ func Wait() {
wg.Wait()
}
// LoadCaddyfile loads a Caddyfile, prioritizing a Caddyfile
// piped from stdin as part of a restart (only happens on first call
// to LoadCaddyfile). If it is not a restart, this function tries
// calling the user's loader function, and if that returns nil, then
// this function resorts to the default configuration. Thus, if there
// are no other errors, this function always returns at least the
// default Caddyfile.
// LoadCaddyfile loads a Caddyfile by calling the user's loader function,
// and if that returns nil, then this function resorts to the default
// configuration. Thus, if there are no other errors, this function
// always returns at least the default Caddyfile.
func LoadCaddyfile(loader func() (Input, error)) (cdyfile Input, err error) {
// If we are a fork, finishing the restart is highest priority;
// piped input is required in this case.
if IsRestart() {
err := gob.NewDecoder(os.Stdin).Decode(&loadedGob)
if err != nil {
return nil, err
}
cdyfile = loadedGob.Caddyfile
atomic.StoreInt32(https.OnDemandIssuedCount, loadedGob.OnDemandTLSCertsIssued)
}
// Try user's loader
if cdyfile == nil && loader != nil {
cdyfile, err = loader()

View file

@ -4,13 +4,11 @@ import (
"bytes"
"fmt"
"io/ioutil"
"log"
"os"
"os/exec"
"runtime"
"strconv"
"strings"
"sync"
)
// isLocalhost returns true if host looks explicitly like a localhost address.
@ -35,46 +33,10 @@ func checkFdlimit() {
}
}
// signalSuccessToParent tells the parent our status using pipe at index 3.
// If this process is not a restart, this function does nothing.
// Calling this function once this process has successfully initialized
// is vital so that the parent process can unblock and kill itself.
// This function is idempotent; it executes at most once per process.
func signalSuccessToParent() {
signalParentOnce.Do(func() {
if IsRestart() {
ppipe := os.NewFile(3, "") // parent is reading from pipe at index 3
_, err := ppipe.Write([]byte("success")) // we must send some bytes to the parent
if err != nil {
log.Printf("[ERROR] Communicating successful init to parent: %v", err)
}
ppipe.Close()
}
})
}
// signalParentOnce is used to make sure that the parent is only
// signaled once; doing so more than once breaks whatever socket is
// at fd 4 (the reason for this is still unclear - to reproduce,
// call Stop() and Start() in succession at least once after a
// restart, then try loading first host of Caddyfile in the browser).
// Do not use this directly - call signalSuccessToParent instead.
var signalParentOnce sync.Once
// caddyfileGob maps bind address to index of the file descriptor
// in the Files array passed to the child process. It also contains
// the caddyfile contents and other state needed by the new process.
// Used only during graceful restarts where a new process is spawned.
type caddyfileGob struct {
ListenerFds map[string]uintptr
Caddyfile Input
OnDemandTLSCertsIssued int32
}
// IsRestart returns whether this process is, according
// to env variables, a fork as part of a graceful restart.
func IsRestart() bool {
return os.Getenv("CADDY_RESTART") == "true"
return startedBefore
}
// writePidFile writes the process ID to the file at PidFile, if specified.

View file

@ -4,23 +4,14 @@ package caddy
import (
"bytes"
"encoding/gob"
"errors"
"io/ioutil"
"log"
"net"
"os"
"os/exec"
"path/filepath"
"sync/atomic"
"github.com/mholt/caddy/caddy/https"
)
func init() {
gob.Register(CaddyfileInput{})
}
// Restart restarts the entire application; gracefully with zero
// downtime if on a POSIX-compatible system, or forcefully if on
// Windows but with imperceptibly-short downtime.
@ -52,87 +43,14 @@ func Restart(newCaddyfile Input) error {
return errors.New("TLS preload: " + err.Error())
}
if RestartMode == "inproc" {
return restartInProc(newCaddyfile)
}
if len(os.Args) == 0 { // this should never happen, but...
os.Args = []string{""}
}
// Tell the child that it's a restart
os.Setenv("CADDY_RESTART", "true")
// Prepare our payload to the child process
cdyfileGob := caddyfileGob{
ListenerFds: make(map[string]uintptr),
Caddyfile: newCaddyfile,
OnDemandTLSCertsIssued: atomic.LoadInt32(https.OnDemandIssuedCount),
}
// Prepare a pipe to the fork's stdin so it can get the Caddyfile
rpipe, wpipe, err := os.Pipe()
if err != nil {
return err
}
// Prepare a pipe that the child process will use to communicate
// its success with us by sending > 0 bytes
sigrpipe, sigwpipe, err := os.Pipe()
if err != nil {
return err
}
// Pass along relevant file descriptors to child process; ordering
// is very important since we rely on these being in certain positions.
extraFiles := []*os.File{sigwpipe} // fd 3
// Add file descriptors of all the sockets
// Add file descriptors of all the sockets for new instance
serversMu.Lock()
for i, s := range servers {
extraFiles = append(extraFiles, s.ListenerFd())
cdyfileGob.ListenerFds[s.Addr] = uintptr(4 + i) // 4 fds come before any of the listeners
for _, s := range servers {
restartFds[s.Addr] = s.ListenerFd()
}
serversMu.Unlock()
// Set up the command
cmd := exec.Command(os.Args[0], os.Args[1:]...)
cmd.Stdin = rpipe // fd 0
cmd.Stdout = os.Stdout // fd 1
cmd.Stderr = os.Stderr // fd 2
cmd.ExtraFiles = extraFiles
// Spawn the child process
err = cmd.Start()
if err != nil {
return err
}
// Immediately close our dup'ed fds and the write end of our signal pipe
for _, f := range extraFiles {
f.Close()
}
// Feed Caddyfile to the child
err = gob.NewEncoder(wpipe).Encode(cdyfileGob)
if err != nil {
return err
}
wpipe.Close()
// Determine whether child startup succeeded
answer, readErr := ioutil.ReadAll(sigrpipe)
if answer == nil || len(answer) == 0 {
cmdErr := cmd.Wait() // get exit status
log.Printf("[ERROR] Restart: child failed to initialize (%v) - changes not applied", cmdErr)
if readErr != nil {
log.Printf("[ERROR] Restart: additionally, error communicating with child process: %v", readErr)
}
return errIncompleteRestart
}
// Looks like child is successful; we can exit gracefully.
return Stop()
return restartInProc(newCaddyfile)
}
func getCertsForNewCaddyfile(newCaddyfile Input) error {

View file

@ -5,6 +5,7 @@ import "log"
// restartInProc restarts Caddy forcefully in process using newCaddyfile.
func restartInProc(newCaddyfile Input) error {
wg.Add(1) // barrier so Wait() doesn't unblock
defer wg.Done()
err := Stop()
if err != nil {
@ -20,13 +21,8 @@ func restartInProc(newCaddyfile Input) error {
// revert to old Caddyfile
if oldErr := Start(oldCaddyfile); oldErr != nil {
log.Printf("[ERROR] Restart: in-process restart failed and cannot revert to old Caddyfile: %v", oldErr)
} else {
wg.Done() // take down our barrier
}
return err
}
wg.Done() // take down our barrier
return nil
return err
}

View file

@ -33,7 +33,6 @@ func init() {
flag.StringVar(&caddy.PidFile, "pidfile", "", "Path to write pid file")
flag.StringVar(&caddy.Port, "port", caddy.DefaultPort, "Default port")
flag.BoolVar(&caddy.Quiet, "quiet", false, "Quiet mode (no initialization output)")
flag.StringVar(&caddy.RestartMode, "restart", "", "Restart mode (inproc for in process restart)")
flag.StringVar(&revoke, "revoke", "", "Hostname for which to revoke the certificate")
flag.StringVar(&caddy.Root, "root", caddy.DefaultRoot, "Root path to default site")
flag.BoolVar(&version, "version", false, "Show version")