Clean up provisioned modules on error; refactor Run(); add Validate()

Modules that return an error during provisioning should still be cleaned
up so that they don't leak any resources they may have allocated before
the error occurred. Cleanup should be able to run even if Provision does
not complete fully.
This commit is contained in:
Matthew Holt 2019-09-30 09:16:01 -06:00
parent 1e66226217
commit 8eb2c37251
No known key found for this signature in database
GPG key ID: 2A349DD577D586A5
4 changed files with 138 additions and 101 deletions

View file

@ -54,7 +54,33 @@ func Run(newCfg *Config) error {
currentCfgMu.Lock() currentCfgMu.Lock()
defer currentCfgMu.Unlock() defer currentCfgMu.Unlock()
if newCfg != nil { // run the new config and start all its apps
err := run(newCfg, true)
if err != nil {
return err
}
// swap old config with the new one
oldCfg := currentCfg
currentCfg = newCfg
// Stop, Cleanup each old app
unsyncedStop(oldCfg)
return nil
}
// run runs newCfg and starts all its apps if
// start is true. If any errors happen, cleanup
// is performed if any modules were provisioned;
// apps that were started already will be stopped,
// so this function should not leak resources if
// an error is returned.
func run(newCfg *Config, start bool) error {
if newCfg == nil {
return nil
}
// because we will need to roll back any state // because we will need to roll back any state
// modifications if this function errors, we // modifications if this function errors, we
// keep a single error value and scope all // keep a single error value and scope all
@ -121,12 +147,18 @@ func Run(newCfg *Config) error {
return err return err
} }
if !start {
return nil
}
// Start // Start
err = func() error { return func() error {
var started []string var started []string
for name, a := range newCfg.apps { for name, a := range newCfg.apps {
err := a.Start() err := a.Start()
if err != nil { if err != nil {
// an app failed to start, so we need to stop
// all other apps that were already started
for _, otherAppName := range started { for _, otherAppName := range started {
err2 := newCfg.apps[otherAppName].Stop() err2 := newCfg.apps[otherAppName].Stop()
if err2 != nil { if err2 != nil {
@ -140,19 +172,6 @@ func Run(newCfg *Config) error {
} }
return nil return nil
}() }()
if err != nil {
return err
}
}
// swap old config with the new one
oldCfg := currentCfg
currentCfg = newCfg
// Stop, Cleanup each old app
unsyncedStop(oldCfg)
return nil
} }
// Stop stops running the current configuration. // Stop stops running the current configuration.
@ -168,26 +187,34 @@ func Stop() error {
return nil return nil
} }
// unsyncedStop stops oldCfg from running, but if // unsyncedStop stops cfg from running, but if
// applicable, you need to acquire locks yourself. // applicable, you need to acquire locks yourself.
// It is a no-op if oldCfg is nil. If any app // It is a no-op if cfg is nil. If any app
// returns an error when stopping, it is logged // returns an error when stopping, it is logged
// and the function continues with the next app. // and the function continues with the next app.
func unsyncedStop(oldCfg *Config) { // This function assumes all apps in cfg were
if oldCfg == nil { // successfully started.
func unsyncedStop(cfg *Config) {
if cfg == nil {
return return
} }
// stop each app // stop each app
for name, a := range oldCfg.apps { for name, a := range cfg.apps {
err := a.Stop() err := a.Stop()
if err != nil { if err != nil {
log.Printf("[ERROR] stop %s: %v", name, err) log.Printf("[ERROR] stop %s: %v", name, err)
} }
} }
// clean up all old modules // clean up all modules
oldCfg.cancelFunc() cfg.cancelFunc()
}
// Validate loads, provisions, and validates
// cfg, but does not start running it.
func Validate(cfg *Config) error {
return run(cfg, false)
} }
// Duration is a JSON-string-unmarshable duration type. // Duration is a JSON-string-unmarshable duration type.

View file

@ -131,6 +131,14 @@ func (ctx Context) LoadModule(name string, rawMsg json.RawMessage) (interface{},
if prov, ok := val.(Provisioner); ok { if prov, ok := val.(Provisioner); ok {
err := prov.Provision(ctx) err := prov.Provision(ctx)
if err != nil { if err != nil {
// incomplete provisioning could have left state
// dangling, so make sure it gets cleaned up
if cleanerUpper, ok := val.(CleanerUpper); ok {
err2 := cleanerUpper.Cleanup()
if err2 != nil {
err = fmt.Errorf("%v; additionally, cleanup: %v", err, err2)
}
}
return nil, fmt.Errorf("provision %s: %v", mod.Name, err) return nil, fmt.Errorf("provision %s: %v", mod.Name, err)
} }
} }
@ -138,6 +146,7 @@ func (ctx Context) LoadModule(name string, rawMsg json.RawMessage) (interface{},
if validator, ok := val.(Validator); ok { if validator, ok := val.(Validator); ok {
err := validator.Validate() err := validator.Validate()
if err != nil { if err != nil {
// since the module was already provisioned, make sure we clean up
if cleanerUpper, ok := val.(CleanerUpper); ok { if cleanerUpper, ok := val.(CleanerUpper); ok {
err2 := cleanerUpper.Cleanup() err2 := cleanerUpper.Cleanup()
if err2 != nil { if err2 != nil {

View file

@ -253,9 +253,10 @@ type Validator interface {
// CleanerUpper is implemented by modules which may have side-effects // CleanerUpper is implemented by modules which may have side-effects
// such as opened files, spawned goroutines, or allocated some sort // such as opened files, spawned goroutines, or allocated some sort
// of non-local state when they were provisioned. This method should // of non-stack state when they were provisioned. This method should
// deallocate/cleanup those resources to prevent memory leaks. Cleanup // deallocate/cleanup those resources to prevent memory leaks. Cleanup
// should be fast and efficient. // should be fast and efficient. Cleanup should work even if Provision
// returns an error, to allow cleaning up from partial provisionings.
type CleanerUpper interface { type CleanerUpper interface {
Cleanup() error Cleanup() error
} }

View file

@ -65,9 +65,9 @@ func gracefulStop(sigName string) {
os.Exit(exitCode) os.Exit(exitCode)
} }
// Exit codes. Generally, you will want to avoid // Exit codes. Generally, you should NOT
// automatically restarting the process if the // automatically restart the process if the
// exit code is 1. // exit code is ExitCodeFailedStartup (1).
const ( const (
ExitCodeSuccess = iota ExitCodeSuccess = iota
ExitCodeFailedStartup ExitCodeFailedStartup