Fix daemon shutdown on error after rework of daemon startup

Currently the daemon will not stop on error because the serve API job is
blocking the channel wait for daemon init.  A better way is to run the
blocking serve API job as a goroutine and make sure that error
notification gets back to the main daemon thread (using the already
existing channel) so that clean shutdown can occur on error.

Docker-DCO-1.1-Signed-off-by: Phil Estes <estesp@linux.vnet.ibm.com> (github: estesp)
This commit is contained in:
Phil Estes 2015-03-11 10:33:06 -04:00
parent 44e9715e46
commit 459e58ffc9
4 changed files with 93 additions and 29 deletions

View File

@ -817,6 +817,12 @@ func NewDaemonFromDirectory(config *Config, eng *engine.Engine) (*Daemon, error)
}
config.DisableNetwork = config.BridgeIface == disableNetworkBridge
// register portallocator release on shutdown
eng.OnShutdown(func() {
if err := portallocator.ReleaseAll(); err != nil {
log.Errorf("portallocator.ReleaseAll(): %s", err)
}
})
// Claim the pidfile first, to avoid any and all unexpected race conditions.
// Some of the init doesn't need a pidfile lock - but let's not try to be smart.
if config.Pidfile != "" {
@ -879,6 +885,12 @@ func NewDaemonFromDirectory(config *Config, eng *engine.Engine) (*Daemon, error)
return nil, fmt.Errorf("error intializing graphdriver: %v", err)
}
log.Debugf("Using graph driver %s", driver)
// register cleanup for graph driver
eng.OnShutdown(func() {
if err := driver.Cleanup(); err != nil {
log.Errorf("Error during graph storage driver.Cleanup(): %v", err)
}
})
// As Docker on btrfs and SELinux are incompatible at present, error on both being enabled
if selinuxEnabled() && config.EnableSelinuxSupport && driver.String() == "btrfs" {
@ -956,6 +968,12 @@ func NewDaemonFromDirectory(config *Config, eng *engine.Engine) (*Daemon, error)
if err != nil {
return nil, err
}
// register graph close on shutdown
eng.OnShutdown(func() {
if err := graph.Close(); err != nil {
log.Errorf("Error during container graph.Close(): %v", err)
}
})
localCopy := path.Join(config.Root, "init", fmt.Sprintf("dockerinit-%s", dockerversion.VERSION))
sysInitPath := utils.DockerInitPath(localCopy)
@ -1003,22 +1021,9 @@ func NewDaemonFromDirectory(config *Config, eng *engine.Engine) (*Daemon, error)
statsCollector: newStatsCollector(1 * time.Second),
}
// Setup shutdown handlers
// FIXME: can these shutdown handlers be registered closer to their source?
eng.OnShutdown(func() {
// FIXME: if these cleanup steps can be called concurrently, register
// them as separate handlers to speed up total shutdown time
if err := daemon.shutdown(); err != nil {
log.Errorf("daemon.shutdown(): %s", err)
}
if err := portallocator.ReleaseAll(); err != nil {
log.Errorf("portallocator.ReleaseAll(): %s", err)
}
if err := daemon.driver.Cleanup(); err != nil {
log.Errorf("daemon.driver.Cleanup(): %v", err)
}
if err := daemon.containerGraph.Close(); err != nil {
log.Errorf("daemon.containerGraph.Close(): %v", err)
log.Errorf("Error during daemon.shutdown(): %v", err)
}
})

View File

@ -7,6 +7,7 @@ import (
"io"
"os"
"path/filepath"
"strings"
log "github.com/Sirupsen/logrus"
"github.com/docker/docker/autogen/dockerversion"
@ -101,13 +102,11 @@ func mainDaemon() {
// load the daemon in the background so we can immediately start
// the http api so that connections don't fail while the daemon
// is booting
daemonWait := make(chan struct{})
daemonInitWait := make(chan error)
go func() {
defer close(daemonWait)
d, err := daemon.NewDaemon(daemonCfg, eng)
if err != nil {
log.Error(err)
daemonInitWait <- err
return
}
@ -119,7 +118,7 @@ func mainDaemon() {
)
if err := d.Install(eng); err != nil {
log.Error(err)
daemonInitWait <- err
return
}
@ -129,11 +128,10 @@ func mainDaemon() {
// after the daemon is done setting up we can tell the api to start
// accepting connections
if err := eng.Job("acceptconnections").Run(); err != nil {
log.Error(err)
daemonInitWait <- err
return
}
log.Debugf("daemon finished")
daemonInitWait <- nil
}()
// Serve api
@ -150,16 +148,46 @@ func mainDaemon() {
job.Setenv("TlsCert", *flCert)
job.Setenv("TlsKey", *flKey)
job.SetenvBool("BufferRequests", true)
err := job.Run()
// The serve API job never exits unless an error occurs
// We need to start it as a goroutine and wait on it so
// daemon doesn't exit
serveAPIWait := make(chan error)
go func() {
if err := job.Run(); err != nil {
log.Errorf("ServeAPI error: %v", err)
serveAPIWait <- err
return
}
serveAPIWait <- nil
}()
// Wait for the daemon startup goroutine to finish
// This makes sure we can actually cleanly shutdown the daemon
log.Infof("waiting for daemon to initialize")
<-daemonWait
eng.Shutdown()
if err != nil {
// log errors here so the log output looks more consistent
log.Fatalf("shutting down daemon due to errors: %v", err)
log.Debug("waiting for daemon to initialize")
errDaemon := <-daemonInitWait
if errDaemon != nil {
eng.Shutdown()
outStr := fmt.Sprintf("Shutting down daemon due to errors: %v", errDaemon)
if strings.Contains(errDaemon.Error(), "engine is shutdown") {
// if the error is "engine is shutdown", we've already reported (or
// will report below in API server errors) the error
outStr = "Shutting down daemon due to reported errors"
}
// we must "fatal" exit here as the API server may be happy to
// continue listening forever if the error had no impact to API
log.Fatal(outStr)
} else {
log.Info("Daemon has completed initialization")
}
// Daemon is fully initialized and handling API traffic
// Wait for serve API job to complete
errAPI := <-serveAPIWait
// If we have an error here it is unique to API (as daemonErr would have
// exited the daemon process above)
if errAPI != nil {
log.Errorf("Shutting down due to ServeAPI error: %v", errAPI)
}
eng.Shutdown()
}

View File

@ -481,6 +481,33 @@ func TestDaemonUpgradeWithVolumes(t *testing.T) {
logDone("daemon - volumes from old(pre 1.3) daemon work")
}
// GH#11320 - verify that the daemon exits on failure properly
// Note that this explicitly tests the conflict of {-b,--bridge} and {--bip} options as the means
// to get a daemon init failure; no other tests for -b/--bip conflict are therefore required
func TestDaemonExitOnFailure(t *testing.T) {
d := NewDaemon(t)
defer d.Stop()
//attempt to start daemon with incorrect flags (we know -b and --bip conflict)
if err := d.Start("--bridge", "nosuchbridge", "--bip", "1.1.1.1"); err != nil {
//verify we got the right error
if !strings.Contains(err.Error(), "Daemon exited and never started") {
t.Fatalf("Expected daemon not to start, got %v", err)
}
// look in the log and make sure we got the message that daemon is shutting down
runCmd := exec.Command("grep", "Shutting down daemon due to", d.LogfileName())
if out, _, err := runCommandWithOutput(runCmd); err != nil {
t.Fatalf("Expected 'shutting down daemon due to error' message; but doesn't exist in log: %q, err: %v", out, err)
}
} else {
//if we didn't get an error and the daemon is running, this is a failure
d.Stop()
t.Fatal("Conflicting options should cause the daemon to error out with a failure")
}
logDone("daemon - verify no start on daemon init errors")
}
func TestDaemonUlimitDefaults(t *testing.T) {
testRequires(t, NativeExecDriver)
d := NewDaemon(t)

View File

@ -267,6 +267,10 @@ func (d *Daemon) Cmd(name string, arg ...string) (string, error) {
return string(b), err
}
func (d *Daemon) LogfileName() string {
return d.logFile.Name()
}
func daemonHost() string {
daemonUrlStr := "unix://" + api.DEFAULTUNIXSOCKET
if daemonHostVar := os.Getenv("DOCKER_HOST"); daemonHostVar != "" {