From 95fcf76cc64a4acf95c168e8d8607e3acf405c13 Mon Sep 17 00:00:00 2001 From: Phil Estes Date: Tue, 21 Apr 2015 00:24:24 -0400 Subject: [PATCH] Add SIGUSR1 handler for dumping stack/goroutine traces Add handler for SIGUSR1 based on feedback regarding when to dump goroutine stacks. This will also dump goroutine stack traces on SIGQUIT followed by a hard-exit from the daemon. Docker-DCO-1.1-Signed-off-by: Phil Estes (github: estesp) --- daemon/daemon.go | 3 +++ daemon/debugtrap.go | 21 +++++++++++++++++++++ daemon/debugtrap_unsupported.go | 7 +++++++ pkg/signal/trap.go | 32 +++++++++++++++++++++----------- 4 files changed, 52 insertions(+), 11 deletions(-) create mode 100644 daemon/debugtrap.go create mode 100644 daemon/debugtrap_unsupported.go diff --git a/daemon/daemon.go b/daemon/daemon.go index e64da1cde9..9c8a205411 100644 --- a/daemon/daemon.go +++ b/daemon/daemon.go @@ -747,6 +747,9 @@ func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemo return nil, err } + // set up SIGUSR1 handler to dump Go routine stacks + setupSigusr1Trap() + // set up the tmpDir to use a canonical path tmp, err := tempDir(config.Root) if err != nil { diff --git a/daemon/debugtrap.go b/daemon/debugtrap.go new file mode 100644 index 0000000000..949bf3d6fe --- /dev/null +++ b/daemon/debugtrap.go @@ -0,0 +1,21 @@ +// +build !windows + +package daemon + +import ( + "os" + "os/signal" + "syscall" + + psignal "github.com/docker/docker/pkg/signal" +) + +func setupSigusr1Trap() { + c := make(chan os.Signal, 1) + signal.Notify(c, syscall.SIGUSR1) + go func() { + for range c { + psignal.DumpStacks() + } + }() +} diff --git a/daemon/debugtrap_unsupported.go b/daemon/debugtrap_unsupported.go new file mode 100644 index 0000000000..6bac39cb1c --- /dev/null +++ b/daemon/debugtrap_unsupported.go @@ -0,0 +1,7 @@ +// +build !linux,!darwin,!freebsd + +package signal + +func setupSigusr1Trap() { + return +} diff --git a/pkg/signal/trap.go b/pkg/signal/trap.go index 7469dbcc21..3772db5e18 100644 --- a/pkg/signal/trap.go +++ b/pkg/signal/trap.go @@ -3,6 +3,7 @@ package signal import ( "os" gosignal "os/signal" + "runtime" "sync/atomic" "syscall" @@ -14,41 +15,50 @@ import ( // (and the Docker engine in particular). // // * If SIGINT or SIGTERM are received, `cleanup` is called, then the process is terminated. -// * If SIGINT or SIGTERM are repeated 3 times before cleanup is complete, then cleanup is -// skipped and the process terminated directly. -// * If "DEBUG" is set in the environment, SIGQUIT causes an exit without cleanup. +// * If SIGINT or SIGTERM are received 3 times before cleanup is complete, then cleanup is +// skipped and the process is terminated immediately (allows force quit of stuck daemon) +// * A SIGQUIT always causes an exit without cleanup, with a goroutine dump preceding exit. // func Trap(cleanup func()) { c := make(chan os.Signal, 1) - signals := []os.Signal{os.Interrupt, syscall.SIGTERM} - if os.Getenv("DEBUG") == "" { - signals = append(signals, syscall.SIGQUIT) - } + // we will handle INT, TERM, QUIT here + signals := []os.Signal{os.Interrupt, syscall.SIGTERM, syscall.SIGQUIT} gosignal.Notify(c, signals...) go func() { interruptCount := uint32(0) for sig := range c { go func(sig os.Signal) { - logrus.Infof("Received signal '%v', starting shutdown of docker...", sig) + logrus.Infof("Processing signal '%v'", sig) switch sig { case os.Interrupt, syscall.SIGTERM: - // If the user really wants to interrupt, let him do so. if atomic.LoadUint32(&interruptCount) < 3 { // Initiate the cleanup only once if atomic.AddUint32(&interruptCount, 1) == 1 { - // Call cleanup handler + // Call the provided cleanup handler cleanup() os.Exit(0) } else { return } } else { - logrus.Infof("Force shutdown of docker, interrupting cleanup") + // 3 SIGTERM/INT signals received; force exit without cleanup + logrus.Infof("Forcing docker daemon shutdown without cleanup; 3 interrupts received") } case syscall.SIGQUIT: + DumpStacks() + logrus.Infof("Forcing docker daemon shutdown without cleanup on SIGQUIT") } + //for the SIGINT/TERM, and SIGQUIT non-clean shutdown case, exit with 128 + signal # os.Exit(128 + int(sig.(syscall.Signal))) }(sig) } }() } + +func DumpStacks() { + buf := make([]byte, 16384) + buf = buf[:runtime.Stack(buf, true)] + // Note that if the daemon is started with a less-verbose log-level than "info" (the default), the goroutine + // traces won't show up in the log. + logrus.Infof("=== BEGIN goroutine stack dump ===\n%s\n=== END goroutine stack dump ===", buf) +}