Add config parameter to change stop timeout during daemon shutdown

This fix tries to add a daemon config parameter `--shutdown-timeout`
that specifies the timeout value to stop containers gracefully
(before SIGKILL). The default value is 15s.

The `--shutdown-timeout` parameter is added to daemon options and
config file. It will also be updated during daemon reload.

Additional test cases have been added to cover the change.

This fix fixes #22471.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
This commit is contained in:
Yong Tang 2016-05-26 14:07:30 -07:00
parent 15ea28f6db
commit d7be6b2deb
6 changed files with 85 additions and 7 deletions

View File

@ -37,6 +37,10 @@ const (
disableNetworkBridge = "none"
)
const (
defaultShutdownTimeout = 15
)
// flatOptions contains configuration keys
// that MUST NOT be parsed as deep structures.
// Use this to differentiate these options
@ -123,6 +127,10 @@ type CommonConfig struct {
// may take place at a time for each push.
MaxConcurrentUploads *int `json:"max-concurrent-uploads,omitempty"`
// ShutdownTimeout is the timeout value (in seconds) the daemon will wait for the container
// to stop when daemon is being shutdown
ShutdownTimeout int `json:"shutdown-timeout,omitempty"`
Debug bool `json:"debug,omitempty"`
Hosts []string `json:"hosts,omitempty"`
LogLevel string `json:"log-level,omitempty"`
@ -176,6 +184,7 @@ func (config *Config) InstallCommonFlags(flags *pflag.FlagSet) {
flags.StringVar(&config.CorsHeaders, "api-cors-header", "", "Set CORS headers in the remote API")
flags.IntVar(&maxConcurrentDownloads, "max-concurrent-downloads", defaultMaxConcurrentDownloads, "Set the max concurrent downloads for each pull")
flags.IntVar(&maxConcurrentUploads, "max-concurrent-uploads", defaultMaxConcurrentUploads, "Set the max concurrent uploads for each push")
flags.IntVar(&config.ShutdownTimeout, "shutdown-timeout", defaultShutdownTimeout, "Set the default shutdown timeout")
flags.StringVar(&config.SwarmDefaultAdvertiseAddr, "swarm-default-advertise-addr", "", "Set default address or interface for swarm advertised address")

View File

@ -732,12 +732,13 @@ func (daemon *Daemon) shutdownContainer(c *container.Container) error {
return nil
}
// ShutdownTimeout returns the shutdown timeout based on the max stopTimeout of the containers
// ShutdownTimeout returns the shutdown timeout based on the max stopTimeout of the containers,
// and is limited by daemon's ShutdownTimeout.
func (daemon *Daemon) ShutdownTimeout() int {
// By default we use container.DefaultStopTimeout + 5s, which is 15s.
// TODO (yongtang): Will need to allow shutdown-timeout once #23036 is in place.
// By default we use daemon's ShutdownTimeout.
shutdownTimeout := daemon.configStore.ShutdownTimeout
graceTimeout := 5
shutdownTimeout := container.DefaultStopTimeout + graceTimeout
if daemon.containers != nil {
for _, c := range daemon.containers.List() {
if shutdownTimeout >= 0 {
@ -769,7 +770,7 @@ func (daemon *Daemon) Shutdown() error {
}
if daemon.containers != nil {
logrus.Debug("starting clean shutdown of all containers...")
logrus.Debugf("start clean shutdown of all containers with a %d seconds timeout...", daemon.configStore.ShutdownTimeout)
daemon.containers.ApplyAll(func(c *container.Container) {
if !c.IsRunning() {
return
@ -995,6 +996,7 @@ func (daemon *Daemon) initDiscovery(config *Config) error {
// - Daemon max concurrent uploads
// - Cluster discovery (reconfigure and restart).
// - Daemon live restore
// - Daemon shutdown timeout (in seconds).
func (daemon *Daemon) Reload(config *Config) (err error) {
daemon.configStore.reloadLock.Lock()
@ -1055,6 +1057,11 @@ func (daemon *Daemon) Reload(config *Config) (err error) {
daemon.uploadManager.SetConcurrency(*daemon.configStore.MaxConcurrentUploads)
}
if config.IsValueSet("shutdown-timeout") {
daemon.configStore.ShutdownTimeout = config.ShutdownTimeout
logrus.Debugf("Reset Shutdown Timeout: %d", daemon.configStore.ShutdownTimeout)
}
// We emit daemon reload event here with updatable configurations
attributes["debug"] = fmt.Sprintf("%t", daemon.configStore.Debug)
attributes["live-restore"] = fmt.Sprintf("%t", daemon.configStore.LiveRestoreEnabled)
@ -1074,6 +1081,7 @@ func (daemon *Daemon) Reload(config *Config) (err error) {
}
attributes["max-concurrent-downloads"] = fmt.Sprintf("%d", *daemon.configStore.MaxConcurrentDownloads)
attributes["max-concurrent-uploads"] = fmt.Sprintf("%d", *daemon.configStore.MaxConcurrentUploads)
attributes["shutdown-timeout"] = fmt.Sprintf("%d", daemon.configStore.ShutdownTimeout)
return nil
}

View File

@ -64,6 +64,7 @@ Options:
--raw-logs Full timestamps without ANSI coloring
--registry-mirror value Preferred Docker registry mirror (default [])
--selinux-enabled Enable selinux support
--shutdown-timeout=15 Set the shutdown timeout value in seconds
-s, --storage-driver string Storage driver to use
--storage-opt value Storage driver options (default [])
--swarm-default-advertise-addr string Set default address or interface for swarm advertised address
@ -1118,6 +1119,7 @@ This is a full example of the allowed configuration options on Linux:
"cluster-advertise": "",
"max-concurrent-downloads": 3,
"max-concurrent-uploads": 5,
"shutdown-timeout": 15,
"debug": true,
"hosts": [],
"log-level": "",
@ -1194,6 +1196,7 @@ This is a full example of the allowed configuration options on Windows:
"graph": "",
"cluster-store": "",
"cluster-advertise": "",
"shutdown-timeout": 15,
"debug": true,
"hosts": [],
"log-level": "",

View File

@ -2920,3 +2920,57 @@ func (s *DockerDaemonSuite) TestDaemonWithUserlandProxyPath(c *check.C) {
c.Assert(out, checker.Contains, "driver failed programming external connectivity on endpoint")
c.Assert(out, checker.Contains, "/does/not/exist: no such file or directory")
}
// Test case for #22471
func (s *DockerDaemonSuite) TestDaemonShutdownTimeout(c *check.C) {
testRequires(c, SameHostDaemon)
c.Assert(s.d.StartWithBusybox("--shutdown-timeout=3"), check.IsNil)
_, err := s.d.Cmd("run", "-d", "busybox", "top")
c.Assert(err, check.IsNil)
syscall.Kill(s.d.cmd.Process.Pid, syscall.SIGINT)
select {
case <-s.d.wait:
case <-time.After(5 * time.Second):
}
expectedMessage := `level=debug msg="start clean shutdown of all containers with a 3 seconds timeout..."`
content, _ := ioutil.ReadFile(s.d.logFile.Name())
c.Assert(string(content), checker.Contains, expectedMessage)
}
// Test case for #22471
func (s *DockerDaemonSuite) TestDaemonShutdownTimeoutWithConfigFile(c *check.C) {
testRequires(c, SameHostDaemon)
// daemon config file
configFilePath := "test.json"
configFile, err := os.Create(configFilePath)
c.Assert(err, checker.IsNil)
defer os.Remove(configFilePath)
daemonConfig := `{ "shutdown-timeout" : 8 }`
fmt.Fprintf(configFile, "%s", daemonConfig)
configFile.Close()
c.Assert(s.d.Start(fmt.Sprintf("--config-file=%s", configFilePath)), check.IsNil)
configFile, err = os.Create(configFilePath)
c.Assert(err, checker.IsNil)
daemonConfig = `{ "shutdown-timeout" : 5 }`
fmt.Fprintf(configFile, "%s", daemonConfig)
configFile.Close()
syscall.Kill(s.d.cmd.Process.Pid, syscall.SIGHUP)
select {
case <-s.d.wait:
case <-time.After(3 * time.Second):
}
expectedMessage := `level=debug msg="Reset Shutdown Timeout: 5"`
content, _ := ioutil.ReadFile(s.d.logFile.Name())
c.Assert(string(content), checker.Contains, expectedMessage)
}

View File

@ -418,7 +418,7 @@ func (s *DockerDaemonSuite) TestDaemonEvents(c *check.C) {
configFile, err = os.Create(configFilePath)
c.Assert(err, checker.IsNil)
daemonConfig = `{"max-concurrent-downloads":1,"labels":["bar=foo"]}`
daemonConfig = `{"max-concurrent-downloads":1,"labels":["bar=foo"], "shutdown-timeout": 10}`
fmt.Fprintf(configFile, "%s", daemonConfig)
configFile.Close()
@ -429,7 +429,7 @@ func (s *DockerDaemonSuite) TestDaemonEvents(c *check.C) {
out, err = s.d.Cmd("events", "--since=0", "--until", daemonUnixTime(c))
c.Assert(err, checker.IsNil)
c.Assert(out, checker.Contains, fmt.Sprintf("daemon reload %s (cluster-advertise=, cluster-store=, cluster-store-opts={}, debug=true, default-runtime=runc, labels=[\"bar=foo\"], live-restore=false, max-concurrent-downloads=1, max-concurrent-uploads=5, name=%s, runtimes=runc:{docker-runc []})", daemonID, daemonName))
c.Assert(out, checker.Contains, fmt.Sprintf("daemon reload %s (cluster-advertise=, cluster-store=, cluster-store-opts={}, debug=true, default-runtime=runc, labels=[\"bar=foo\"], live-restore=false, max-concurrent-downloads=1, max-concurrent-uploads=5, name=%s, runtimes=runc:{docker-runc []}, shutdown-timeout=10)", daemonID, daemonName))
}
func (s *DockerDaemonSuite) TestDaemonEventsWithFilters(c *check.C) {

View File

@ -56,6 +56,7 @@ dockerd - Enable daemon mode
[**--registry-mirror**[=*[]*]]
[**-s**|**--storage-driver**[=*STORAGE-DRIVER*]]
[**--selinux-enabled**]
[**--shutdown-timeout**[=*15*]]
[**--storage-opt**[=*[]*]]
[**--swarm-default-advertise-addr**[=*IP|INTERFACE*]]
[**--tls**]
@ -246,6 +247,9 @@ output otherwise.
**--selinux-enabled**=*true*|*false*
Enable selinux support. Default is false.
**--shutdown-timeout**=*15*
Set the shutdown timeout value in seconds. Default is `15`.
**--storage-opt**=[]
Set storage driver options. See STORAGE DRIVER OPTIONS.