From d705dab1b1bd0a946d647374325d61fac57736db Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Thu, 2 Jun 2016 11:10:55 -0700 Subject: [PATCH] Add --live-restore flag This flags enables full support of daemonless containers in docker. It ensures that docker does not stop containers on shutdown or restore and properly reconnects to the container when restarted. This is not the default because of backwards compat but should be the desired outcome for people running containers in prod. Signed-off-by: Michael Crosby --- cmd/dockerd/daemon_unix.go | 3 ++ daemon/config.go | 1 + daemon/config_unix.go | 1 + daemon/daemon.go | 15 +++++++ docs/admin/configuring.md | 13 ++++++ .../docker_cli_daemon_experimental_test.go | 12 +++--- libcontainerd/client_linux.go | 43 +++++++++++++++++-- libcontainerd/container_linux.go | 16 +++++++ libcontainerd/remote.go | 2 + libcontainerd/remote_linux.go | 29 +++++++++++++ libcontainerd/remote_solaris.go | 9 ++++ libcontainerd/remote_windows.go | 9 ++++ man/dockerd.8.md | 4 ++ 13 files changed, 148 insertions(+), 9 deletions(-) diff --git a/cmd/dockerd/daemon_unix.go b/cmd/dockerd/daemon_unix.go index cb1e6fca02..50588b9e24 100644 --- a/cmd/dockerd/daemon_unix.go +++ b/cmd/dockerd/daemon_unix.go @@ -71,6 +71,9 @@ func (cli *DaemonCli) getPlatformRemoteOptions() []libcontainerd.RemoteOption { args := []string{"--systemd-cgroup=true"} opts = append(opts, libcontainerd.WithRuntimeArgs(args)) } + if cli.Config.LiveRestore { + opts = append(opts, libcontainerd.WithLiveRestore(true)) + } return opts } diff --git a/daemon/config.go b/daemon/config.go index f16026d311..2331045fdb 100644 --- a/daemon/config.go +++ b/daemon/config.go @@ -90,6 +90,7 @@ type CommonConfig struct { TrustKeyPath string `json:"-"` CorsHeaders string `json:"api-cors-header,omitempty"` EnableCors bool `json:"api-enable-cors,omitempty"` + LiveRestore bool `json:"live-restore,omitempty"` // ClusterStore is the storage backend used for the cluster information. It is used by both // multihost networking (to store networks and endpoints information) and by the node discovery diff --git a/daemon/config_unix.go b/daemon/config_unix.go index f7a0ea599b..7e8d2bb6f4 100644 --- a/daemon/config_unix.go +++ b/daemon/config_unix.go @@ -82,6 +82,7 @@ func (config *Config) InstallFlags(cmd *flag.FlagSet, usageFn func(string) strin cmd.StringVar(&config.CgroupParent, []string{"-cgroup-parent"}, "", usageFn("Set parent cgroup for all containers")) cmd.StringVar(&config.RemappedRoot, []string{"-userns-remap"}, "", usageFn("User/Group setting for user namespaces")) cmd.StringVar(&config.ContainerdAddr, []string{"-containerd"}, "", usageFn("Path to containerd socket")) + cmd.BoolVar(&config.LiveRestore, []string{"-live-restore"}, false, usageFn("Enable live restore of docker when containers are still running")) config.attachExperimentalFlags(cmd, usageFn) } diff --git a/daemon/daemon.go b/daemon/daemon.go index 7d095930e1..ef4b45c50b 100644 --- a/daemon/daemon.go +++ b/daemon/daemon.go @@ -92,6 +92,7 @@ type Daemon struct { nameIndex *registrar.Registrar linkIndex *linkIndex containerd libcontainerd.Client + containerdRemote libcontainerd.Remote defaultIsolation containertypes.Isolation // Default isolation mode on Windows } @@ -542,6 +543,7 @@ func NewDaemon(config *Config, registryService registry.Service, containerdRemot d.nameIndex = registrar.NewRegistrar() d.linkIndex = newLinkIndex() + d.containerdRemote = containerdRemote go d.execCommandGC() @@ -599,6 +601,11 @@ func (daemon *Daemon) shutdownContainer(c *container.Container) error { // Shutdown stops the daemon. func (daemon *Daemon) Shutdown() error { daemon.shutdown = true + // Keep mounts and networking running on daemon shutdown if + // we are to keep containers running and restore them. + if daemon.configStore.LiveRestore { + return nil + } if daemon.containers != nil { logrus.Debug("starting clean shutdown of all containers...") daemon.containers.ApplyAll(func(c *container.Container) { @@ -782,6 +789,7 @@ func (daemon *Daemon) initDiscovery(config *Config) error { // - Daemon max concurrent downloads // - Daemon max concurrent uploads // - Cluster discovery (reconfigure and restart). +// - Daemon live restore func (daemon *Daemon) Reload(config *Config) error { daemon.configStore.reloadLock.Lock() defer daemon.configStore.reloadLock.Unlock() @@ -796,6 +804,13 @@ func (daemon *Daemon) Reload(config *Config) error { if config.IsValueSet("debug") { daemon.configStore.Debug = config.Debug } + if config.IsValueSet("live-restore") { + daemon.configStore.LiveRestore = config.LiveRestore + if err := daemon.containerdRemote.UpdateOptions(libcontainerd.WithLiveRestore(config.LiveRestore)); err != nil { + return err + } + + } // If no value is set for max-concurrent-downloads we assume it is the default value // We always "reset" as the cost is lightweight and easy to maintain. diff --git a/docs/admin/configuring.md b/docs/admin/configuring.md index 3e10ab88b9..247525cf5b 100644 --- a/docs/admin/configuring.md +++ b/docs/admin/configuring.md @@ -278,3 +278,16 @@ be viewed using `journalctl -u docker` May 06 00:22:06 localhost.localdomain docker[2495]: time="2015-05-06T00:22:06Z" level="info" msg="-job acceptconnections() = OK (0)" _Note: Using and configuring journal is an advanced topic and is beyond the scope of this article._ + + +### Daemonless Containers + +Starting with Docker 1.12 containers can run without Docker or containerd running. This allows the +Docker daemon to exit, be upgraded, or recover from a crash without affecting running containers +on the system. To enable this functionality you need to add the `--live-restore` flag when +launching `dockerd`. This will ensure that Docker does not kill containers on graceful shutdown or +on restart leaving the containers running. + +While the Docker daemon is down logging will still be captured, however, it will be capped at the kernel's pipe buffer size before the buffer fills up, blocking the process. +Docker will need to be restarted to flush these buffers. +You can modify the kernel's buffer size by changing `/proc/sys/fs/pipe-max-size`. diff --git a/integration-cli/docker_cli_daemon_experimental_test.go b/integration-cli/docker_cli_daemon_experimental_test.go index b191f07222..e454c6813c 100644 --- a/integration-cli/docker_cli_daemon_experimental_test.go +++ b/integration-cli/docker_cli_daemon_experimental_test.go @@ -63,7 +63,7 @@ func (s *DockerDaemonSuite) TestDaemonRestartWithKilledRunningContainer(t *check // them now, should remove the mounts. func (s *DockerDaemonSuite) TestCleanupMountsAfterDaemonCrash(c *check.C) { testRequires(c, DaemonIsLinux) - c.Assert(s.d.StartWithBusybox(), check.IsNil) + c.Assert(s.d.StartWithBusybox("--live-restore"), check.IsNil) out, err := s.d.Cmd("run", "-d", "busybox", "top") c.Assert(err, check.IsNil, check.Commentf("Output: %s", out)) @@ -78,7 +78,7 @@ func (s *DockerDaemonSuite) TestCleanupMountsAfterDaemonCrash(c *check.C) { c.Assert(strings.Contains(string(mountOut), id), check.Equals, true, comment) // restart daemon. - if err := s.d.Restart(); err != nil { + if err := s.d.Restart("--live-restore"); err != nil { c.Fatal(err) } @@ -103,7 +103,7 @@ func (s *DockerDaemonSuite) TestCleanupMountsAfterDaemonCrash(c *check.C) { // TestDaemonRestartWithPausedRunningContainer requires live restore of running containers func (s *DockerDaemonSuite) TestDaemonRestartWithPausedRunningContainer(t *check.C) { - if err := s.d.StartWithBusybox(); err != nil { + if err := s.d.StartWithBusybox("--live-restore"); err != nil { t.Fatal(err) } @@ -130,7 +130,7 @@ func (s *DockerDaemonSuite) TestDaemonRestartWithPausedRunningContainer(t *check time.Sleep(3 * time.Second) // restart the daemon - if err := s.d.Start(); err != nil { + if err := s.d.Start("--live-restore"); err != nil { t.Fatal(err) } @@ -148,7 +148,7 @@ func (s *DockerDaemonSuite) TestDaemonRestartWithPausedRunningContainer(t *check func (s *DockerDaemonSuite) TestDaemonRestartWithUnpausedRunningContainer(t *check.C) { // TODO(mlaventure): Not sure what would the exit code be on windows testRequires(t, DaemonIsLinux) - if err := s.d.StartWithBusybox(); err != nil { + if err := s.d.StartWithBusybox("--live-restore"); err != nil { t.Fatal(err) } @@ -180,7 +180,7 @@ func (s *DockerDaemonSuite) TestDaemonRestartWithUnpausedRunningContainer(t *che time.Sleep(3 * time.Second) // restart the daemon - if err := s.d.Start(); err != nil { + if err := s.d.Start("--live-restore"); err != nil { t.Fatal(err) } diff --git a/libcontainerd/client_linux.go b/libcontainerd/client_linux.go index 165597b9a6..80e2ed4e30 100644 --- a/libcontainerd/client_linux.go +++ b/libcontainerd/client_linux.go @@ -8,6 +8,7 @@ import ( "strings" "sync" "syscall" + "time" "github.com/Sirupsen/logrus" containerd "github.com/docker/containerd/api/grpc/types" @@ -24,6 +25,7 @@ type client struct { remote *remote q queue exitNotifiers map[string]*exitNotifier + liveRestore bool } func (clnt *client) AddProcess(containerID, processFriendlyName string, specp Process) error { @@ -445,13 +447,48 @@ func (clnt *client) restore(cont *containerd.Container, options ...CreateOption) } func (clnt *client) Restore(containerID string, options ...CreateOption) error { + if clnt.liveRestore { + cont, err := clnt.getContainerdContainer(containerID) + if err == nil && cont.Status != "stopped" { + if err := clnt.restore(cont, options...); err != nil { + logrus.Errorf("error restoring %s: %v", containerID, err) + } + return nil + } + return clnt.setExited(containerID) + } + cont, err := clnt.getContainerdContainer(containerID) if err == nil && cont.Status != "stopped" { - if err := clnt.restore(cont, options...); err != nil { - logrus.Errorf("error restoring %s: %v", containerID, err) + w := clnt.getOrCreateExitNotifier(containerID) + clnt.lock(cont.Id) + container := clnt.newContainer(cont.BundlePath) + container.systemPid = systemPid(cont) + clnt.appendContainer(container) + clnt.unlock(cont.Id) + + container.discardFifos() + + if err := clnt.Signal(containerID, int(syscall.SIGTERM)); err != nil { + logrus.Errorf("error sending sigterm to %v: %v", containerID, err) + } + select { + case <-time.After(10 * time.Second): + if err := clnt.Signal(containerID, int(syscall.SIGKILL)); err != nil { + logrus.Errorf("error sending sigkill to %v: %v", containerID, err) + } + select { + case <-time.After(2 * time.Second): + case <-w.wait(): + return nil + } + case <-w.wait(): + return nil } - return nil } + + clnt.deleteContainer(containerID) + return clnt.setExited(containerID) } diff --git a/libcontainerd/container_linux.go b/libcontainerd/container_linux.go index adf6bff44f..00f86f9a81 100644 --- a/libcontainerd/container_linux.go +++ b/libcontainerd/container_linux.go @@ -2,6 +2,7 @@ package libcontainerd import ( "encoding/json" + "io" "io/ioutil" "os" "path/filepath" @@ -194,3 +195,18 @@ func (ctr *container) handleEvent(e *containerd.Event) error { } return nil } + +// discardFifos attempts to fully read the container fifos to unblock processes +// that may be blocked on the writer side. +func (ctr *container) discardFifos() { + for _, i := range []int{syscall.Stdout, syscall.Stderr} { + f := ctr.fifo(i) + c := make(chan struct{}) + go func() { + close(c) // this channel is used to not close the writer too early, before readonly open has been called. + io.Copy(ioutil.Discard, openReaderFromFifo(f)) + }() + <-c + closeReaderFifo(f) // avoid blocking permanently on open if there is no writer side + } +} diff --git a/libcontainerd/remote.go b/libcontainerd/remote.go index a89d4ddc56..9031e3ae7d 100644 --- a/libcontainerd/remote.go +++ b/libcontainerd/remote.go @@ -9,6 +9,8 @@ type Remote interface { // Cleanup stops containerd if it was started by libcontainerd. // Note this is not used on Windows as there is no remote containerd. Cleanup() + // UpdateOptions allows various remote options to be updated at runtime. + UpdateOptions(...RemoteOption) error } // RemoteOption allows to configure parameters of remotes. diff --git a/libcontainerd/remote_linux.go b/libcontainerd/remote_linux.go index 1a6ccf387c..6ed4a8c546 100644 --- a/libcontainerd/remote_linux.go +++ b/libcontainerd/remote_linux.go @@ -52,6 +52,7 @@ type remote struct { pastEvents map[string]*containerd.Event runtimeArgs []string daemonWaitCh chan struct{} + liveRestore bool } // New creates a fresh instance of libcontainerd remote. @@ -111,6 +112,15 @@ func New(stateDir string, options ...RemoteOption) (_ Remote, err error) { return r, nil } +func (r *remote) UpdateOptions(options ...RemoteOption) error { + for _, option := range options { + if err := option.Apply(r); err != nil { + return err + } + } + return nil +} + func (r *remote) handleConnectionChange() { var transientFailureCount = 0 state := grpc.Idle @@ -184,6 +194,7 @@ func (r *remote) Client(b Backend) (Client, error) { }, remote: r, exitNotifiers: make(map[string]*exitNotifier), + liveRestore: r.liveRestore, } r.Lock() @@ -460,3 +471,21 @@ func (d debugLog) Apply(r Remote) error { } return fmt.Errorf("WithDebugLog option not supported for this remote") } + +// WithLiveRestore defines if containers are stopped on shutdown or restored. +func WithLiveRestore(v bool) RemoteOption { + return liveRestore(v) +} + +type liveRestore bool + +func (l liveRestore) Apply(r Remote) error { + if remote, ok := r.(*remote); ok { + remote.liveRestore = bool(l) + for _, c := range remote.clients { + c.liveRestore = bool(l) + } + return nil + } + return fmt.Errorf("WithLiveRestore option not supported for this remote") +} diff --git a/libcontainerd/remote_solaris.go b/libcontainerd/remote_solaris.go index bd115485ad..e04f192882 100644 --- a/libcontainerd/remote_solaris.go +++ b/libcontainerd/remote_solaris.go @@ -19,7 +19,16 @@ func (r *remote) Client(b Backend) (Client, error) { func (r *remote) Cleanup() { } +func (r *remote) UpdateOptions(opts ...RemoteOption) error { + return nil +} + // New creates a fresh instance of libcontainerd remote. func New(_ string, _ ...RemoteOption) (Remote, error) { return &remote{}, nil } + +// WithLiveRestore is a noop on solaris. +func WithLiveRestore(v bool) RemoteOption { + return nil +} diff --git a/libcontainerd/remote_windows.go b/libcontainerd/remote_windows.go index ce01f74f25..74c10447bb 100644 --- a/libcontainerd/remote_windows.go +++ b/libcontainerd/remote_windows.go @@ -20,8 +20,17 @@ func (r *remote) Client(b Backend) (Client, error) { func (r *remote) Cleanup() { } +func (r *remote) UpdateOptions(opts ...RemoteOption) error { + return nil +} + // New creates a fresh instance of libcontainerd remote. On Windows, // this is not used as there is no remote containerd process. func New(_ string, _ ...RemoteOption) (Remote, error) { return &remote{}, nil } + +// WithLiveRestore is a noop on windows. +func WithLiveRestore(v bool) RemoteOption { + return nil +} diff --git a/man/dockerd.8.md b/man/dockerd.8.md index 9f980310e4..9cfd4b6339 100644 --- a/man/dockerd.8.md +++ b/man/dockerd.8.md @@ -42,6 +42,7 @@ dockerd - Enable daemon mode [**--isolation**[=*default*]] [**-l**|**--log-level**[=*info*]] [**--label**[=*[]*]] +[**--live-restore**[=*false*]] [**--log-driver**[=*json-file*]] [**--log-opt**[=*map[]*]] [**--mtu**[=*0*]] @@ -195,6 +196,9 @@ is `hyperv`. Linux only supports `default`. **--label**="[]" Set key=value labels to the daemon (displayed in `docker info`) +**--live-restore**=*false* + Enable live restore of running containers when the daemon starts so that they are not restarted. + **--log-driver**="*json-file*|*syslog*|*journald*|*gelf*|*fluentd*|*awslogs*|*splunk*|*etwlogs*|*gcplogs*|*none*" Default driver for container logs. Default is `json-file`. **Warning**: `docker logs` command works only for `json-file` logging driver.