From 1818ca9d75569f43bb1e3ec2a6f469d38e5b4742 Mon Sep 17 00:00:00 2001 From: Anusha Ragunathan Date: Fri, 4 Mar 2016 14:41:53 -0800 Subject: [PATCH] Update mount state of live containers after a daemon crash. Fix unmount issues in the daemon crash and restart lifecycle, w.r.t graph drivers. This change sets a live container RWLayer's activity count to 1, so that the RWLayer is aware of the mount. Note that containerd has experimental support for restore live containers. Added/updated corresponding tests. Signed-off-by: Anusha Ragunathan (cherry picked from commit 511a70583fbb901f57acb44d501cca8e6dcbce2c) --- daemon/daemon.go | 5 +++ distribution/xfer/download_test.go | 4 ++ .../docker_cli_daemon_experimental_test.go | 45 +++++++++++++++++++ ...docker_cli_daemon_not_experimental_test.go | 39 ++++++++++++++++ integration-cli/docker_cli_daemon_test.go | 41 ++++++++++++++--- layer/layer.go | 1 + layer/layer_store.go | 21 ++++++++- layer/mounted_layer.go | 21 +++++++++ 8 files changed, 170 insertions(+), 7 deletions(-) create mode 100644 integration-cli/docker_cli_daemon_not_experimental_test.go diff --git a/daemon/daemon.go b/daemon/daemon.go index 10d9bb70ea..5efa8137f6 100644 --- a/daemon/daemon.go +++ b/daemon/daemon.go @@ -293,6 +293,11 @@ func (daemon *Daemon) restore() error { go func(c *container.Container) { defer wg.Done() if c.IsRunning() || c.IsPaused() { + // Fix activityCount such that graph mounts can be unmounted later + if err := daemon.layerStore.ReinitRWLayer(c.RWLayer); err != nil { + logrus.Errorf("Failed to ReinitRWLayer for %s due to %s", c.ID, err) + return + } if err := daemon.containerd.Restore(c.ID, libcontainerd.WithRestartManager(c.RestartManager(true))); err != nil { logrus.Errorf("Failed to restore with containerd: %q", err) return diff --git a/distribution/xfer/download_test.go b/distribution/xfer/download_test.go index 2e4d724cd2..96f2db1707 100644 --- a/distribution/xfer/download_test.go +++ b/distribution/xfer/download_test.go @@ -121,6 +121,10 @@ func (ls *mockLayerStore) GetMountID(string) (string, error) { return "", errors.New("not implemented") } +func (ls *mockLayerStore) ReinitRWLayer(layer.RWLayer) error { + return errors.New("not implemented") +} + func (ls *mockLayerStore) Cleanup() error { return nil } diff --git a/integration-cli/docker_cli_daemon_experimental_test.go b/integration-cli/docker_cli_daemon_experimental_test.go index f6e9b824a4..fea887d720 100644 --- a/integration-cli/docker_cli_daemon_experimental_test.go +++ b/integration-cli/docker_cli_daemon_experimental_test.go @@ -3,6 +3,8 @@ package main import ( + "io/ioutil" + "os" "os/exec" "strings" "time" @@ -56,6 +58,49 @@ func (s *DockerDaemonSuite) TestDaemonRestartWithKilledRunningContainer(t *check } +// os.Kill should kill daemon ungracefully, leaving behind live containers. +// The live containers should be known to the restarted daemon. Stopping +// them now, should remove the mounts. +func (s *DockerDaemonSuite) TestCleanupMountsAfterDaemonCrash(c *check.C) { + testRequires(c, DaemonIsLinux) + c.Assert(s.d.StartWithBusybox(), check.IsNil) + + out, err := s.d.Cmd("run", "-d", "busybox", "top") + c.Assert(err, check.IsNil, check.Commentf("Output: %s", out)) + id := strings.TrimSpace(out) + + c.Assert(s.d.cmd.Process.Signal(os.Kill), check.IsNil) + mountOut, err := ioutil.ReadFile("/proc/self/mountinfo") + c.Assert(err, check.IsNil, check.Commentf("Output: %s", mountOut)) + + // container mounts should exist even after daemon has crashed. + comment := check.Commentf("%s should stay mounted from older daemon start:\nDaemon root repository %s\n%s", id, s.d.folder, mountOut) + c.Assert(strings.Contains(string(mountOut), id), check.Equals, true, comment) + + // restart daemon. + if err := s.d.Restart(); err != nil { + c.Fatal(err) + } + + // container should be running. + out, err = s.d.Cmd("inspect", "--format='{{.State.Running}}'", id) + c.Assert(err, check.IsNil, check.Commentf("Output: %s", out)) + out = strings.TrimSpace(out) + if out != "true" { + c.Fatalf("Container %s expected to stay alive after daemon restart", id) + } + + // 'docker stop' should work. + out, err = s.d.Cmd("stop", id) + c.Assert(err, check.IsNil, check.Commentf("Output: %s", out)) + + // Now, container mounts should be gone. + mountOut, err = ioutil.ReadFile("/proc/self/mountinfo") + c.Assert(err, check.IsNil, check.Commentf("Output: %s", mountOut)) + comment = check.Commentf("%s is still mounted from older daemon start:\nDaemon root repository %s\n%s", id, s.d.folder, mountOut) + c.Assert(strings.Contains(string(mountOut), id), check.Equals, false, comment) +} + // TestDaemonRestartWithPausedRunningContainer requires live restore of running containers func (s *DockerDaemonSuite) TestDaemonRestartWithPausedRunningContainer(t *check.C) { if err := s.d.StartWithBusybox(); err != nil { diff --git a/integration-cli/docker_cli_daemon_not_experimental_test.go b/integration-cli/docker_cli_daemon_not_experimental_test.go new file mode 100644 index 0000000000..3c987741fb --- /dev/null +++ b/integration-cli/docker_cli_daemon_not_experimental_test.go @@ -0,0 +1,39 @@ +// +build daemon,!windows,!experimental + +package main + +import ( + "io/ioutil" + "os" + "strings" + + "github.com/go-check/check" +) + +// os.Kill should kill daemon ungracefully, leaving behind container mounts. +// A subsequent daemon restart shoud clean up said mounts. +func (s *DockerDaemonSuite) TestCleanupMountsAfterDaemonKill(c *check.C) { + c.Assert(s.d.StartWithBusybox(), check.IsNil) + + out, err := s.d.Cmd("run", "-d", "busybox", "top") + c.Assert(err, check.IsNil, check.Commentf("Output: %s", out)) + id := strings.TrimSpace(out) + c.Assert(s.d.cmd.Process.Signal(os.Kill), check.IsNil) + mountOut, err := ioutil.ReadFile("/proc/self/mountinfo") + c.Assert(err, check.IsNil, check.Commentf("Output: %s", mountOut)) + + // container mounts should exist even after daemon has crashed. + comment := check.Commentf("%s should stay mounted from older daemon start:\nDaemon root repository %s\n%s", id, s.d.folder, mountOut) + c.Assert(strings.Contains(string(mountOut), id), check.Equals, true, comment) + + // restart daemon. + if err := s.d.Restart(); err != nil { + c.Fatal(err) + } + + // Now, container mounts should be gone. + mountOut, err = ioutil.ReadFile("/proc/self/mountinfo") + c.Assert(err, check.IsNil, check.Commentf("Output: %s", mountOut)) + comment = check.Commentf("%s is still mounted from older daemon start:\nDaemon root repository %s\n%s", id, s.d.folder, mountOut) + c.Assert(strings.Contains(string(mountOut), id), check.Equals, false, comment) +} diff --git a/integration-cli/docker_cli_daemon_test.go b/integration-cli/docker_cli_daemon_test.go index 200f37cb52..661fbc4ca5 100644 --- a/integration-cli/docker_cli_daemon_test.go +++ b/integration-cli/docker_cli_daemon_test.go @@ -1501,25 +1501,54 @@ func (s *DockerDaemonSuite) TestDaemonRestartWithSocketAsVolume(c *check.C) { c.Assert(s.d.Restart(), check.IsNil) } -func (s *DockerDaemonSuite) TestCleanupMountsAfterCrash(c *check.C) { +// os.Kill should kill daemon ungracefully, leaving behind container mounts. +// A subsequent daemon restart shoud clean up said mounts. +func (s *DockerDaemonSuite) TestCleanupMountsAfterDaemonAndContainerKill(c *check.C) { c.Assert(s.d.StartWithBusybox(), check.IsNil) out, err := s.d.Cmd("run", "-d", "busybox", "top") c.Assert(err, check.IsNil, check.Commentf("Output: %s", out)) id := strings.TrimSpace(out) - c.Assert(s.d.Kill(), check.IsNil) + c.Assert(s.d.cmd.Process.Signal(os.Kill), check.IsNil) + mountOut, err := ioutil.ReadFile("/proc/self/mountinfo") + c.Assert(err, check.IsNil, check.Commentf("Output: %s", mountOut)) + + // container mounts should exist even after daemon has crashed. + comment := check.Commentf("%s should stay mounted from older daemon start:\nDaemon root repository %s\n%s", id, s.d.folder, mountOut) + c.Assert(strings.Contains(string(mountOut), id), check.Equals, true, comment) // kill the container runCmd := exec.Command(ctrBinary, "--address", "/var/run/docker/libcontainerd/docker-containerd.sock", "containers", "kill", id) if out, ec, err := runCommandWithOutput(runCmd); err != nil { - c.Fatalf("Failed to run ctr, ExitCode: %d, err: '%v' output: '%s' cid: '%s'\n", ec, err, out, id) + c.Fatalf("Failed to run ctr, ExitCode: %d, err: %v output: %s id: %s\n", ec, err, out, id) } - // Give time to containerd to process the command if we don't - // the exit event might be received after we do the inspect + // restart daemon. + if err := s.d.Restart(); err != nil { + c.Fatal(err) + } + + // Now, container mounts should be gone. + mountOut, err = ioutil.ReadFile("/proc/self/mountinfo") + c.Assert(err, check.IsNil, check.Commentf("Output: %s", mountOut)) + comment = check.Commentf("%s is still mounted from older daemon start:\nDaemon root repository %s\n%s", id, s.d.folder, mountOut) + c.Assert(strings.Contains(string(mountOut), id), check.Equals, false, comment) +} + +// os.Interrupt should perform a graceful daemon shutdown and hence cleanup mounts. +func (s *DockerDaemonSuite) TestCleanupMountsAfterGracefulShutdown(c *check.C) { + c.Assert(s.d.StartWithBusybox(), check.IsNil) + + out, err := s.d.Cmd("run", "-d", "busybox", "top") + c.Assert(err, check.IsNil, check.Commentf("Output: %s", out)) + id := strings.TrimSpace(out) + + // Send SIGINT and daemon should clean up + c.Assert(s.d.cmd.Process.Signal(os.Interrupt), check.IsNil) + + // Wait a bit for the daemon to handle cleanups. time.Sleep(3 * time.Second) - c.Assert(s.d.Start(), check.IsNil) mountOut, err := ioutil.ReadFile("/proc/self/mountinfo") c.Assert(err, check.IsNil, check.Commentf("Output: %s", mountOut)) diff --git a/layer/layer.go b/layer/layer.go index be3fd8329c..ad01e89a37 100644 --- a/layer/layer.go +++ b/layer/layer.go @@ -174,6 +174,7 @@ type Store interface { CreateRWLayer(id string, parent ChainID, mountLabel string, initFunc MountInit) (RWLayer, error) GetRWLayer(id string) (RWLayer, error) GetMountID(id string) (string, error) + ReinitRWLayer(l RWLayer) error ReleaseRWLayer(RWLayer) ([]Metadata, error) Cleanup() error diff --git a/layer/layer_store.go b/layer/layer_store.go index fa436f098b..a05b4a38b9 100644 --- a/layer/layer_store.go +++ b/layer/layer_store.go @@ -487,11 +487,30 @@ func (ls *layerStore) GetMountID(id string) (string, error) { if !ok { return "", ErrMountDoesNotExist } - logrus.Debugf("GetRWLayer id: %s -> mountID: %s", id, mount.mountID) + logrus.Debugf("GetMountID id: %s -> mountID: %s", id, mount.mountID) return mount.mountID, nil } +// ReinitRWLayer reinitializes a given mount to the layerstore, specifically +// initializing the usage count. It should strictly only be used in the +// daemon's restore path to restore state of live containers. +func (ls *layerStore) ReinitRWLayer(l RWLayer) error { + ls.mountL.Lock() + defer ls.mountL.Unlock() + + m, ok := ls.mounts[l.Name()] + if !ok { + return ErrMountDoesNotExist + } + + if err := m.incActivityCount(l); err != nil { + return err + } + + return nil +} + func (ls *layerStore) ReleaseRWLayer(l RWLayer) ([]Metadata, error) { ls.mountL.Lock() defer ls.mountL.Unlock() diff --git a/layer/mounted_layer.go b/layer/mounted_layer.go index 36a8eb44ce..5a07fd08ea 100644 --- a/layer/mounted_layer.go +++ b/layer/mounted_layer.go @@ -83,6 +83,18 @@ func (ml *mountedLayer) hasReferences() bool { return len(ml.references) > 0 } +func (ml *mountedLayer) incActivityCount(ref RWLayer) error { + rl, ok := ml.references[ref] + if !ok { + return ErrLayerNotRetained + } + + if err := rl.acquire(); err != nil { + return err + } + return nil +} + func (ml *mountedLayer) deleteReference(ref RWLayer) error { rl, ok := ml.references[ref] if !ok { @@ -111,6 +123,15 @@ type referencedRWLayer struct { activityCount int } +func (rl *referencedRWLayer) acquire() error { + rl.activityL.Lock() + defer rl.activityL.Unlock() + + rl.activityCount++ + + return nil +} + func (rl *referencedRWLayer) release() error { rl.activityL.Lock() defer rl.activityL.Unlock()