mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00
Update mount state of live containers after a daemon crash.
Fix unmount issues in the daemon crash and restart lifecycle, w.r.t
graph drivers. This change sets a live container RWLayer's activity
count to 1, so that the RWLayer is aware of the mount. Note that
containerd has experimental support for restore live containers.
Added/updated corresponding tests.
Signed-off-by: Anusha Ragunathan <anusha@docker.com>
(cherry picked from commit 511a70583f
)
This commit is contained in:
parent
9db0bd88f5
commit
1818ca9d75
8 changed files with 170 additions and 7 deletions
|
@ -293,6 +293,11 @@ func (daemon *Daemon) restore() error {
|
||||||
go func(c *container.Container) {
|
go func(c *container.Container) {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
if c.IsRunning() || c.IsPaused() {
|
if c.IsRunning() || c.IsPaused() {
|
||||||
|
// Fix activityCount such that graph mounts can be unmounted later
|
||||||
|
if err := daemon.layerStore.ReinitRWLayer(c.RWLayer); err != nil {
|
||||||
|
logrus.Errorf("Failed to ReinitRWLayer for %s due to %s", c.ID, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
if err := daemon.containerd.Restore(c.ID, libcontainerd.WithRestartManager(c.RestartManager(true))); err != nil {
|
if err := daemon.containerd.Restore(c.ID, libcontainerd.WithRestartManager(c.RestartManager(true))); err != nil {
|
||||||
logrus.Errorf("Failed to restore with containerd: %q", err)
|
logrus.Errorf("Failed to restore with containerd: %q", err)
|
||||||
return
|
return
|
||||||
|
|
|
@ -121,6 +121,10 @@ func (ls *mockLayerStore) GetMountID(string) (string, error) {
|
||||||
return "", errors.New("not implemented")
|
return "", errors.New("not implemented")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (ls *mockLayerStore) ReinitRWLayer(layer.RWLayer) error {
|
||||||
|
return errors.New("not implemented")
|
||||||
|
}
|
||||||
|
|
||||||
func (ls *mockLayerStore) Cleanup() error {
|
func (ls *mockLayerStore) Cleanup() error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,6 +3,8 @@
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"io/ioutil"
|
||||||
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
@ -56,6 +58,49 @@ func (s *DockerDaemonSuite) TestDaemonRestartWithKilledRunningContainer(t *check
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// os.Kill should kill daemon ungracefully, leaving behind live containers.
|
||||||
|
// The live containers should be known to the restarted daemon. Stopping
|
||||||
|
// them now, should remove the mounts.
|
||||||
|
func (s *DockerDaemonSuite) TestCleanupMountsAfterDaemonCrash(c *check.C) {
|
||||||
|
testRequires(c, DaemonIsLinux)
|
||||||
|
c.Assert(s.d.StartWithBusybox(), check.IsNil)
|
||||||
|
|
||||||
|
out, err := s.d.Cmd("run", "-d", "busybox", "top")
|
||||||
|
c.Assert(err, check.IsNil, check.Commentf("Output: %s", out))
|
||||||
|
id := strings.TrimSpace(out)
|
||||||
|
|
||||||
|
c.Assert(s.d.cmd.Process.Signal(os.Kill), check.IsNil)
|
||||||
|
mountOut, err := ioutil.ReadFile("/proc/self/mountinfo")
|
||||||
|
c.Assert(err, check.IsNil, check.Commentf("Output: %s", mountOut))
|
||||||
|
|
||||||
|
// container mounts should exist even after daemon has crashed.
|
||||||
|
comment := check.Commentf("%s should stay mounted from older daemon start:\nDaemon root repository %s\n%s", id, s.d.folder, mountOut)
|
||||||
|
c.Assert(strings.Contains(string(mountOut), id), check.Equals, true, comment)
|
||||||
|
|
||||||
|
// restart daemon.
|
||||||
|
if err := s.d.Restart(); err != nil {
|
||||||
|
c.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// container should be running.
|
||||||
|
out, err = s.d.Cmd("inspect", "--format='{{.State.Running}}'", id)
|
||||||
|
c.Assert(err, check.IsNil, check.Commentf("Output: %s", out))
|
||||||
|
out = strings.TrimSpace(out)
|
||||||
|
if out != "true" {
|
||||||
|
c.Fatalf("Container %s expected to stay alive after daemon restart", id)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 'docker stop' should work.
|
||||||
|
out, err = s.d.Cmd("stop", id)
|
||||||
|
c.Assert(err, check.IsNil, check.Commentf("Output: %s", out))
|
||||||
|
|
||||||
|
// Now, container mounts should be gone.
|
||||||
|
mountOut, err = ioutil.ReadFile("/proc/self/mountinfo")
|
||||||
|
c.Assert(err, check.IsNil, check.Commentf("Output: %s", mountOut))
|
||||||
|
comment = check.Commentf("%s is still mounted from older daemon start:\nDaemon root repository %s\n%s", id, s.d.folder, mountOut)
|
||||||
|
c.Assert(strings.Contains(string(mountOut), id), check.Equals, false, comment)
|
||||||
|
}
|
||||||
|
|
||||||
// TestDaemonRestartWithPausedRunningContainer requires live restore of running containers
|
// TestDaemonRestartWithPausedRunningContainer requires live restore of running containers
|
||||||
func (s *DockerDaemonSuite) TestDaemonRestartWithPausedRunningContainer(t *check.C) {
|
func (s *DockerDaemonSuite) TestDaemonRestartWithPausedRunningContainer(t *check.C) {
|
||||||
if err := s.d.StartWithBusybox(); err != nil {
|
if err := s.d.StartWithBusybox(); err != nil {
|
||||||
|
|
39
integration-cli/docker_cli_daemon_not_experimental_test.go
Normal file
39
integration-cli/docker_cli_daemon_not_experimental_test.go
Normal file
|
@ -0,0 +1,39 @@
|
||||||
|
// +build daemon,!windows,!experimental
|
||||||
|
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"io/ioutil"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/go-check/check"
|
||||||
|
)
|
||||||
|
|
||||||
|
// os.Kill should kill daemon ungracefully, leaving behind container mounts.
|
||||||
|
// A subsequent daemon restart shoud clean up said mounts.
|
||||||
|
func (s *DockerDaemonSuite) TestCleanupMountsAfterDaemonKill(c *check.C) {
|
||||||
|
c.Assert(s.d.StartWithBusybox(), check.IsNil)
|
||||||
|
|
||||||
|
out, err := s.d.Cmd("run", "-d", "busybox", "top")
|
||||||
|
c.Assert(err, check.IsNil, check.Commentf("Output: %s", out))
|
||||||
|
id := strings.TrimSpace(out)
|
||||||
|
c.Assert(s.d.cmd.Process.Signal(os.Kill), check.IsNil)
|
||||||
|
mountOut, err := ioutil.ReadFile("/proc/self/mountinfo")
|
||||||
|
c.Assert(err, check.IsNil, check.Commentf("Output: %s", mountOut))
|
||||||
|
|
||||||
|
// container mounts should exist even after daemon has crashed.
|
||||||
|
comment := check.Commentf("%s should stay mounted from older daemon start:\nDaemon root repository %s\n%s", id, s.d.folder, mountOut)
|
||||||
|
c.Assert(strings.Contains(string(mountOut), id), check.Equals, true, comment)
|
||||||
|
|
||||||
|
// restart daemon.
|
||||||
|
if err := s.d.Restart(); err != nil {
|
||||||
|
c.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now, container mounts should be gone.
|
||||||
|
mountOut, err = ioutil.ReadFile("/proc/self/mountinfo")
|
||||||
|
c.Assert(err, check.IsNil, check.Commentf("Output: %s", mountOut))
|
||||||
|
comment = check.Commentf("%s is still mounted from older daemon start:\nDaemon root repository %s\n%s", id, s.d.folder, mountOut)
|
||||||
|
c.Assert(strings.Contains(string(mountOut), id), check.Equals, false, comment)
|
||||||
|
}
|
|
@ -1501,25 +1501,54 @@ func (s *DockerDaemonSuite) TestDaemonRestartWithSocketAsVolume(c *check.C) {
|
||||||
c.Assert(s.d.Restart(), check.IsNil)
|
c.Assert(s.d.Restart(), check.IsNil)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *DockerDaemonSuite) TestCleanupMountsAfterCrash(c *check.C) {
|
// os.Kill should kill daemon ungracefully, leaving behind container mounts.
|
||||||
|
// A subsequent daemon restart shoud clean up said mounts.
|
||||||
|
func (s *DockerDaemonSuite) TestCleanupMountsAfterDaemonAndContainerKill(c *check.C) {
|
||||||
c.Assert(s.d.StartWithBusybox(), check.IsNil)
|
c.Assert(s.d.StartWithBusybox(), check.IsNil)
|
||||||
|
|
||||||
out, err := s.d.Cmd("run", "-d", "busybox", "top")
|
out, err := s.d.Cmd("run", "-d", "busybox", "top")
|
||||||
c.Assert(err, check.IsNil, check.Commentf("Output: %s", out))
|
c.Assert(err, check.IsNil, check.Commentf("Output: %s", out))
|
||||||
id := strings.TrimSpace(out)
|
id := strings.TrimSpace(out)
|
||||||
c.Assert(s.d.Kill(), check.IsNil)
|
c.Assert(s.d.cmd.Process.Signal(os.Kill), check.IsNil)
|
||||||
|
mountOut, err := ioutil.ReadFile("/proc/self/mountinfo")
|
||||||
|
c.Assert(err, check.IsNil, check.Commentf("Output: %s", mountOut))
|
||||||
|
|
||||||
|
// container mounts should exist even after daemon has crashed.
|
||||||
|
comment := check.Commentf("%s should stay mounted from older daemon start:\nDaemon root repository %s\n%s", id, s.d.folder, mountOut)
|
||||||
|
c.Assert(strings.Contains(string(mountOut), id), check.Equals, true, comment)
|
||||||
|
|
||||||
// kill the container
|
// kill the container
|
||||||
runCmd := exec.Command(ctrBinary, "--address", "/var/run/docker/libcontainerd/docker-containerd.sock", "containers", "kill", id)
|
runCmd := exec.Command(ctrBinary, "--address", "/var/run/docker/libcontainerd/docker-containerd.sock", "containers", "kill", id)
|
||||||
if out, ec, err := runCommandWithOutput(runCmd); err != nil {
|
if out, ec, err := runCommandWithOutput(runCmd); err != nil {
|
||||||
c.Fatalf("Failed to run ctr, ExitCode: %d, err: '%v' output: '%s' cid: '%s'\n", ec, err, out, id)
|
c.Fatalf("Failed to run ctr, ExitCode: %d, err: %v output: %s id: %s\n", ec, err, out, id)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Give time to containerd to process the command if we don't
|
// restart daemon.
|
||||||
// the exit event might be received after we do the inspect
|
if err := s.d.Restart(); err != nil {
|
||||||
|
c.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now, container mounts should be gone.
|
||||||
|
mountOut, err = ioutil.ReadFile("/proc/self/mountinfo")
|
||||||
|
c.Assert(err, check.IsNil, check.Commentf("Output: %s", mountOut))
|
||||||
|
comment = check.Commentf("%s is still mounted from older daemon start:\nDaemon root repository %s\n%s", id, s.d.folder, mountOut)
|
||||||
|
c.Assert(strings.Contains(string(mountOut), id), check.Equals, false, comment)
|
||||||
|
}
|
||||||
|
|
||||||
|
// os.Interrupt should perform a graceful daemon shutdown and hence cleanup mounts.
|
||||||
|
func (s *DockerDaemonSuite) TestCleanupMountsAfterGracefulShutdown(c *check.C) {
|
||||||
|
c.Assert(s.d.StartWithBusybox(), check.IsNil)
|
||||||
|
|
||||||
|
out, err := s.d.Cmd("run", "-d", "busybox", "top")
|
||||||
|
c.Assert(err, check.IsNil, check.Commentf("Output: %s", out))
|
||||||
|
id := strings.TrimSpace(out)
|
||||||
|
|
||||||
|
// Send SIGINT and daemon should clean up
|
||||||
|
c.Assert(s.d.cmd.Process.Signal(os.Interrupt), check.IsNil)
|
||||||
|
|
||||||
|
// Wait a bit for the daemon to handle cleanups.
|
||||||
time.Sleep(3 * time.Second)
|
time.Sleep(3 * time.Second)
|
||||||
|
|
||||||
c.Assert(s.d.Start(), check.IsNil)
|
|
||||||
mountOut, err := ioutil.ReadFile("/proc/self/mountinfo")
|
mountOut, err := ioutil.ReadFile("/proc/self/mountinfo")
|
||||||
c.Assert(err, check.IsNil, check.Commentf("Output: %s", mountOut))
|
c.Assert(err, check.IsNil, check.Commentf("Output: %s", mountOut))
|
||||||
|
|
||||||
|
|
|
@ -174,6 +174,7 @@ type Store interface {
|
||||||
CreateRWLayer(id string, parent ChainID, mountLabel string, initFunc MountInit) (RWLayer, error)
|
CreateRWLayer(id string, parent ChainID, mountLabel string, initFunc MountInit) (RWLayer, error)
|
||||||
GetRWLayer(id string) (RWLayer, error)
|
GetRWLayer(id string) (RWLayer, error)
|
||||||
GetMountID(id string) (string, error)
|
GetMountID(id string) (string, error)
|
||||||
|
ReinitRWLayer(l RWLayer) error
|
||||||
ReleaseRWLayer(RWLayer) ([]Metadata, error)
|
ReleaseRWLayer(RWLayer) ([]Metadata, error)
|
||||||
|
|
||||||
Cleanup() error
|
Cleanup() error
|
||||||
|
|
|
@ -487,11 +487,30 @@ func (ls *layerStore) GetMountID(id string) (string, error) {
|
||||||
if !ok {
|
if !ok {
|
||||||
return "", ErrMountDoesNotExist
|
return "", ErrMountDoesNotExist
|
||||||
}
|
}
|
||||||
logrus.Debugf("GetRWLayer id: %s -> mountID: %s", id, mount.mountID)
|
logrus.Debugf("GetMountID id: %s -> mountID: %s", id, mount.mountID)
|
||||||
|
|
||||||
return mount.mountID, nil
|
return mount.mountID, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ReinitRWLayer reinitializes a given mount to the layerstore, specifically
|
||||||
|
// initializing the usage count. It should strictly only be used in the
|
||||||
|
// daemon's restore path to restore state of live containers.
|
||||||
|
func (ls *layerStore) ReinitRWLayer(l RWLayer) error {
|
||||||
|
ls.mountL.Lock()
|
||||||
|
defer ls.mountL.Unlock()
|
||||||
|
|
||||||
|
m, ok := ls.mounts[l.Name()]
|
||||||
|
if !ok {
|
||||||
|
return ErrMountDoesNotExist
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := m.incActivityCount(l); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func (ls *layerStore) ReleaseRWLayer(l RWLayer) ([]Metadata, error) {
|
func (ls *layerStore) ReleaseRWLayer(l RWLayer) ([]Metadata, error) {
|
||||||
ls.mountL.Lock()
|
ls.mountL.Lock()
|
||||||
defer ls.mountL.Unlock()
|
defer ls.mountL.Unlock()
|
||||||
|
|
|
@ -83,6 +83,18 @@ func (ml *mountedLayer) hasReferences() bool {
|
||||||
return len(ml.references) > 0
|
return len(ml.references) > 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (ml *mountedLayer) incActivityCount(ref RWLayer) error {
|
||||||
|
rl, ok := ml.references[ref]
|
||||||
|
if !ok {
|
||||||
|
return ErrLayerNotRetained
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := rl.acquire(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func (ml *mountedLayer) deleteReference(ref RWLayer) error {
|
func (ml *mountedLayer) deleteReference(ref RWLayer) error {
|
||||||
rl, ok := ml.references[ref]
|
rl, ok := ml.references[ref]
|
||||||
if !ok {
|
if !ok {
|
||||||
|
@ -111,6 +123,15 @@ type referencedRWLayer struct {
|
||||||
activityCount int
|
activityCount int
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (rl *referencedRWLayer) acquire() error {
|
||||||
|
rl.activityL.Lock()
|
||||||
|
defer rl.activityL.Unlock()
|
||||||
|
|
||||||
|
rl.activityCount++
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func (rl *referencedRWLayer) release() error {
|
func (rl *referencedRWLayer) release() error {
|
||||||
rl.activityL.Lock()
|
rl.activityL.Lock()
|
||||||
defer rl.activityL.Unlock()
|
defer rl.activityL.Unlock()
|
||||||
|
|
Loading…
Add table
Reference in a new issue