1
0
Fork 0
mirror of https://github.com/moby/moby.git synced 2022-11-09 12:21:53 -05:00

Update mount state of live containers after a daemon crash.

Fix unmount issues in the daemon crash and restart lifecycle, w.r.t
graph drivers. This change sets a live container RWLayer's activity
count to 1, so that the RWLayer is aware of the mount. Note that
containerd has experimental support for restore live containers.

Added/updated corresponding tests.

Signed-off-by: Anusha Ragunathan <anusha@docker.com>
(cherry picked from commit 511a70583f)
This commit is contained in:
Anusha Ragunathan 2016-03-04 14:41:53 -08:00 committed by Tibor Vass
parent 9db0bd88f5
commit 1818ca9d75
8 changed files with 170 additions and 7 deletions

View file

@ -293,6 +293,11 @@ func (daemon *Daemon) restore() error {
go func(c *container.Container) { go func(c *container.Container) {
defer wg.Done() defer wg.Done()
if c.IsRunning() || c.IsPaused() { if c.IsRunning() || c.IsPaused() {
// Fix activityCount such that graph mounts can be unmounted later
if err := daemon.layerStore.ReinitRWLayer(c.RWLayer); err != nil {
logrus.Errorf("Failed to ReinitRWLayer for %s due to %s", c.ID, err)
return
}
if err := daemon.containerd.Restore(c.ID, libcontainerd.WithRestartManager(c.RestartManager(true))); err != nil { if err := daemon.containerd.Restore(c.ID, libcontainerd.WithRestartManager(c.RestartManager(true))); err != nil {
logrus.Errorf("Failed to restore with containerd: %q", err) logrus.Errorf("Failed to restore with containerd: %q", err)
return return

View file

@ -121,6 +121,10 @@ func (ls *mockLayerStore) GetMountID(string) (string, error) {
return "", errors.New("not implemented") return "", errors.New("not implemented")
} }
func (ls *mockLayerStore) ReinitRWLayer(layer.RWLayer) error {
return errors.New("not implemented")
}
func (ls *mockLayerStore) Cleanup() error { func (ls *mockLayerStore) Cleanup() error {
return nil return nil
} }

View file

@ -3,6 +3,8 @@
package main package main
import ( import (
"io/ioutil"
"os"
"os/exec" "os/exec"
"strings" "strings"
"time" "time"
@ -56,6 +58,49 @@ func (s *DockerDaemonSuite) TestDaemonRestartWithKilledRunningContainer(t *check
} }
// os.Kill should kill daemon ungracefully, leaving behind live containers.
// The live containers should be known to the restarted daemon. Stopping
// them now, should remove the mounts.
func (s *DockerDaemonSuite) TestCleanupMountsAfterDaemonCrash(c *check.C) {
testRequires(c, DaemonIsLinux)
c.Assert(s.d.StartWithBusybox(), check.IsNil)
out, err := s.d.Cmd("run", "-d", "busybox", "top")
c.Assert(err, check.IsNil, check.Commentf("Output: %s", out))
id := strings.TrimSpace(out)
c.Assert(s.d.cmd.Process.Signal(os.Kill), check.IsNil)
mountOut, err := ioutil.ReadFile("/proc/self/mountinfo")
c.Assert(err, check.IsNil, check.Commentf("Output: %s", mountOut))
// container mounts should exist even after daemon has crashed.
comment := check.Commentf("%s should stay mounted from older daemon start:\nDaemon root repository %s\n%s", id, s.d.folder, mountOut)
c.Assert(strings.Contains(string(mountOut), id), check.Equals, true, comment)
// restart daemon.
if err := s.d.Restart(); err != nil {
c.Fatal(err)
}
// container should be running.
out, err = s.d.Cmd("inspect", "--format='{{.State.Running}}'", id)
c.Assert(err, check.IsNil, check.Commentf("Output: %s", out))
out = strings.TrimSpace(out)
if out != "true" {
c.Fatalf("Container %s expected to stay alive after daemon restart", id)
}
// 'docker stop' should work.
out, err = s.d.Cmd("stop", id)
c.Assert(err, check.IsNil, check.Commentf("Output: %s", out))
// Now, container mounts should be gone.
mountOut, err = ioutil.ReadFile("/proc/self/mountinfo")
c.Assert(err, check.IsNil, check.Commentf("Output: %s", mountOut))
comment = check.Commentf("%s is still mounted from older daemon start:\nDaemon root repository %s\n%s", id, s.d.folder, mountOut)
c.Assert(strings.Contains(string(mountOut), id), check.Equals, false, comment)
}
// TestDaemonRestartWithPausedRunningContainer requires live restore of running containers // TestDaemonRestartWithPausedRunningContainer requires live restore of running containers
func (s *DockerDaemonSuite) TestDaemonRestartWithPausedRunningContainer(t *check.C) { func (s *DockerDaemonSuite) TestDaemonRestartWithPausedRunningContainer(t *check.C) {
if err := s.d.StartWithBusybox(); err != nil { if err := s.d.StartWithBusybox(); err != nil {

View file

@ -0,0 +1,39 @@
// +build daemon,!windows,!experimental
package main
import (
"io/ioutil"
"os"
"strings"
"github.com/go-check/check"
)
// os.Kill should kill daemon ungracefully, leaving behind container mounts.
// A subsequent daemon restart shoud clean up said mounts.
func (s *DockerDaemonSuite) TestCleanupMountsAfterDaemonKill(c *check.C) {
c.Assert(s.d.StartWithBusybox(), check.IsNil)
out, err := s.d.Cmd("run", "-d", "busybox", "top")
c.Assert(err, check.IsNil, check.Commentf("Output: %s", out))
id := strings.TrimSpace(out)
c.Assert(s.d.cmd.Process.Signal(os.Kill), check.IsNil)
mountOut, err := ioutil.ReadFile("/proc/self/mountinfo")
c.Assert(err, check.IsNil, check.Commentf("Output: %s", mountOut))
// container mounts should exist even after daemon has crashed.
comment := check.Commentf("%s should stay mounted from older daemon start:\nDaemon root repository %s\n%s", id, s.d.folder, mountOut)
c.Assert(strings.Contains(string(mountOut), id), check.Equals, true, comment)
// restart daemon.
if err := s.d.Restart(); err != nil {
c.Fatal(err)
}
// Now, container mounts should be gone.
mountOut, err = ioutil.ReadFile("/proc/self/mountinfo")
c.Assert(err, check.IsNil, check.Commentf("Output: %s", mountOut))
comment = check.Commentf("%s is still mounted from older daemon start:\nDaemon root repository %s\n%s", id, s.d.folder, mountOut)
c.Assert(strings.Contains(string(mountOut), id), check.Equals, false, comment)
}

View file

@ -1501,25 +1501,54 @@ func (s *DockerDaemonSuite) TestDaemonRestartWithSocketAsVolume(c *check.C) {
c.Assert(s.d.Restart(), check.IsNil) c.Assert(s.d.Restart(), check.IsNil)
} }
func (s *DockerDaemonSuite) TestCleanupMountsAfterCrash(c *check.C) { // os.Kill should kill daemon ungracefully, leaving behind container mounts.
// A subsequent daemon restart shoud clean up said mounts.
func (s *DockerDaemonSuite) TestCleanupMountsAfterDaemonAndContainerKill(c *check.C) {
c.Assert(s.d.StartWithBusybox(), check.IsNil) c.Assert(s.d.StartWithBusybox(), check.IsNil)
out, err := s.d.Cmd("run", "-d", "busybox", "top") out, err := s.d.Cmd("run", "-d", "busybox", "top")
c.Assert(err, check.IsNil, check.Commentf("Output: %s", out)) c.Assert(err, check.IsNil, check.Commentf("Output: %s", out))
id := strings.TrimSpace(out) id := strings.TrimSpace(out)
c.Assert(s.d.Kill(), check.IsNil) c.Assert(s.d.cmd.Process.Signal(os.Kill), check.IsNil)
mountOut, err := ioutil.ReadFile("/proc/self/mountinfo")
c.Assert(err, check.IsNil, check.Commentf("Output: %s", mountOut))
// container mounts should exist even after daemon has crashed.
comment := check.Commentf("%s should stay mounted from older daemon start:\nDaemon root repository %s\n%s", id, s.d.folder, mountOut)
c.Assert(strings.Contains(string(mountOut), id), check.Equals, true, comment)
// kill the container // kill the container
runCmd := exec.Command(ctrBinary, "--address", "/var/run/docker/libcontainerd/docker-containerd.sock", "containers", "kill", id) runCmd := exec.Command(ctrBinary, "--address", "/var/run/docker/libcontainerd/docker-containerd.sock", "containers", "kill", id)
if out, ec, err := runCommandWithOutput(runCmd); err != nil { if out, ec, err := runCommandWithOutput(runCmd); err != nil {
c.Fatalf("Failed to run ctr, ExitCode: %d, err: '%v' output: '%s' cid: '%s'\n", ec, err, out, id) c.Fatalf("Failed to run ctr, ExitCode: %d, err: %v output: %s id: %s\n", ec, err, out, id)
} }
// Give time to containerd to process the command if we don't // restart daemon.
// the exit event might be received after we do the inspect if err := s.d.Restart(); err != nil {
c.Fatal(err)
}
// Now, container mounts should be gone.
mountOut, err = ioutil.ReadFile("/proc/self/mountinfo")
c.Assert(err, check.IsNil, check.Commentf("Output: %s", mountOut))
comment = check.Commentf("%s is still mounted from older daemon start:\nDaemon root repository %s\n%s", id, s.d.folder, mountOut)
c.Assert(strings.Contains(string(mountOut), id), check.Equals, false, comment)
}
// os.Interrupt should perform a graceful daemon shutdown and hence cleanup mounts.
func (s *DockerDaemonSuite) TestCleanupMountsAfterGracefulShutdown(c *check.C) {
c.Assert(s.d.StartWithBusybox(), check.IsNil)
out, err := s.d.Cmd("run", "-d", "busybox", "top")
c.Assert(err, check.IsNil, check.Commentf("Output: %s", out))
id := strings.TrimSpace(out)
// Send SIGINT and daemon should clean up
c.Assert(s.d.cmd.Process.Signal(os.Interrupt), check.IsNil)
// Wait a bit for the daemon to handle cleanups.
time.Sleep(3 * time.Second) time.Sleep(3 * time.Second)
c.Assert(s.d.Start(), check.IsNil)
mountOut, err := ioutil.ReadFile("/proc/self/mountinfo") mountOut, err := ioutil.ReadFile("/proc/self/mountinfo")
c.Assert(err, check.IsNil, check.Commentf("Output: %s", mountOut)) c.Assert(err, check.IsNil, check.Commentf("Output: %s", mountOut))

View file

@ -174,6 +174,7 @@ type Store interface {
CreateRWLayer(id string, parent ChainID, mountLabel string, initFunc MountInit) (RWLayer, error) CreateRWLayer(id string, parent ChainID, mountLabel string, initFunc MountInit) (RWLayer, error)
GetRWLayer(id string) (RWLayer, error) GetRWLayer(id string) (RWLayer, error)
GetMountID(id string) (string, error) GetMountID(id string) (string, error)
ReinitRWLayer(l RWLayer) error
ReleaseRWLayer(RWLayer) ([]Metadata, error) ReleaseRWLayer(RWLayer) ([]Metadata, error)
Cleanup() error Cleanup() error

View file

@ -487,11 +487,30 @@ func (ls *layerStore) GetMountID(id string) (string, error) {
if !ok { if !ok {
return "", ErrMountDoesNotExist return "", ErrMountDoesNotExist
} }
logrus.Debugf("GetRWLayer id: %s -> mountID: %s", id, mount.mountID) logrus.Debugf("GetMountID id: %s -> mountID: %s", id, mount.mountID)
return mount.mountID, nil return mount.mountID, nil
} }
// ReinitRWLayer reinitializes a given mount to the layerstore, specifically
// initializing the usage count. It should strictly only be used in the
// daemon's restore path to restore state of live containers.
func (ls *layerStore) ReinitRWLayer(l RWLayer) error {
ls.mountL.Lock()
defer ls.mountL.Unlock()
m, ok := ls.mounts[l.Name()]
if !ok {
return ErrMountDoesNotExist
}
if err := m.incActivityCount(l); err != nil {
return err
}
return nil
}
func (ls *layerStore) ReleaseRWLayer(l RWLayer) ([]Metadata, error) { func (ls *layerStore) ReleaseRWLayer(l RWLayer) ([]Metadata, error) {
ls.mountL.Lock() ls.mountL.Lock()
defer ls.mountL.Unlock() defer ls.mountL.Unlock()

View file

@ -83,6 +83,18 @@ func (ml *mountedLayer) hasReferences() bool {
return len(ml.references) > 0 return len(ml.references) > 0
} }
func (ml *mountedLayer) incActivityCount(ref RWLayer) error {
rl, ok := ml.references[ref]
if !ok {
return ErrLayerNotRetained
}
if err := rl.acquire(); err != nil {
return err
}
return nil
}
func (ml *mountedLayer) deleteReference(ref RWLayer) error { func (ml *mountedLayer) deleteReference(ref RWLayer) error {
rl, ok := ml.references[ref] rl, ok := ml.references[ref]
if !ok { if !ok {
@ -111,6 +123,15 @@ type referencedRWLayer struct {
activityCount int activityCount int
} }
func (rl *referencedRWLayer) acquire() error {
rl.activityL.Lock()
defer rl.activityL.Unlock()
rl.activityCount++
return nil
}
func (rl *referencedRWLayer) release() error { func (rl *referencedRWLayer) release() error {
rl.activityL.Lock() rl.activityL.Lock()
defer rl.activityL.Unlock() defer rl.activityL.Unlock()