Do not remove containers from memory on error

Before this, if `forceRemove` is set the container data will be removed
no matter what, including if there are issues with removing container
on-disk state (rw layer, container root).

In practice this causes a lot of issues with leaked data sitting on
disk that users are not able to clean up themselves.
This is particularly a problem while the `EBUSY` errors on remove are so
prevalent. So for now let's not keep this behavior.

Signed-off-by: Brian Goff <cpuguy83@gmail.com>
This commit is contained in:
Brian Goff 2017-02-14 13:35:20 -05:00
parent 669f4ba37e
commit 54dcbab25e
12 changed files with 261 additions and 45 deletions

View File

@ -12,6 +12,7 @@ import (
"github.com/docker/docker/api/types"
"github.com/docker/docker/container"
"github.com/docker/docker/layer"
"github.com/docker/docker/pkg/system"
volumestore "github.com/docker/docker/volume/store"
"github.com/pkg/errors"
)
@ -111,37 +112,30 @@ func (daemon *Daemon) cleanupContainer(container *container.Container, forceRemo
logrus.Errorf("Error saving dying container to disk: %v", err)
}
// If force removal is required, delete container from various
// indexes even if removal failed.
defer func() {
if err == nil || forceRemove {
daemon.nameIndex.Delete(container.ID)
daemon.linkIndex.delete(container)
selinuxFreeLxcContexts(container.ProcessLabel)
daemon.idIndex.Delete(container.ID)
daemon.containers.Delete(container.ID)
if e := daemon.removeMountPoints(container, removeVolume); e != nil {
logrus.Error(e)
}
daemon.LogContainerEvent(container, "destroy")
stateCtr.del(container.ID)
}
}()
if err = os.RemoveAll(container.Root); err != nil {
return fmt.Errorf("Unable to remove filesystem for %v: %v", container.ID, err)
}
// When container creation fails and `RWLayer` has not been created yet, we
// do not call `ReleaseRWLayer`
if container.RWLayer != nil {
metadata, err := daemon.layerStore.ReleaseRWLayer(container.RWLayer)
layer.LogReleaseMetadata(metadata)
if err != nil && err != layer.ErrMountDoesNotExist {
return fmt.Errorf("Driver %s failed to remove root filesystem %s: %s", daemon.GraphDriverName(), container.ID, err)
return errors.Wrapf(err, "driver %q failed to remove root filesystem for %s", daemon.GraphDriverName(), container.ID)
}
}
if err := system.EnsureRemoveAll(container.Root); err != nil {
return errors.Wrapf(err, "unable to remove filesystem for %s", container.ID)
}
daemon.nameIndex.Delete(container.ID)
daemon.linkIndex.delete(container)
selinuxFreeLxcContexts(container.ProcessLabel)
daemon.idIndex.Delete(container.ID)
daemon.containers.Delete(container.ID)
if e := daemon.removeMountPoints(container, removeVolume); e != nil {
logrus.Error(e)
}
stateCtr.del(container.ID)
daemon.LogContainerEvent(container, "destroy")
return nil
}

View File

@ -46,6 +46,7 @@ import (
"github.com/docker/docker/pkg/idtools"
"github.com/docker/docker/pkg/locker"
mountpk "github.com/docker/docker/pkg/mount"
"github.com/docker/docker/pkg/system"
rsystem "github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/selinux/go-selinux/label"
@ -319,13 +320,13 @@ func (a *Driver) Remove(id string) error {
}
return err
}
defer os.RemoveAll(tmpMntPath)
defer system.EnsureRemoveAll(tmpMntPath)
tmpDiffpath := path.Join(a.diffPath(), fmt.Sprintf("%s-removing", id))
if err := os.Rename(a.getDiffPath(id), tmpDiffpath); err != nil && !os.IsNotExist(err) {
return err
}
defer os.RemoveAll(tmpDiffpath)
defer system.EnsureRemoveAll(tmpDiffpath)
// Remove the layers file for the id
if err := os.Remove(path.Join(a.rootPath(), "layers", id)); err != nil && !os.IsNotExist(err) {

View File

@ -27,6 +27,7 @@ import (
"github.com/docker/docker/pkg/idtools"
"github.com/docker/docker/pkg/mount"
"github.com/docker/docker/pkg/parsers"
"github.com/docker/docker/pkg/system"
"github.com/docker/go-units"
"github.com/opencontainers/selinux/go-selinux/label"
)
@ -535,7 +536,7 @@ func (d *Driver) Remove(id string) error {
if err := subvolDelete(d.subvolumesDir(), id); err != nil {
return err
}
if err := os.RemoveAll(dir); err != nil && !os.IsNotExist(err) {
if err := system.EnsureRemoveAll(dir); err != nil {
return err
}
if err := d.subvolRescanQuota(); err != nil {

View File

@ -16,6 +16,7 @@ import (
"github.com/docker/docker/pkg/idtools"
"github.com/docker/docker/pkg/locker"
"github.com/docker/docker/pkg/mount"
"github.com/docker/docker/pkg/system"
units "github.com/docker/go-units"
)
@ -160,7 +161,7 @@ func (d *Driver) Remove(id string) error {
}
mp := path.Join(d.home, "mnt", id)
if err := os.RemoveAll(mp); err != nil && !os.IsNotExist(err) {
if err := system.EnsureRemoveAll(mp); err != nil {
return err
}

View File

@ -21,6 +21,7 @@ import (
"github.com/docker/docker/pkg/idtools"
"github.com/docker/docker/pkg/locker"
"github.com/docker/docker/pkg/mount"
"github.com/docker/docker/pkg/system"
"github.com/opencontainers/selinux/go-selinux/label"
)
@ -339,10 +340,7 @@ func (d *Driver) dir(id string) string {
func (d *Driver) Remove(id string) error {
d.locker.Lock(id)
defer d.locker.Unlock(id)
if err := os.RemoveAll(d.dir(id)); err != nil && !os.IsNotExist(err) {
return err
}
return nil
return system.EnsureRemoveAll(d.dir(id))
}
// Get creates and mounts the required file system for the given id and returns the mount path.

View File

@ -31,6 +31,7 @@ import (
"github.com/docker/docker/pkg/mount"
"github.com/docker/docker/pkg/parsers"
"github.com/docker/docker/pkg/parsers/kernel"
"github.com/docker/docker/pkg/system"
units "github.com/docker/go-units"
"github.com/opencontainers/selinux/go-selinux/label"
@ -464,7 +465,7 @@ func (d *Driver) Remove(id string) error {
}
}
if err := os.RemoveAll(dir); err != nil && !os.IsNotExist(err) {
if err := system.EnsureRemoveAll(dir); err != nil && !os.IsNotExist(err) {
return err
}
return nil

View File

@ -8,6 +8,7 @@ import (
"github.com/docker/docker/daemon/graphdriver"
"github.com/docker/docker/pkg/chrootarchive"
"github.com/docker/docker/pkg/idtools"
"github.com/docker/docker/pkg/system"
"github.com/opencontainers/selinux/go-selinux/label"
)
@ -114,7 +115,7 @@ func (d *Driver) dir(id string) string {
// Remove deletes the content from the directory for a given id.
func (d *Driver) Remove(id string) error {
if err := os.RemoveAll(d.dir(id)); err != nil && !os.IsNotExist(err) {
if err := system.EnsureRemoveAll(d.dir(id)); err != nil {
return err
}
return nil

View File

@ -41,17 +41,6 @@ func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
daemon.updateHealthMonitor(c)
daemon.LogContainerEvent(c, "oom")
case libcontainerd.StateExit:
// if container's AutoRemove flag is set, remove it after clean up
autoRemove := func() {
c.Lock()
ar := c.HostConfig.AutoRemove
c.Unlock()
if ar {
if err := daemon.ContainerRm(c.ID, &types.ContainerRmConfig{ForceRemove: true, RemoveVolume: true}); err != nil {
logrus.Errorf("can't remove container %s: %v", c.ID, err)
}
}
}
c.Lock()
c.StreamConfig.Wait()
@ -63,7 +52,7 @@ func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
c.SetRestarting(platformConstructExitStatus(e))
} else {
c.SetStopped(platformConstructExitStatus(e))
defer autoRemove()
defer daemon.autoRemove(c)
}
// cancel healthcheck here, they will be automatically
@ -85,7 +74,7 @@ func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
}
if err != nil {
c.SetStopped(platformConstructExitStatus(e))
defer autoRemove()
defer daemon.autoRemove(c)
if err != restartmanager.ErrRestartCanceled {
logrus.Errorf("restartmanger wait error: %+v", err)
}
@ -153,3 +142,24 @@ func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
}
return nil
}
func (daemon *Daemon) autoRemove(c *container.Container) {
c.Lock()
ar := c.HostConfig.AutoRemove
c.Unlock()
if !ar {
return
}
var err error
if err = daemon.ContainerRm(c.ID, &types.ContainerRmConfig{ForceRemove: true, RemoveVolume: true}); err == nil {
return
}
if c := daemon.containers.Get(c.ID); c == nil {
return
}
if err != nil {
logrus.WithError(err).WithField("container", c.ID).Error("error removing container")
}
}

View File

@ -1,5 +1,10 @@
package mount
import (
"sort"
"strings"
)
// GetMounts retrieves a list of mounts for the current running process.
func GetMounts() ([]*Info, error) {
return parseMountTable()
@ -53,3 +58,29 @@ func Unmount(target string) error {
}
return unmount(target, mntDetach)
}
// RecursiveUnmount unmounts the target and all mounts underneath, starting with
// the deepsest mount first.
func RecursiveUnmount(target string) error {
mounts, err := GetMounts()
if err != nil {
return err
}
// Make the deepest mount be first
sort.Sort(sort.Reverse(byMountpoint(mounts)))
for i, m := range mounts {
if !strings.HasPrefix(m.Mountpoint, target) {
continue
}
if err := Unmount(m.Mountpoint); err != nil && i == len(mounts)-1 {
if mounted, err := Mounted(m.Mountpoint); err != nil || mounted {
return err
}
// Ignore errors for submounts and continue trying to unmount others
// The final unmount should fail if there ane any submounts remaining
}
}
return nil
}

View File

@ -38,3 +38,17 @@ type Info struct {
// VfsOpts represents per super block options.
VfsOpts string
}
type byMountpoint []*Info
func (by byMountpoint) Len() int {
return len(by)
}
func (by byMountpoint) Less(i, j int) bool {
return by[i].Mountpoint < by[j].Mountpoint
}
func (by byMountpoint) Swap(i, j int) {
by[i], by[j] = by[j], by[i]
}

80
pkg/system/rm.go Normal file
View File

@ -0,0 +1,80 @@
package system
import (
"os"
"syscall"
"time"
"github.com/docker/docker/pkg/mount"
"github.com/pkg/errors"
)
// EnsureRemoveAll wraps `os.RemoveAll` to check for specific errors that can
// often be remedied.
// Only use `EnsureRemoveAll` if you really want to make every effort to remove
// a directory.
//
// Because of the way `os.Remove` (and by extension `os.RemoveAll`) works, there
// can be a race between reading directory entries and then actually attempting
// to remove everything in the directory.
// These types of errors do not need to be returned since it's ok for the dir to
// be gone we can just retry the remove operation.
//
// This should not return a `os.ErrNotExist` kind of error under any cirucmstances
func EnsureRemoveAll(dir string) error {
notExistErr := make(map[string]bool)
// track retries
exitOnErr := make(map[string]int)
maxRetry := 5
// Attempt to unmount anything beneath this dir first
mount.RecursiveUnmount(dir)
for {
err := os.RemoveAll(dir)
if err == nil {
return err
}
pe, ok := err.(*os.PathError)
if !ok {
return err
}
if os.IsNotExist(err) {
if notExistErr[pe.Path] {
return err
}
notExistErr[pe.Path] = true
// There is a race where some subdir can be removed but after the parent
// dir entries have been read.
// So the path could be from `os.Remove(subdir)`
// If the reported non-existent path is not the passed in `dir` we
// should just retry, but otherwise return with no error.
if pe.Path == dir {
return nil
}
continue
}
if pe.Err != syscall.EBUSY {
return err
}
if mounted, _ := mount.Mounted(pe.Path); mounted {
if e := mount.Unmount(pe.Path); e != nil {
if mounted, _ := mount.Mounted(pe.Path); mounted {
return errors.Wrapf(e, "error while removing %s", dir)
}
}
}
if exitOnErr[pe.Path] == maxRetry {
return err
}
exitOnErr[pe.Path]++
time.Sleep(100 * time.Millisecond)
}
}

84
pkg/system/rm_test.go Normal file
View File

@ -0,0 +1,84 @@
package system
import (
"io/ioutil"
"os"
"path/filepath"
"runtime"
"testing"
"time"
"github.com/docker/docker/pkg/mount"
)
func TestEnsureRemoveAllNotExist(t *testing.T) {
// should never return an error for a non-existent path
if err := EnsureRemoveAll("/non/existent/path"); err != nil {
t.Fatal(err)
}
}
func TestEnsureRemoveAllWithDir(t *testing.T) {
dir, err := ioutil.TempDir("", "test-ensure-removeall-with-dir")
if err != nil {
t.Fatal(err)
}
if err := EnsureRemoveAll(dir); err != nil {
t.Fatal(err)
}
}
func TestEnsureRemoveAllWithFile(t *testing.T) {
tmp, err := ioutil.TempFile("", "test-ensure-removeall-with-dir")
if err != nil {
t.Fatal(err)
}
tmp.Close()
if err := EnsureRemoveAll(tmp.Name()); err != nil {
t.Fatal(err)
}
}
func TestEnsureRemoveAllWithMount(t *testing.T) {
if runtime.GOOS == "windows" {
t.Skip("mount not supported on Windows")
}
dir1, err := ioutil.TempDir("", "test-ensure-removeall-with-dir1")
if err != nil {
t.Fatal(err)
}
dir2, err := ioutil.TempDir("", "test-ensure-removeall-with-dir2")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(dir2)
bindDir := filepath.Join(dir1, "bind")
if err := os.MkdirAll(bindDir, 0755); err != nil {
t.Fatal(err)
}
if err := mount.Mount(dir2, bindDir, "none", "bind"); err != nil {
t.Fatal(err)
}
done := make(chan struct{})
go func() {
err = EnsureRemoveAll(dir1)
close(done)
}()
select {
case <-done:
if err != nil {
t.Fatal(err)
}
case <-time.After(5 * time.Second):
t.Fatal("timeout waiting for EnsureRemoveAll to finish")
}
if _, err := os.Stat(dir1); !os.IsNotExist(err) {
t.Fatalf("expected %q to not exist", dir1)
}
}