mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00
Do not remove containers from memory on error
Before this, if `forceRemove` is set the container data will be removed no matter what, including if there are issues with removing container on-disk state (rw layer, container root). In practice this causes a lot of issues with leaked data sitting on disk that users are not able to clean up themselves. This is particularly a problem while the `EBUSY` errors on remove are so prevalent. So for now let's not keep this behavior. Signed-off-by: Brian Goff <cpuguy83@gmail.com>
This commit is contained in:
parent
669f4ba37e
commit
54dcbab25e
12 changed files with 261 additions and 45 deletions
|
@ -12,6 +12,7 @@ import (
|
|||
"github.com/docker/docker/api/types"
|
||||
"github.com/docker/docker/container"
|
||||
"github.com/docker/docker/layer"
|
||||
"github.com/docker/docker/pkg/system"
|
||||
volumestore "github.com/docker/docker/volume/store"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
@ -111,37 +112,30 @@ func (daemon *Daemon) cleanupContainer(container *container.Container, forceRemo
|
|||
logrus.Errorf("Error saving dying container to disk: %v", err)
|
||||
}
|
||||
|
||||
// If force removal is required, delete container from various
|
||||
// indexes even if removal failed.
|
||||
defer func() {
|
||||
if err == nil || forceRemove {
|
||||
daemon.nameIndex.Delete(container.ID)
|
||||
daemon.linkIndex.delete(container)
|
||||
selinuxFreeLxcContexts(container.ProcessLabel)
|
||||
daemon.idIndex.Delete(container.ID)
|
||||
daemon.containers.Delete(container.ID)
|
||||
if e := daemon.removeMountPoints(container, removeVolume); e != nil {
|
||||
logrus.Error(e)
|
||||
}
|
||||
daemon.LogContainerEvent(container, "destroy")
|
||||
stateCtr.del(container.ID)
|
||||
}
|
||||
}()
|
||||
|
||||
if err = os.RemoveAll(container.Root); err != nil {
|
||||
return fmt.Errorf("Unable to remove filesystem for %v: %v", container.ID, err)
|
||||
}
|
||||
|
||||
// When container creation fails and `RWLayer` has not been created yet, we
|
||||
// do not call `ReleaseRWLayer`
|
||||
if container.RWLayer != nil {
|
||||
metadata, err := daemon.layerStore.ReleaseRWLayer(container.RWLayer)
|
||||
layer.LogReleaseMetadata(metadata)
|
||||
if err != nil && err != layer.ErrMountDoesNotExist {
|
||||
return fmt.Errorf("Driver %s failed to remove root filesystem %s: %s", daemon.GraphDriverName(), container.ID, err)
|
||||
return errors.Wrapf(err, "driver %q failed to remove root filesystem for %s", daemon.GraphDriverName(), container.ID)
|
||||
}
|
||||
}
|
||||
|
||||
if err := system.EnsureRemoveAll(container.Root); err != nil {
|
||||
return errors.Wrapf(err, "unable to remove filesystem for %s", container.ID)
|
||||
}
|
||||
|
||||
daemon.nameIndex.Delete(container.ID)
|
||||
daemon.linkIndex.delete(container)
|
||||
selinuxFreeLxcContexts(container.ProcessLabel)
|
||||
daemon.idIndex.Delete(container.ID)
|
||||
daemon.containers.Delete(container.ID)
|
||||
if e := daemon.removeMountPoints(container, removeVolume); e != nil {
|
||||
logrus.Error(e)
|
||||
}
|
||||
stateCtr.del(container.ID)
|
||||
daemon.LogContainerEvent(container, "destroy")
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -46,6 +46,7 @@ import (
|
|||
"github.com/docker/docker/pkg/idtools"
|
||||
"github.com/docker/docker/pkg/locker"
|
||||
mountpk "github.com/docker/docker/pkg/mount"
|
||||
"github.com/docker/docker/pkg/system"
|
||||
|
||||
rsystem "github.com/opencontainers/runc/libcontainer/system"
|
||||
"github.com/opencontainers/selinux/go-selinux/label"
|
||||
|
@ -319,13 +320,13 @@ func (a *Driver) Remove(id string) error {
|
|||
}
|
||||
return err
|
||||
}
|
||||
defer os.RemoveAll(tmpMntPath)
|
||||
defer system.EnsureRemoveAll(tmpMntPath)
|
||||
|
||||
tmpDiffpath := path.Join(a.diffPath(), fmt.Sprintf("%s-removing", id))
|
||||
if err := os.Rename(a.getDiffPath(id), tmpDiffpath); err != nil && !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
defer os.RemoveAll(tmpDiffpath)
|
||||
defer system.EnsureRemoveAll(tmpDiffpath)
|
||||
|
||||
// Remove the layers file for the id
|
||||
if err := os.Remove(path.Join(a.rootPath(), "layers", id)); err != nil && !os.IsNotExist(err) {
|
||||
|
|
|
@ -27,6 +27,7 @@ import (
|
|||
"github.com/docker/docker/pkg/idtools"
|
||||
"github.com/docker/docker/pkg/mount"
|
||||
"github.com/docker/docker/pkg/parsers"
|
||||
"github.com/docker/docker/pkg/system"
|
||||
"github.com/docker/go-units"
|
||||
"github.com/opencontainers/selinux/go-selinux/label"
|
||||
)
|
||||
|
@ -535,7 +536,7 @@ func (d *Driver) Remove(id string) error {
|
|||
if err := subvolDelete(d.subvolumesDir(), id); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.RemoveAll(dir); err != nil && !os.IsNotExist(err) {
|
||||
if err := system.EnsureRemoveAll(dir); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := d.subvolRescanQuota(); err != nil {
|
||||
|
|
|
@ -16,6 +16,7 @@ import (
|
|||
"github.com/docker/docker/pkg/idtools"
|
||||
"github.com/docker/docker/pkg/locker"
|
||||
"github.com/docker/docker/pkg/mount"
|
||||
"github.com/docker/docker/pkg/system"
|
||||
units "github.com/docker/go-units"
|
||||
)
|
||||
|
||||
|
@ -160,7 +161,7 @@ func (d *Driver) Remove(id string) error {
|
|||
}
|
||||
|
||||
mp := path.Join(d.home, "mnt", id)
|
||||
if err := os.RemoveAll(mp); err != nil && !os.IsNotExist(err) {
|
||||
if err := system.EnsureRemoveAll(mp); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
|
|
|
@ -21,6 +21,7 @@ import (
|
|||
"github.com/docker/docker/pkg/idtools"
|
||||
"github.com/docker/docker/pkg/locker"
|
||||
"github.com/docker/docker/pkg/mount"
|
||||
"github.com/docker/docker/pkg/system"
|
||||
"github.com/opencontainers/selinux/go-selinux/label"
|
||||
)
|
||||
|
||||
|
@ -339,10 +340,7 @@ func (d *Driver) dir(id string) string {
|
|||
func (d *Driver) Remove(id string) error {
|
||||
d.locker.Lock(id)
|
||||
defer d.locker.Unlock(id)
|
||||
if err := os.RemoveAll(d.dir(id)); err != nil && !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
return system.EnsureRemoveAll(d.dir(id))
|
||||
}
|
||||
|
||||
// Get creates and mounts the required file system for the given id and returns the mount path.
|
||||
|
|
|
@ -31,6 +31,7 @@ import (
|
|||
"github.com/docker/docker/pkg/mount"
|
||||
"github.com/docker/docker/pkg/parsers"
|
||||
"github.com/docker/docker/pkg/parsers/kernel"
|
||||
"github.com/docker/docker/pkg/system"
|
||||
units "github.com/docker/go-units"
|
||||
|
||||
"github.com/opencontainers/selinux/go-selinux/label"
|
||||
|
@ -464,7 +465,7 @@ func (d *Driver) Remove(id string) error {
|
|||
}
|
||||
}
|
||||
|
||||
if err := os.RemoveAll(dir); err != nil && !os.IsNotExist(err) {
|
||||
if err := system.EnsureRemoveAll(dir); err != nil && !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
|
|
|
@ -8,6 +8,7 @@ import (
|
|||
"github.com/docker/docker/daemon/graphdriver"
|
||||
"github.com/docker/docker/pkg/chrootarchive"
|
||||
"github.com/docker/docker/pkg/idtools"
|
||||
"github.com/docker/docker/pkg/system"
|
||||
|
||||
"github.com/opencontainers/selinux/go-selinux/label"
|
||||
)
|
||||
|
@ -114,7 +115,7 @@ func (d *Driver) dir(id string) string {
|
|||
|
||||
// Remove deletes the content from the directory for a given id.
|
||||
func (d *Driver) Remove(id string) error {
|
||||
if err := os.RemoveAll(d.dir(id)); err != nil && !os.IsNotExist(err) {
|
||||
if err := system.EnsureRemoveAll(d.dir(id)); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
|
|
|
@ -41,17 +41,6 @@ func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
|
|||
daemon.updateHealthMonitor(c)
|
||||
daemon.LogContainerEvent(c, "oom")
|
||||
case libcontainerd.StateExit:
|
||||
// if container's AutoRemove flag is set, remove it after clean up
|
||||
autoRemove := func() {
|
||||
c.Lock()
|
||||
ar := c.HostConfig.AutoRemove
|
||||
c.Unlock()
|
||||
if ar {
|
||||
if err := daemon.ContainerRm(c.ID, &types.ContainerRmConfig{ForceRemove: true, RemoveVolume: true}); err != nil {
|
||||
logrus.Errorf("can't remove container %s: %v", c.ID, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
c.Lock()
|
||||
c.StreamConfig.Wait()
|
||||
|
@ -63,7 +52,7 @@ func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
|
|||
c.SetRestarting(platformConstructExitStatus(e))
|
||||
} else {
|
||||
c.SetStopped(platformConstructExitStatus(e))
|
||||
defer autoRemove()
|
||||
defer daemon.autoRemove(c)
|
||||
}
|
||||
|
||||
// cancel healthcheck here, they will be automatically
|
||||
|
@ -85,7 +74,7 @@ func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
|
|||
}
|
||||
if err != nil {
|
||||
c.SetStopped(platformConstructExitStatus(e))
|
||||
defer autoRemove()
|
||||
defer daemon.autoRemove(c)
|
||||
if err != restartmanager.ErrRestartCanceled {
|
||||
logrus.Errorf("restartmanger wait error: %+v", err)
|
||||
}
|
||||
|
@ -153,3 +142,24 @@ func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
|
|||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (daemon *Daemon) autoRemove(c *container.Container) {
|
||||
c.Lock()
|
||||
ar := c.HostConfig.AutoRemove
|
||||
c.Unlock()
|
||||
if !ar {
|
||||
return
|
||||
}
|
||||
|
||||
var err error
|
||||
if err = daemon.ContainerRm(c.ID, &types.ContainerRmConfig{ForceRemove: true, RemoveVolume: true}); err == nil {
|
||||
return
|
||||
}
|
||||
if c := daemon.containers.Get(c.ID); c == nil {
|
||||
return
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
logrus.WithError(err).WithField("container", c.ID).Error("error removing container")
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,5 +1,10 @@
|
|||
package mount
|
||||
|
||||
import (
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// GetMounts retrieves a list of mounts for the current running process.
|
||||
func GetMounts() ([]*Info, error) {
|
||||
return parseMountTable()
|
||||
|
@ -53,3 +58,29 @@ func Unmount(target string) error {
|
|||
}
|
||||
return unmount(target, mntDetach)
|
||||
}
|
||||
|
||||
// RecursiveUnmount unmounts the target and all mounts underneath, starting with
|
||||
// the deepsest mount first.
|
||||
func RecursiveUnmount(target string) error {
|
||||
mounts, err := GetMounts()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Make the deepest mount be first
|
||||
sort.Sort(sort.Reverse(byMountpoint(mounts)))
|
||||
|
||||
for i, m := range mounts {
|
||||
if !strings.HasPrefix(m.Mountpoint, target) {
|
||||
continue
|
||||
}
|
||||
if err := Unmount(m.Mountpoint); err != nil && i == len(mounts)-1 {
|
||||
if mounted, err := Mounted(m.Mountpoint); err != nil || mounted {
|
||||
return err
|
||||
}
|
||||
// Ignore errors for submounts and continue trying to unmount others
|
||||
// The final unmount should fail if there ane any submounts remaining
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -38,3 +38,17 @@ type Info struct {
|
|||
// VfsOpts represents per super block options.
|
||||
VfsOpts string
|
||||
}
|
||||
|
||||
type byMountpoint []*Info
|
||||
|
||||
func (by byMountpoint) Len() int {
|
||||
return len(by)
|
||||
}
|
||||
|
||||
func (by byMountpoint) Less(i, j int) bool {
|
||||
return by[i].Mountpoint < by[j].Mountpoint
|
||||
}
|
||||
|
||||
func (by byMountpoint) Swap(i, j int) {
|
||||
by[i], by[j] = by[j], by[i]
|
||||
}
|
||||
|
|
80
pkg/system/rm.go
Normal file
80
pkg/system/rm.go
Normal file
|
@ -0,0 +1,80 @@
|
|||
package system
|
||||
|
||||
import (
|
||||
"os"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/docker/docker/pkg/mount"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
// EnsureRemoveAll wraps `os.RemoveAll` to check for specific errors that can
|
||||
// often be remedied.
|
||||
// Only use `EnsureRemoveAll` if you really want to make every effort to remove
|
||||
// a directory.
|
||||
//
|
||||
// Because of the way `os.Remove` (and by extension `os.RemoveAll`) works, there
|
||||
// can be a race between reading directory entries and then actually attempting
|
||||
// to remove everything in the directory.
|
||||
// These types of errors do not need to be returned since it's ok for the dir to
|
||||
// be gone we can just retry the remove operation.
|
||||
//
|
||||
// This should not return a `os.ErrNotExist` kind of error under any cirucmstances
|
||||
func EnsureRemoveAll(dir string) error {
|
||||
notExistErr := make(map[string]bool)
|
||||
|
||||
// track retries
|
||||
exitOnErr := make(map[string]int)
|
||||
maxRetry := 5
|
||||
|
||||
// Attempt to unmount anything beneath this dir first
|
||||
mount.RecursiveUnmount(dir)
|
||||
|
||||
for {
|
||||
err := os.RemoveAll(dir)
|
||||
if err == nil {
|
||||
return err
|
||||
}
|
||||
|
||||
pe, ok := err.(*os.PathError)
|
||||
if !ok {
|
||||
return err
|
||||
}
|
||||
|
||||
if os.IsNotExist(err) {
|
||||
if notExistErr[pe.Path] {
|
||||
return err
|
||||
}
|
||||
notExistErr[pe.Path] = true
|
||||
|
||||
// There is a race where some subdir can be removed but after the parent
|
||||
// dir entries have been read.
|
||||
// So the path could be from `os.Remove(subdir)`
|
||||
// If the reported non-existent path is not the passed in `dir` we
|
||||
// should just retry, but otherwise return with no error.
|
||||
if pe.Path == dir {
|
||||
return nil
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if pe.Err != syscall.EBUSY {
|
||||
return err
|
||||
}
|
||||
|
||||
if mounted, _ := mount.Mounted(pe.Path); mounted {
|
||||
if e := mount.Unmount(pe.Path); e != nil {
|
||||
if mounted, _ := mount.Mounted(pe.Path); mounted {
|
||||
return errors.Wrapf(e, "error while removing %s", dir)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if exitOnErr[pe.Path] == maxRetry {
|
||||
return err
|
||||
}
|
||||
exitOnErr[pe.Path]++
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
}
|
||||
}
|
84
pkg/system/rm_test.go
Normal file
84
pkg/system/rm_test.go
Normal file
|
@ -0,0 +1,84 @@
|
|||
package system
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/docker/docker/pkg/mount"
|
||||
)
|
||||
|
||||
func TestEnsureRemoveAllNotExist(t *testing.T) {
|
||||
// should never return an error for a non-existent path
|
||||
if err := EnsureRemoveAll("/non/existent/path"); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEnsureRemoveAllWithDir(t *testing.T) {
|
||||
dir, err := ioutil.TempDir("", "test-ensure-removeall-with-dir")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := EnsureRemoveAll(dir); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEnsureRemoveAllWithFile(t *testing.T) {
|
||||
tmp, err := ioutil.TempFile("", "test-ensure-removeall-with-dir")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
tmp.Close()
|
||||
if err := EnsureRemoveAll(tmp.Name()); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEnsureRemoveAllWithMount(t *testing.T) {
|
||||
if runtime.GOOS == "windows" {
|
||||
t.Skip("mount not supported on Windows")
|
||||
}
|
||||
|
||||
dir1, err := ioutil.TempDir("", "test-ensure-removeall-with-dir1")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
dir2, err := ioutil.TempDir("", "test-ensure-removeall-with-dir2")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer os.RemoveAll(dir2)
|
||||
|
||||
bindDir := filepath.Join(dir1, "bind")
|
||||
if err := os.MkdirAll(bindDir, 0755); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err := mount.Mount(dir2, bindDir, "none", "bind"); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
done := make(chan struct{})
|
||||
go func() {
|
||||
err = EnsureRemoveAll(dir1)
|
||||
close(done)
|
||||
}()
|
||||
|
||||
select {
|
||||
case <-done:
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
case <-time.After(5 * time.Second):
|
||||
t.Fatal("timeout waiting for EnsureRemoveAll to finish")
|
||||
}
|
||||
|
||||
if _, err := os.Stat(dir1); !os.IsNotExist(err) {
|
||||
t.Fatalf("expected %q to not exist", dir1)
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue