From 213681b66a41d7728445e30125176834ce349c2a Mon Sep 17 00:00:00 2001 From: Olli Janatuinen Date: Wed, 8 May 2019 04:27:15 +0300 Subject: [PATCH] First step to implement full garbage collector for image layers Refactored exiting logic on way that layers are first marked to be under removal so if actual removal fails they can be found from disk and cleaned up. Full garbage collector will be implemented as part of containerd migration. Signed-off-by: Olli Janatuinen --- integration/image/remove_unix_test.go | 88 +++++++++++++++++++++++++++ internal/test/daemon/daemon.go | 7 +++ internal/test/daemon/ops.go | 7 +++ internal/test/daemon/ops_unix.go | 34 +++++++++++ layer/filestore.go | 76 ++++++++++++++++++++++- layer/layer_store.go | 40 +++++++++++- 6 files changed, 247 insertions(+), 5 deletions(-) create mode 100644 integration/image/remove_unix_test.go create mode 100644 internal/test/daemon/ops_unix.go diff --git a/integration/image/remove_unix_test.go b/integration/image/remove_unix_test.go new file mode 100644 index 0000000000..984ce5c9d7 --- /dev/null +++ b/integration/image/remove_unix_test.go @@ -0,0 +1,88 @@ +// +build !windows + +package image // import "github.com/docker/docker/integration/image" + +import ( + "context" + "io" + "io/ioutil" + "os" + "strconv" + "syscall" + "testing" + "unsafe" + + "github.com/docker/docker/api/types" + "github.com/docker/docker/internal/test/daemon" + "github.com/docker/docker/internal/test/fakecontext" + "gotest.tools/assert" + "gotest.tools/skip" +) + +// This is a regression test for #38488 +// It ensures that orphan layers can be found and cleaned up +// after unsuccessful image removal +func TestRemoveImageGarbageCollector(t *testing.T) { + // This test uses very platform specific way to prevent + // daemon for remove image layer. + skip.If(t, testEnv.DaemonInfo.OSType != "linux") + skip.If(t, os.Getenv("DOCKER_ENGINE_GOARCH") != "amd64") + + // Create daemon with overlay2 graphdriver because vfs uses disk differently + // and this test case would not work with it. + d := daemon.New(t, daemon.WithStorageDriver("overlay2"), daemon.WithImageService) + d.Start(t) + defer d.Stop(t) + + ctx := context.Background() + client := d.NewClientT(t) + i := d.ImageService() + + img := "test-garbage-collector" + + // Build a image with multiple layers + dockerfile := `FROM busybox + RUN echo echo Running... > /run.sh` + source := fakecontext.New(t, "", fakecontext.WithDockerfile(dockerfile)) + defer source.Close() + resp, err := client.ImageBuild(ctx, + source.AsTarReader(t), + types.ImageBuildOptions{ + Remove: true, + ForceRemove: true, + Tags: []string{img}, + }) + assert.NilError(t, err) + _, err = io.Copy(ioutil.Discard, resp.Body) + resp.Body.Close() + assert.NilError(t, err) + image, _, err := client.ImageInspectWithRaw(ctx, img) + assert.NilError(t, err) + + // Mark latest image layer to immutable + data := image.GraphDriver.Data + file, _ := os.Open(data["UpperDir"]) + attr := 0x00000010 + fsflags := uintptr(0x40086602) + argp := uintptr(unsafe.Pointer(&attr)) + _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, file.Fd(), fsflags, argp) + assert.Equal(t, "errno 0", errno.Error()) + + // Try to remove the image, it should generate error + // but marking layer back to mutable before checking errors (so we don't break CI server) + _, err = client.ImageRemove(ctx, img, types.ImageRemoveOptions{}) + attr = 0x00000000 + argp = uintptr(unsafe.Pointer(&attr)) + _, _, errno = syscall.Syscall(syscall.SYS_IOCTL, file.Fd(), fsflags, argp) + assert.Equal(t, "errno 0", errno.Error()) + assert.ErrorContains(t, err, "permission denied") + + // Verify that layer remaining on disk + dir, _ := os.Stat(data["UpperDir"]) + assert.Equal(t, "true", strconv.FormatBool(dir.IsDir())) + + // Run imageService.Cleanup() and make sure that layer was removed from disk + i.Cleanup() + dir, err = os.Stat(data["UpperDir"]) + assert.ErrorContains(t, err, "no such file or directory") +} diff --git a/internal/test/daemon/daemon.go b/internal/test/daemon/daemon.go index f835011d04..4705d2f4ee 100644 --- a/internal/test/daemon/daemon.go +++ b/internal/test/daemon/daemon.go @@ -16,6 +16,7 @@ import ( "github.com/docker/docker/api/types" "github.com/docker/docker/api/types/events" "github.com/docker/docker/client" + "github.com/docker/docker/daemon/images" "github.com/docker/docker/internal/test" "github.com/docker/docker/internal/test/request" "github.com/docker/docker/opts" @@ -71,6 +72,7 @@ type Daemon struct { init bool dockerdBinary string log logT + imageService *images.ImageService // swarm related field swarmListenAddr string @@ -700,3 +702,8 @@ func cleanupRaftDir(t testingT, rootPath string) { t.Logf("error removing %v: %v", walDir, err) } } + +// ImageService returns the Daemon's ImageService +func (d *Daemon) ImageService() *images.ImageService { + return d.imageService +} diff --git a/internal/test/daemon/ops.go b/internal/test/daemon/ops.go index da5f44a7e5..9f869dee0f 100644 --- a/internal/test/daemon/ops.go +++ b/internal/test/daemon/ops.go @@ -70,3 +70,10 @@ func WithEnvironment(e environment.Execution) func(*Daemon) { } } } + +// WithStorageDriver sets store driver option +func WithStorageDriver(driver string) func(d *Daemon) { + return func(d *Daemon) { + d.storageDriver = driver + } +} diff --git a/internal/test/daemon/ops_unix.go b/internal/test/daemon/ops_unix.go new file mode 100644 index 0000000000..41011e87c5 --- /dev/null +++ b/internal/test/daemon/ops_unix.go @@ -0,0 +1,34 @@ +// +build !windows + +package daemon + +import ( + "path/filepath" + "runtime" + + "github.com/docker/docker/daemon/images" + "github.com/docker/docker/layer" + + // register graph drivers + _ "github.com/docker/docker/daemon/graphdriver/register" + "github.com/docker/docker/pkg/idtools" +) + +// WithImageService sets imageService options +func WithImageService(d *Daemon) { + layerStores := make(map[string]layer.Store) + os := runtime.GOOS + layerStores[os], _ = layer.NewStoreFromOptions(layer.StoreOptions{ + Root: d.Root, + MetadataStorePathTemplate: filepath.Join(d.RootDir(), "image", "%s", "layerdb"), + GraphDriver: d.storageDriver, + GraphDriverOptions: nil, + IDMapping: &idtools.IdentityMapping{}, + PluginGetter: nil, + ExperimentalEnabled: false, + OS: os, + }) + d.imageService = images.NewImageService(images.ImageServiceConfig{ + LayerStores: layerStores, + }) +} diff --git a/layer/filestore.go b/layer/filestore.go index 208a0c3a85..37a927022a 100644 --- a/layer/filestore.go +++ b/layer/filestore.go @@ -305,6 +305,47 @@ func (fms *fileMetadataStore) GetMountParent(mount string) (ChainID, error) { return ChainID(dgst), nil } +func (fms *fileMetadataStore) getOrphan() ([]roLayer, error) { + var orphanLayers []roLayer + for _, algorithm := range supportedAlgorithms { + fileInfos, err := ioutil.ReadDir(filepath.Join(fms.root, string(algorithm))) + if err != nil { + if os.IsNotExist(err) { + continue + } + return nil, err + } + + for _, fi := range fileInfos { + if fi.IsDir() && strings.Contains(fi.Name(), "-removing") { + nameSplit := strings.Split(fi.Name(), "-") + dgst := digest.NewDigestFromEncoded(algorithm, nameSplit[0]) + if err := dgst.Validate(); err != nil { + logrus.Debugf("Ignoring invalid digest %s:%s", algorithm, nameSplit[0]) + } else { + chainID := ChainID(dgst) + chainFile := filepath.Join(fms.root, string(algorithm), fi.Name(), "cache-id") + contentBytes, err := ioutil.ReadFile(chainFile) + if err != nil { + logrus.WithError(err).WithField("digest", dgst).Error("cannot get cache ID") + } + cacheID := strings.TrimSpace(string(contentBytes)) + if cacheID == "" { + logrus.Errorf("invalid cache id value") + } + + l := &roLayer{ + chainID: chainID, + cacheID: cacheID, + } + orphanLayers = append(orphanLayers, *l) + } + } + } + } + return orphanLayers, nil +} + func (fms *fileMetadataStore) List() ([]ChainID, []string, error) { var ids []ChainID for _, algorithm := range supportedAlgorithms { @@ -346,8 +387,39 @@ func (fms *fileMetadataStore) List() ([]ChainID, []string, error) { return ids, mounts, nil } -func (fms *fileMetadataStore) Remove(layer ChainID) error { - return os.RemoveAll(fms.getLayerDirectory(layer)) +// Remove layerdb folder if that is marked for removal +func (fms *fileMetadataStore) Remove(layer ChainID, cache string) error { + dgst := digest.Digest(layer) + files, err := ioutil.ReadDir(filepath.Join(fms.root, string(dgst.Algorithm()))) + if err != nil { + return err + } + for _, f := range files { + if !strings.HasSuffix(f.Name(), "-removing") || !strings.HasPrefix(f.Name(), dgst.String()) { + continue + } + + // Make sure that we only remove layerdb folder which points to + // requested cacheID + dir := filepath.Join(fms.root, string(dgst.Algorithm()), f.Name()) + chainFile := filepath.Join(dir, "cache-id") + contentBytes, err := ioutil.ReadFile(chainFile) + if err != nil { + logrus.WithError(err).WithField("file", chainFile).Error("cannot get cache ID") + continue + } + cacheID := strings.TrimSpace(string(contentBytes)) + if cacheID != cache { + continue + } + logrus.Debugf("Removing folder: %s", dir) + err = os.RemoveAll(dir) + if err != nil && !os.IsNotExist(err) { + logrus.WithError(err).WithField("name", f.Name()).Error("cannot remove layer") + continue + } + } + return nil } func (fms *fileMetadataStore) RemoveMount(mount string) error { diff --git a/layer/layer_store.go b/layer/layer_store.go index 81730e9d92..7b8c011f4c 100644 --- a/layer/layer_store.go +++ b/layer/layer_store.go @@ -5,6 +5,8 @@ import ( "fmt" "io" "io/ioutil" + "os" + "path/filepath" "sync" "github.com/docker/distribution" @@ -415,11 +417,24 @@ func (ls *layerStore) Map() map[ChainID]Layer { } func (ls *layerStore) deleteLayer(layer *roLayer, metadata *Metadata) error { + // Rename layer digest folder first so we detect orphan layer(s) + // if ls.driver.Remove fails + dir := ls.store.getLayerDirectory(layer.chainID) + for { + dgst := digest.Digest(layer.chainID) + tmpID := fmt.Sprintf("%s-%s-removing", dgst.Hex(), stringid.GenerateRandomID()) + dir := filepath.Join(ls.store.root, string(dgst.Algorithm()), tmpID) + err := os.Rename(ls.store.getLayerDirectory(layer.chainID), dir) + if os.IsExist(err) { + continue + } + break + } err := ls.driver.Remove(layer.cacheID) if err != nil { return err } - err = ls.store.Remove(layer.chainID) + err = os.RemoveAll(dir) if err != nil { return err } @@ -452,12 +467,14 @@ func (ls *layerStore) releaseLayer(l *roLayer) ([]Metadata, error) { if l.hasReferences() { panic("cannot delete referenced layer") } + // Remove layer from layer map first so it is not considered to exist + // when if ls.deleteLayer fails. + delete(ls.layerMap, l.chainID) + var metadata Metadata if err := ls.deleteLayer(l, &metadata); err != nil { return nil, err } - - delete(ls.layerMap, l.chainID) removed = append(removed, metadata) if l.parent == nil { @@ -743,6 +760,23 @@ func (ls *layerStore) assembleTarTo(graphID string, metadata io.ReadCloser, size } func (ls *layerStore) Cleanup() error { + orphanLayers, err := ls.store.getOrphan() + if err != nil { + logrus.Errorf("Cannot get orphan layers: %v", err) + } + logrus.Debugf("found %v orphan layers", len(orphanLayers)) + for _, orphan := range orphanLayers { + logrus.Debugf("removing orphan layer, chain ID: %v , cache ID: %v", orphan.chainID, orphan.cacheID) + err = ls.driver.Remove(orphan.cacheID) + if err != nil && !os.IsNotExist(err) { + logrus.WithError(err).WithField("cache-id", orphan.cacheID).Error("cannot remove orphan layer") + continue + } + err = ls.store.Remove(orphan.chainID, orphan.cacheID) + if err != nil { + logrus.WithError(err).WithField("chain-id", orphan.chainID).Error("cannot remove orphan layer metadata") + } + } return ls.driver.Cleanup() }