First step to implement full garbage collector for image layers

Refactored exiting logic on way that layers are first marked to be under
removal so if actual removal fails they can be found from disk and
cleaned up.

Full garbage collector will be implemented as part of containerd
migration.

Signed-off-by: Olli Janatuinen <olli.janatuinen@gmail.com>
This commit is contained in:
Olli Janatuinen 2019-05-08 04:27:15 +03:00
parent 71e00574c7
commit 213681b66a
6 changed files with 247 additions and 5 deletions

View File

@ -0,0 +1,88 @@
// +build !windows
package image // import "github.com/docker/docker/integration/image"
import (
"context"
"io"
"io/ioutil"
"os"
"strconv"
"syscall"
"testing"
"unsafe"
"github.com/docker/docker/api/types"
"github.com/docker/docker/internal/test/daemon"
"github.com/docker/docker/internal/test/fakecontext"
"gotest.tools/assert"
"gotest.tools/skip"
)
// This is a regression test for #38488
// It ensures that orphan layers can be found and cleaned up
// after unsuccessful image removal
func TestRemoveImageGarbageCollector(t *testing.T) {
// This test uses very platform specific way to prevent
// daemon for remove image layer.
skip.If(t, testEnv.DaemonInfo.OSType != "linux")
skip.If(t, os.Getenv("DOCKER_ENGINE_GOARCH") != "amd64")
// Create daemon with overlay2 graphdriver because vfs uses disk differently
// and this test case would not work with it.
d := daemon.New(t, daemon.WithStorageDriver("overlay2"), daemon.WithImageService)
d.Start(t)
defer d.Stop(t)
ctx := context.Background()
client := d.NewClientT(t)
i := d.ImageService()
img := "test-garbage-collector"
// Build a image with multiple layers
dockerfile := `FROM busybox
RUN echo echo Running... > /run.sh`
source := fakecontext.New(t, "", fakecontext.WithDockerfile(dockerfile))
defer source.Close()
resp, err := client.ImageBuild(ctx,
source.AsTarReader(t),
types.ImageBuildOptions{
Remove: true,
ForceRemove: true,
Tags: []string{img},
})
assert.NilError(t, err)
_, err = io.Copy(ioutil.Discard, resp.Body)
resp.Body.Close()
assert.NilError(t, err)
image, _, err := client.ImageInspectWithRaw(ctx, img)
assert.NilError(t, err)
// Mark latest image layer to immutable
data := image.GraphDriver.Data
file, _ := os.Open(data["UpperDir"])
attr := 0x00000010
fsflags := uintptr(0x40086602)
argp := uintptr(unsafe.Pointer(&attr))
_, _, errno := syscall.Syscall(syscall.SYS_IOCTL, file.Fd(), fsflags, argp)
assert.Equal(t, "errno 0", errno.Error())
// Try to remove the image, it should generate error
// but marking layer back to mutable before checking errors (so we don't break CI server)
_, err = client.ImageRemove(ctx, img, types.ImageRemoveOptions{})
attr = 0x00000000
argp = uintptr(unsafe.Pointer(&attr))
_, _, errno = syscall.Syscall(syscall.SYS_IOCTL, file.Fd(), fsflags, argp)
assert.Equal(t, "errno 0", errno.Error())
assert.ErrorContains(t, err, "permission denied")
// Verify that layer remaining on disk
dir, _ := os.Stat(data["UpperDir"])
assert.Equal(t, "true", strconv.FormatBool(dir.IsDir()))
// Run imageService.Cleanup() and make sure that layer was removed from disk
i.Cleanup()
dir, err = os.Stat(data["UpperDir"])
assert.ErrorContains(t, err, "no such file or directory")
}

View File

@ -16,6 +16,7 @@ import (
"github.com/docker/docker/api/types"
"github.com/docker/docker/api/types/events"
"github.com/docker/docker/client"
"github.com/docker/docker/daemon/images"
"github.com/docker/docker/internal/test"
"github.com/docker/docker/internal/test/request"
"github.com/docker/docker/opts"
@ -71,6 +72,7 @@ type Daemon struct {
init bool
dockerdBinary string
log logT
imageService *images.ImageService
// swarm related field
swarmListenAddr string
@ -700,3 +702,8 @@ func cleanupRaftDir(t testingT, rootPath string) {
t.Logf("error removing %v: %v", walDir, err)
}
}
// ImageService returns the Daemon's ImageService
func (d *Daemon) ImageService() *images.ImageService {
return d.imageService
}

View File

@ -70,3 +70,10 @@ func WithEnvironment(e environment.Execution) func(*Daemon) {
}
}
}
// WithStorageDriver sets store driver option
func WithStorageDriver(driver string) func(d *Daemon) {
return func(d *Daemon) {
d.storageDriver = driver
}
}

View File

@ -0,0 +1,34 @@
// +build !windows
package daemon
import (
"path/filepath"
"runtime"
"github.com/docker/docker/daemon/images"
"github.com/docker/docker/layer"
// register graph drivers
_ "github.com/docker/docker/daemon/graphdriver/register"
"github.com/docker/docker/pkg/idtools"
)
// WithImageService sets imageService options
func WithImageService(d *Daemon) {
layerStores := make(map[string]layer.Store)
os := runtime.GOOS
layerStores[os], _ = layer.NewStoreFromOptions(layer.StoreOptions{
Root: d.Root,
MetadataStorePathTemplate: filepath.Join(d.RootDir(), "image", "%s", "layerdb"),
GraphDriver: d.storageDriver,
GraphDriverOptions: nil,
IDMapping: &idtools.IdentityMapping{},
PluginGetter: nil,
ExperimentalEnabled: false,
OS: os,
})
d.imageService = images.NewImageService(images.ImageServiceConfig{
LayerStores: layerStores,
})
}

View File

@ -305,6 +305,47 @@ func (fms *fileMetadataStore) GetMountParent(mount string) (ChainID, error) {
return ChainID(dgst), nil
}
func (fms *fileMetadataStore) getOrphan() ([]roLayer, error) {
var orphanLayers []roLayer
for _, algorithm := range supportedAlgorithms {
fileInfos, err := ioutil.ReadDir(filepath.Join(fms.root, string(algorithm)))
if err != nil {
if os.IsNotExist(err) {
continue
}
return nil, err
}
for _, fi := range fileInfos {
if fi.IsDir() && strings.Contains(fi.Name(), "-removing") {
nameSplit := strings.Split(fi.Name(), "-")
dgst := digest.NewDigestFromEncoded(algorithm, nameSplit[0])
if err := dgst.Validate(); err != nil {
logrus.Debugf("Ignoring invalid digest %s:%s", algorithm, nameSplit[0])
} else {
chainID := ChainID(dgst)
chainFile := filepath.Join(fms.root, string(algorithm), fi.Name(), "cache-id")
contentBytes, err := ioutil.ReadFile(chainFile)
if err != nil {
logrus.WithError(err).WithField("digest", dgst).Error("cannot get cache ID")
}
cacheID := strings.TrimSpace(string(contentBytes))
if cacheID == "" {
logrus.Errorf("invalid cache id value")
}
l := &roLayer{
chainID: chainID,
cacheID: cacheID,
}
orphanLayers = append(orphanLayers, *l)
}
}
}
}
return orphanLayers, nil
}
func (fms *fileMetadataStore) List() ([]ChainID, []string, error) {
var ids []ChainID
for _, algorithm := range supportedAlgorithms {
@ -346,8 +387,39 @@ func (fms *fileMetadataStore) List() ([]ChainID, []string, error) {
return ids, mounts, nil
}
func (fms *fileMetadataStore) Remove(layer ChainID) error {
return os.RemoveAll(fms.getLayerDirectory(layer))
// Remove layerdb folder if that is marked for removal
func (fms *fileMetadataStore) Remove(layer ChainID, cache string) error {
dgst := digest.Digest(layer)
files, err := ioutil.ReadDir(filepath.Join(fms.root, string(dgst.Algorithm())))
if err != nil {
return err
}
for _, f := range files {
if !strings.HasSuffix(f.Name(), "-removing") || !strings.HasPrefix(f.Name(), dgst.String()) {
continue
}
// Make sure that we only remove layerdb folder which points to
// requested cacheID
dir := filepath.Join(fms.root, string(dgst.Algorithm()), f.Name())
chainFile := filepath.Join(dir, "cache-id")
contentBytes, err := ioutil.ReadFile(chainFile)
if err != nil {
logrus.WithError(err).WithField("file", chainFile).Error("cannot get cache ID")
continue
}
cacheID := strings.TrimSpace(string(contentBytes))
if cacheID != cache {
continue
}
logrus.Debugf("Removing folder: %s", dir)
err = os.RemoveAll(dir)
if err != nil && !os.IsNotExist(err) {
logrus.WithError(err).WithField("name", f.Name()).Error("cannot remove layer")
continue
}
}
return nil
}
func (fms *fileMetadataStore) RemoveMount(mount string) error {

View File

@ -5,6 +5,8 @@ import (
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
"sync"
"github.com/docker/distribution"
@ -415,11 +417,24 @@ func (ls *layerStore) Map() map[ChainID]Layer {
}
func (ls *layerStore) deleteLayer(layer *roLayer, metadata *Metadata) error {
// Rename layer digest folder first so we detect orphan layer(s)
// if ls.driver.Remove fails
dir := ls.store.getLayerDirectory(layer.chainID)
for {
dgst := digest.Digest(layer.chainID)
tmpID := fmt.Sprintf("%s-%s-removing", dgst.Hex(), stringid.GenerateRandomID())
dir := filepath.Join(ls.store.root, string(dgst.Algorithm()), tmpID)
err := os.Rename(ls.store.getLayerDirectory(layer.chainID), dir)
if os.IsExist(err) {
continue
}
break
}
err := ls.driver.Remove(layer.cacheID)
if err != nil {
return err
}
err = ls.store.Remove(layer.chainID)
err = os.RemoveAll(dir)
if err != nil {
return err
}
@ -452,12 +467,14 @@ func (ls *layerStore) releaseLayer(l *roLayer) ([]Metadata, error) {
if l.hasReferences() {
panic("cannot delete referenced layer")
}
// Remove layer from layer map first so it is not considered to exist
// when if ls.deleteLayer fails.
delete(ls.layerMap, l.chainID)
var metadata Metadata
if err := ls.deleteLayer(l, &metadata); err != nil {
return nil, err
}
delete(ls.layerMap, l.chainID)
removed = append(removed, metadata)
if l.parent == nil {
@ -743,6 +760,23 @@ func (ls *layerStore) assembleTarTo(graphID string, metadata io.ReadCloser, size
}
func (ls *layerStore) Cleanup() error {
orphanLayers, err := ls.store.getOrphan()
if err != nil {
logrus.Errorf("Cannot get orphan layers: %v", err)
}
logrus.Debugf("found %v orphan layers", len(orphanLayers))
for _, orphan := range orphanLayers {
logrus.Debugf("removing orphan layer, chain ID: %v , cache ID: %v", orphan.chainID, orphan.cacheID)
err = ls.driver.Remove(orphan.cacheID)
if err != nil && !os.IsNotExist(err) {
logrus.WithError(err).WithField("cache-id", orphan.cacheID).Error("cannot remove orphan layer")
continue
}
err = ls.store.Remove(orphan.chainID, orphan.cacheID)
if err != nil {
logrus.WithError(err).WithField("chain-id", orphan.chainID).Error("cannot remove orphan layer metadata")
}
}
return ls.driver.Cleanup()
}