diff --git a/daemon/daemon_overlayfs.go b/daemon/daemon_overlayfs.go new file mode 100644 index 0000000000..e134b297a9 --- /dev/null +++ b/daemon/daemon_overlayfs.go @@ -0,0 +1,7 @@ +// +build !exclude_graphdriver_overlayfs + +package daemon + +import ( + _ "github.com/docker/docker/daemon/graphdriver/overlayfs" +) diff --git a/daemon/graphdriver/driver.go b/daemon/graphdriver/driver.go index 91040db97a..3eacd428cc 100644 --- a/daemon/graphdriver/driver.go +++ b/daemon/graphdriver/driver.go @@ -81,6 +81,8 @@ var ( "btrfs", "devicemapper", "vfs", + // experimental, has to be enabled manually for now + "overlayfs", } ErrNotSupported = errors.New("driver not supported") diff --git a/daemon/graphdriver/overlayfs/copy.go b/daemon/graphdriver/overlayfs/copy.go new file mode 100644 index 0000000000..4c8c6239ac --- /dev/null +++ b/daemon/graphdriver/overlayfs/copy.go @@ -0,0 +1,157 @@ +// +build linux + +package overlayfs + +import ( + "fmt" + "io" + "os" + "path/filepath" + "syscall" + + "github.com/docker/docker/pkg/system" +) + +type CopyFlags int + +const ( + CopyHardlink CopyFlags = 1 << iota +) + +func copyRegular(srcPath, dstPath string, mode os.FileMode) error { + srcFile, err := os.Open(srcPath) + if err != nil { + return err + } + defer srcFile.Close() + + dstFile, err := os.OpenFile(dstPath, os.O_WRONLY|os.O_CREATE, mode) + if err != nil { + return err + } + defer dstFile.Close() + + _, err = io.Copy(dstFile, srcFile) + + return err +} + +func copyXattr(srcPath, dstPath, attr string) error { + data, err := system.Lgetxattr(srcPath, attr) + if err != nil { + return err + } + if data != nil { + if err := system.Lsetxattr(dstPath, attr, data, 0); err != nil { + return err + } + } + return nil +} + +func copyDir(srcDir, dstDir string, flags CopyFlags) error { + err := filepath.Walk(srcDir, func(srcPath string, f os.FileInfo, err error) error { + if err != nil { + return err + } + + // Rebase path + relPath, err := filepath.Rel(srcDir, srcPath) + if err != nil { + return err + } + + dstPath := filepath.Join(dstDir, relPath) + if err != nil { + return err + } + + stat, ok := f.Sys().(*syscall.Stat_t) + if !ok { + return fmt.Errorf("Unable to get raw syscall.Stat_t data for %s", srcPath) + } + + switch f.Mode() & os.ModeType { + case 0: // Regular file + if flags&CopyHardlink != 0 { + if err := os.Link(srcPath, dstPath); err != nil { + return err + } + } else { + if err := copyRegular(srcPath, dstPath, f.Mode()); err != nil { + return err + } + } + + case os.ModeDir: + if err := os.Mkdir(dstPath, f.Mode()); err != nil && !os.IsExist(err) { + return err + } + + case os.ModeSymlink: + link, err := os.Readlink(srcPath) + if err != nil { + return err + } + + if err := os.Symlink(link, dstPath); err != nil { + return err + } + + case os.ModeNamedPipe: + fallthrough + case os.ModeSocket: + if err := syscall.Mkfifo(dstPath, stat.Mode); err != nil { + return err + } + + case os.ModeDevice: + if err := syscall.Mknod(dstPath, stat.Mode, int(stat.Rdev)); err != nil { + return err + } + + default: + return fmt.Errorf("Unknown file type for %s\n", srcPath) + } + + if err := os.Lchown(dstPath, int(stat.Uid), int(stat.Gid)); err != nil { + return err + } + + if err := copyXattr(srcPath, dstPath, "security.capability"); err != nil { + return err + } + + // We need to copy this attribute if it appears in an overlayfs upper layer, as + // this function is used to copy those. It is set by overlayfs if a directory + // is removed and then re-created and should not inherit anything from the + // same dir in the lower dir. + if err := copyXattr(srcPath, dstPath, "trusted.overlay.opaque"); err != nil { + return err + } + + isSymlink := f.Mode()&os.ModeSymlink != 0 + + // There is no LChmod, so ignore mode for symlink. Also, this + // must happen after chown, as that can modify the file mode + if !isSymlink { + if err := os.Chmod(dstPath, f.Mode()); err != nil { + return err + } + } + + ts := []syscall.Timespec{stat.Atim, stat.Mtim} + // syscall.UtimesNano doesn't support a NOFOLLOW flag atm, and + if !isSymlink { + if err := system.UtimesNano(dstPath, ts); err != nil { + return err + } + } else { + if err := system.LUtimesNano(dstPath, ts); err != nil { + return err + } + } + return nil + }) + return err +} diff --git a/daemon/graphdriver/overlayfs/overlayfs.go b/daemon/graphdriver/overlayfs/overlayfs.go new file mode 100644 index 0000000000..f2f478dc4a --- /dev/null +++ b/daemon/graphdriver/overlayfs/overlayfs.go @@ -0,0 +1,369 @@ +// +build linux + +package overlayfs + +import ( + "bufio" + "fmt" + "io/ioutil" + "os" + "os/exec" + "path" + "strings" + "sync" + "syscall" + + log "github.com/Sirupsen/logrus" + "github.com/docker/docker/daemon/graphdriver" + "github.com/docker/docker/pkg/archive" + "github.com/docker/libcontainer/label" +) + +// This is a small wrapper over the NaiveDiffWriter that lets us have a custom +// implementation of ApplyDiff() + +var ( + ErrApplyDiffFallback = fmt.Errorf("Fall back to normal ApplyDiff") +) + +type ApplyDiffProtoDriver interface { + graphdriver.ProtoDriver + ApplyDiff(id, parent string, diff archive.ArchiveReader) (bytes int64, err error) +} + +type naiveDiffDriverWithApply struct { + graphdriver.Driver + applyDiff ApplyDiffProtoDriver +} + +func NaiveDiffDriverWithApply(driver ApplyDiffProtoDriver) graphdriver.Driver { + return &naiveDiffDriverWithApply{ + Driver: graphdriver.NaiveDiffDriver(driver), + applyDiff: driver, + } +} + +func (d *naiveDiffDriverWithApply) ApplyDiff(id, parent string, diff archive.ArchiveReader) (int64, error) { + b, err := d.applyDiff.ApplyDiff(id, parent, diff) + if err == ErrApplyDiffFallback { + return d.Driver.ApplyDiff(id, parent, diff) + } + return b, err +} + +// This backend uses the overlayfs union filesystem for containers +// plus hard link file sharing for images. + +// Each container/image can have a "root" subdirectory which is a plain +// filesystem hierarchy, or they can use overlayfs. + +// If they use overlayfs there is a "upper" directory and a "lower-id" +// file, as well as "merged" and "work" directories. The "upper" +// directory has the upper layer of the overlay, and "lower-id" contains +// the id of the parent whose "root" directory shall be used as the lower +// layer in the overlay. The overlay itself is mounted in the "merged" +// directory, and the "work" dir is needed for overlayfs to work. + +// When a overlay layer is created there are two cases, either the +// parent has a "root" dir, then we start out with a empty "upper" +// directory overlaid on the parents root. This is typically the +// case with the init layer of a container which is based on an image. +// If there is no "root" in the parent, we inherit the lower-id from +// the parent and start by making a copy if the parents "upper" dir. +// This is typically the case for a container layer which copies +// its parent -init upper layer. + +// Additionally we also have a custom implementation of ApplyLayer +// which makes a recursive copy of the parent "root" layer using +// hardlinks to share file data, and then applies the layer on top +// of that. This means all child images share file (but not directory) +// data with the parent. + +type ActiveMount struct { + count int + path string + mounted bool +} +type Driver struct { + home string + sync.Mutex // Protects concurrent modification to active + active map[string]*ActiveMount +} + +func init() { + graphdriver.Register("overlayfs", Init) +} + +func Init(home string, options []string) (graphdriver.Driver, error) { + if err := supportsOverlayfs(); err != nil { + return nil, graphdriver.ErrNotSupported + } + + // Create the driver home dir + if err := os.MkdirAll(home, 0755); err != nil && !os.IsExist(err) { + return nil, err + } + + d := &Driver{ + home: home, + active: make(map[string]*ActiveMount), + } + + return NaiveDiffDriverWithApply(d), nil +} + +func supportsOverlayfs() error { + // We can try to modprobe overlayfs first before looking at + // proc/filesystems for when overlayfs is supported + exec.Command("modprobe", "overlayfs").Run() + + f, err := os.Open("/proc/filesystems") + if err != nil { + return err + } + defer f.Close() + + s := bufio.NewScanner(f) + for s.Scan() { + if strings.Contains(s.Text(), "overlayfs") { + return nil + } + } + return graphdriver.ErrNotSupported +} + +func (d *Driver) String() string { + return "overlayfs" +} + +func (d *Driver) Status() [][2]string { + return nil +} + +func (d *Driver) Cleanup() error { + return nil +} + +func (d *Driver) Create(id string, parent string) (retErr error) { + dir := d.dir(id) + if err := os.MkdirAll(path.Dir(dir), 0700); err != nil { + return err + } + if err := os.Mkdir(dir, 0700); err != nil { + return err + } + + defer func() { + // Clean up on failure + if retErr != nil { + os.RemoveAll(dir) + } + }() + + // Toplevel images are just a "root" dir + if parent == "" { + if err := os.Mkdir(path.Join(dir, "root"), 0755); err != nil { + return err + } + return nil + } + + parentDir := d.dir(parent) + + // Ensure parent exists + if _, err := os.Lstat(parentDir); err != nil { + return err + } + + // If parent has a root, just do a overlayfs to it + parentRoot := path.Join(parentDir, "root") + + if s, err := os.Lstat(parentRoot); err == nil { + if err := os.Mkdir(path.Join(dir, "upper"), s.Mode()); err != nil { + return err + } + if err := os.Mkdir(path.Join(dir, "work"), 0700); err != nil { + return err + } + if err := os.Mkdir(path.Join(dir, "merged"), 0700); err != nil { + return err + } + if err := ioutil.WriteFile(path.Join(dir, "lower-id"), []byte(parent), 0666); err != nil { + return err + } + return nil + } + + // Otherwise, copy the upper and the lower-id from the parent + + lowerId, err := ioutil.ReadFile(path.Join(parentDir, "lower-id")) + if err != nil { + return err + } + + if err := ioutil.WriteFile(path.Join(dir, "lower-id"), lowerId, 0666); err != nil { + return err + } + + parentUpperDir := path.Join(parentDir, "upper") + s, err := os.Lstat(parentUpperDir) + if err != nil { + return err + } + + upperDir := path.Join(dir, "upper") + if err := os.Mkdir(upperDir, s.Mode()); err != nil { + return err + } + if err := os.Mkdir(path.Join(dir, "work"), 0700); err != nil { + return err + } + if err := os.Mkdir(path.Join(dir, "merged"), 0700); err != nil { + return err + } + + return copyDir(parentUpperDir, upperDir, 0) +} + +func (d *Driver) dir(id string) string { + return path.Join(d.home, id) +} + +func (d *Driver) Remove(id string) error { + dir := d.dir(id) + if _, err := os.Stat(dir); err != nil { + return err + } + return os.RemoveAll(dir) +} + +func (d *Driver) Get(id string, mountLabel string) (string, error) { + // Protect the d.active from concurrent access + d.Lock() + defer d.Unlock() + + mount := d.active[id] + if mount != nil { + mount.count++ + return mount.path, nil + } else { + mount = &ActiveMount{count: 1} + } + + dir := d.dir(id) + if _, err := os.Stat(dir); err != nil { + return "", err + } + + // If id has a root, just return it + rootDir := path.Join(dir, "root") + if _, err := os.Stat(rootDir); err == nil { + mount.path = rootDir + d.active[id] = mount + return mount.path, nil + } + + lowerId, err := ioutil.ReadFile(path.Join(dir, "lower-id")) + if err != nil { + return "", err + } + lowerDir := path.Join(d.dir(string(lowerId)), "root") + upperDir := path.Join(dir, "upper") + workDir := path.Join(dir, "work") + mergedDir := path.Join(dir, "merged") + + opts := fmt.Sprintf("lowerdir=%s,upperdir=%s,workdir=%s", lowerDir, upperDir, workDir) + if err := syscall.Mount("overlayfs", mergedDir, "overlayfs", 0, label.FormatMountLabel(opts, mountLabel)); err != nil { + return "", err + } + mount.path = mergedDir + mount.mounted = true + d.active[id] = mount + + return mount.path, nil +} + +func (d *Driver) Put(id string) { + // Protect the d.active from concurrent access + d.Lock() + defer d.Unlock() + + mount := d.active[id] + if mount == nil { + log.Debugf("Put on a non-mounted device %s", id) + return + } + + mount.count-- + if mount.count > 0 { + return + } + + if mount.mounted { + if err := syscall.Unmount(mount.path, 0); err != nil { + log.Debugf("Failed to unmount %s overlayfs: %v", id, err) + } + } + + delete(d.active, id) +} + +func (d *Driver) ApplyDiff(id string, parent string, diff archive.ArchiveReader) (bytes int64, err error) { + dir := d.dir(id) + + if parent == "" { + return 0, ErrApplyDiffFallback + } + + parentRootDir := path.Join(d.dir(parent), "root") + if _, err := os.Stat(parentRootDir); err != nil { + return 0, ErrApplyDiffFallback + } + + // We now know there is a parent, and it has a "root" directory containing + // the full root filesystem. We can just hardlink it and apply the + // layer. This relies on two things: + // 1) ApplyDiff is only run once on a clean (no writes to upper layer) container + // 2) ApplyDiff doesn't do any in-place writes to files (would break hardlinks) + // These are all currently true and are not expected to break + + tmpRootDir, err := ioutil.TempDir(dir, "tmproot") + if err != nil { + return 0, err + } + defer func() { + if err != nil { + os.RemoveAll(tmpRootDir) + } else { + os.RemoveAll(path.Join(dir, "upper")) + os.RemoveAll(path.Join(dir, "work")) + os.RemoveAll(path.Join(dir, "merged")) + os.RemoveAll(path.Join(dir, "lower-id")) + } + }() + + if err = copyDir(parentRootDir, tmpRootDir, CopyHardlink); err != nil { + return 0, err + } + + if err := archive.ApplyLayer(tmpRootDir, diff); err != nil { + return 0, err + } + + rootDir := path.Join(dir, "root") + if err := os.Rename(tmpRootDir, rootDir); err != nil { + return 0, err + } + + changes, err := archive.ChangesDirs(rootDir, parentRootDir) + if err != nil { + return 0, err + } + + return archive.ChangesSize(rootDir, changes), nil +} + +func (d *Driver) Exists(id string) bool { + _, err := os.Stat(d.dir(id)) + return err == nil +} diff --git a/daemon/graphdriver/overlayfs/overlayfs_test.go b/daemon/graphdriver/overlayfs/overlayfs_test.go new file mode 100644 index 0000000000..7ab71d0e64 --- /dev/null +++ b/daemon/graphdriver/overlayfs/overlayfs_test.go @@ -0,0 +1,28 @@ +package overlayfs + +import ( + "github.com/docker/docker/daemon/graphdriver/graphtest" + "testing" +) + +// This avoids creating a new driver for each test if all tests are run +// Make sure to put new tests between TestOverlayfsSetup and TestOverlayfsTeardown +func TestOverlayfsSetup(t *testing.T) { + graphtest.GetDriver(t, "overlayfs") +} + +func TestOverlayfsCreateEmpty(t *testing.T) { + graphtest.DriverTestCreateEmpty(t, "overlayfs") +} + +func TestOverlayfsCreateBase(t *testing.T) { + graphtest.DriverTestCreateBase(t, "overlayfs") +} + +func TestOverlayfsCreateSnap(t *testing.T) { + graphtest.DriverTestCreateSnap(t, "overlayfs") +} + +func TestOverlayfsTeardown(t *testing.T) { + graphtest.PutDriver(t) +}