From f8ed19c8b4dc17dc5b0147c12ce8c66d7c8898a6 Mon Sep 17 00:00:00 2001 From: Akihiro Suda Date: Mon, 15 Oct 2018 16:47:02 +0900 Subject: [PATCH 1/3] pkg/archive: support overlayfs in userns (Ubuntu kernel only) Ubuntu kernel supports overlayfs in user namespaces. However, Docker had previously crafting overlay opaques directly using mknod(2) and setxattr(2), which are not supported in userns. Tested with LXD, Ubuntu 18.04, kernel 4.15.0-36-generic #39-Ubuntu. Signed-off-by: Akihiro Suda --- pkg/archive/archive.go | 4 +- pkg/archive/archive_linux.go | 179 ++++++++++++++++++++++++++++++++++- pkg/archive/archive_other.go | 2 +- 3 files changed, 177 insertions(+), 8 deletions(-) diff --git a/pkg/archive/archive.go b/pkg/archive/archive.go index 32132fcfed..bb623fa856 100644 --- a/pkg/archive/archive.go +++ b/pkg/archive/archive.go @@ -745,7 +745,7 @@ func TarWithOptions(srcPath string, options *TarOptions) (io.ReadCloser, error) compressWriter, options.ChownOpts, ) - ta.WhiteoutConverter = getWhiteoutConverter(options.WhiteoutFormat) + ta.WhiteoutConverter = getWhiteoutConverter(options.WhiteoutFormat, options.InUserNS) defer func() { // Make sure to check the error on Close. @@ -903,7 +903,7 @@ func Unpack(decompressedArchive io.Reader, dest string, options *TarOptions) err var dirs []*tar.Header idMapping := idtools.NewIDMappingsFromMaps(options.UIDMaps, options.GIDMaps) rootIDs := idMapping.RootPair() - whiteoutConverter := getWhiteoutConverter(options.WhiteoutFormat) + whiteoutConverter := getWhiteoutConverter(options.WhiteoutFormat, options.InUserNS) // Iterate through the files in the archive. loop: diff --git a/pkg/archive/archive_linux.go b/pkg/archive/archive_linux.go index 970d4d0680..0601f7b0d1 100644 --- a/pkg/archive/archive_linux.go +++ b/pkg/archive/archive_linux.go @@ -2,22 +2,29 @@ package archive // import "github.com/docker/docker/pkg/archive" import ( "archive/tar" + "fmt" + "io/ioutil" "os" "path/filepath" "strings" + "syscall" + "github.com/containerd/continuity/fs" "github.com/docker/docker/pkg/system" + "github.com/pkg/errors" "golang.org/x/sys/unix" ) -func getWhiteoutConverter(format WhiteoutFormat) tarWhiteoutConverter { +func getWhiteoutConverter(format WhiteoutFormat, inUserNS bool) tarWhiteoutConverter { if format == OverlayWhiteoutFormat { - return overlayWhiteoutConverter{} + return overlayWhiteoutConverter{inUserNS: inUserNS} } return nil } -type overlayWhiteoutConverter struct{} +type overlayWhiteoutConverter struct { + inUserNS bool +} func (overlayWhiteoutConverter) ConvertWrite(hdr *tar.Header, path string, fi os.FileInfo) (wo *tar.Header, err error) { // convert whiteouts to AUFS format @@ -61,13 +68,22 @@ func (overlayWhiteoutConverter) ConvertWrite(hdr *tar.Header, path string, fi os return } -func (overlayWhiteoutConverter) ConvertRead(hdr *tar.Header, path string) (bool, error) { +func (c overlayWhiteoutConverter) ConvertRead(hdr *tar.Header, path string) (bool, error) { base := filepath.Base(path) dir := filepath.Dir(path) // if a directory is marked as opaque by the AUFS special file, we need to translate that to overlay if base == WhiteoutOpaqueDir { err := unix.Setxattr(dir, "trusted.overlay.opaque", []byte{'y'}, 0) + if err != nil { + if c.inUserNS { + if err = replaceDirWithOverlayOpaque(dir); err != nil { + return false, errors.Wrapf(err, "replaceDirWithOverlayOpaque(%q) failed", dir) + } + } else { + return false, errors.Wrapf(err, "setxattr(%q, trusted.overlay.opaque=y)", dir) + } + } // don't write the file itself return false, err } @@ -78,7 +94,19 @@ func (overlayWhiteoutConverter) ConvertRead(hdr *tar.Header, path string) (bool, originalPath := filepath.Join(dir, originalBase) if err := unix.Mknod(originalPath, unix.S_IFCHR, 0); err != nil { - return false, err + if c.inUserNS { + // Ubuntu and a few distros support overlayfs in userns. + // + // Although we can't call mknod directly in userns (at least on bionic kernel 4.15), + // we can still create 0,0 char device using mknodChar0Overlay(). + // + // NOTE: we don't need this hack for the containerd snapshotter+unpack model. + if err := mknodChar0Overlay(originalPath); err != nil { + return false, errors.Wrapf(err, "failed to mknodChar0UserNS(%q)", originalPath) + } + } else { + return false, errors.Wrapf(err, "failed to mknod(%q, S_IFCHR, 0)", originalPath) + } } if err := os.Chown(originalPath, hdr.Uid, hdr.Gid); err != nil { return false, err @@ -90,3 +118,144 @@ func (overlayWhiteoutConverter) ConvertRead(hdr *tar.Header, path string) (bool, return true, nil } + +// mknodChar0Overlay creates 0,0 char device by mounting overlayfs and unlinking. +// This function can be used for creating 0,0 char device in userns on Ubuntu. +// +// Steps: +// * Mkdir lower,upper,merged,work +// * Create lower/dummy +// * Mount overlayfs +// * Unlink merged/dummy +// * Unmount overlayfs +// * Make sure a 0,0 char device is created as upper/dummy +// * Rename upper/dummy to cleansedOriginalPath +func mknodChar0Overlay(cleansedOriginalPath string) error { + dir := filepath.Dir(cleansedOriginalPath) + tmp, err := ioutil.TempDir(dir, "mc0o") + if err != nil { + return errors.Wrapf(err, "failed to create a tmp directory under %s", dir) + } + defer os.RemoveAll(tmp) + lower := filepath.Join(tmp, "l") + upper := filepath.Join(tmp, "u") + work := filepath.Join(tmp, "w") + merged := filepath.Join(tmp, "m") + for _, s := range []string{lower, upper, work, merged} { + if err := os.MkdirAll(s, 0700); err != nil { + return errors.Wrapf(err, "failed to mkdir %s", s) + } + } + dummyBase := "d" + lowerDummy := filepath.Join(lower, dummyBase) + if err := ioutil.WriteFile(lowerDummy, []byte{}, 0600); err != nil { + return errors.Wrapf(err, "failed to create a dummy lower file %s", lowerDummy) + } + mOpts := fmt.Sprintf("lowerdir=%s,upperdir=%s,workdir=%s", lower, upper, work) + // docker/pkg/mount.Mount() requires procfs to be mounted. So we use syscall.Mount() directly instead. + if err := syscall.Mount("overlay", merged, "overlay", uintptr(0), mOpts); err != nil { + return errors.Wrapf(err, "failed to mount overlay (%s) on %s", mOpts, merged) + } + mergedDummy := filepath.Join(merged, dummyBase) + if err := os.Remove(mergedDummy); err != nil { + syscall.Unmount(merged, 0) + return errors.Wrapf(err, "failed to unlink %s", mergedDummy) + } + if err := syscall.Unmount(merged, 0); err != nil { + return errors.Wrapf(err, "failed to unmount %s", merged) + } + upperDummy := filepath.Join(upper, dummyBase) + if err := isChar0(upperDummy); err != nil { + return err + } + if err := os.Rename(upperDummy, cleansedOriginalPath); err != nil { + return errors.Wrapf(err, "failed to rename %s to %s", upperDummy, cleansedOriginalPath) + } + return nil +} + +func isChar0(path string) error { + osStat, err := os.Stat(path) + if err != nil { + return errors.Wrapf(err, "failed to stat %s", path) + } + st, ok := osStat.Sys().(*syscall.Stat_t) + if !ok { + return errors.Errorf("got unsupported stat for %s", path) + } + if os.FileMode(st.Mode)&syscall.S_IFMT != syscall.S_IFCHR { + return errors.Errorf("%s is not a character device, got mode=%d", path, st.Mode) + } + if st.Rdev != 0 { + return errors.Errorf("%s is not a 0,0 character device, got Rdev=%d", path, st.Rdev) + } + return nil +} + +// replaceDirWithOverlayOpaque replaces path with a new directory with trusted.overlay.opaque +// xattr. The contents of the directory are preserved. +func replaceDirWithOverlayOpaque(path string) error { + if path == "/" { + return errors.New("replaceDirWithOverlayOpaque: path must not be \"/\"") + } + dir := filepath.Dir(path) + tmp, err := ioutil.TempDir(dir, "rdwoo") + if err != nil { + return errors.Wrapf(err, "failed to create a tmp directory under %s", dir) + } + defer os.RemoveAll(tmp) + // newPath is a new empty directory crafted with trusted.overlay.opaque xattr. + // we copy the content of path into newPath, remove path, and rename newPath to path. + newPath, err := createDirWithOverlayOpaque(tmp) + if err != nil { + return errors.Wrapf(err, "createDirWithOverlayOpaque(%q) failed", tmp) + } + if err := fs.CopyDir(newPath, path); err != nil { + return errors.Wrapf(err, "CopyDir(%q, %q) failed", newPath, path) + } + if err := os.RemoveAll(path); err != nil { + return err + } + return os.Rename(newPath, path) +} + +// createDirWithOverlayOpaque creates a directory with trusted.overlay.opaque xattr, +// without calling setxattr, so as to allow creating opaque dir in userns on Ubuntu. +func createDirWithOverlayOpaque(tmp string) (string, error) { + lower := filepath.Join(tmp, "l") + upper := filepath.Join(tmp, "u") + work := filepath.Join(tmp, "w") + merged := filepath.Join(tmp, "m") + for _, s := range []string{lower, upper, work, merged} { + if err := os.MkdirAll(s, 0700); err != nil { + return "", errors.Wrapf(err, "failed to mkdir %s", s) + } + } + dummyBase := "d" + lowerDummy := filepath.Join(lower, dummyBase) + if err := os.MkdirAll(lowerDummy, 0700); err != nil { + return "", errors.Wrapf(err, "failed to create a dummy lower directory %s", lowerDummy) + } + mOpts := fmt.Sprintf("lowerdir=%s,upperdir=%s,workdir=%s", lower, upper, work) + // docker/pkg/mount.Mount() requires procfs to be mounted. So we use syscall.Mount() directly instead. + if err := syscall.Mount("overlay", merged, "overlay", uintptr(0), mOpts); err != nil { + return "", errors.Wrapf(err, "failed to mount overlay (%s) on %s", mOpts, merged) + } + mergedDummy := filepath.Join(merged, dummyBase) + if err := os.Remove(mergedDummy); err != nil { + syscall.Unmount(merged, 0) + return "", errors.Wrapf(err, "failed to rmdir %s", mergedDummy) + } + // upperDummy becomes a 0,0-char device file here + if err := os.Mkdir(mergedDummy, 0700); err != nil { + syscall.Unmount(merged, 0) + return "", errors.Wrapf(err, "failed to mkdir %s", mergedDummy) + } + // upperDummy becomes a directory with trusted.overlay.opaque xattr + // (but can't be verified in userns) + if err := syscall.Unmount(merged, 0); err != nil { + return "", errors.Wrapf(err, "failed to unmount %s", merged) + } + upperDummy := filepath.Join(upper, dummyBase) + return upperDummy, nil +} diff --git a/pkg/archive/archive_other.go b/pkg/archive/archive_other.go index 462dfc6323..65a73354c4 100644 --- a/pkg/archive/archive_other.go +++ b/pkg/archive/archive_other.go @@ -2,6 +2,6 @@ package archive // import "github.com/docker/docker/pkg/archive" -func getWhiteoutConverter(format WhiteoutFormat) tarWhiteoutConverter { +func getWhiteoutConverter(format WhiteoutFormat, inUserNS bool) tarWhiteoutConverter { return nil } From 037e370de12830c867bf1be0955247ed06d33e17 Mon Sep 17 00:00:00 2001 From: Akihiro Suda Date: Thu, 29 Nov 2018 16:38:55 +0900 Subject: [PATCH 2/3] pkg/archive: annotate tests that requires initial userns `rootlesskit go test ./pkg/archive` now succeeds Signed-off-by: Akihiro Suda --- pkg/archive/archive_linux_test.go | 2 ++ pkg/archive/archive_test.go | 2 ++ pkg/archive/archive_unix_test.go | 2 ++ 3 files changed, 6 insertions(+) diff --git a/pkg/archive/archive_linux_test.go b/pkg/archive/archive_linux_test.go index 9422269dff..92b8f2d7d7 100644 --- a/pkg/archive/archive_linux_test.go +++ b/pkg/archive/archive_linux_test.go @@ -8,6 +8,7 @@ import ( "testing" "github.com/docker/docker/pkg/system" + rsystem "github.com/opencontainers/runc/libcontainer/system" "golang.org/x/sys/unix" "gotest.tools/assert" "gotest.tools/skip" @@ -24,6 +25,7 @@ import ( // └── f1 # whiteout, 0644 func setupOverlayTestDir(t *testing.T, src string) { skip.If(t, os.Getuid() != 0, "skipping test that requires root") + skip.If(t, rsystem.RunningInUserNS(), "skipping test that requires initial userns (trusted.overlay.opaque xattr cannot be set in userns, even with Ubuntu kernel)") // Create opaque directory containing single file and permission 0700 err := os.Mkdir(filepath.Join(src, "d1"), 0700) assert.NilError(t, err) diff --git a/pkg/archive/archive_test.go b/pkg/archive/archive_test.go index affc7fa83b..b49ad6723e 100644 --- a/pkg/archive/archive_test.go +++ b/pkg/archive/archive_test.go @@ -18,6 +18,7 @@ import ( "github.com/docker/docker/pkg/idtools" "github.com/docker/docker/pkg/ioutils" + rsystem "github.com/opencontainers/runc/libcontainer/system" "gotest.tools/assert" is "gotest.tools/assert/cmp" "gotest.tools/skip" @@ -1229,6 +1230,7 @@ func TestReplaceFileTarWrapper(t *testing.T) { // version of this package that was built with <=go17 are still readable. func TestPrefixHeaderReadable(t *testing.T) { skip.If(t, runtime.GOOS != "windows" && os.Getuid() != 0, "skipping test that requires root") + skip.If(t, rsystem.RunningInUserNS(), "skipping test that requires more than 010000000 UIDs, which is unlikely to be satisfied when running in userns") // https://gist.github.com/stevvooe/e2a790ad4e97425896206c0816e1a882#file-out-go var testFile = []byte("\x1f\x8b\x08\x08\x44\x21\x68\x59\x00\x03\x74\x2e\x74\x61\x72\x00\x4b\xcb\xcf\x67\xa0\x35\x30\x80\x00\x86\x06\x10\x47\x01\xc1\x37\x40\x00\x54\xb6\xb1\xa1\xa9\x99\x09\x48\x25\x1d\x40\x69\x71\x49\x62\x91\x02\xe5\x76\xa1\x79\x84\x21\x91\xd6\x80\x72\xaf\x8f\x82\x51\x30\x0a\x46\x36\x00\x00\xf0\x1c\x1e\x95\x00\x06\x00\x00") diff --git a/pkg/archive/archive_unix_test.go b/pkg/archive/archive_unix_test.go index dc4e1fdae6..6119133f8e 100644 --- a/pkg/archive/archive_unix_test.go +++ b/pkg/archive/archive_unix_test.go @@ -14,6 +14,7 @@ import ( "testing" "github.com/docker/docker/pkg/system" + rsystem "github.com/opencontainers/runc/libcontainer/system" "golang.org/x/sys/unix" "gotest.tools/assert" is "gotest.tools/assert/cmp" @@ -182,6 +183,7 @@ func getInode(path string) (uint64, error) { func TestTarWithBlockCharFifo(t *testing.T) { skip.If(t, os.Getuid() != 0, "skipping test that requires root") + skip.If(t, rsystem.RunningInUserNS(), "skipping test that requires initial userns") origin, err := ioutil.TempDir("", "docker-test-tar-hardlink") assert.NilError(t, err) From ec153ccfc8a66ec7efd032206026461bc5224c04 Mon Sep 17 00:00:00 2001 From: Akihiro Suda Date: Thu, 29 Nov 2018 18:25:17 +0900 Subject: [PATCH 3/3] pkg/archive: add TestReexecUserNSOverlayWhiteoutConverter Signed-off-by: Akihiro Suda --- pkg/archive/archive_linux_test.go | 130 ++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) diff --git a/pkg/archive/archive_linux_test.go b/pkg/archive/archive_linux_test.go index 92b8f2d7d7..8fdcb34b53 100644 --- a/pkg/archive/archive_linux_test.go +++ b/pkg/archive/archive_linux_test.go @@ -1,14 +1,18 @@ package archive // import "github.com/docker/docker/pkg/archive" import ( + "fmt" "io/ioutil" "os" + "os/exec" "path/filepath" "syscall" "testing" + "github.com/docker/docker/pkg/reexec" "github.com/docker/docker/pkg/system" rsystem "github.com/opencontainers/runc/libcontainer/system" + "github.com/pkg/errors" "golang.org/x/sys/unix" "gotest.tools/assert" "gotest.tools/skip" @@ -162,3 +166,129 @@ func TestOverlayTarAUFSUntar(t *testing.T) { checkFileMode(t, filepath.Join(dst, "d2", "f1"), 0660) checkFileMode(t, filepath.Join(dst, "d3", WhiteoutPrefix+"f1"), 0600) } + +func unshareCmd(cmd *exec.Cmd) { + cmd.SysProcAttr = &syscall.SysProcAttr{ + Cloneflags: syscall.CLONE_NEWUSER | syscall.CLONE_NEWNS, + UidMappings: []syscall.SysProcIDMap{ + { + ContainerID: 0, + HostID: os.Geteuid(), + Size: 1, + }, + }, + GidMappings: []syscall.SysProcIDMap{ + { + ContainerID: 0, + HostID: os.Getegid(), + Size: 1, + }, + }, + } +} + +const ( + reexecSupportsUserNSOverlay = "docker-test-supports-userns-overlay" + reexecMknodChar0 = "docker-test-userns-mknod-char0" + reexecSetOpaque = "docker-test-userns-set-opaque" +) + +func supportsOverlay(dir string) error { + lower := filepath.Join(dir, "l") + upper := filepath.Join(dir, "u") + work := filepath.Join(dir, "w") + merged := filepath.Join(dir, "m") + for _, s := range []string{lower, upper, work, merged} { + if err := os.MkdirAll(s, 0700); err != nil { + return err + } + } + mOpts := fmt.Sprintf("lowerdir=%s,upperdir=%s,workdir=%s", lower, upper, work) + if err := syscall.Mount("overlay", merged, "overlay", uintptr(0), mOpts); err != nil { + return errors.Wrapf(err, "failed to mount overlay (%s) on %s", mOpts, merged) + } + if err := syscall.Unmount(merged, 0); err != nil { + return errors.Wrapf(err, "failed to unmount %s", merged) + } + return nil +} + +// supportsUserNSOverlay returns nil error if overlay is supported in userns. +// Only Ubuntu and a few distros support overlay in userns (by patching the kernel). +// https://lists.ubuntu.com/archives/kernel-team/2014-February/038091.html +// As of kernel 4.19, the patch is not merged to the upstream. +func supportsUserNSOverlay() error { + tmp, err := ioutil.TempDir("", "docker-test-supports-userns-overlay") + if err != nil { + return err + } + defer os.RemoveAll(tmp) + cmd := reexec.Command(reexecSupportsUserNSOverlay, tmp) + unshareCmd(cmd) + out, err := cmd.CombinedOutput() + if err != nil { + return errors.Wrapf(err, "output: %q", string(out)) + } + return nil +} + +// isOpaque returns nil error if the dir has trusted.overlay.opaque=y. +// isOpaque needs to be called in the initial userns. +func isOpaque(dir string) error { + xattrOpaque, err := system.Lgetxattr(dir, "trusted.overlay.opaque") + if err != nil { + return errors.Wrapf(err, "failed to read opaque flag of %s", dir) + } + if string(xattrOpaque) != "y" { + return errors.Errorf("expected \"y\", got %q", string(xattrOpaque)) + } + return nil +} + +func TestReexecUserNSOverlayWhiteoutConverter(t *testing.T) { + skip.If(t, os.Getuid() != 0, "skipping test that requires root") + skip.If(t, rsystem.RunningInUserNS(), "skipping test that requires initial userns") + if err := supportsUserNSOverlay(); err != nil { + t.Skipf("skipping test that requires kernel support for overlay-in-userns: %v", err) + } + tmp, err := ioutil.TempDir("", "docker-test-userns-overlay") + assert.NilError(t, err) + defer os.RemoveAll(tmp) + + char0 := filepath.Join(tmp, "char0") + cmd := reexec.Command(reexecMknodChar0, char0) + unshareCmd(cmd) + out, err := cmd.CombinedOutput() + assert.NilError(t, err, string(out)) + assert.NilError(t, isChar0(char0)) + + opaqueDir := filepath.Join(tmp, "opaquedir") + err = os.MkdirAll(opaqueDir, 0755) + assert.NilError(t, err, string(out)) + cmd = reexec.Command(reexecSetOpaque, opaqueDir) + unshareCmd(cmd) + out, err = cmd.CombinedOutput() + assert.NilError(t, err, string(out)) + assert.NilError(t, isOpaque(opaqueDir)) +} + +func init() { + reexec.Register(reexecSupportsUserNSOverlay, func() { + if err := supportsOverlay(os.Args[1]); err != nil { + panic(err) + } + }) + reexec.Register(reexecMknodChar0, func() { + if err := mknodChar0Overlay(os.Args[1]); err != nil { + panic(err) + } + }) + reexec.Register(reexecSetOpaque, func() { + if err := replaceDirWithOverlayOpaque(os.Args[1]); err != nil { + panic(err) + } + }) + if reexec.Init() { + os.Exit(0) + } +}