diff --git a/pkg/archive/archive.go b/pkg/archive/archive.go index 32132fcfed..bb623fa856 100644 --- a/pkg/archive/archive.go +++ b/pkg/archive/archive.go @@ -745,7 +745,7 @@ func TarWithOptions(srcPath string, options *TarOptions) (io.ReadCloser, error) compressWriter, options.ChownOpts, ) - ta.WhiteoutConverter = getWhiteoutConverter(options.WhiteoutFormat) + ta.WhiteoutConverter = getWhiteoutConverter(options.WhiteoutFormat, options.InUserNS) defer func() { // Make sure to check the error on Close. @@ -903,7 +903,7 @@ func Unpack(decompressedArchive io.Reader, dest string, options *TarOptions) err var dirs []*tar.Header idMapping := idtools.NewIDMappingsFromMaps(options.UIDMaps, options.GIDMaps) rootIDs := idMapping.RootPair() - whiteoutConverter := getWhiteoutConverter(options.WhiteoutFormat) + whiteoutConverter := getWhiteoutConverter(options.WhiteoutFormat, options.InUserNS) // Iterate through the files in the archive. loop: diff --git a/pkg/archive/archive_linux.go b/pkg/archive/archive_linux.go index 970d4d0680..0601f7b0d1 100644 --- a/pkg/archive/archive_linux.go +++ b/pkg/archive/archive_linux.go @@ -2,22 +2,29 @@ package archive // import "github.com/docker/docker/pkg/archive" import ( "archive/tar" + "fmt" + "io/ioutil" "os" "path/filepath" "strings" + "syscall" + "github.com/containerd/continuity/fs" "github.com/docker/docker/pkg/system" + "github.com/pkg/errors" "golang.org/x/sys/unix" ) -func getWhiteoutConverter(format WhiteoutFormat) tarWhiteoutConverter { +func getWhiteoutConverter(format WhiteoutFormat, inUserNS bool) tarWhiteoutConverter { if format == OverlayWhiteoutFormat { - return overlayWhiteoutConverter{} + return overlayWhiteoutConverter{inUserNS: inUserNS} } return nil } -type overlayWhiteoutConverter struct{} +type overlayWhiteoutConverter struct { + inUserNS bool +} func (overlayWhiteoutConverter) ConvertWrite(hdr *tar.Header, path string, fi os.FileInfo) (wo *tar.Header, err error) { // convert whiteouts to AUFS format @@ -61,13 +68,22 @@ func (overlayWhiteoutConverter) ConvertWrite(hdr *tar.Header, path string, fi os return } -func (overlayWhiteoutConverter) ConvertRead(hdr *tar.Header, path string) (bool, error) { +func (c overlayWhiteoutConverter) ConvertRead(hdr *tar.Header, path string) (bool, error) { base := filepath.Base(path) dir := filepath.Dir(path) // if a directory is marked as opaque by the AUFS special file, we need to translate that to overlay if base == WhiteoutOpaqueDir { err := unix.Setxattr(dir, "trusted.overlay.opaque", []byte{'y'}, 0) + if err != nil { + if c.inUserNS { + if err = replaceDirWithOverlayOpaque(dir); err != nil { + return false, errors.Wrapf(err, "replaceDirWithOverlayOpaque(%q) failed", dir) + } + } else { + return false, errors.Wrapf(err, "setxattr(%q, trusted.overlay.opaque=y)", dir) + } + } // don't write the file itself return false, err } @@ -78,7 +94,19 @@ func (overlayWhiteoutConverter) ConvertRead(hdr *tar.Header, path string) (bool, originalPath := filepath.Join(dir, originalBase) if err := unix.Mknod(originalPath, unix.S_IFCHR, 0); err != nil { - return false, err + if c.inUserNS { + // Ubuntu and a few distros support overlayfs in userns. + // + // Although we can't call mknod directly in userns (at least on bionic kernel 4.15), + // we can still create 0,0 char device using mknodChar0Overlay(). + // + // NOTE: we don't need this hack for the containerd snapshotter+unpack model. + if err := mknodChar0Overlay(originalPath); err != nil { + return false, errors.Wrapf(err, "failed to mknodChar0UserNS(%q)", originalPath) + } + } else { + return false, errors.Wrapf(err, "failed to mknod(%q, S_IFCHR, 0)", originalPath) + } } if err := os.Chown(originalPath, hdr.Uid, hdr.Gid); err != nil { return false, err @@ -90,3 +118,144 @@ func (overlayWhiteoutConverter) ConvertRead(hdr *tar.Header, path string) (bool, return true, nil } + +// mknodChar0Overlay creates 0,0 char device by mounting overlayfs and unlinking. +// This function can be used for creating 0,0 char device in userns on Ubuntu. +// +// Steps: +// * Mkdir lower,upper,merged,work +// * Create lower/dummy +// * Mount overlayfs +// * Unlink merged/dummy +// * Unmount overlayfs +// * Make sure a 0,0 char device is created as upper/dummy +// * Rename upper/dummy to cleansedOriginalPath +func mknodChar0Overlay(cleansedOriginalPath string) error { + dir := filepath.Dir(cleansedOriginalPath) + tmp, err := ioutil.TempDir(dir, "mc0o") + if err != nil { + return errors.Wrapf(err, "failed to create a tmp directory under %s", dir) + } + defer os.RemoveAll(tmp) + lower := filepath.Join(tmp, "l") + upper := filepath.Join(tmp, "u") + work := filepath.Join(tmp, "w") + merged := filepath.Join(tmp, "m") + for _, s := range []string{lower, upper, work, merged} { + if err := os.MkdirAll(s, 0700); err != nil { + return errors.Wrapf(err, "failed to mkdir %s", s) + } + } + dummyBase := "d" + lowerDummy := filepath.Join(lower, dummyBase) + if err := ioutil.WriteFile(lowerDummy, []byte{}, 0600); err != nil { + return errors.Wrapf(err, "failed to create a dummy lower file %s", lowerDummy) + } + mOpts := fmt.Sprintf("lowerdir=%s,upperdir=%s,workdir=%s", lower, upper, work) + // docker/pkg/mount.Mount() requires procfs to be mounted. So we use syscall.Mount() directly instead. + if err := syscall.Mount("overlay", merged, "overlay", uintptr(0), mOpts); err != nil { + return errors.Wrapf(err, "failed to mount overlay (%s) on %s", mOpts, merged) + } + mergedDummy := filepath.Join(merged, dummyBase) + if err := os.Remove(mergedDummy); err != nil { + syscall.Unmount(merged, 0) + return errors.Wrapf(err, "failed to unlink %s", mergedDummy) + } + if err := syscall.Unmount(merged, 0); err != nil { + return errors.Wrapf(err, "failed to unmount %s", merged) + } + upperDummy := filepath.Join(upper, dummyBase) + if err := isChar0(upperDummy); err != nil { + return err + } + if err := os.Rename(upperDummy, cleansedOriginalPath); err != nil { + return errors.Wrapf(err, "failed to rename %s to %s", upperDummy, cleansedOriginalPath) + } + return nil +} + +func isChar0(path string) error { + osStat, err := os.Stat(path) + if err != nil { + return errors.Wrapf(err, "failed to stat %s", path) + } + st, ok := osStat.Sys().(*syscall.Stat_t) + if !ok { + return errors.Errorf("got unsupported stat for %s", path) + } + if os.FileMode(st.Mode)&syscall.S_IFMT != syscall.S_IFCHR { + return errors.Errorf("%s is not a character device, got mode=%d", path, st.Mode) + } + if st.Rdev != 0 { + return errors.Errorf("%s is not a 0,0 character device, got Rdev=%d", path, st.Rdev) + } + return nil +} + +// replaceDirWithOverlayOpaque replaces path with a new directory with trusted.overlay.opaque +// xattr. The contents of the directory are preserved. +func replaceDirWithOverlayOpaque(path string) error { + if path == "/" { + return errors.New("replaceDirWithOverlayOpaque: path must not be \"/\"") + } + dir := filepath.Dir(path) + tmp, err := ioutil.TempDir(dir, "rdwoo") + if err != nil { + return errors.Wrapf(err, "failed to create a tmp directory under %s", dir) + } + defer os.RemoveAll(tmp) + // newPath is a new empty directory crafted with trusted.overlay.opaque xattr. + // we copy the content of path into newPath, remove path, and rename newPath to path. + newPath, err := createDirWithOverlayOpaque(tmp) + if err != nil { + return errors.Wrapf(err, "createDirWithOverlayOpaque(%q) failed", tmp) + } + if err := fs.CopyDir(newPath, path); err != nil { + return errors.Wrapf(err, "CopyDir(%q, %q) failed", newPath, path) + } + if err := os.RemoveAll(path); err != nil { + return err + } + return os.Rename(newPath, path) +} + +// createDirWithOverlayOpaque creates a directory with trusted.overlay.opaque xattr, +// without calling setxattr, so as to allow creating opaque dir in userns on Ubuntu. +func createDirWithOverlayOpaque(tmp string) (string, error) { + lower := filepath.Join(tmp, "l") + upper := filepath.Join(tmp, "u") + work := filepath.Join(tmp, "w") + merged := filepath.Join(tmp, "m") + for _, s := range []string{lower, upper, work, merged} { + if err := os.MkdirAll(s, 0700); err != nil { + return "", errors.Wrapf(err, "failed to mkdir %s", s) + } + } + dummyBase := "d" + lowerDummy := filepath.Join(lower, dummyBase) + if err := os.MkdirAll(lowerDummy, 0700); err != nil { + return "", errors.Wrapf(err, "failed to create a dummy lower directory %s", lowerDummy) + } + mOpts := fmt.Sprintf("lowerdir=%s,upperdir=%s,workdir=%s", lower, upper, work) + // docker/pkg/mount.Mount() requires procfs to be mounted. So we use syscall.Mount() directly instead. + if err := syscall.Mount("overlay", merged, "overlay", uintptr(0), mOpts); err != nil { + return "", errors.Wrapf(err, "failed to mount overlay (%s) on %s", mOpts, merged) + } + mergedDummy := filepath.Join(merged, dummyBase) + if err := os.Remove(mergedDummy); err != nil { + syscall.Unmount(merged, 0) + return "", errors.Wrapf(err, "failed to rmdir %s", mergedDummy) + } + // upperDummy becomes a 0,0-char device file here + if err := os.Mkdir(mergedDummy, 0700); err != nil { + syscall.Unmount(merged, 0) + return "", errors.Wrapf(err, "failed to mkdir %s", mergedDummy) + } + // upperDummy becomes a directory with trusted.overlay.opaque xattr + // (but can't be verified in userns) + if err := syscall.Unmount(merged, 0); err != nil { + return "", errors.Wrapf(err, "failed to unmount %s", merged) + } + upperDummy := filepath.Join(upper, dummyBase) + return upperDummy, nil +} diff --git a/pkg/archive/archive_linux_test.go b/pkg/archive/archive_linux_test.go index 9422269dff..8fdcb34b53 100644 --- a/pkg/archive/archive_linux_test.go +++ b/pkg/archive/archive_linux_test.go @@ -1,13 +1,18 @@ package archive // import "github.com/docker/docker/pkg/archive" import ( + "fmt" "io/ioutil" "os" + "os/exec" "path/filepath" "syscall" "testing" + "github.com/docker/docker/pkg/reexec" "github.com/docker/docker/pkg/system" + rsystem "github.com/opencontainers/runc/libcontainer/system" + "github.com/pkg/errors" "golang.org/x/sys/unix" "gotest.tools/assert" "gotest.tools/skip" @@ -24,6 +29,7 @@ import ( // └── f1 # whiteout, 0644 func setupOverlayTestDir(t *testing.T, src string) { skip.If(t, os.Getuid() != 0, "skipping test that requires root") + skip.If(t, rsystem.RunningInUserNS(), "skipping test that requires initial userns (trusted.overlay.opaque xattr cannot be set in userns, even with Ubuntu kernel)") // Create opaque directory containing single file and permission 0700 err := os.Mkdir(filepath.Join(src, "d1"), 0700) assert.NilError(t, err) @@ -160,3 +166,129 @@ func TestOverlayTarAUFSUntar(t *testing.T) { checkFileMode(t, filepath.Join(dst, "d2", "f1"), 0660) checkFileMode(t, filepath.Join(dst, "d3", WhiteoutPrefix+"f1"), 0600) } + +func unshareCmd(cmd *exec.Cmd) { + cmd.SysProcAttr = &syscall.SysProcAttr{ + Cloneflags: syscall.CLONE_NEWUSER | syscall.CLONE_NEWNS, + UidMappings: []syscall.SysProcIDMap{ + { + ContainerID: 0, + HostID: os.Geteuid(), + Size: 1, + }, + }, + GidMappings: []syscall.SysProcIDMap{ + { + ContainerID: 0, + HostID: os.Getegid(), + Size: 1, + }, + }, + } +} + +const ( + reexecSupportsUserNSOverlay = "docker-test-supports-userns-overlay" + reexecMknodChar0 = "docker-test-userns-mknod-char0" + reexecSetOpaque = "docker-test-userns-set-opaque" +) + +func supportsOverlay(dir string) error { + lower := filepath.Join(dir, "l") + upper := filepath.Join(dir, "u") + work := filepath.Join(dir, "w") + merged := filepath.Join(dir, "m") + for _, s := range []string{lower, upper, work, merged} { + if err := os.MkdirAll(s, 0700); err != nil { + return err + } + } + mOpts := fmt.Sprintf("lowerdir=%s,upperdir=%s,workdir=%s", lower, upper, work) + if err := syscall.Mount("overlay", merged, "overlay", uintptr(0), mOpts); err != nil { + return errors.Wrapf(err, "failed to mount overlay (%s) on %s", mOpts, merged) + } + if err := syscall.Unmount(merged, 0); err != nil { + return errors.Wrapf(err, "failed to unmount %s", merged) + } + return nil +} + +// supportsUserNSOverlay returns nil error if overlay is supported in userns. +// Only Ubuntu and a few distros support overlay in userns (by patching the kernel). +// https://lists.ubuntu.com/archives/kernel-team/2014-February/038091.html +// As of kernel 4.19, the patch is not merged to the upstream. +func supportsUserNSOverlay() error { + tmp, err := ioutil.TempDir("", "docker-test-supports-userns-overlay") + if err != nil { + return err + } + defer os.RemoveAll(tmp) + cmd := reexec.Command(reexecSupportsUserNSOverlay, tmp) + unshareCmd(cmd) + out, err := cmd.CombinedOutput() + if err != nil { + return errors.Wrapf(err, "output: %q", string(out)) + } + return nil +} + +// isOpaque returns nil error if the dir has trusted.overlay.opaque=y. +// isOpaque needs to be called in the initial userns. +func isOpaque(dir string) error { + xattrOpaque, err := system.Lgetxattr(dir, "trusted.overlay.opaque") + if err != nil { + return errors.Wrapf(err, "failed to read opaque flag of %s", dir) + } + if string(xattrOpaque) != "y" { + return errors.Errorf("expected \"y\", got %q", string(xattrOpaque)) + } + return nil +} + +func TestReexecUserNSOverlayWhiteoutConverter(t *testing.T) { + skip.If(t, os.Getuid() != 0, "skipping test that requires root") + skip.If(t, rsystem.RunningInUserNS(), "skipping test that requires initial userns") + if err := supportsUserNSOverlay(); err != nil { + t.Skipf("skipping test that requires kernel support for overlay-in-userns: %v", err) + } + tmp, err := ioutil.TempDir("", "docker-test-userns-overlay") + assert.NilError(t, err) + defer os.RemoveAll(tmp) + + char0 := filepath.Join(tmp, "char0") + cmd := reexec.Command(reexecMknodChar0, char0) + unshareCmd(cmd) + out, err := cmd.CombinedOutput() + assert.NilError(t, err, string(out)) + assert.NilError(t, isChar0(char0)) + + opaqueDir := filepath.Join(tmp, "opaquedir") + err = os.MkdirAll(opaqueDir, 0755) + assert.NilError(t, err, string(out)) + cmd = reexec.Command(reexecSetOpaque, opaqueDir) + unshareCmd(cmd) + out, err = cmd.CombinedOutput() + assert.NilError(t, err, string(out)) + assert.NilError(t, isOpaque(opaqueDir)) +} + +func init() { + reexec.Register(reexecSupportsUserNSOverlay, func() { + if err := supportsOverlay(os.Args[1]); err != nil { + panic(err) + } + }) + reexec.Register(reexecMknodChar0, func() { + if err := mknodChar0Overlay(os.Args[1]); err != nil { + panic(err) + } + }) + reexec.Register(reexecSetOpaque, func() { + if err := replaceDirWithOverlayOpaque(os.Args[1]); err != nil { + panic(err) + } + }) + if reexec.Init() { + os.Exit(0) + } +} diff --git a/pkg/archive/archive_other.go b/pkg/archive/archive_other.go index 462dfc6323..65a73354c4 100644 --- a/pkg/archive/archive_other.go +++ b/pkg/archive/archive_other.go @@ -2,6 +2,6 @@ package archive // import "github.com/docker/docker/pkg/archive" -func getWhiteoutConverter(format WhiteoutFormat) tarWhiteoutConverter { +func getWhiteoutConverter(format WhiteoutFormat, inUserNS bool) tarWhiteoutConverter { return nil } diff --git a/pkg/archive/archive_test.go b/pkg/archive/archive_test.go index affc7fa83b..b49ad6723e 100644 --- a/pkg/archive/archive_test.go +++ b/pkg/archive/archive_test.go @@ -18,6 +18,7 @@ import ( "github.com/docker/docker/pkg/idtools" "github.com/docker/docker/pkg/ioutils" + rsystem "github.com/opencontainers/runc/libcontainer/system" "gotest.tools/assert" is "gotest.tools/assert/cmp" "gotest.tools/skip" @@ -1229,6 +1230,7 @@ func TestReplaceFileTarWrapper(t *testing.T) { // version of this package that was built with <=go17 are still readable. func TestPrefixHeaderReadable(t *testing.T) { skip.If(t, runtime.GOOS != "windows" && os.Getuid() != 0, "skipping test that requires root") + skip.If(t, rsystem.RunningInUserNS(), "skipping test that requires more than 010000000 UIDs, which is unlikely to be satisfied when running in userns") // https://gist.github.com/stevvooe/e2a790ad4e97425896206c0816e1a882#file-out-go var testFile = []byte("\x1f\x8b\x08\x08\x44\x21\x68\x59\x00\x03\x74\x2e\x74\x61\x72\x00\x4b\xcb\xcf\x67\xa0\x35\x30\x80\x00\x86\x06\x10\x47\x01\xc1\x37\x40\x00\x54\xb6\xb1\xa1\xa9\x99\x09\x48\x25\x1d\x40\x69\x71\x49\x62\x91\x02\xe5\x76\xa1\x79\x84\x21\x91\xd6\x80\x72\xaf\x8f\x82\x51\x30\x0a\x46\x36\x00\x00\xf0\x1c\x1e\x95\x00\x06\x00\x00") diff --git a/pkg/archive/archive_unix_test.go b/pkg/archive/archive_unix_test.go index dc4e1fdae6..6119133f8e 100644 --- a/pkg/archive/archive_unix_test.go +++ b/pkg/archive/archive_unix_test.go @@ -14,6 +14,7 @@ import ( "testing" "github.com/docker/docker/pkg/system" + rsystem "github.com/opencontainers/runc/libcontainer/system" "golang.org/x/sys/unix" "gotest.tools/assert" is "gotest.tools/assert/cmp" @@ -182,6 +183,7 @@ func getInode(path string) (uint64, error) { func TestTarWithBlockCharFifo(t *testing.T) { skip.If(t, os.Getuid() != 0, "skipping test that requires root") + skip.If(t, rsystem.RunningInUserNS(), "skipping test that requires initial userns") origin, err := ioutil.TempDir("", "docker-test-tar-hardlink") assert.NilError(t, err)