Merge pull request #38038 from AkihiroSuda/ubuntu-overlayfs

pkg/archive: support overlayfs in userns (Ubuntu kernel only)
This commit is contained in:
Vincent Demeester 2019-01-04 09:50:15 +01:00 committed by GitHub
commit de640c9f49
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 313 additions and 8 deletions

View File

@ -745,7 +745,7 @@ func TarWithOptions(srcPath string, options *TarOptions) (io.ReadCloser, error)
compressWriter,
options.ChownOpts,
)
ta.WhiteoutConverter = getWhiteoutConverter(options.WhiteoutFormat)
ta.WhiteoutConverter = getWhiteoutConverter(options.WhiteoutFormat, options.InUserNS)
defer func() {
// Make sure to check the error on Close.
@ -903,7 +903,7 @@ func Unpack(decompressedArchive io.Reader, dest string, options *TarOptions) err
var dirs []*tar.Header
idMapping := idtools.NewIDMappingsFromMaps(options.UIDMaps, options.GIDMaps)
rootIDs := idMapping.RootPair()
whiteoutConverter := getWhiteoutConverter(options.WhiteoutFormat)
whiteoutConverter := getWhiteoutConverter(options.WhiteoutFormat, options.InUserNS)
// Iterate through the files in the archive.
loop:

View File

@ -2,22 +2,29 @@ package archive // import "github.com/docker/docker/pkg/archive"
import (
"archive/tar"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strings"
"syscall"
"github.com/containerd/continuity/fs"
"github.com/docker/docker/pkg/system"
"github.com/pkg/errors"
"golang.org/x/sys/unix"
)
func getWhiteoutConverter(format WhiteoutFormat) tarWhiteoutConverter {
func getWhiteoutConverter(format WhiteoutFormat, inUserNS bool) tarWhiteoutConverter {
if format == OverlayWhiteoutFormat {
return overlayWhiteoutConverter{}
return overlayWhiteoutConverter{inUserNS: inUserNS}
}
return nil
}
type overlayWhiteoutConverter struct{}
type overlayWhiteoutConverter struct {
inUserNS bool
}
func (overlayWhiteoutConverter) ConvertWrite(hdr *tar.Header, path string, fi os.FileInfo) (wo *tar.Header, err error) {
// convert whiteouts to AUFS format
@ -61,13 +68,22 @@ func (overlayWhiteoutConverter) ConvertWrite(hdr *tar.Header, path string, fi os
return
}
func (overlayWhiteoutConverter) ConvertRead(hdr *tar.Header, path string) (bool, error) {
func (c overlayWhiteoutConverter) ConvertRead(hdr *tar.Header, path string) (bool, error) {
base := filepath.Base(path)
dir := filepath.Dir(path)
// if a directory is marked as opaque by the AUFS special file, we need to translate that to overlay
if base == WhiteoutOpaqueDir {
err := unix.Setxattr(dir, "trusted.overlay.opaque", []byte{'y'}, 0)
if err != nil {
if c.inUserNS {
if err = replaceDirWithOverlayOpaque(dir); err != nil {
return false, errors.Wrapf(err, "replaceDirWithOverlayOpaque(%q) failed", dir)
}
} else {
return false, errors.Wrapf(err, "setxattr(%q, trusted.overlay.opaque=y)", dir)
}
}
// don't write the file itself
return false, err
}
@ -78,7 +94,19 @@ func (overlayWhiteoutConverter) ConvertRead(hdr *tar.Header, path string) (bool,
originalPath := filepath.Join(dir, originalBase)
if err := unix.Mknod(originalPath, unix.S_IFCHR, 0); err != nil {
return false, err
if c.inUserNS {
// Ubuntu and a few distros support overlayfs in userns.
//
// Although we can't call mknod directly in userns (at least on bionic kernel 4.15),
// we can still create 0,0 char device using mknodChar0Overlay().
//
// NOTE: we don't need this hack for the containerd snapshotter+unpack model.
if err := mknodChar0Overlay(originalPath); err != nil {
return false, errors.Wrapf(err, "failed to mknodChar0UserNS(%q)", originalPath)
}
} else {
return false, errors.Wrapf(err, "failed to mknod(%q, S_IFCHR, 0)", originalPath)
}
}
if err := os.Chown(originalPath, hdr.Uid, hdr.Gid); err != nil {
return false, err
@ -90,3 +118,144 @@ func (overlayWhiteoutConverter) ConvertRead(hdr *tar.Header, path string) (bool,
return true, nil
}
// mknodChar0Overlay creates 0,0 char device by mounting overlayfs and unlinking.
// This function can be used for creating 0,0 char device in userns on Ubuntu.
//
// Steps:
// * Mkdir lower,upper,merged,work
// * Create lower/dummy
// * Mount overlayfs
// * Unlink merged/dummy
// * Unmount overlayfs
// * Make sure a 0,0 char device is created as upper/dummy
// * Rename upper/dummy to cleansedOriginalPath
func mknodChar0Overlay(cleansedOriginalPath string) error {
dir := filepath.Dir(cleansedOriginalPath)
tmp, err := ioutil.TempDir(dir, "mc0o")
if err != nil {
return errors.Wrapf(err, "failed to create a tmp directory under %s", dir)
}
defer os.RemoveAll(tmp)
lower := filepath.Join(tmp, "l")
upper := filepath.Join(tmp, "u")
work := filepath.Join(tmp, "w")
merged := filepath.Join(tmp, "m")
for _, s := range []string{lower, upper, work, merged} {
if err := os.MkdirAll(s, 0700); err != nil {
return errors.Wrapf(err, "failed to mkdir %s", s)
}
}
dummyBase := "d"
lowerDummy := filepath.Join(lower, dummyBase)
if err := ioutil.WriteFile(lowerDummy, []byte{}, 0600); err != nil {
return errors.Wrapf(err, "failed to create a dummy lower file %s", lowerDummy)
}
mOpts := fmt.Sprintf("lowerdir=%s,upperdir=%s,workdir=%s", lower, upper, work)
// docker/pkg/mount.Mount() requires procfs to be mounted. So we use syscall.Mount() directly instead.
if err := syscall.Mount("overlay", merged, "overlay", uintptr(0), mOpts); err != nil {
return errors.Wrapf(err, "failed to mount overlay (%s) on %s", mOpts, merged)
}
mergedDummy := filepath.Join(merged, dummyBase)
if err := os.Remove(mergedDummy); err != nil {
syscall.Unmount(merged, 0)
return errors.Wrapf(err, "failed to unlink %s", mergedDummy)
}
if err := syscall.Unmount(merged, 0); err != nil {
return errors.Wrapf(err, "failed to unmount %s", merged)
}
upperDummy := filepath.Join(upper, dummyBase)
if err := isChar0(upperDummy); err != nil {
return err
}
if err := os.Rename(upperDummy, cleansedOriginalPath); err != nil {
return errors.Wrapf(err, "failed to rename %s to %s", upperDummy, cleansedOriginalPath)
}
return nil
}
func isChar0(path string) error {
osStat, err := os.Stat(path)
if err != nil {
return errors.Wrapf(err, "failed to stat %s", path)
}
st, ok := osStat.Sys().(*syscall.Stat_t)
if !ok {
return errors.Errorf("got unsupported stat for %s", path)
}
if os.FileMode(st.Mode)&syscall.S_IFMT != syscall.S_IFCHR {
return errors.Errorf("%s is not a character device, got mode=%d", path, st.Mode)
}
if st.Rdev != 0 {
return errors.Errorf("%s is not a 0,0 character device, got Rdev=%d", path, st.Rdev)
}
return nil
}
// replaceDirWithOverlayOpaque replaces path with a new directory with trusted.overlay.opaque
// xattr. The contents of the directory are preserved.
func replaceDirWithOverlayOpaque(path string) error {
if path == "/" {
return errors.New("replaceDirWithOverlayOpaque: path must not be \"/\"")
}
dir := filepath.Dir(path)
tmp, err := ioutil.TempDir(dir, "rdwoo")
if err != nil {
return errors.Wrapf(err, "failed to create a tmp directory under %s", dir)
}
defer os.RemoveAll(tmp)
// newPath is a new empty directory crafted with trusted.overlay.opaque xattr.
// we copy the content of path into newPath, remove path, and rename newPath to path.
newPath, err := createDirWithOverlayOpaque(tmp)
if err != nil {
return errors.Wrapf(err, "createDirWithOverlayOpaque(%q) failed", tmp)
}
if err := fs.CopyDir(newPath, path); err != nil {
return errors.Wrapf(err, "CopyDir(%q, %q) failed", newPath, path)
}
if err := os.RemoveAll(path); err != nil {
return err
}
return os.Rename(newPath, path)
}
// createDirWithOverlayOpaque creates a directory with trusted.overlay.opaque xattr,
// without calling setxattr, so as to allow creating opaque dir in userns on Ubuntu.
func createDirWithOverlayOpaque(tmp string) (string, error) {
lower := filepath.Join(tmp, "l")
upper := filepath.Join(tmp, "u")
work := filepath.Join(tmp, "w")
merged := filepath.Join(tmp, "m")
for _, s := range []string{lower, upper, work, merged} {
if err := os.MkdirAll(s, 0700); err != nil {
return "", errors.Wrapf(err, "failed to mkdir %s", s)
}
}
dummyBase := "d"
lowerDummy := filepath.Join(lower, dummyBase)
if err := os.MkdirAll(lowerDummy, 0700); err != nil {
return "", errors.Wrapf(err, "failed to create a dummy lower directory %s", lowerDummy)
}
mOpts := fmt.Sprintf("lowerdir=%s,upperdir=%s,workdir=%s", lower, upper, work)
// docker/pkg/mount.Mount() requires procfs to be mounted. So we use syscall.Mount() directly instead.
if err := syscall.Mount("overlay", merged, "overlay", uintptr(0), mOpts); err != nil {
return "", errors.Wrapf(err, "failed to mount overlay (%s) on %s", mOpts, merged)
}
mergedDummy := filepath.Join(merged, dummyBase)
if err := os.Remove(mergedDummy); err != nil {
syscall.Unmount(merged, 0)
return "", errors.Wrapf(err, "failed to rmdir %s", mergedDummy)
}
// upperDummy becomes a 0,0-char device file here
if err := os.Mkdir(mergedDummy, 0700); err != nil {
syscall.Unmount(merged, 0)
return "", errors.Wrapf(err, "failed to mkdir %s", mergedDummy)
}
// upperDummy becomes a directory with trusted.overlay.opaque xattr
// (but can't be verified in userns)
if err := syscall.Unmount(merged, 0); err != nil {
return "", errors.Wrapf(err, "failed to unmount %s", merged)
}
upperDummy := filepath.Join(upper, dummyBase)
return upperDummy, nil
}

View File

@ -1,13 +1,18 @@
package archive // import "github.com/docker/docker/pkg/archive"
import (
"fmt"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"syscall"
"testing"
"github.com/docker/docker/pkg/reexec"
"github.com/docker/docker/pkg/system"
rsystem "github.com/opencontainers/runc/libcontainer/system"
"github.com/pkg/errors"
"golang.org/x/sys/unix"
"gotest.tools/assert"
"gotest.tools/skip"
@ -24,6 +29,7 @@ import (
// └── f1 # whiteout, 0644
func setupOverlayTestDir(t *testing.T, src string) {
skip.If(t, os.Getuid() != 0, "skipping test that requires root")
skip.If(t, rsystem.RunningInUserNS(), "skipping test that requires initial userns (trusted.overlay.opaque xattr cannot be set in userns, even with Ubuntu kernel)")
// Create opaque directory containing single file and permission 0700
err := os.Mkdir(filepath.Join(src, "d1"), 0700)
assert.NilError(t, err)
@ -160,3 +166,129 @@ func TestOverlayTarAUFSUntar(t *testing.T) {
checkFileMode(t, filepath.Join(dst, "d2", "f1"), 0660)
checkFileMode(t, filepath.Join(dst, "d3", WhiteoutPrefix+"f1"), 0600)
}
func unshareCmd(cmd *exec.Cmd) {
cmd.SysProcAttr = &syscall.SysProcAttr{
Cloneflags: syscall.CLONE_NEWUSER | syscall.CLONE_NEWNS,
UidMappings: []syscall.SysProcIDMap{
{
ContainerID: 0,
HostID: os.Geteuid(),
Size: 1,
},
},
GidMappings: []syscall.SysProcIDMap{
{
ContainerID: 0,
HostID: os.Getegid(),
Size: 1,
},
},
}
}
const (
reexecSupportsUserNSOverlay = "docker-test-supports-userns-overlay"
reexecMknodChar0 = "docker-test-userns-mknod-char0"
reexecSetOpaque = "docker-test-userns-set-opaque"
)
func supportsOverlay(dir string) error {
lower := filepath.Join(dir, "l")
upper := filepath.Join(dir, "u")
work := filepath.Join(dir, "w")
merged := filepath.Join(dir, "m")
for _, s := range []string{lower, upper, work, merged} {
if err := os.MkdirAll(s, 0700); err != nil {
return err
}
}
mOpts := fmt.Sprintf("lowerdir=%s,upperdir=%s,workdir=%s", lower, upper, work)
if err := syscall.Mount("overlay", merged, "overlay", uintptr(0), mOpts); err != nil {
return errors.Wrapf(err, "failed to mount overlay (%s) on %s", mOpts, merged)
}
if err := syscall.Unmount(merged, 0); err != nil {
return errors.Wrapf(err, "failed to unmount %s", merged)
}
return nil
}
// supportsUserNSOverlay returns nil error if overlay is supported in userns.
// Only Ubuntu and a few distros support overlay in userns (by patching the kernel).
// https://lists.ubuntu.com/archives/kernel-team/2014-February/038091.html
// As of kernel 4.19, the patch is not merged to the upstream.
func supportsUserNSOverlay() error {
tmp, err := ioutil.TempDir("", "docker-test-supports-userns-overlay")
if err != nil {
return err
}
defer os.RemoveAll(tmp)
cmd := reexec.Command(reexecSupportsUserNSOverlay, tmp)
unshareCmd(cmd)
out, err := cmd.CombinedOutput()
if err != nil {
return errors.Wrapf(err, "output: %q", string(out))
}
return nil
}
// isOpaque returns nil error if the dir has trusted.overlay.opaque=y.
// isOpaque needs to be called in the initial userns.
func isOpaque(dir string) error {
xattrOpaque, err := system.Lgetxattr(dir, "trusted.overlay.opaque")
if err != nil {
return errors.Wrapf(err, "failed to read opaque flag of %s", dir)
}
if string(xattrOpaque) != "y" {
return errors.Errorf("expected \"y\", got %q", string(xattrOpaque))
}
return nil
}
func TestReexecUserNSOverlayWhiteoutConverter(t *testing.T) {
skip.If(t, os.Getuid() != 0, "skipping test that requires root")
skip.If(t, rsystem.RunningInUserNS(), "skipping test that requires initial userns")
if err := supportsUserNSOverlay(); err != nil {
t.Skipf("skipping test that requires kernel support for overlay-in-userns: %v", err)
}
tmp, err := ioutil.TempDir("", "docker-test-userns-overlay")
assert.NilError(t, err)
defer os.RemoveAll(tmp)
char0 := filepath.Join(tmp, "char0")
cmd := reexec.Command(reexecMknodChar0, char0)
unshareCmd(cmd)
out, err := cmd.CombinedOutput()
assert.NilError(t, err, string(out))
assert.NilError(t, isChar0(char0))
opaqueDir := filepath.Join(tmp, "opaquedir")
err = os.MkdirAll(opaqueDir, 0755)
assert.NilError(t, err, string(out))
cmd = reexec.Command(reexecSetOpaque, opaqueDir)
unshareCmd(cmd)
out, err = cmd.CombinedOutput()
assert.NilError(t, err, string(out))
assert.NilError(t, isOpaque(opaqueDir))
}
func init() {
reexec.Register(reexecSupportsUserNSOverlay, func() {
if err := supportsOverlay(os.Args[1]); err != nil {
panic(err)
}
})
reexec.Register(reexecMknodChar0, func() {
if err := mknodChar0Overlay(os.Args[1]); err != nil {
panic(err)
}
})
reexec.Register(reexecSetOpaque, func() {
if err := replaceDirWithOverlayOpaque(os.Args[1]); err != nil {
panic(err)
}
})
if reexec.Init() {
os.Exit(0)
}
}

View File

@ -2,6 +2,6 @@
package archive // import "github.com/docker/docker/pkg/archive"
func getWhiteoutConverter(format WhiteoutFormat) tarWhiteoutConverter {
func getWhiteoutConverter(format WhiteoutFormat, inUserNS bool) tarWhiteoutConverter {
return nil
}

View File

@ -18,6 +18,7 @@ import (
"github.com/docker/docker/pkg/idtools"
"github.com/docker/docker/pkg/ioutils"
rsystem "github.com/opencontainers/runc/libcontainer/system"
"gotest.tools/assert"
is "gotest.tools/assert/cmp"
"gotest.tools/skip"
@ -1229,6 +1230,7 @@ func TestReplaceFileTarWrapper(t *testing.T) {
// version of this package that was built with <=go17 are still readable.
func TestPrefixHeaderReadable(t *testing.T) {
skip.If(t, runtime.GOOS != "windows" && os.Getuid() != 0, "skipping test that requires root")
skip.If(t, rsystem.RunningInUserNS(), "skipping test that requires more than 010000000 UIDs, which is unlikely to be satisfied when running in userns")
// https://gist.github.com/stevvooe/e2a790ad4e97425896206c0816e1a882#file-out-go
var testFile = []byte("\x1f\x8b\x08\x08\x44\x21\x68\x59\x00\x03\x74\x2e\x74\x61\x72\x00\x4b\xcb\xcf\x67\xa0\x35\x30\x80\x00\x86\x06\x10\x47\x01\xc1\x37\x40\x00\x54\xb6\xb1\xa1\xa9\x99\x09\x48\x25\x1d\x40\x69\x71\x49\x62\x91\x02\xe5\x76\xa1\x79\x84\x21\x91\xd6\x80\x72\xaf\x8f\x82\x51\x30\x0a\x46\x36\x00\x00\xf0\x1c\x1e\x95\x00\x06\x00\x00")

View File

@ -14,6 +14,7 @@ import (
"testing"
"github.com/docker/docker/pkg/system"
rsystem "github.com/opencontainers/runc/libcontainer/system"
"golang.org/x/sys/unix"
"gotest.tools/assert"
is "gotest.tools/assert/cmp"
@ -182,6 +183,7 @@ func getInode(path string) (uint64, error) {
func TestTarWithBlockCharFifo(t *testing.T) {
skip.If(t, os.Getuid() != 0, "skipping test that requires root")
skip.If(t, rsystem.RunningInUserNS(), "skipping test that requires initial userns")
origin, err := ioutil.TempDir("", "docker-test-tar-hardlink")
assert.NilError(t, err)