1
0
Fork 0
mirror of https://github.com/moby/moby.git synced 2022-11-09 12:21:53 -05:00

pkg/archive: support overlayfs in userns (Ubuntu kernel only)

Ubuntu kernel supports overlayfs in user namespaces.

However, Docker had previously crafting overlay opaques directly
using mknod(2) and setxattr(2), which are not supported in userns.

Tested with LXD, Ubuntu 18.04, kernel 4.15.0-36-generic #39-Ubuntu.

Signed-off-by: Akihiro Suda <suda.akihiro@lab.ntt.co.jp>
This commit is contained in:
Akihiro Suda 2018-10-15 16:47:02 +09:00
parent 6e3113f700
commit f8ed19c8b4
3 changed files with 177 additions and 8 deletions

View file

@ -745,7 +745,7 @@ func TarWithOptions(srcPath string, options *TarOptions) (io.ReadCloser, error)
compressWriter, compressWriter,
options.ChownOpts, options.ChownOpts,
) )
ta.WhiteoutConverter = getWhiteoutConverter(options.WhiteoutFormat) ta.WhiteoutConverter = getWhiteoutConverter(options.WhiteoutFormat, options.InUserNS)
defer func() { defer func() {
// Make sure to check the error on Close. // Make sure to check the error on Close.
@ -903,7 +903,7 @@ func Unpack(decompressedArchive io.Reader, dest string, options *TarOptions) err
var dirs []*tar.Header var dirs []*tar.Header
idMapping := idtools.NewIDMappingsFromMaps(options.UIDMaps, options.GIDMaps) idMapping := idtools.NewIDMappingsFromMaps(options.UIDMaps, options.GIDMaps)
rootIDs := idMapping.RootPair() rootIDs := idMapping.RootPair()
whiteoutConverter := getWhiteoutConverter(options.WhiteoutFormat) whiteoutConverter := getWhiteoutConverter(options.WhiteoutFormat, options.InUserNS)
// Iterate through the files in the archive. // Iterate through the files in the archive.
loop: loop:

View file

@ -2,22 +2,29 @@ package archive // import "github.com/docker/docker/pkg/archive"
import ( import (
"archive/tar" "archive/tar"
"fmt"
"io/ioutil"
"os" "os"
"path/filepath" "path/filepath"
"strings" "strings"
"syscall"
"github.com/containerd/continuity/fs"
"github.com/docker/docker/pkg/system" "github.com/docker/docker/pkg/system"
"github.com/pkg/errors"
"golang.org/x/sys/unix" "golang.org/x/sys/unix"
) )
func getWhiteoutConverter(format WhiteoutFormat) tarWhiteoutConverter { func getWhiteoutConverter(format WhiteoutFormat, inUserNS bool) tarWhiteoutConverter {
if format == OverlayWhiteoutFormat { if format == OverlayWhiteoutFormat {
return overlayWhiteoutConverter{} return overlayWhiteoutConverter{inUserNS: inUserNS}
} }
return nil return nil
} }
type overlayWhiteoutConverter struct{} type overlayWhiteoutConverter struct {
inUserNS bool
}
func (overlayWhiteoutConverter) ConvertWrite(hdr *tar.Header, path string, fi os.FileInfo) (wo *tar.Header, err error) { func (overlayWhiteoutConverter) ConvertWrite(hdr *tar.Header, path string, fi os.FileInfo) (wo *tar.Header, err error) {
// convert whiteouts to AUFS format // convert whiteouts to AUFS format
@ -61,13 +68,22 @@ func (overlayWhiteoutConverter) ConvertWrite(hdr *tar.Header, path string, fi os
return return
} }
func (overlayWhiteoutConverter) ConvertRead(hdr *tar.Header, path string) (bool, error) { func (c overlayWhiteoutConverter) ConvertRead(hdr *tar.Header, path string) (bool, error) {
base := filepath.Base(path) base := filepath.Base(path)
dir := filepath.Dir(path) dir := filepath.Dir(path)
// if a directory is marked as opaque by the AUFS special file, we need to translate that to overlay // if a directory is marked as opaque by the AUFS special file, we need to translate that to overlay
if base == WhiteoutOpaqueDir { if base == WhiteoutOpaqueDir {
err := unix.Setxattr(dir, "trusted.overlay.opaque", []byte{'y'}, 0) err := unix.Setxattr(dir, "trusted.overlay.opaque", []byte{'y'}, 0)
if err != nil {
if c.inUserNS {
if err = replaceDirWithOverlayOpaque(dir); err != nil {
return false, errors.Wrapf(err, "replaceDirWithOverlayOpaque(%q) failed", dir)
}
} else {
return false, errors.Wrapf(err, "setxattr(%q, trusted.overlay.opaque=y)", dir)
}
}
// don't write the file itself // don't write the file itself
return false, err return false, err
} }
@ -78,7 +94,19 @@ func (overlayWhiteoutConverter) ConvertRead(hdr *tar.Header, path string) (bool,
originalPath := filepath.Join(dir, originalBase) originalPath := filepath.Join(dir, originalBase)
if err := unix.Mknod(originalPath, unix.S_IFCHR, 0); err != nil { if err := unix.Mknod(originalPath, unix.S_IFCHR, 0); err != nil {
return false, err if c.inUserNS {
// Ubuntu and a few distros support overlayfs in userns.
//
// Although we can't call mknod directly in userns (at least on bionic kernel 4.15),
// we can still create 0,0 char device using mknodChar0Overlay().
//
// NOTE: we don't need this hack for the containerd snapshotter+unpack model.
if err := mknodChar0Overlay(originalPath); err != nil {
return false, errors.Wrapf(err, "failed to mknodChar0UserNS(%q)", originalPath)
}
} else {
return false, errors.Wrapf(err, "failed to mknod(%q, S_IFCHR, 0)", originalPath)
}
} }
if err := os.Chown(originalPath, hdr.Uid, hdr.Gid); err != nil { if err := os.Chown(originalPath, hdr.Uid, hdr.Gid); err != nil {
return false, err return false, err
@ -90,3 +118,144 @@ func (overlayWhiteoutConverter) ConvertRead(hdr *tar.Header, path string) (bool,
return true, nil return true, nil
} }
// mknodChar0Overlay creates 0,0 char device by mounting overlayfs and unlinking.
// This function can be used for creating 0,0 char device in userns on Ubuntu.
//
// Steps:
// * Mkdir lower,upper,merged,work
// * Create lower/dummy
// * Mount overlayfs
// * Unlink merged/dummy
// * Unmount overlayfs
// * Make sure a 0,0 char device is created as upper/dummy
// * Rename upper/dummy to cleansedOriginalPath
func mknodChar0Overlay(cleansedOriginalPath string) error {
dir := filepath.Dir(cleansedOriginalPath)
tmp, err := ioutil.TempDir(dir, "mc0o")
if err != nil {
return errors.Wrapf(err, "failed to create a tmp directory under %s", dir)
}
defer os.RemoveAll(tmp)
lower := filepath.Join(tmp, "l")
upper := filepath.Join(tmp, "u")
work := filepath.Join(tmp, "w")
merged := filepath.Join(tmp, "m")
for _, s := range []string{lower, upper, work, merged} {
if err := os.MkdirAll(s, 0700); err != nil {
return errors.Wrapf(err, "failed to mkdir %s", s)
}
}
dummyBase := "d"
lowerDummy := filepath.Join(lower, dummyBase)
if err := ioutil.WriteFile(lowerDummy, []byte{}, 0600); err != nil {
return errors.Wrapf(err, "failed to create a dummy lower file %s", lowerDummy)
}
mOpts := fmt.Sprintf("lowerdir=%s,upperdir=%s,workdir=%s", lower, upper, work)
// docker/pkg/mount.Mount() requires procfs to be mounted. So we use syscall.Mount() directly instead.
if err := syscall.Mount("overlay", merged, "overlay", uintptr(0), mOpts); err != nil {
return errors.Wrapf(err, "failed to mount overlay (%s) on %s", mOpts, merged)
}
mergedDummy := filepath.Join(merged, dummyBase)
if err := os.Remove(mergedDummy); err != nil {
syscall.Unmount(merged, 0)
return errors.Wrapf(err, "failed to unlink %s", mergedDummy)
}
if err := syscall.Unmount(merged, 0); err != nil {
return errors.Wrapf(err, "failed to unmount %s", merged)
}
upperDummy := filepath.Join(upper, dummyBase)
if err := isChar0(upperDummy); err != nil {
return err
}
if err := os.Rename(upperDummy, cleansedOriginalPath); err != nil {
return errors.Wrapf(err, "failed to rename %s to %s", upperDummy, cleansedOriginalPath)
}
return nil
}
func isChar0(path string) error {
osStat, err := os.Stat(path)
if err != nil {
return errors.Wrapf(err, "failed to stat %s", path)
}
st, ok := osStat.Sys().(*syscall.Stat_t)
if !ok {
return errors.Errorf("got unsupported stat for %s", path)
}
if os.FileMode(st.Mode)&syscall.S_IFMT != syscall.S_IFCHR {
return errors.Errorf("%s is not a character device, got mode=%d", path, st.Mode)
}
if st.Rdev != 0 {
return errors.Errorf("%s is not a 0,0 character device, got Rdev=%d", path, st.Rdev)
}
return nil
}
// replaceDirWithOverlayOpaque replaces path with a new directory with trusted.overlay.opaque
// xattr. The contents of the directory are preserved.
func replaceDirWithOverlayOpaque(path string) error {
if path == "/" {
return errors.New("replaceDirWithOverlayOpaque: path must not be \"/\"")
}
dir := filepath.Dir(path)
tmp, err := ioutil.TempDir(dir, "rdwoo")
if err != nil {
return errors.Wrapf(err, "failed to create a tmp directory under %s", dir)
}
defer os.RemoveAll(tmp)
// newPath is a new empty directory crafted with trusted.overlay.opaque xattr.
// we copy the content of path into newPath, remove path, and rename newPath to path.
newPath, err := createDirWithOverlayOpaque(tmp)
if err != nil {
return errors.Wrapf(err, "createDirWithOverlayOpaque(%q) failed", tmp)
}
if err := fs.CopyDir(newPath, path); err != nil {
return errors.Wrapf(err, "CopyDir(%q, %q) failed", newPath, path)
}
if err := os.RemoveAll(path); err != nil {
return err
}
return os.Rename(newPath, path)
}
// createDirWithOverlayOpaque creates a directory with trusted.overlay.opaque xattr,
// without calling setxattr, so as to allow creating opaque dir in userns on Ubuntu.
func createDirWithOverlayOpaque(tmp string) (string, error) {
lower := filepath.Join(tmp, "l")
upper := filepath.Join(tmp, "u")
work := filepath.Join(tmp, "w")
merged := filepath.Join(tmp, "m")
for _, s := range []string{lower, upper, work, merged} {
if err := os.MkdirAll(s, 0700); err != nil {
return "", errors.Wrapf(err, "failed to mkdir %s", s)
}
}
dummyBase := "d"
lowerDummy := filepath.Join(lower, dummyBase)
if err := os.MkdirAll(lowerDummy, 0700); err != nil {
return "", errors.Wrapf(err, "failed to create a dummy lower directory %s", lowerDummy)
}
mOpts := fmt.Sprintf("lowerdir=%s,upperdir=%s,workdir=%s", lower, upper, work)
// docker/pkg/mount.Mount() requires procfs to be mounted. So we use syscall.Mount() directly instead.
if err := syscall.Mount("overlay", merged, "overlay", uintptr(0), mOpts); err != nil {
return "", errors.Wrapf(err, "failed to mount overlay (%s) on %s", mOpts, merged)
}
mergedDummy := filepath.Join(merged, dummyBase)
if err := os.Remove(mergedDummy); err != nil {
syscall.Unmount(merged, 0)
return "", errors.Wrapf(err, "failed to rmdir %s", mergedDummy)
}
// upperDummy becomes a 0,0-char device file here
if err := os.Mkdir(mergedDummy, 0700); err != nil {
syscall.Unmount(merged, 0)
return "", errors.Wrapf(err, "failed to mkdir %s", mergedDummy)
}
// upperDummy becomes a directory with trusted.overlay.opaque xattr
// (but can't be verified in userns)
if err := syscall.Unmount(merged, 0); err != nil {
return "", errors.Wrapf(err, "failed to unmount %s", merged)
}
upperDummy := filepath.Join(upper, dummyBase)
return upperDummy, nil
}

View file

@ -2,6 +2,6 @@
package archive // import "github.com/docker/docker/pkg/archive" package archive // import "github.com/docker/docker/pkg/archive"
func getWhiteoutConverter(format WhiteoutFormat) tarWhiteoutConverter { func getWhiteoutConverter(format WhiteoutFormat, inUserNS bool) tarWhiteoutConverter {
return nil return nil
} }