mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00
b36e613d9f
This fix tries to address the issue raised in 29810 where btrfs subvolume removal failed when docker is in an unprivileged lxc container. The failure was caused by `Failed to rescan btrfs quota` with `operation not permitted`. However, if disk quota is not enabled, there is no need to run a btrfs rescan at the first place. This fix checks for `quotaEnabled` and only run btrfs rescan if `quotaEnabled` is true. This fix fixes 29810. Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
574 lines
14 KiB
Go
574 lines
14 KiB
Go
// +build linux
|
|
|
|
package btrfs
|
|
|
|
/*
|
|
#include <stdlib.h>
|
|
#include <dirent.h>
|
|
#include <btrfs/ioctl.h>
|
|
#include <btrfs/ctree.h>
|
|
|
|
static void set_name_btrfs_ioctl_vol_args_v2(struct btrfs_ioctl_vol_args_v2* btrfs_struct, const char* value) {
|
|
snprintf(btrfs_struct->name, BTRFS_SUBVOL_NAME_MAX, "%s", value);
|
|
}
|
|
*/
|
|
import "C"
|
|
|
|
import (
|
|
"fmt"
|
|
"os"
|
|
"path"
|
|
"path/filepath"
|
|
"strings"
|
|
"syscall"
|
|
"unsafe"
|
|
|
|
"github.com/docker/docker/daemon/graphdriver"
|
|
"github.com/docker/docker/pkg/idtools"
|
|
"github.com/docker/docker/pkg/mount"
|
|
"github.com/docker/docker/pkg/parsers"
|
|
"github.com/docker/go-units"
|
|
"github.com/opencontainers/runc/libcontainer/label"
|
|
)
|
|
|
|
func init() {
|
|
graphdriver.Register("btrfs", Init)
|
|
}
|
|
|
|
type btrfsOptions struct {
|
|
minSpace uint64
|
|
size uint64
|
|
}
|
|
|
|
// Init returns a new BTRFS driver.
|
|
// An error is returned if BTRFS is not supported.
|
|
func Init(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (graphdriver.Driver, error) {
|
|
|
|
fsMagic, err := graphdriver.GetFSMagic(home)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if fsMagic != graphdriver.FsMagicBtrfs {
|
|
return nil, graphdriver.ErrPrerequisites
|
|
}
|
|
|
|
rootUID, rootGID, err := idtools.GetRootUIDGID(uidMaps, gidMaps)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if err := idtools.MkdirAllAs(home, 0700, rootUID, rootGID); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if err := mount.MakePrivate(home); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
opt, userDiskQuota, err := parseOptions(options)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
driver := &Driver{
|
|
home: home,
|
|
uidMaps: uidMaps,
|
|
gidMaps: gidMaps,
|
|
options: opt,
|
|
}
|
|
|
|
if userDiskQuota {
|
|
if err := driver.subvolEnableQuota(); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
return graphdriver.NewNaiveDiffDriver(driver, uidMaps, gidMaps), nil
|
|
}
|
|
|
|
func parseOptions(opt []string) (btrfsOptions, bool, error) {
|
|
var options btrfsOptions
|
|
userDiskQuota := false
|
|
for _, option := range opt {
|
|
key, val, err := parsers.ParseKeyValueOpt(option)
|
|
if err != nil {
|
|
return options, userDiskQuota, err
|
|
}
|
|
key = strings.ToLower(key)
|
|
switch key {
|
|
case "btrfs.min_space":
|
|
minSpace, err := units.RAMInBytes(val)
|
|
if err != nil {
|
|
return options, userDiskQuota, err
|
|
}
|
|
userDiskQuota = true
|
|
options.minSpace = uint64(minSpace)
|
|
default:
|
|
return options, userDiskQuota, fmt.Errorf("Unknown option %s", key)
|
|
}
|
|
}
|
|
return options, userDiskQuota, nil
|
|
}
|
|
|
|
// Driver contains information about the filesystem mounted.
|
|
type Driver struct {
|
|
//root of the file system
|
|
home string
|
|
uidMaps []idtools.IDMap
|
|
gidMaps []idtools.IDMap
|
|
options btrfsOptions
|
|
quotaEnabled bool
|
|
}
|
|
|
|
// String prints the name of the driver (btrfs).
|
|
func (d *Driver) String() string {
|
|
return "btrfs"
|
|
}
|
|
|
|
// Status returns current driver information in a two dimensional string array.
|
|
// Output contains "Build Version" and "Library Version" of the btrfs libraries used.
|
|
// Version information can be used to check compatibility with your kernel.
|
|
func (d *Driver) Status() [][2]string {
|
|
status := [][2]string{}
|
|
if bv := btrfsBuildVersion(); bv != "-" {
|
|
status = append(status, [2]string{"Build Version", bv})
|
|
}
|
|
if lv := btrfsLibVersion(); lv != -1 {
|
|
status = append(status, [2]string{"Library Version", fmt.Sprintf("%d", lv)})
|
|
}
|
|
return status
|
|
}
|
|
|
|
// GetMetadata returns empty metadata for this driver.
|
|
func (d *Driver) GetMetadata(id string) (map[string]string, error) {
|
|
return nil, nil
|
|
}
|
|
|
|
// Cleanup unmounts the home directory.
|
|
func (d *Driver) Cleanup() error {
|
|
if err := d.subvolDisableQuota(); err != nil {
|
|
return err
|
|
}
|
|
|
|
return mount.Unmount(d.home)
|
|
}
|
|
|
|
func free(p *C.char) {
|
|
C.free(unsafe.Pointer(p))
|
|
}
|
|
|
|
func openDir(path string) (*C.DIR, error) {
|
|
Cpath := C.CString(path)
|
|
defer free(Cpath)
|
|
|
|
dir := C.opendir(Cpath)
|
|
if dir == nil {
|
|
return nil, fmt.Errorf("Can't open dir")
|
|
}
|
|
return dir, nil
|
|
}
|
|
|
|
func closeDir(dir *C.DIR) {
|
|
if dir != nil {
|
|
C.closedir(dir)
|
|
}
|
|
}
|
|
|
|
func getDirFd(dir *C.DIR) uintptr {
|
|
return uintptr(C.dirfd(dir))
|
|
}
|
|
|
|
func subvolCreate(path, name string) error {
|
|
dir, err := openDir(path)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer closeDir(dir)
|
|
|
|
var args C.struct_btrfs_ioctl_vol_args
|
|
for i, c := range []byte(name) {
|
|
args.name[i] = C.char(c)
|
|
}
|
|
|
|
_, _, errno := syscall.Syscall(syscall.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_SUBVOL_CREATE,
|
|
uintptr(unsafe.Pointer(&args)))
|
|
if errno != 0 {
|
|
return fmt.Errorf("Failed to create btrfs subvolume: %v", errno.Error())
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func subvolSnapshot(src, dest, name string) error {
|
|
srcDir, err := openDir(src)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer closeDir(srcDir)
|
|
|
|
destDir, err := openDir(dest)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer closeDir(destDir)
|
|
|
|
var args C.struct_btrfs_ioctl_vol_args_v2
|
|
args.fd = C.__s64(getDirFd(srcDir))
|
|
|
|
var cs = C.CString(name)
|
|
C.set_name_btrfs_ioctl_vol_args_v2(&args, cs)
|
|
C.free(unsafe.Pointer(cs))
|
|
|
|
_, _, errno := syscall.Syscall(syscall.SYS_IOCTL, getDirFd(destDir), C.BTRFS_IOC_SNAP_CREATE_V2,
|
|
uintptr(unsafe.Pointer(&args)))
|
|
if errno != 0 {
|
|
return fmt.Errorf("Failed to create btrfs snapshot: %v", errno.Error())
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func isSubvolume(p string) (bool, error) {
|
|
var bufStat syscall.Stat_t
|
|
if err := syscall.Lstat(p, &bufStat); err != nil {
|
|
return false, err
|
|
}
|
|
|
|
// return true if it is a btrfs subvolume
|
|
return bufStat.Ino == C.BTRFS_FIRST_FREE_OBJECTID, nil
|
|
}
|
|
|
|
func subvolDelete(dirpath, name string) error {
|
|
dir, err := openDir(dirpath)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer closeDir(dir)
|
|
fullPath := path.Join(dirpath, name)
|
|
|
|
var args C.struct_btrfs_ioctl_vol_args
|
|
|
|
// walk the btrfs subvolumes
|
|
walkSubvolumes := func(p string, f os.FileInfo, err error) error {
|
|
if err != nil {
|
|
if os.IsNotExist(err) && p != fullPath {
|
|
// missing most likely because the path was a subvolume that got removed in the previous iteration
|
|
// since it's gone anyway, we don't care
|
|
return nil
|
|
}
|
|
return fmt.Errorf("error walking subvolumes: %v", err)
|
|
}
|
|
// we want to check children only so skip itself
|
|
// it will be removed after the filepath walk anyways
|
|
if f.IsDir() && p != fullPath {
|
|
sv, err := isSubvolume(p)
|
|
if err != nil {
|
|
return fmt.Errorf("Failed to test if %s is a btrfs subvolume: %v", p, err)
|
|
}
|
|
if sv {
|
|
if err := subvolDelete(path.Dir(p), f.Name()); err != nil {
|
|
return fmt.Errorf("Failed to destroy btrfs child subvolume (%s) of parent (%s): %v", p, dirpath, err)
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
if err := filepath.Walk(path.Join(dirpath, name), walkSubvolumes); err != nil {
|
|
return fmt.Errorf("Recursively walking subvolumes for %s failed: %v", dirpath, err)
|
|
}
|
|
|
|
// all subvolumes have been removed
|
|
// now remove the one originally passed in
|
|
for i, c := range []byte(name) {
|
|
args.name[i] = C.char(c)
|
|
}
|
|
_, _, errno := syscall.Syscall(syscall.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_SNAP_DESTROY,
|
|
uintptr(unsafe.Pointer(&args)))
|
|
if errno != 0 {
|
|
return fmt.Errorf("Failed to destroy btrfs snapshot %s for %s: %v", dirpath, name, errno.Error())
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (d *Driver) subvolEnableQuota() error {
|
|
if d.quotaEnabled {
|
|
return nil
|
|
}
|
|
// In case quotaEnabled is not set, check qgroup and update quotaEnabled as needed
|
|
if _, err := subvolLookupQgroup(d.home); err == nil {
|
|
d.quotaEnabled = true
|
|
return nil
|
|
}
|
|
|
|
dir, err := openDir(d.home)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer closeDir(dir)
|
|
|
|
var args C.struct_btrfs_ioctl_quota_ctl_args
|
|
args.cmd = C.BTRFS_QUOTA_CTL_ENABLE
|
|
_, _, errno := syscall.Syscall(syscall.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QUOTA_CTL,
|
|
uintptr(unsafe.Pointer(&args)))
|
|
if errno != 0 {
|
|
return fmt.Errorf("Failed to enable btrfs quota for %s: %v", dir, errno.Error())
|
|
}
|
|
|
|
d.quotaEnabled = true
|
|
|
|
return nil
|
|
}
|
|
|
|
func (d *Driver) subvolDisableQuota() error {
|
|
if !d.quotaEnabled {
|
|
// In case quotaEnabled is not set, check qgroup and update quotaEnabled as needed
|
|
if _, err := subvolLookupQgroup(d.home); err != nil {
|
|
// quota is still not enabled
|
|
return nil
|
|
}
|
|
d.quotaEnabled = true
|
|
}
|
|
|
|
dir, err := openDir(d.home)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer closeDir(dir)
|
|
|
|
var args C.struct_btrfs_ioctl_quota_ctl_args
|
|
args.cmd = C.BTRFS_QUOTA_CTL_DISABLE
|
|
_, _, errno := syscall.Syscall(syscall.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QUOTA_CTL,
|
|
uintptr(unsafe.Pointer(&args)))
|
|
if errno != 0 {
|
|
return fmt.Errorf("Failed to disable btrfs quota for %s: %v", dir, errno.Error())
|
|
}
|
|
|
|
d.quotaEnabled = false
|
|
|
|
return nil
|
|
}
|
|
|
|
func (d *Driver) subvolRescanQuota() error {
|
|
if !d.quotaEnabled {
|
|
// In case quotaEnabled is not set, check qgroup and update quotaEnabled as needed
|
|
if _, err := subvolLookupQgroup(d.home); err != nil {
|
|
// quota is still not enabled
|
|
return nil
|
|
}
|
|
d.quotaEnabled = true
|
|
}
|
|
|
|
dir, err := openDir(d.home)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer closeDir(dir)
|
|
|
|
var args C.struct_btrfs_ioctl_quota_rescan_args
|
|
_, _, errno := syscall.Syscall(syscall.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QUOTA_RESCAN_WAIT,
|
|
uintptr(unsafe.Pointer(&args)))
|
|
if errno != 0 {
|
|
return fmt.Errorf("Failed to rescan btrfs quota for %s: %v", dir, errno.Error())
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func subvolLimitQgroup(path string, size uint64) error {
|
|
dir, err := openDir(path)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer closeDir(dir)
|
|
|
|
var args C.struct_btrfs_ioctl_qgroup_limit_args
|
|
args.lim.max_referenced = C.__u64(size)
|
|
args.lim.flags = C.BTRFS_QGROUP_LIMIT_MAX_RFER
|
|
_, _, errno := syscall.Syscall(syscall.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QGROUP_LIMIT,
|
|
uintptr(unsafe.Pointer(&args)))
|
|
if errno != 0 {
|
|
return fmt.Errorf("Failed to limit qgroup for %s: %v", dir, errno.Error())
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func subvolLookupQgroup(path string) (uint64, error) {
|
|
dir, err := openDir(path)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
defer closeDir(dir)
|
|
|
|
var args C.struct_btrfs_ioctl_ino_lookup_args
|
|
args.objectid = C.BTRFS_FIRST_FREE_OBJECTID
|
|
|
|
_, _, errno := syscall.Syscall(syscall.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_INO_LOOKUP,
|
|
uintptr(unsafe.Pointer(&args)))
|
|
if errno != 0 {
|
|
return 0, fmt.Errorf("Failed to lookup qgroup for %s: %v", dir, errno.Error())
|
|
}
|
|
if args.treeid == 0 {
|
|
return 0, fmt.Errorf("Invalid qgroup id for %s: 0", dir)
|
|
}
|
|
|
|
return uint64(args.treeid), nil
|
|
}
|
|
|
|
func (d *Driver) subvolumesDir() string {
|
|
return path.Join(d.home, "subvolumes")
|
|
}
|
|
|
|
func (d *Driver) subvolumesDirID(id string) string {
|
|
return path.Join(d.subvolumesDir(), id)
|
|
}
|
|
|
|
// CreateReadWrite creates a layer that is writable for use as a container
|
|
// file system.
|
|
func (d *Driver) CreateReadWrite(id, parent string, opts *graphdriver.CreateOpts) error {
|
|
return d.Create(id, parent, opts)
|
|
}
|
|
|
|
// Create the filesystem with given id.
|
|
func (d *Driver) Create(id, parent string, opts *graphdriver.CreateOpts) error {
|
|
subvolumes := path.Join(d.home, "subvolumes")
|
|
rootUID, rootGID, err := idtools.GetRootUIDGID(d.uidMaps, d.gidMaps)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if err := idtools.MkdirAllAs(subvolumes, 0700, rootUID, rootGID); err != nil {
|
|
return err
|
|
}
|
|
if parent == "" {
|
|
if err := subvolCreate(subvolumes, id); err != nil {
|
|
return err
|
|
}
|
|
} else {
|
|
parentDir := d.subvolumesDirID(parent)
|
|
st, err := os.Stat(parentDir)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if !st.IsDir() {
|
|
return fmt.Errorf("%s: not a directory", parentDir)
|
|
}
|
|
if err := subvolSnapshot(parentDir, subvolumes, id); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
var storageOpt map[string]string
|
|
if opts != nil {
|
|
storageOpt = opts.StorageOpt
|
|
}
|
|
|
|
if _, ok := storageOpt["size"]; ok {
|
|
driver := &Driver{}
|
|
if err := d.parseStorageOpt(storageOpt, driver); err != nil {
|
|
return err
|
|
}
|
|
if err := d.setStorageSize(path.Join(subvolumes, id), driver); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
// if we have a remapped root (user namespaces enabled), change the created snapshot
|
|
// dir ownership to match
|
|
if rootUID != 0 || rootGID != 0 {
|
|
if err := os.Chown(path.Join(subvolumes, id), rootUID, rootGID); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
mountLabel := ""
|
|
if opts != nil {
|
|
mountLabel = opts.MountLabel
|
|
}
|
|
|
|
return label.Relabel(path.Join(subvolumes, id), mountLabel, false)
|
|
}
|
|
|
|
// Parse btrfs storage options
|
|
func (d *Driver) parseStorageOpt(storageOpt map[string]string, driver *Driver) error {
|
|
// Read size to change the subvolume disk quota per container
|
|
for key, val := range storageOpt {
|
|
key := strings.ToLower(key)
|
|
switch key {
|
|
case "size":
|
|
size, err := units.RAMInBytes(val)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
driver.options.size = uint64(size)
|
|
default:
|
|
return fmt.Errorf("Unknown option %s", key)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Set btrfs storage size
|
|
func (d *Driver) setStorageSize(dir string, driver *Driver) error {
|
|
if driver.options.size <= 0 {
|
|
return fmt.Errorf("btrfs: invalid storage size: %s", units.HumanSize(float64(driver.options.size)))
|
|
}
|
|
if d.options.minSpace > 0 && driver.options.size < d.options.minSpace {
|
|
return fmt.Errorf("btrfs: storage size cannot be less than %s", units.HumanSize(float64(d.options.minSpace)))
|
|
}
|
|
|
|
if err := d.subvolEnableQuota(); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := subvolLimitQgroup(dir, driver.options.size); err != nil {
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Remove the filesystem with given id.
|
|
func (d *Driver) Remove(id string) error {
|
|
dir := d.subvolumesDirID(id)
|
|
if _, err := os.Stat(dir); err != nil {
|
|
return err
|
|
}
|
|
if err := subvolDelete(d.subvolumesDir(), id); err != nil {
|
|
return err
|
|
}
|
|
if err := os.RemoveAll(dir); err != nil && !os.IsNotExist(err) {
|
|
return err
|
|
}
|
|
if err := d.subvolRescanQuota(); err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Get the requested filesystem id.
|
|
func (d *Driver) Get(id, mountLabel string) (string, error) {
|
|
dir := d.subvolumesDirID(id)
|
|
st, err := os.Stat(dir)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
if !st.IsDir() {
|
|
return "", fmt.Errorf("%s: not a directory", dir)
|
|
}
|
|
|
|
return dir, nil
|
|
}
|
|
|
|
// Put is not implemented for BTRFS as there is no cleanup required for the id.
|
|
func (d *Driver) Put(id string) error {
|
|
// Get() creates no runtime resources (like e.g. mounts)
|
|
// so this doesn't need to do anything.
|
|
return nil
|
|
}
|
|
|
|
// Exists checks if the id exists in the filesystem.
|
|
func (d *Driver) Exists(id string) bool {
|
|
dir := d.subvolumesDirID(id)
|
|
_, err := os.Stat(dir)
|
|
return err == nil
|
|
}
|