diff --git a/daemon/graphdriver/btrfs/btrfs.go b/daemon/graphdriver/btrfs/btrfs.go index 7358bf1002..1bc85fc23e 100644 --- a/daemon/graphdriver/btrfs/btrfs.go +++ b/daemon/graphdriver/btrfs/btrfs.go @@ -19,12 +19,15 @@ import ( "os" "path" "path/filepath" + "strings" "syscall" "unsafe" "github.com/docker/docker/daemon/graphdriver" "github.com/docker/docker/pkg/idtools" "github.com/docker/docker/pkg/mount" + "github.com/docker/docker/pkg/parsers" + "github.com/docker/go-units" "github.com/opencontainers/runc/libcontainer/label" ) @@ -32,6 +35,16 @@ func init() { graphdriver.Register("btrfs", Init) } +var ( + quotaEnabled = false + userDiskQuota = false +) + +type btrfsOptions struct { + minSpace uint64 + size uint64 +} + // Init returns a new BTRFS driver. // An error is returned if BTRFS is not supported. func Init(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (graphdriver.Driver, error) { @@ -57,21 +70,58 @@ func Init(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (grap return nil, err } + opt, err := parseOptions(options) + if err != nil { + return nil, err + } + + if userDiskQuota { + if err := subvolEnableQuota(home); err != nil { + return nil, err + } + quotaEnabled = true + } + driver := &Driver{ home: home, uidMaps: uidMaps, gidMaps: gidMaps, + options: opt, } return graphdriver.NewNaiveDiffDriver(driver, uidMaps, gidMaps), nil } +func parseOptions(opt []string) (btrfsOptions, error) { + var options btrfsOptions + for _, option := range opt { + key, val, err := parsers.ParseKeyValueOpt(option) + if err != nil { + return options, err + } + key = strings.ToLower(key) + switch key { + case "btrfs.min_space": + minSpace, err := units.RAMInBytes(val) + if err != nil { + return options, err + } + userDiskQuota = true + options.minSpace = uint64(minSpace) + default: + return options, fmt.Errorf("Unknown option %s", key) + } + } + return options, nil +} + // Driver contains information about the filesystem mounted. type Driver struct { //root of the file system home string uidMaps []idtools.IDMap gidMaps []idtools.IDMap + options btrfsOptions } // String prints the name of the driver (btrfs). @@ -100,6 +150,12 @@ func (d *Driver) GetMetadata(id string) (map[string]string, error) { // Cleanup unmounts the home directory. func (d *Driver) Cleanup() error { + if quotaEnabled { + if err := subvolDisableQuota(d.home); err != nil { + return err + } + } + return mount.Unmount(d.home) } @@ -238,6 +294,78 @@ func subvolDelete(dirpath, name string) error { return nil } +func subvolEnableQuota(path string) error { + dir, err := openDir(path) + if err != nil { + return err + } + defer closeDir(dir) + + var args C.struct_btrfs_ioctl_quota_ctl_args + args.cmd = C.BTRFS_QUOTA_CTL_ENABLE + _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QUOTA_CTL, + uintptr(unsafe.Pointer(&args))) + if errno != 0 { + return fmt.Errorf("Failed to enable btrfs quota for %s: %v", dir, errno.Error()) + } + + return nil +} + +func subvolDisableQuota(path string) error { + dir, err := openDir(path) + if err != nil { + return err + } + defer closeDir(dir) + + var args C.struct_btrfs_ioctl_quota_ctl_args + args.cmd = C.BTRFS_QUOTA_CTL_DISABLE + _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QUOTA_CTL, + uintptr(unsafe.Pointer(&args))) + if errno != 0 { + return fmt.Errorf("Failed to disable btrfs quota for %s: %v", dir, errno.Error()) + } + + return nil +} + +func subvolRescanQuota(path string) error { + dir, err := openDir(path) + if err != nil { + return err + } + defer closeDir(dir) + + var args C.struct_btrfs_ioctl_quota_rescan_args + _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QUOTA_RESCAN_WAIT, + uintptr(unsafe.Pointer(&args))) + if errno != 0 { + return fmt.Errorf("Failed to rescan btrfs quota for %s: %v", dir, errno.Error()) + } + + return nil +} + +func subvolLimitQgroup(path string, size uint64) error { + dir, err := openDir(path) + if err != nil { + return err + } + defer closeDir(dir) + + var args C.struct_btrfs_ioctl_qgroup_limit_args + args.lim.max_referenced = C.__u64(size) + args.lim.flags = C.BTRFS_QGROUP_LIMIT_MAX_RFER + _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QGROUP_LIMIT, + uintptr(unsafe.Pointer(&args))) + if errno != 0 { + return fmt.Errorf("Failed to limit qgroup for %s: %v", dir, errno.Error()) + } + + return nil +} + func (d *Driver) subvolumesDir() string { return path.Join(d.home, "subvolumes") } @@ -254,11 +382,6 @@ func (d *Driver) CreateReadWrite(id, parent, mountLabel string, storageOpt map[s // Create the filesystem with given id. func (d *Driver) Create(id, parent, mountLabel string, storageOpt map[string]string) error { - - if len(storageOpt) != 0 { - return fmt.Errorf("--storage-opt is not supported for btrfs") - } - subvolumes := path.Join(d.home, "subvolumes") rootUID, rootGID, err := idtools.GetRootUIDGID(d.uidMaps, d.gidMaps) if err != nil { @@ -285,6 +408,16 @@ func (d *Driver) Create(id, parent, mountLabel string, storageOpt map[string]str } } + if _, ok := storageOpt["size"]; ok { + driver := &Driver{} + if err := d.parseStorageOpt(storageOpt, driver); err != nil { + return err + } + if err := d.setStorageSize(path.Join(subvolumes, id), driver); err != nil { + return err + } + } + // if we have a remapped root (user namespaces enabled), change the created snapshot // dir ownership to match if rootUID != 0 || rootGID != 0 { @@ -296,6 +429,49 @@ func (d *Driver) Create(id, parent, mountLabel string, storageOpt map[string]str return label.Relabel(path.Join(subvolumes, id), mountLabel, false) } +// Parse btrfs storage options +func (d *Driver) parseStorageOpt(storageOpt map[string]string, driver *Driver) error { + // Read size to change the subvolume disk quota per container + for key, val := range storageOpt { + key := strings.ToLower(key) + switch key { + case "size": + size, err := units.RAMInBytes(val) + if err != nil { + return err + } + driver.options.size = uint64(size) + default: + return fmt.Errorf("Unknown option %s", key) + } + } + + return nil +} + +// Set btrfs storage size +func (d *Driver) setStorageSize(dir string, driver *Driver) error { + if driver.options.size <= 0 { + return fmt.Errorf("btrfs: invalid storage size: %s", units.HumanSize(float64(driver.options.size))) + } + if d.options.minSpace > 0 && driver.options.size < d.options.minSpace { + return fmt.Errorf("btrfs: storage size cannot be less than %s", units.HumanSize(float64(d.options.minSpace))) + } + + if !quotaEnabled { + if err := subvolEnableQuota(d.home); err != nil { + return err + } + quotaEnabled = true + } + + if err := subvolLimitQgroup(dir, driver.options.size); err != nil { + return err + } + + return nil +} + // Remove the filesystem with given id. func (d *Driver) Remove(id string) error { dir := d.subvolumesDirID(id) @@ -308,6 +484,9 @@ func (d *Driver) Remove(id string) error { if err := os.RemoveAll(dir); err != nil && !os.IsNotExist(err) { return err } + if err := subvolRescanQuota(d.home); err != nil { + return err + } return nil } diff --git a/docs/reference/commandline/dockerd.md b/docs/reference/commandline/dockerd.md index 842e051664..2ecadbed5c 100644 --- a/docs/reference/commandline/dockerd.md +++ b/docs/reference/commandline/dockerd.md @@ -185,8 +185,10 @@ Linux kernel as of [3.18.0](https://lkml.org/lkml/2014/10/26/137). Call ### Storage driver options Particular storage-driver can be configured with options specified with -`--storage-opt` flags. Options for `devicemapper` are prefixed with `dm` and -options for `zfs` start with `zfs`. +`--storage-opt` flags. Options for `devicemapper` are prefixed with `dm`, +options for `zfs` start with `zfs` and options for `btrfs` start with `btrfs`. + +#### Devicemapper options * `dm.thinpooldev` @@ -470,7 +472,7 @@ options for `zfs` start with `zfs`. $ dockerd --storage-opt dm.min_free_space=10% ``` -Currently supported options of `zfs`: +#### ZFS options * `zfs.fsname` @@ -482,6 +484,18 @@ Currently supported options of `zfs`: $ dockerd -s zfs --storage-opt zfs.fsname=zroot/docker +#### Btrfs options + +* `btrfs.min_space` + + Specifies the mininum size to use when creating the subvolume which is used + for containers. If user uses disk quota for btrfs when creating or running + a container with **--storage-opt size** option, docker should ensure the + **size** cannot be smaller than **btrfs.min_space**. + + Example use: + $ docker daemon -s btrfs --storage-opt btrfs.min_space=10G + ## Docker runtime execution options The Docker daemon relies on a diff --git a/man/docker-create.1.md b/man/docker-create.1.md index 3f90a3a1d8..f8a80cd347 100644 --- a/man/docker-create.1.md +++ b/man/docker-create.1.md @@ -333,6 +333,7 @@ unit, `b` is used. Set LIMIT to `-1` to enable unlimited swap. $ docker create -it --storage-opt size=120G fedora /bin/bash This (size) will allow to set the container rootfs size to 120G at creation time. User cannot pass a size less than the Default BaseFS Size. + This option is only available for the `devicemapper`, `btrfs` and `zfs` graphrivers. **--stop-signal**=*SIGTERM* Signal to stop a container. Default is SIGTERM. diff --git a/man/docker-run.1.md b/man/docker-run.1.md index 921ff9a07b..8fb856b5dc 100644 --- a/man/docker-run.1.md +++ b/man/docker-run.1.md @@ -484,7 +484,8 @@ its root filesystem mounted as read only prohibiting any writes. $ docker run -it --storage-opt size=120G fedora /bin/bash This (size) will allow to set the container rootfs size to 120G at creation time. User cannot pass a size less than the Default BaseFS Size. - + This option is only available for the `devicemapper`, `btrfs` and `zfs` graphrivers. + **--stop-signal**=*SIGTERM* Signal to stop a container. Default is SIGTERM. diff --git a/man/dockerd.8.md b/man/dockerd.8.md index 74d7ce6aa1..2c381fb0f7 100644 --- a/man/dockerd.8.md +++ b/man/dockerd.8.md @@ -247,9 +247,9 @@ backends use operating system level technologies and can be configured. Specify options to the storage backend with **--storage-opt** flags. The -backends that currently take options are *devicemapper* and *zfs*. -Options for *devicemapper* are prefixed with *dm* and options for *zfs* -start with *zfs*. +backends that currently take options are *devicemapper*, *zfs* and *btrfs*. +Options for *devicemapper* are prefixed with *dm*, options for *zfs* +start with *zfs* and options for *btrfs* start with *btrfs*. Specifically for devicemapper, the default is a "loopback" model which requires no pre-configuration, but is extremely inefficient. Do not @@ -511,6 +511,17 @@ By default docker will pick up the zfs filesystem where docker graph Example use: `dockerd -s zfs --storage-opt zfs.fsname=zroot/docker` +## Btrfs options + +#### btrfs.min_space + +Specifies the mininum size to use when creating the subvolume which is used +for containers. If user uses disk quota for btrfs when creating or running +a container with **--storage-opt size** option, docker should ensure the +**size** cannot be smaller than **btrfs.min_space**. + +Example use: `docker daemon -s btrfs --storage-opt btrfs.min_space=10G` + # CLUSTER STORE OPTIONS The daemon uses libkv to advertise