mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00
![Dan Walsh](/assets/img/avatar_default.png)
This change will allow us to run SELinux in a container with BTRFS back end. We continue to work on fixing the kernel/BTRFS but this change will allow SELinux Security separation on BTRFS. It basically relabels the content on container creation. Just relabling -init directory in BTRFS use case. Everything looks like it works. I don't believe tar/achive stores the SELinux labels, so we are good as far as docker commit. Tested Speed on startup with BTRFS on top of loopback directory. BTRFS not on loopback should get even better perfomance on startup time. The more inodes inside of the container image will increase the relabel time. This patch will give people who care more about security the option of runnin BTRFS with SELinux. Those who don't want to take the slow down can disable SELinux either in individual containers or for all containers by continuing to disable SELinux in the daemon. Without relabel: > time docker run --security-opt label:disable fedora echo test test real 0m0.918s user 0m0.009s sys 0m0.026s With Relabel test real 0m1.942s user 0m0.007s sys 0m0.030s Signed-off-by: Dan Walsh <dwalsh@redhat.com> Signed-off-by: Dan Walsh <dwalsh@redhat.com>
302 lines
7.1 KiB
Go
302 lines
7.1 KiB
Go
// +build linux
|
|
|
|
package btrfs
|
|
|
|
/*
|
|
#include <stdlib.h>
|
|
#include <dirent.h>
|
|
#include <btrfs/ioctl.h>
|
|
#include <btrfs/ctree.h>
|
|
*/
|
|
import "C"
|
|
|
|
import (
|
|
"fmt"
|
|
"os"
|
|
"path"
|
|
"path/filepath"
|
|
"syscall"
|
|
"unsafe"
|
|
|
|
"github.com/docker/docker/daemon/graphdriver"
|
|
"github.com/docker/docker/pkg/idtools"
|
|
"github.com/docker/docker/pkg/mount"
|
|
"github.com/opencontainers/runc/libcontainer/label"
|
|
)
|
|
|
|
func init() {
|
|
graphdriver.Register("btrfs", Init)
|
|
}
|
|
|
|
// Init returns a new BTRFS driver.
|
|
// An error is returned if BTRFS is not supported.
|
|
func Init(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (graphdriver.Driver, error) {
|
|
rootdir := path.Dir(home)
|
|
|
|
var buf syscall.Statfs_t
|
|
if err := syscall.Statfs(rootdir, &buf); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if graphdriver.FsMagic(buf.Type) != graphdriver.FsMagicBtrfs {
|
|
return nil, graphdriver.ErrPrerequisites
|
|
}
|
|
|
|
rootUID, rootGID, err := idtools.GetRootUIDGID(uidMaps, gidMaps)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if err := idtools.MkdirAllAs(home, 0700, rootUID, rootGID); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if err := mount.MakePrivate(home); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
driver := &Driver{
|
|
home: home,
|
|
uidMaps: uidMaps,
|
|
gidMaps: gidMaps,
|
|
}
|
|
|
|
return graphdriver.NewNaiveDiffDriver(driver, uidMaps, gidMaps), nil
|
|
}
|
|
|
|
// Driver contains information about the filesystem mounted.
|
|
type Driver struct {
|
|
//root of the file system
|
|
home string
|
|
uidMaps []idtools.IDMap
|
|
gidMaps []idtools.IDMap
|
|
}
|
|
|
|
// String prints the name of the driver (btrfs).
|
|
func (d *Driver) String() string {
|
|
return "btrfs"
|
|
}
|
|
|
|
// Status returns current driver information in a two dimensional string array.
|
|
// Output contains "Build Version" and "Library Version" of the btrfs libraries used.
|
|
// Version information can be used to check compatibility with your kernel.
|
|
func (d *Driver) Status() [][2]string {
|
|
status := [][2]string{}
|
|
if bv := btrfsBuildVersion(); bv != "-" {
|
|
status = append(status, [2]string{"Build Version", bv})
|
|
}
|
|
if lv := btrfsLibVersion(); lv != -1 {
|
|
status = append(status, [2]string{"Library Version", fmt.Sprintf("%d", lv)})
|
|
}
|
|
return status
|
|
}
|
|
|
|
// GetMetadata returns empty metadata for this driver.
|
|
func (d *Driver) GetMetadata(id string) (map[string]string, error) {
|
|
return nil, nil
|
|
}
|
|
|
|
// Cleanup unmounts the home directory.
|
|
func (d *Driver) Cleanup() error {
|
|
return mount.Unmount(d.home)
|
|
}
|
|
|
|
func free(p *C.char) {
|
|
C.free(unsafe.Pointer(p))
|
|
}
|
|
|
|
func openDir(path string) (*C.DIR, error) {
|
|
Cpath := C.CString(path)
|
|
defer free(Cpath)
|
|
|
|
dir := C.opendir(Cpath)
|
|
if dir == nil {
|
|
return nil, fmt.Errorf("Can't open dir")
|
|
}
|
|
return dir, nil
|
|
}
|
|
|
|
func closeDir(dir *C.DIR) {
|
|
if dir != nil {
|
|
C.closedir(dir)
|
|
}
|
|
}
|
|
|
|
func getDirFd(dir *C.DIR) uintptr {
|
|
return uintptr(C.dirfd(dir))
|
|
}
|
|
|
|
func subvolCreate(path, name string) error {
|
|
dir, err := openDir(path)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer closeDir(dir)
|
|
|
|
var args C.struct_btrfs_ioctl_vol_args
|
|
for i, c := range []byte(name) {
|
|
args.name[i] = C.char(c)
|
|
}
|
|
|
|
_, _, errno := syscall.Syscall(syscall.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_SUBVOL_CREATE,
|
|
uintptr(unsafe.Pointer(&args)))
|
|
if errno != 0 {
|
|
return fmt.Errorf("Failed to create btrfs subvolume: %v", errno.Error())
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func subvolSnapshot(src, dest, name string) error {
|
|
srcDir, err := openDir(src)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer closeDir(srcDir)
|
|
|
|
destDir, err := openDir(dest)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer closeDir(destDir)
|
|
|
|
var args C.struct_btrfs_ioctl_vol_args_v2
|
|
args.fd = C.__s64(getDirFd(srcDir))
|
|
for i, c := range []byte(name) {
|
|
args.name[i] = C.char(c)
|
|
}
|
|
|
|
_, _, errno := syscall.Syscall(syscall.SYS_IOCTL, getDirFd(destDir), C.BTRFS_IOC_SNAP_CREATE_V2,
|
|
uintptr(unsafe.Pointer(&args)))
|
|
if errno != 0 {
|
|
return fmt.Errorf("Failed to create btrfs snapshot: %v", errno.Error())
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func isSubvolume(p string) (bool, error) {
|
|
var bufStat syscall.Stat_t
|
|
if err := syscall.Lstat(p, &bufStat); err != nil {
|
|
return false, err
|
|
}
|
|
|
|
// return true if it is a btrfs subvolume
|
|
return bufStat.Ino == C.BTRFS_FIRST_FREE_OBJECTID, nil
|
|
}
|
|
|
|
func subvolDelete(dirpath, name string) error {
|
|
dir, err := openDir(dirpath)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer closeDir(dir)
|
|
|
|
var args C.struct_btrfs_ioctl_vol_args
|
|
|
|
// walk the btrfs subvolumes
|
|
walkSubvolumes := func(p string, f os.FileInfo, err error) error {
|
|
// we want to check children only so skip itself
|
|
// it will be removed after the filepath walk anyways
|
|
if f.IsDir() && p != path.Join(dirpath, name) {
|
|
sv, err := isSubvolume(p)
|
|
if err != nil {
|
|
return fmt.Errorf("Failed to test if %s is a btrfs subvolume: %v", p, err)
|
|
}
|
|
if sv {
|
|
if err := subvolDelete(p, f.Name()); err != nil {
|
|
return fmt.Errorf("Failed to destroy btrfs child subvolume (%s) of parent (%s): %v", p, dirpath, err)
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
if err := filepath.Walk(path.Join(dirpath, name), walkSubvolumes); err != nil {
|
|
return fmt.Errorf("Recursively walking subvolumes for %s failed: %v", dirpath, err)
|
|
}
|
|
|
|
// all subvolumes have been removed
|
|
// now remove the one originally passed in
|
|
for i, c := range []byte(name) {
|
|
args.name[i] = C.char(c)
|
|
}
|
|
_, _, errno := syscall.Syscall(syscall.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_SNAP_DESTROY,
|
|
uintptr(unsafe.Pointer(&args)))
|
|
if errno != 0 {
|
|
return fmt.Errorf("Failed to destroy btrfs snapshot %s for %s: %v", dirpath, name, errno.Error())
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (d *Driver) subvolumesDir() string {
|
|
return path.Join(d.home, "subvolumes")
|
|
}
|
|
|
|
func (d *Driver) subvolumesDirID(id string) string {
|
|
return path.Join(d.subvolumesDir(), id)
|
|
}
|
|
|
|
// Create the filesystem with given id.
|
|
func (d *Driver) Create(id, parent, mountLabel string) error {
|
|
subvolumes := path.Join(d.home, "subvolumes")
|
|
rootUID, rootGID, err := idtools.GetRootUIDGID(d.uidMaps, d.gidMaps)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if err := idtools.MkdirAllAs(subvolumes, 0700, rootUID, rootGID); err != nil {
|
|
return err
|
|
}
|
|
if parent == "" {
|
|
if err := subvolCreate(subvolumes, id); err != nil {
|
|
return err
|
|
}
|
|
} else {
|
|
parentDir, err := d.Get(parent, "")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if err := subvolSnapshot(parentDir, subvolumes, id); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
return label.Relabel(path.Join(subvolumes, id), mountLabel, false)
|
|
}
|
|
|
|
// Remove the filesystem with given id.
|
|
func (d *Driver) Remove(id string) error {
|
|
dir := d.subvolumesDirID(id)
|
|
if _, err := os.Stat(dir); err != nil {
|
|
return err
|
|
}
|
|
if err := subvolDelete(d.subvolumesDir(), id); err != nil {
|
|
return err
|
|
}
|
|
return os.RemoveAll(dir)
|
|
}
|
|
|
|
// Get the requested filesystem id.
|
|
func (d *Driver) Get(id, mountLabel string) (string, error) {
|
|
dir := d.subvolumesDirID(id)
|
|
st, err := os.Stat(dir)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
if !st.IsDir() {
|
|
return "", fmt.Errorf("%s: not a directory", dir)
|
|
}
|
|
|
|
return dir, nil
|
|
}
|
|
|
|
// Put is not implemented for BTRFS as there is no cleanup required for the id.
|
|
func (d *Driver) Put(id string) error {
|
|
// Get() creates no runtime resources (like e.g. mounts)
|
|
// so this doesn't need to do anything.
|
|
return nil
|
|
}
|
|
|
|
// Exists checks if the id exists in the filesystem.
|
|
func (d *Driver) Exists(id string) bool {
|
|
dir := d.subvolumesDirID(id)
|
|
_, err := os.Stat(dir)
|
|
return err == nil
|
|
}
|