From f5139233b930e436707a65cc032aa2952edd6e4a Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Thu, 1 May 2014 10:08:18 -0700 Subject: [PATCH] Update restrictions for better handling of mounts This also cleans up some of the left over restriction paths code from before. Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- daemon/execdriver/lxc/driver.go | 60 +++++++---------- daemon/execdriver/lxc/lxc_template.go | 12 +--- daemon/execdriver/native/create.go | 4 +- daemon/execdriver/native/driver.go | 7 -- pkg/libcontainer/mount/init.go | 7 +- pkg/libcontainer/nsinit/init.go | 4 +- .../security/restrict/restrict.go | 65 ++++++------------- .../security/restrict/unsupported.go | 2 +- 8 files changed, 54 insertions(+), 107 deletions(-) diff --git a/daemon/execdriver/lxc/driver.go b/daemon/execdriver/lxc/driver.go index 3fe44202ac..92a79ff5a5 100644 --- a/daemon/execdriver/lxc/driver.go +++ b/daemon/execdriver/lxc/driver.go @@ -2,12 +2,6 @@ package lxc import ( "fmt" - "github.com/dotcloud/docker/daemon/execdriver" - "github.com/dotcloud/docker/pkg/cgroups" - "github.com/dotcloud/docker/pkg/label" - "github.com/dotcloud/docker/pkg/libcontainer/security/restrict" - "github.com/dotcloud/docker/pkg/system" - "github.com/dotcloud/docker/utils" "io/ioutil" "log" "os" @@ -18,6 +12,13 @@ import ( "strings" "syscall" "time" + + "github.com/dotcloud/docker/daemon/execdriver" + "github.com/dotcloud/docker/pkg/cgroups" + "github.com/dotcloud/docker/pkg/label" + "github.com/dotcloud/docker/pkg/libcontainer/security/restrict" + "github.com/dotcloud/docker/pkg/system" + "github.com/dotcloud/docker/utils" ) const DriverName = "lxc" @@ -27,31 +28,26 @@ func init() { if err := setupEnv(args); err != nil { return err } - if err := setupHostname(args); err != nil { return err } - if err := setupNetworking(args); err != nil { return err } - - if err := restrict.Restrict("/", "/empty"); err != nil { - return err + if !args.Privileged { + if err := restrict.Restrict(); err != nil { + return err + } } - if err := setupCapabilities(args); err != nil { return err } - if err := setupWorkingDirectory(args); err != nil { return err } - if err := system.CloseFdsFrom(3); err != nil { return err } - if err := changeUser(args); err != nil { return err } @@ -69,10 +65,9 @@ func init() { } type driver struct { - root string // root path for the driver to use - apparmor bool - sharedRoot bool - restrictionPath string + root string // root path for the driver to use + apparmor bool + sharedRoot bool } func NewDriver(root string, apparmor bool) (*driver, error) { @@ -80,15 +75,10 @@ func NewDriver(root string, apparmor bool) (*driver, error) { if err := linkLxcStart(root); err != nil { return nil, err } - restrictionPath := filepath.Join(root, "empty") - if err := os.MkdirAll(restrictionPath, 0700); err != nil { - return nil, err - } return &driver{ - apparmor: apparmor, - root: root, - sharedRoot: rootIsShared(), - restrictionPath: restrictionPath, + apparmor: apparmor, + root: root, + sharedRoot: rootIsShared(), }, nil } @@ -419,16 +409,14 @@ func (d *driver) generateLXCConfig(c *execdriver.Command) (string, error) { if err := LxcTemplateCompiled.Execute(fo, struct { *execdriver.Command - AppArmor bool - ProcessLabel string - MountLabel string - RestrictionSource string + AppArmor bool + ProcessLabel string + MountLabel string }{ - Command: c, - AppArmor: d.apparmor, - ProcessLabel: process, - MountLabel: mount, - RestrictionSource: d.restrictionPath, + Command: c, + AppArmor: d.apparmor, + ProcessLabel: process, + MountLabel: mount, }); err != nil { return "", err } diff --git a/daemon/execdriver/lxc/lxc_template.go b/daemon/execdriver/lxc/lxc_template.go index 03d32e72b5..19fa43c4c2 100644 --- a/daemon/execdriver/lxc/lxc_template.go +++ b/daemon/execdriver/lxc/lxc_template.go @@ -1,10 +1,11 @@ package lxc import ( - "github.com/dotcloud/docker/daemon/execdriver" - "github.com/dotcloud/docker/pkg/label" "strings" "text/template" + + "github.com/dotcloud/docker/daemon/execdriver" + "github.com/dotcloud/docker/pkg/label" ) const LxcTemplate = ` @@ -110,13 +111,6 @@ lxc.aa_profile = unconfined {{else}} # Let AppArmor normal confinement take place (i.e., not unconfined) {{end}} -{{else}} -# Restrict access to some stuff in /proc. Note that /proc is already mounted -# read-only, so we don't need to bother about things that are just dangerous -# to write to (like sysrq-trigger). Also, recent kernels won't let a container -# peek into /proc/kcore, but let's cater for people who might run Docker on -# older kernels. Just in case. -lxc.mount.entry = {{escapeFstabSpaces $ROOTFS}}/dev/null {{escapeFstabSpaces $ROOTFS}}/proc/kcore none bind,ro 0 0 {{end}} # limits diff --git a/daemon/execdriver/native/create.go b/daemon/execdriver/native/create.go index 6f663f916e..5562d08986 100644 --- a/daemon/execdriver/native/create.go +++ b/daemon/execdriver/native/create.go @@ -24,7 +24,7 @@ func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Container container.Cgroups.Name = c.ID // check to see if we are running in ramdisk to disable pivot root container.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != "" - container.Context["restriction_path"] = d.restrictionPath + container.Context["restrictions"] = "true" if err := d.createNetwork(container, c); err != nil { return nil, err @@ -84,7 +84,7 @@ func (d *driver) setPrivileged(container *libcontainer.Container) error { } container.Cgroups.DeviceAccess = true - delete(container.Context, "restriction_path") + delete(container.Context, "restrictions") if apparmor.IsEnabled() { container.Context["apparmor_profile"] = "unconfined" diff --git a/daemon/execdriver/native/driver.go b/daemon/execdriver/native/driver.go index a397387f11..e674d57333 100644 --- a/daemon/execdriver/native/driver.go +++ b/daemon/execdriver/native/driver.go @@ -57,7 +57,6 @@ type driver struct { root string initPath string activeContainers map[string]*exec.Cmd - restrictionPath string } func NewDriver(root, initPath string) (*driver, error) { @@ -68,14 +67,8 @@ func NewDriver(root, initPath string) (*driver, error) { if err := apparmor.InstallDefaultProfile(filepath.Join(root, "../..", BackupApparmorProfilePath)); err != nil { return nil, err } - restrictionPath := filepath.Join(root, "empty") - if err := os.MkdirAll(restrictionPath, 0700); err != nil { - return nil, err - } - return &driver{ root: root, - restrictionPath: restrictionPath, initPath: initPath, activeContainers: make(map[string]*exec.Cmd), }, nil diff --git a/pkg/libcontainer/mount/init.go b/pkg/libcontainer/mount/init.go index cc3ce2158e..6a54f2444e 100644 --- a/pkg/libcontainer/mount/init.go +++ b/pkg/libcontainer/mount/init.go @@ -123,15 +123,12 @@ func newSystemMounts(rootfs, mountLabel string, mounts libcontainer.Mounts) []mo systemMounts := []mount{ {source: "proc", path: filepath.Join(rootfs, "proc"), device: "proc", flags: defaultMountFlags}, {source: "sysfs", path: filepath.Join(rootfs, "sys"), device: "sysfs", flags: defaultMountFlags}, + {source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaultMountFlags, data: label.FormatMountLabel("mode=1777,size=65536k", mountLabel)}, + {source: "devpts", path: filepath.Join(rootfs, "dev", "pts"), device: "devpts", flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, data: label.FormatMountLabel("newinstance,ptmxmode=0666,mode=620,gid=5", mountLabel)}, } if len(mounts.OfType("devtmpfs")) == 1 { systemMounts = append(systemMounts, mount{source: "tmpfs", path: filepath.Join(rootfs, "dev"), device: "tmpfs", flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, data: label.FormatMountLabel("mode=755", mountLabel)}) } - systemMounts = append(systemMounts, - mount{source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaultMountFlags, data: label.FormatMountLabel("mode=1777,size=65536k", mountLabel)}, - mount{source: "devpts", path: filepath.Join(rootfs, "dev", "pts"), device: "devpts", flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, data: label.FormatMountLabel("newinstance,ptmxmode=0666,mode=620,gid=5", mountLabel)}, - ) - return systemMounts } diff --git a/pkg/libcontainer/nsinit/init.go b/pkg/libcontainer/nsinit/init.go index 90b97a9f99..755847948e 100644 --- a/pkg/libcontainer/nsinit/init.go +++ b/pkg/libcontainer/nsinit/init.go @@ -72,8 +72,8 @@ func Init(container *libcontainer.Container, uncleanRootfs, consolePath string, runtime.LockOSThread() - if restrictionPath := container.Context["restriction_path"]; restrictionPath != "" { - if err := restrict.Restrict("/", restrictionPath); err != nil { + if container.Context["restrictions"] != "" { + if err := restrict.Restrict(); err != nil { return err } } diff --git a/pkg/libcontainer/security/restrict/restrict.go b/pkg/libcontainer/security/restrict/restrict.go index a9bdc4bacb..2b7cea5a48 100644 --- a/pkg/libcontainer/security/restrict/restrict.go +++ b/pkg/libcontainer/security/restrict/restrict.go @@ -11,67 +11,42 @@ import ( "github.com/dotcloud/docker/pkg/system" ) -// "restrictions" are container paths (files, directories, whatever) that have to be masked. -// maskPath is a "safe" path to be mounted over maskedPath. It can take two special values: -// - if it is "", then nothing is mounted; -// - if it is "EMPTY", then an empty directory is mounted instead. -// If remountRO is true then the maskedPath is remounted read-only (regardless of whether a maskPath was used). -type restriction struct { - maskedPath string - maskPath string - remountRO bool -} - -var restrictions = []restriction{ - {"/proc", "", true}, - {"/sys", "", true}, - {"/proc/kcore", "/dev/null", false}, -} - // This has to be called while the container still has CAP_SYS_ADMIN (to be able to perform mounts). // However, afterwards, CAP_SYS_ADMIN should be dropped (otherwise the user will be able to revert those changes). -// "empty" should be the path to an empty directory. -func Restrict(rootfs, empty string) error { - for _, restriction := range restrictions { - dest := filepath.Join(rootfs, restriction.maskedPath) - if restriction.maskPath != "" { - var source string - if restriction.maskPath == "EMPTY" { - source = empty - } else { - source = filepath.Join(rootfs, restriction.maskPath) - } - if err := system.Mount(source, dest, "", syscall.MS_BIND, ""); err != nil { - return fmt.Errorf("unable to bind-mount %s over %s: %s", source, dest, err) - } - } - if restriction.remountRO { - if err := system.Mount("", dest, "", syscall.MS_REMOUNT|syscall.MS_RDONLY, ""); err != nil { - return fmt.Errorf("unable to remount %s readonly: %s", dest, err) - } +func Restrict() error { + // remount proc and sys as readonly + for _, dest := range []string{"proc", "sys"} { + if err := system.Mount("", dest, "", syscall.MS_REMOUNT|syscall.MS_RDONLY, ""); err != nil { + return fmt.Errorf("unable to remount %s readonly: %s", dest, err) } } + if err := system.Mount("/proc/kcore", "/dev/null", "", syscall.MS_BIND, ""); err != nil { + return fmt.Errorf("unable to bind-mount /dev/null over /proc/kcore") + } + // This weird trick will allow us to mount /proc read-only, while being able to use AppArmor. // This is because apparently, loading an AppArmor profile requires write access to /proc/1/attr. // So we do another mount of procfs, ensure it's write-able, and bind-mount a subset of it. - tmpProcPath := filepath.Join(rootfs, ".proc") - if err := os.Mkdir(tmpProcPath, 0700); err != nil { - return fmt.Errorf("unable to create temporary proc mountpoint %s: %s", tmpProcPath, err) + var ( + rwAttrPath = filepath.Join(".proc", "1", "attr") + roAttrPath = filepath.Join("proc", "1", "attr") + ) + + if err := os.Mkdir(".proc", 0700); err != nil { + return fmt.Errorf("unable to create temporary proc mountpoint .proc: %s", err) } - if err := system.Mount("proc", tmpProcPath, "proc", 0, ""); err != nil { + if err := system.Mount("proc", ".proc", "proc", 0, ""); err != nil { return fmt.Errorf("unable to mount proc on temporary proc mountpoint: %s", err) } - if err := system.Mount("proc", tmpProcPath, "", syscall.MS_REMOUNT, ""); err != nil { + if err := system.Mount("proc", ".proc", "", syscall.MS_REMOUNT, ""); err != nil { return fmt.Errorf("unable to remount proc read-write: %s", err) } - rwAttrPath := filepath.Join(rootfs, ".proc", "1", "attr") - roAttrPath := filepath.Join(rootfs, "proc", "1", "attr") if err := system.Mount(rwAttrPath, roAttrPath, "", syscall.MS_BIND, ""); err != nil { return fmt.Errorf("unable to bind-mount %s on %s: %s", rwAttrPath, roAttrPath, err) } - if err := system.Unmount(tmpProcPath, 0); err != nil { + if err := system.Unmount(".proc", 0); err != nil { return fmt.Errorf("unable to unmount temporary proc filesystem: %s", err) } - return nil + return os.RemoveAll(".proc") } diff --git a/pkg/libcontainer/security/restrict/unsupported.go b/pkg/libcontainer/security/restrict/unsupported.go index 6898baab3d..464e8d498d 100644 --- a/pkg/libcontainer/security/restrict/unsupported.go +++ b/pkg/libcontainer/security/restrict/unsupported.go @@ -4,6 +4,6 @@ package restrict import "fmt" -func Restrict(rootfs, empty string) error { +func Restrict() error { return fmt.Errorf("not supported") }