From 81e5026a6afb282589704fd5f6bcac9ed50108ea Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Fri, 11 Apr 2014 11:45:39 +0000 Subject: [PATCH] No not mount sysfs by default for non privilged containers Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- daemon/execdriver/lxc/lxc_template.go | 2 + daemon/execdriver/native/create.go | 11 +- integration-cli/docker_cli_run_test.go | 275 +------------------------ pkg/libcontainer/container.go | 27 ++- pkg/libcontainer/nsinit/mount.go | 42 ++-- 5 files changed, 67 insertions(+), 290 deletions(-) diff --git a/daemon/execdriver/lxc/lxc_template.go b/daemon/execdriver/lxc/lxc_template.go index 25c227ef15..bc94e7a19d 100644 --- a/daemon/execdriver/lxc/lxc_template.go +++ b/daemon/execdriver/lxc/lxc_template.go @@ -88,7 +88,9 @@ lxc.mount.entry = proc {{escapeFstabSpaces $ROOTFS}}/proc proc nosuid,nodev,noex # WARNING: sysfs is a known attack vector and should probably be disabled # if your userspace allows it. eg. see http://bit.ly/T9CkqJ +{{if .Privileged}} lxc.mount.entry = sysfs {{escapeFstabSpaces $ROOTFS}}/sys sysfs nosuid,nodev,noexec 0 0 +{{end}} {{if .Tty}} lxc.mount.entry = {{.Console}} {{escapeFstabSpaces $ROOTFS}}/dev/console none bind,rw 0 0 diff --git a/daemon/execdriver/native/create.go b/daemon/execdriver/native/create.go index 1edbd17ad3..e26ff8d2b8 100644 --- a/daemon/execdriver/native/create.go +++ b/daemon/execdriver/native/create.go @@ -82,6 +82,9 @@ func (d *driver) setPrivileged(container *libcontainer.Container) error { c.Enabled = true } container.Cgroups.DeviceAccess = true + + // add sysfs as a mount for privileged containers + container.Mounts = append(container.Mounts, libcontainer.Mount{Type: "sysfs"}) delete(container.Context, "restriction_path") if apparmor.IsEnabled() { @@ -101,7 +104,13 @@ func (d *driver) setupCgroups(container *libcontainer.Container, c *execdriver.C func (d *driver) setupMounts(container *libcontainer.Container, c *execdriver.Command) error { for _, m := range c.Mounts { - container.Mounts = append(container.Mounts, libcontainer.Mount{m.Source, m.Destination, m.Writable, m.Private}) + container.Mounts = append(container.Mounts, libcontainer.Mount{ + Type: "bind", + Source: m.Source, + Destination: m.Destination, + Writable: m.Writable, + Private: m.Private, + }) } return nil } diff --git a/integration-cli/docker_cli_run_test.go b/integration-cli/docker_cli_run_test.go index d356f5f4de..40781294ae 100644 --- a/integration-cli/docker_cli_run_test.go +++ b/integration-cli/docker_cli_run_test.go @@ -389,279 +389,24 @@ func TestMultipleVolumesFrom(t *testing.T) { logDone("run - multiple volumes from") } -// this tests verifies the ID format for the container -func TestVerifyContainerID(t *testing.T) { - cmd := exec.Command(dockerBinary, "run", "-d", "busybox", "true") - out, exit, err := runCommandWithOutput(cmd) - if err != nil { - t.Fatal(err) - } - if exit != 0 { - t.Fatalf("expected exit code 0 received %d", exit) - } - match, err := regexp.MatchString("^[0-9a-f]{64}$", strings.TrimSuffix(out, "\n")) - if err != nil { - t.Fatal(err) - } - if !match { - t.Fatalf("Invalid container ID: %s", out) +func TestSysNotAvaliableInNonPrivilegedContainers(t *testing.T) { + cmd := exec.Command(dockerBinary, "run", "busybox", "ls", "/sys/kernel") + if code, err := runCommand(cmd); err == nil || code == 0 { + t.Fatal("sys should not be available in a non privileged container") } deleteAllContainers() - logDone("run - verify container ID") + logDone("run - sys not avaliable in non privileged container") } -// Test that creating a container with a volume doesn't crash. Regression test for #995. -func TestCreateVolume(t *testing.T) { - cmd := exec.Command(dockerBinary, "run", "-v", "/var/lib/data", "busybox", "true") - if _, err := runCommand(cmd); err != nil { - t.Fatal(err) +func TestSysAvaliableInPrivilegedContainers(t *testing.T) { + cmd := exec.Command(dockerBinary, "run", "--privileged", "busybox", "ls", "/sys/kernel") + if code, err := runCommand(cmd); err != nil || code != 0 { + t.Fatalf("sys should be available in privileged container") } deleteAllContainers() - logDone("run - create docker mangaed volume") -} - -func TestExitCode(t *testing.T) { - cmd := exec.Command(dockerBinary, "run", "busybox", "/bin/sh", "-c", "exit 72") - - exit, err := runCommand(cmd) - if err == nil { - t.Fatal("should not have a non nil error") - } - if exit != 72 { - t.Fatalf("expected exit code 72 received %d", exit) - } - - deleteAllContainers() - - logDone("run - correct exit code") -} - -func TestUserDefaultsToRoot(t *testing.T) { - cmd := exec.Command(dockerBinary, "run", "busybox", "id") - - out, _, err := runCommandWithOutput(cmd) - if err != nil { - t.Fatal(err, out) - } - if !strings.Contains(out, "uid=0(root) gid=0(root)") { - t.Fatalf("expected root user got %s", out) - } - deleteAllContainers() - - logDone("run - default user") -} - -func TestUserByName(t *testing.T) { - cmd := exec.Command(dockerBinary, "run", "-u", "root", "busybox", "id") - - out, _, err := runCommandWithOutput(cmd) - if err != nil { - t.Fatal(err, out) - } - if !strings.Contains(out, "uid=0(root) gid=0(root)") { - t.Fatalf("expected root user got %s", out) - } - deleteAllContainers() - - logDone("run - user by name") -} - -func TestUserByID(t *testing.T) { - cmd := exec.Command(dockerBinary, "run", "-u", "1", "busybox", "id") - - out, _, err := runCommandWithOutput(cmd) - if err != nil { - t.Fatal(err, out) - } - if !strings.Contains(out, "uid=1(daemon) gid=1(daemon)") { - t.Fatalf("expected daemon user got %s", out) - } - deleteAllContainers() - - logDone("run - user by id") -} - -func TestUserNotFound(t *testing.T) { - cmd := exec.Command(dockerBinary, "run", "-u", "notme", "busybox", "id") - - _, err := runCommand(cmd) - if err == nil { - t.Fatal("unknown user should cause container to fail") - } - deleteAllContainers() - - logDone("run - user not found") -} - -func TestRunTwoConcurrentContainers(t *testing.T) { - group := sync.WaitGroup{} - group.Add(2) - - for i := 0; i < 2; i++ { - go func() { - defer group.Done() - cmd := exec.Command(dockerBinary, "run", "busybox", "sleep", "2") - if _, err := runCommand(cmd); err != nil { - t.Fatal(err) - } - }() - } - - group.Wait() - - deleteAllContainers() - - logDone("run - two concurrent containers") -} - -func TestEnvironment(t *testing.T) { - cmd := exec.Command(dockerBinary, "run", "-h", "testing", "-e=FALSE=true", "-e=TRUE", "-e=TRICKY", "busybox", "env") - cmd.Env = append(os.Environ(), - "TRUE=false", - "TRICKY=tri\ncky\n", - ) - - out, _, err := runCommandWithOutput(cmd) - if err != nil { - t.Fatal(err, out) - } - - actualEnv := strings.Split(out, "\n") - if actualEnv[len(actualEnv)-1] == "" { - actualEnv = actualEnv[:len(actualEnv)-1] - } - sort.Strings(actualEnv) - - goodEnv := []string{ - "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", - "HOME=/", - "HOSTNAME=testing", - "FALSE=true", - "TRUE=false", - "TRICKY=tri", - "cky", - "", - } - sort.Strings(goodEnv) - if len(goodEnv) != len(actualEnv) { - t.Fatalf("Wrong environment: should be %d variables, not: '%s'\n", len(goodEnv), strings.Join(actualEnv, ", ")) - } - for i := range goodEnv { - if actualEnv[i] != goodEnv[i] { - t.Fatalf("Wrong environment variable: should be %s, not %s", goodEnv[i], actualEnv[i]) - } - } - - deleteAllContainers() - - logDone("run - verify environment") -} - -func TestContainerNetwork(t *testing.T) { - cmd := exec.Command(dockerBinary, "run", "busybox", "ping", "-c", "1", "127.0.0.1") - if _, err := runCommand(cmd); err != nil { - t.Fatal(err) - } - - deleteAllContainers() - - logDone("run - test container network via ping") -} - -// Issue #4681 -func TestLoopbackWhenNetworkDisabled(t *testing.T) { - cmd := exec.Command(dockerBinary, "run", "--networking=false", "busybox", "ping", "-c", "1", "127.0.0.1") - if _, err := runCommand(cmd); err != nil { - t.Fatal(err) - } - - deleteAllContainers() - - logDone("run - test container loopback when networking disabled") -} - -func TestLoopbackOnlyExistsWhenNetworkingDisabled(t *testing.T) { - cmd := exec.Command(dockerBinary, "run", "--networking=false", "busybox", "ip", "a", "show", "up") - out, _, err := runCommandWithOutput(cmd) - if err != nil { - t.Fatal(err, out) - } - - interfaces := regexp.MustCompile(`(?m)^[0-9]+: [a-zA-Z0-9]+`).FindAllString(out, -1) - if len(interfaces) != 1 { - t.Fatalf("Wrong interface count in test container: expected [*: lo], got %s", interfaces) - } - if !strings.HasSuffix(interfaces[0], ": lo") { - t.Fatalf("Wrong interface in test container: expected [*: lo], got %s", interfaces) - } - - deleteAllContainers() - - logDone("run - test loopback only exists when networking disabled") -} - -func TestPrivilegedCanMknod(t *testing.T) { - cmd := exec.Command(dockerBinary, "run", "--privileged", "busybox", "sh", "-c", "mknod /tmp/sda b 8 0 && echo ok") - out, _, err := runCommandWithOutput(cmd) - if err != nil { - t.Fatal(err) - } - - if actual := strings.Trim(out, "\r\n"); actual != "ok" { - t.Fatalf("expected output ok received %s", actual) - } - deleteAllContainers() - - logDone("run - test privileged can mknod") -} - -func TestUnPrivilegedCanMknod(t *testing.T) { - cmd := exec.Command(dockerBinary, "run", "busybox", "sh", "-c", "mknod /tmp/sda b 8 0 && echo ok") - out, _, err := runCommandWithOutput(cmd) - if err != nil { - t.Fatal(err) - } - - if actual := strings.Trim(out, "\r\n"); actual != "ok" { - t.Fatalf("expected output ok received %s", actual) - } - deleteAllContainers() - - logDone("run - test un-privileged can mknod") -} - -func TestPrivilegedCanMount(t *testing.T) { - cmd := exec.Command(dockerBinary, "run", "--privileged", "busybox", "sh", "-c", "mount -t tmpfs none /tmp && echo ok") - - out, _, err := runCommandWithOutput(cmd) - if err != nil { - t.Fatal(err) - } - - if actual := strings.Trim(out, "\r\n"); actual != "ok" { - t.Fatalf("expected output ok received %s", actual) - } - deleteAllContainers() - - logDone("run - test privileged can mount") -} - -func TestUnPrivilegedCannotMount(t *testing.T) { - cmd := exec.Command(dockerBinary, "run", "busybox", "sh", "-c", "mount -t tmpfs none /tmp && echo ok") - - out, _, err := runCommandWithOutput(cmd) - if err == nil { - t.Fatal(err, out) - } - - if actual := strings.Trim(out, "\r\n"); actual == "ok" { - t.Fatalf("expected output not ok received %s", actual) - } - deleteAllContainers() - - logDone("run - test un-privileged cannot mount") + logDone("run - sys avaliable in privileged container") } diff --git a/pkg/libcontainer/container.go b/pkg/libcontainer/container.go index c7cac35428..1e032c0642 100644 --- a/pkg/libcontainer/container.go +++ b/pkg/libcontainer/container.go @@ -23,7 +23,7 @@ type Container struct { Networks []*Network `json:"networks,omitempty"` // nil for host's network stack Cgroups *cgroups.Cgroup `json:"cgroups,omitempty"` // cgroups Context Context `json:"context,omitempty"` // generic context for specific options (apparmor, selinux) - Mounts []Mount `json:"mounts,omitempty"` + Mounts Mounts `json:"mounts,omitempty"` } // Network defines configuration for a container's networking stack @@ -38,11 +38,22 @@ type Network struct { Mtu int `json:"mtu,omitempty"` } -// Bind mounts from the host system to the container -// -type Mount struct { - Source string `json:"source"` // Source path, in the host namespace - Destination string `json:"destination"` // Destination path, in the container - Writable bool `json:"writable"` - Private bool `json:"private"` +type Mounts []Mount + +func (s Mounts) OfType(t string) Mounts { + out := Mounts{} + for _, m := range s { + if m.Type == t { + out = append(out, m) + } + } + return out +} + +type Mount struct { + Type string `json:"type,omitempty"` + Source string `json:"source,omitempty"` // Source path, in the host namespace + Destination string `json:"destination,omitempty"` // Destination path, in the container + Writable bool `json:"writable,omitempty"` + Private bool `json:"private,omitempty"` } diff --git a/pkg/libcontainer/nsinit/mount.go b/pkg/libcontainer/nsinit/mount.go index e4869a0ecb..ee480328c0 100644 --- a/pkg/libcontainer/nsinit/mount.go +++ b/pkg/libcontainer/nsinit/mount.go @@ -17,6 +17,14 @@ import ( // default mount point flags const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV +type mount struct { + source string + path string + device string + flags int + data string +} + // setupNewMountNamespace is used to initialize a new mount namespace for an new // container in the rootfs that is specified. // @@ -33,7 +41,7 @@ func setupNewMountNamespace(rootfs, console string, container *libcontainer.Cont if err := system.Mount(rootfs, rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, ""); err != nil { return fmt.Errorf("mouting %s as bind %s", rootfs, err) } - if err := mountSystem(rootfs, container.Context["mount_label"]); err != nil { + if err := mountSystem(rootfs, container); err != nil { return fmt.Errorf("mount system %s", err) } if err := setupBindmounts(rootfs, container.Mounts); err != nil { @@ -183,19 +191,8 @@ func setupConsole(rootfs, console string, mountLabel string) error { // mountSystem sets up linux specific system mounts like sys, proc, shm, and devpts // inside the mount namespace -func mountSystem(rootfs string, mountLabel string) error { - for _, m := range []struct { - source string - path string - device string - flags int - data string - }{ - {source: "proc", path: filepath.Join(rootfs, "proc"), device: "proc", flags: defaultMountFlags}, - {source: "sysfs", path: filepath.Join(rootfs, "sys"), device: "sysfs", flags: defaultMountFlags}, - {source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaultMountFlags, data: label.FormatMountLabel("mode=1777,size=65536k", mountLabel)}, - {source: "devpts", path: filepath.Join(rootfs, "dev", "pts"), device: "devpts", flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, data: label.FormatMountLabel("newinstance,ptmxmode=0666,mode=620,gid=5", mountLabel)}, - } { +func mountSystem(rootfs string, container *libcontainer.Container) error { + for _, m := range newSystemMounts(rootfs, container.Context["mount_label"], container.Mounts) { if err := os.MkdirAll(m.path, 0755); err != nil && !os.IsExist(err) { return fmt.Errorf("mkdirall %s %s", m.path, err) } @@ -249,8 +246,8 @@ func remountSys() error { return nil } -func setupBindmounts(rootfs string, bindMounts []libcontainer.Mount) error { - for _, m := range bindMounts { +func setupBindmounts(rootfs string, bindMounts libcontainer.Mounts) error { + for _, m := range bindMounts.OfType("bind") { var ( flags = syscall.MS_BIND | syscall.MS_REC dest = filepath.Join(rootfs, m.Destination) @@ -274,3 +271,16 @@ func setupBindmounts(rootfs string, bindMounts []libcontainer.Mount) error { } return nil } + +func newSystemMounts(rootfs, mountLabel string, mounts libcontainer.Mounts) []mount { + systemMounts := []mount{ + {source: "proc", path: filepath.Join(rootfs, "proc"), device: "proc", flags: defaultMountFlags}, + {source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaultMountFlags, data: label.FormatMountLabel("mode=1777,size=65536k", mountLabel)}, + {source: "devpts", path: filepath.Join(rootfs, "dev", "pts"), device: "devpts", flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, data: label.FormatMountLabel("newinstance,ptmxmode=0666,mode=620,gid=5", mountLabel)}, + } + + if len(mounts.OfType("sysfs")) == 1 { + systemMounts = append(systemMounts, mount{source: "sysfs", path: filepath.Join(rootfs, "sys"), device: "sysfs", flags: defaultMountFlags}) + } + return systemMounts +}