diff --git a/daemon/container.go b/daemon/container.go index aacd90a449..1efe511c85 100644 --- a/daemon/container.go +++ b/daemon/container.go @@ -254,6 +254,8 @@ func populateCommand(c *Container, env []string) error { Resources: resources, AllowedDevices: allowedDevices, AutoCreatedDevices: autoCreatedDevices, + CapAdd: c.hostConfig.CapAdd, + CapDrop: c.hostConfig.CapDrop, } c.command.SysProcAttr = &syscall.SysProcAttr{Setsid: true} c.command.Env = env diff --git a/daemon/execdriver/driver.go b/daemon/execdriver/driver.go index a3d3bc260a..d52a2ac96c 100644 --- a/daemon/execdriver/driver.go +++ b/daemon/execdriver/driver.go @@ -60,6 +60,8 @@ type InitArgs struct { Console string Pipe int Root string + CapAdd string + CapDrop string } // Driver specific information based on @@ -140,6 +142,8 @@ type Command struct { Mounts []Mount `json:"mounts"` AllowedDevices []*devices.Device `json:"allowed_devices"` AutoCreatedDevices []*devices.Device `json:"autocreated_devices"` + CapAdd []string `json:"cap_add"` + CapDrop []string `json:"cap_drop"` Terminal Terminal `json:"-"` // standard or tty terminal Console string `json:"-"` // dev/console path diff --git a/daemon/execdriver/lxc/driver.go b/daemon/execdriver/lxc/driver.go index 59daf1afe1..2faada2350 100644 --- a/daemon/execdriver/lxc/driver.go +++ b/daemon/execdriver/lxc/driver.go @@ -122,6 +122,14 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba params = append(params, "-w", c.WorkingDir) } + if len(c.CapAdd) > 0 { + params = append(params, "-cap-add", strings.Join(c.CapAdd, " ")) + } + + if len(c.CapDrop) > 0 { + params = append(params, "-cap-drop", strings.Join(c.CapDrop, " ")) + } + params = append(params, "--", c.Entrypoint) params = append(params, c.Arguments...) diff --git a/daemon/execdriver/lxc/lxc_init_linux.go b/daemon/execdriver/lxc/lxc_init_linux.go index 1fd497e9aa..40956e442b 100644 --- a/daemon/execdriver/lxc/lxc_init_linux.go +++ b/daemon/execdriver/lxc/lxc_init_linux.go @@ -4,6 +4,7 @@ package lxc import ( "fmt" + "strings" "syscall" "github.com/docker/libcontainer/namespaces" @@ -48,8 +49,13 @@ func finalizeNamespace(args *execdriver.InitArgs) error { return fmt.Errorf("clear keep caps %s", err) } + caps, err := execdriver.TweakCapabilities(container.Capabilities, strings.Split(args.CapAdd, " "), strings.Split(args.CapDrop, " ")) + if err != nil { + return err + } + // drop all other capabilities - if err := capabilities.DropCapabilities(container.Capabilities); err != nil { + if err := capabilities.DropCapabilities(caps); err != nil { return fmt.Errorf("drop capabilities %s", err) } } diff --git a/daemon/execdriver/native/create.go b/daemon/execdriver/native/create.go index f28507b046..13f81c7180 100644 --- a/daemon/execdriver/native/create.go +++ b/daemon/execdriver/native/create.go @@ -42,6 +42,10 @@ func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Config, e if err := d.setPrivileged(container); err != nil { return nil, err } + } else { + if err := d.setCapabilities(container, c); err != nil { + return nil, err + } } if err := d.setupCgroups(container, c); err != nil { @@ -136,6 +140,11 @@ func (d *driver) setPrivileged(container *libcontainer.Config) (err error) { return nil } +func (d *driver) setCapabilities(container *libcontainer.Config, c *execdriver.Command) (err error) { + container.Capabilities, err = execdriver.TweakCapabilities(container.Capabilities, c.CapAdd, c.CapDrop) + return err +} + func (d *driver) setupCgroups(container *libcontainer.Config, c *execdriver.Command) error { if c.Resources != nil { container.Cgroups.CpuShares = c.Resources.CpuShares diff --git a/daemon/execdriver/utils.go b/daemon/execdriver/utils.go new file mode 100644 index 0000000000..90c5177421 --- /dev/null +++ b/daemon/execdriver/utils.go @@ -0,0 +1,63 @@ +package execdriver + +import ( + "fmt" + "strings" + + "github.com/docker/libcontainer/security/capabilities" + "github.com/dotcloud/docker/utils" +) + +func TweakCapabilities(basics, adds, drops []string) ([]string, error) { + var ( + newCaps []string + allCaps = capabilities.GetAllCapabilities() + ) + + // look for invalid cap in the drop list + for _, cap := range drops { + if strings.ToLower(cap) == "all" { + continue + } + if !utils.StringsContainsNoCase(allCaps, cap) { + return nil, fmt.Errorf("Unknown capability: %s", cap) + } + } + + // handle --cap-add=all + if utils.StringsContainsNoCase(adds, "all") { + basics = capabilities.GetAllCapabilities() + } + + if !utils.StringsContainsNoCase(drops, "all") { + for _, cap := range basics { + // skip `all` aready handled above + if strings.ToLower(cap) == "all" { + continue + } + + // if we don't drop `all`, add back all the non-dropped caps + if !utils.StringsContainsNoCase(drops, cap) { + newCaps = append(newCaps, cap) + } + } + } + + for _, cap := range adds { + // skip `all` aready handled above + if strings.ToLower(cap) == "all" { + continue + } + + // look for invalid cap in the drop list + if !utils.StringsContainsNoCase(allCaps, cap) { + return nil, fmt.Errorf("Unknown capability: %s", cap) + } + + // add cap if not already in the list + if !utils.StringsContainsNoCase(newCaps, cap) { + newCaps = append(newCaps, cap) + } + } + return newCaps, nil +} diff --git a/docs/sources/reference/api/docker_remote_api.md b/docs/sources/reference/api/docker_remote_api.md index 04b750f4c5..c2916e5de0 100644 --- a/docs/sources/reference/api/docker_remote_api.md +++ b/docs/sources/reference/api/docker_remote_api.md @@ -43,6 +43,12 @@ You can now use the `stop` parameter to stop running containers before removal **New!** You can now use the `kill` parameter to kill running containers before removal. +`POST /containers/(id)/start` + +**New!** +The `hostConfig` option now accepts the field `CapAdd`, which specifies a list of capabilities +to add, and the field `CapDrop`, which specifies a list of capabilities to drop. + ## v1.13 ### Full Documentation diff --git a/docs/sources/reference/api/docker_remote_api_v1.14.md b/docs/sources/reference/api/docker_remote_api_v1.14.md index 1d75b88588..ce876594c5 100644 --- a/docs/sources/reference/api/docker_remote_api_v1.14.md +++ b/docs/sources/reference/api/docker_remote_api_v1.14.md @@ -241,7 +241,9 @@ Return low-level information on the container `id` ] }, "Links": ["/name:alias"], - "PublishAllPorts": false + "PublishAllPorts": false, + "CapAdd: ["NET_ADMIN"], + "CapDrop: ["MKNOD"] } } @@ -410,7 +412,9 @@ Start the container `id` "PublishAllPorts":false, "Privileged":false, "Dns": ["8.8.8.8"], - "VolumesFrom": ["parent", "other:ro"] + "VolumesFrom": ["parent", "other:ro"], + "CapAdd: ["NET_ADMIN"], + "CapDrop: ["MKNOD"] } **Example response**: diff --git a/docs/sources/reference/run.md b/docs/sources/reference/run.md index 1bd70e83f0..f8ced8d734 100644 --- a/docs/sources/reference/run.md +++ b/docs/sources/reference/run.md @@ -55,7 +55,7 @@ following options. - [Network Settings](#network-settings) - [Clean Up (--rm)](#clean-up-rm) - [Runtime Constraints on CPU and Memory](#runtime-constraints-on-cpu-and-memory) - - [Runtime Privilege and LXC Configuration](#runtime-privilege-and-lxc-configuration) + - [Runtime Privilege, Linux Capabilities, and LXC Configuration](#runtime-privilege-linux-capabilities-and-lxc-configuration) ## Detached vs Foreground @@ -222,8 +222,10 @@ get the same proportion of CPU cycles, but you can tell the kernel to give more shares of CPU time to one or more containers when you start them via Docker. -## Runtime Privilege and LXC Configuration +## Runtime Privilege, Linux Capabilities, and LXC Configuration + --cap-add: Add Linux capabilities + --cap-drop: Drop Linux capabilities --privileged=false: Give extended privileges to this container --lxc-conf=[]: (lxc exec-driver only) Add custom lxc options --lxc-conf="lxc.cgroup.cpuset.cpus = 0,1" @@ -242,6 +244,16 @@ host as processes running outside containers on the host. Additional information about running with `--privileged` is available on the [Docker Blog](http://blog.docker.com/2013/09/docker-can-now-run-within-docker/). +In addition to `--privileged`, the operator can have fine grain control over the +capabilities using `--cap-add` and `--cap-drop`. By default, Docker has a default +list of capabilities that are kept. Both flags support the value `all`, so if the +operator wants to have all capabilities but `MKNOD` they could use: + + $ docker run --cap-add=ALL --cap-drop=MKNOD ... + +For interacting with the network stack, instead of using `--privileged` they +should use `--cap-add=NET_ADMIN` to modify the network interfaces. + If the Docker daemon was started using the `lxc` exec-driver (`docker -d --exec-driver=lxc`) then the operator can also specify LXC options using one or more `--lxc-conf` parameters. These can be new parameters or diff --git a/integration-cli/docker_cli_run_test.go b/integration-cli/docker_cli_run_test.go index cf0f4b7e3d..dba8e7fe28 100644 --- a/integration-cli/docker_cli_run_test.go +++ b/integration-cli/docker_cli_run_test.go @@ -783,6 +783,116 @@ func TestUnPrivilegedCanMknod(t *testing.T) { logDone("run - test un-privileged can mknod") } +func TestCapDropInvalid(t *testing.T) { + cmd := exec.Command(dockerBinary, "run", "--cap-drop=CHPASS", "busybox", "ls") + out, _, err := runCommandWithOutput(cmd) + if err == nil { + t.Fatal(err, out) + } + + logDone("run - test --cap-drop=CHPASS invalid") +} + +func TestCapDropCannotMknod(t *testing.T) { + cmd := exec.Command(dockerBinary, "run", "--cap-drop=MKNOD", "busybox", "sh", "-c", "mknod /tmp/sda b 8 0 && echo ok") + out, _, err := runCommandWithOutput(cmd) + if err == nil { + t.Fatal(err, out) + } + + if actual := strings.Trim(out, "\r\n"); actual == "ok" { + t.Fatalf("expected output not ok received %s", actual) + } + deleteAllContainers() + + logDone("run - test --cap-drop=MKNOD cannot mknod") +} + +func TestCapDropALLCannotMknod(t *testing.T) { + cmd := exec.Command(dockerBinary, "run", "--cap-drop=ALL", "busybox", "sh", "-c", "mknod /tmp/sda b 8 0 && echo ok") + out, _, err := runCommandWithOutput(cmd) + if err == nil { + t.Fatal(err, out) + } + + if actual := strings.Trim(out, "\r\n"); actual == "ok" { + t.Fatalf("expected output not ok received %s", actual) + } + deleteAllContainers() + + logDone("run - test --cap-drop=ALL cannot mknod") +} + +func TestCapDropALLAddMknodCannotMknod(t *testing.T) { + cmd := exec.Command(dockerBinary, "run", "--cap-drop=ALL", "--cap-add=MKNOD", "busybox", "sh", "-c", "mknod /tmp/sda b 8 0 && echo ok") + out, _, err := runCommandWithOutput(cmd) + if err != nil { + t.Fatal(err, out) + } + + if actual := strings.Trim(out, "\r\n"); actual != "ok" { + t.Fatalf("expected output ok received %s", actual) + } + deleteAllContainers() + + logDone("run - test --cap-drop=ALL --cap-add=MKNOD can mknod") +} + +func TestCapAddInvalid(t *testing.T) { + cmd := exec.Command(dockerBinary, "run", "--cap-add=CHPASS", "busybox", "ls") + out, _, err := runCommandWithOutput(cmd) + if err == nil { + t.Fatal(err, out) + } + + logDone("run - test --cap-add=CHPASS invalid") +} + +func TestCapAddCanDownInterface(t *testing.T) { + cmd := exec.Command(dockerBinary, "run", "--cap-add=NET_ADMIN", "busybox", "sh", "-c", "ip link set eth0 down && echo ok") + out, _, err := runCommandWithOutput(cmd) + if err != nil { + t.Fatal(err, out) + } + + if actual := strings.Trim(out, "\r\n"); actual != "ok" { + t.Fatalf("expected output ok received %s", actual) + } + deleteAllContainers() + + logDone("run - test --cap-add=NET_ADMIN can set eth0 down") +} + +func TestCapAddALLCanDownInterface(t *testing.T) { + cmd := exec.Command(dockerBinary, "run", "--cap-add=ALL", "busybox", "sh", "-c", "ip link set eth0 down && echo ok") + out, _, err := runCommandWithOutput(cmd) + if err != nil { + t.Fatal(err, out) + } + + if actual := strings.Trim(out, "\r\n"); actual != "ok" { + t.Fatalf("expected output ok received %s", actual) + } + deleteAllContainers() + + logDone("run - test --cap-add=ALL can set eth0 down") +} + +func TestCapAddALLDropNetAdminCanDownInterface(t *testing.T) { + cmd := exec.Command(dockerBinary, "run", "--cap-add=ALL", "--cap-drop=NET_ADMIN", "busybox", "sh", "-c", "ip link set eth0 down && echo ok") + out, _, err := runCommandWithOutput(cmd) + if err == nil { + t.Fatal(err, out) + } + + if actual := strings.Trim(out, "\r\n"); actual == "ok" { + t.Fatalf("expected output not ok received %s", actual) + } + deleteAllContainers() + + logDone("run - test --cap-add=ALL --cap-drop=NET_ADMIN cannot set eth0 down") +} + func TestPrivilegedCanMount(t *testing.T) { cmd := exec.Command(dockerBinary, "run", "--privileged", "busybox", "sh", "-c", "mount -t tmpfs none /tmp && echo ok") diff --git a/runconfig/hostconfig.go b/runconfig/hostconfig.go index f4aa69fe97..c68f764588 100644 --- a/runconfig/hostconfig.go +++ b/runconfig/hostconfig.go @@ -38,6 +38,8 @@ type HostConfig struct { VolumesFrom []string Devices []DeviceMapping NetworkMode NetworkMode + CapAdd []string + CapDrop []string } func ContainerHostConfigFromJob(job *engine.Job) *HostConfig { @@ -65,5 +67,11 @@ func ContainerHostConfigFromJob(job *engine.Job) *HostConfig { if VolumesFrom := job.GetenvList("VolumesFrom"); VolumesFrom != nil { hostConfig.VolumesFrom = VolumesFrom } + if CapAdd := job.GetenvList("CapAdd"); CapAdd != nil { + hostConfig.CapAdd = CapAdd + } + if CapDrop := job.GetenvList("CapDrop"); CapDrop != nil { + hostConfig.CapDrop = CapDrop + } return hostConfig } diff --git a/runconfig/parse.go b/runconfig/parse.go index f7d1d5963f..3e52007544 100644 --- a/runconfig/parse.go +++ b/runconfig/parse.go @@ -50,6 +50,8 @@ func parseRun(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Conf flVolumesFrom opts.ListOpts flLxcOpts opts.ListOpts flEnvFile opts.ListOpts + flCapAdd opts.ListOpts + flCapDrop opts.ListOpts flAutoRemove = cmd.Bool([]string{"#rm", "-rm"}, false, "Automatically remove the container when it exits (incompatible with -d)") flDetach = cmd.Bool([]string{"d", "-detach"}, false, "Detached mode: run container in the background and print new container ID") @@ -86,6 +88,9 @@ func parseRun(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Conf cmd.Var(&flVolumesFrom, []string{"#volumes-from", "-volumes-from"}, "Mount volumes from the specified container(s)") cmd.Var(&flLxcOpts, []string{"#lxc-conf", "-lxc-conf"}, "(lxc exec-driver only) Add custom lxc options --lxc-conf=\"lxc.cgroup.cpuset.cpus = 0,1\"") + cmd.Var(&flCapAdd, []string{"-cap-add"}, "Add Linux capabilities") + cmd.Var(&flCapDrop, []string{"-cap-drop"}, "Drop Linux capabilities") + if err := cmd.Parse(args); err != nil { return nil, nil, cmd, err } @@ -258,6 +263,8 @@ func parseRun(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Conf VolumesFrom: flVolumesFrom.GetAll(), NetworkMode: netMode, Devices: deviceMappings, + CapAdd: flCapAdd.GetAll(), + CapDrop: flCapDrop.GetAll(), } if sysInfo != nil && flMemory > 0 && !sysInfo.SwapLimit { diff --git a/sysinit/sysinit.go b/sysinit/sysinit.go index 62e89ce9e7..1b8746f02a 100644 --- a/sysinit/sysinit.go +++ b/sysinit/sysinit.go @@ -3,11 +3,12 @@ package sysinit import ( "flag" "fmt" + "log" + "os" + "github.com/dotcloud/docker/daemon/execdriver" _ "github.com/dotcloud/docker/daemon/execdriver/lxc" _ "github.com/dotcloud/docker/daemon/execdriver/native" - "log" - "os" ) func executeProgram(args *execdriver.InitArgs) error { @@ -39,6 +40,8 @@ func SysInit() { pipe = flag.Int("pipe", 0, "sync pipe fd") console = flag.String("console", "", "console (pty slave) path") root = flag.String("root", ".", "root path for configuration files") + capAdd = flag.String("cap-add", "", "capabilities to add") + capDrop = flag.String("cap-drop", "", "capabilities to drop") ) flag.Parse() @@ -54,6 +57,8 @@ func SysInit() { Console: *console, Pipe: *pipe, Root: *root, + CapAdd: *capAdd, + CapDrop: *capDrop, } if err := executeProgram(args); err != nil { diff --git a/utils/utils.go b/utils/utils.go index ef28aceca7..0d44ec0f72 100644 --- a/utils/utils.go +++ b/utils/utils.go @@ -907,3 +907,12 @@ func ValidateContextDirectory(srcPath string) error { }) return finalError } + +func StringsContainsNoCase(slice []string, s string) bool { + for _, ss := range slice { + if strings.ToLower(s) == strings.ToLower(ss) { + return true + } + } + return false +}