diff --git a/container.go b/container.go index 8721d45a55..c20ee11f55 100644 --- a/container.go +++ b/container.go @@ -78,6 +78,7 @@ type Config struct { VolumesFrom string Entrypoint []string NetworkDisabled bool + Privileged bool } type HostConfig struct { @@ -108,6 +109,7 @@ func ParseRun(args []string, capabilities *Capabilities) (*Config, *HostConfig, flMemory := cmd.Int64("m", 0, "Memory limit (in bytes)") flContainerIDFile := cmd.String("cidfile", "", "Write the container ID to the file") flNetwork := cmd.Bool("n", true, "Enable networking for this container") + flPrivileged := cmd.Bool("privileged", false, "Give extended privileges to this container") if capabilities != nil && *flMemory > 0 && !capabilities.MemoryLimit { //fmt.Fprintf(stdout, "WARNING: Your kernel does not support memory limit capabilities. Limitation discarded.\n") @@ -194,6 +196,7 @@ func ParseRun(args []string, capabilities *Capabilities) (*Config, *HostConfig, Volumes: flVolumes, VolumesFrom: *flVolumesFrom, Entrypoint: entrypoint, + Privileged: *flPrivileged, } hostConfig := &HostConfig{ Binds: binds, diff --git a/container_test.go b/container_test.go index 1050bfb2ad..0bab8ea8f0 100644 --- a/container_test.go +++ b/container_test.go @@ -1313,3 +1313,35 @@ func TestOnlyLoopbackExistsWhenUsingDisableNetworkOption(t *testing.T) { } } + +func TestPrivilegedCanMknod(t *testing.T) { + runtime := mkRuntime(t) + defer nuke(runtime) + if output, _ := runContainer(runtime, []string{"-privileged", "_", "sh", "-c", "mknod /tmp/sda b 8 0 && echo ok"}, t); output != "ok\n" { + t.Fatal("Could not mknod into privileged container") + } +} + +func TestPrivilegedCanMount(t *testing.T) { + runtime := mkRuntime(t) + defer nuke(runtime) + if output, _ := runContainer(runtime, []string{"-privileged", "_", "sh", "-c", "mount -t tmpfs none /tmp && echo ok"}, t); output != "ok\n" { + t.Fatal("Could not mount into privileged container") + } +} + +func TestPrivilegedCannotMknod(t *testing.T) { + runtime := mkRuntime(t) + defer nuke(runtime) + if output, _ := runContainer(runtime, []string{"_", "sh", "-c", "mknod /tmp/sda b 8 0 || echo ok"}, t); output != "ok\n" { + t.Fatal("Could mknod into secure container") + } +} + +func TestPrivilegedCannotMount(t *testing.T) { + runtime := mkRuntime(t) + defer nuke(runtime) + if output, _ := runContainer(runtime, []string{"_", "sh", "-c", "mount -t tmpfs none /tmp || echo ok"}, t); output != "ok\n" { + t.Fatal("Could mount into secure container") + } +} diff --git a/docs/sources/commandline/command/run.rst b/docs/sources/commandline/command/run.rst index c5b363d787..2e3e96e46a 100644 --- a/docs/sources/commandline/command/run.rst +++ b/docs/sources/commandline/command/run.rst @@ -19,6 +19,7 @@ -e=[]: Set environment variables -h="": Container host name -i=false: Keep stdin open even if not attached + -privileged=false: Give extended privileges to this container -m=0: Memory limit (in bytes) -n=true: Enable networking for this container -p=[]: Map a network port to the container @@ -38,3 +39,15 @@ Examples docker run -cidfile /tmp/docker_test.cid ubuntu echo "test" | This will create a container and print "test" to the console. The cidfile flag makes docker attempt to create a new file and write the container ID to it. If the file exists already, docker will return an error. Docker will close this file when docker run exits. + +.. code-block:: bash + + docker run mount -t tmpfs none /var/spool/squid + +| This will *not* work, because by default, most potentially dangerous kernel capabilities are dropped; including ``cap_sys_admin`` (which is required to mount filesystems). However, the ``-privileged`` flag will allow it to run: + +.. code-block:: bash + + docker run -privileged mount -t tmpfs none /var/spool/squid + +| The ``-privileged`` flag gives *all* capabilities to the container, and it also lifts all the limitations enforced by the ``device`` cgroup controller. In other words, the container can then do almost everything that the host can do. This flag exists to allow special use-cases, like running Docker within Docker. diff --git a/lxc_template.go b/lxc_template.go index d49d66b4d9..3ab2d2c86e 100644 --- a/lxc_template.go +++ b/lxc_template.go @@ -40,6 +40,9 @@ lxc.console = none # no controlling tty at all lxc.tty = 1 +{{if .Config.Privileged}} +lxc.cgroup.devices.allow = a +{{else}} # no implicit access to devices lxc.cgroup.devices.deny = a @@ -69,7 +72,7 @@ lxc.cgroup.devices.allow = c 10:200 rwm # rtc #lxc.cgroup.devices.allow = c 254:0 rwm - +{{end}} # standard mount point # WARNING: procfs is a known attack vector and should probably be disabled @@ -95,11 +98,15 @@ lxc.mount.entry = {{$realPath}} {{$ROOTFS}}/{{$virtualPath}} none bind,{{ if ind {{end}} {{end}} +{{if .Config.Privileged}} +# retain all capabilities; no lxc.cap.drop line +{{else}} # drop linux capabilities (apply mainly to the user root in the container) # (Note: 'lxc.cap.keep' is coming soon and should replace this under the # security principle 'deny all unless explicitly permitted', see # http://sourceforge.net/mailarchive/message.php?msg_id=31054627 ) lxc.cap.drop = audit_control audit_write mac_admin mac_override mknod setfcap setpcap sys_admin sys_boot sys_module sys_nice sys_pacct sys_rawio sys_resource sys_time sys_tty_config +{{end}} # limits {{if .Config.Memory}}