diff --git a/daemon/container_operations_unix.go b/daemon/container_operations_unix.go index 16679180b9..44454462c2 100644 --- a/daemon/container_operations_unix.go +++ b/daemon/container_operations_unix.go @@ -218,11 +218,14 @@ func (daemon *Daemon) populateCommand(c *container.Container, env []string) erro processConfig.Env = env remappedRoot := &execdriver.User{} - rootUID, rootGID := daemon.GetRemappedUIDGID() - if rootUID != 0 { - remappedRoot.UID = rootUID - remappedRoot.GID = rootGID + if c.HostConfig.UsernsMode.IsPrivate() { + rootUID, rootGID := daemon.GetRemappedUIDGID() + if rootUID != 0 { + remappedRoot.UID = rootUID + remappedRoot.GID = rootGID + } } + uidMap, gidMap := daemon.GetUIDGIDMaps() if !daemon.seccompEnabled { diff --git a/daemon/daemon_unix.go b/daemon/daemon_unix.go index 6c1ae734ad..c857108875 100644 --- a/daemon/daemon_unix.go +++ b/daemon/daemon_unix.go @@ -429,7 +429,7 @@ func verifyPlatformContainerSettings(daemon *Daemon, hostConfig *containertypes. logrus.Warnf("IPv4 forwarding is disabled. Networking will not work") } // check for various conflicting options with user namespaces - if daemon.configStore.RemappedRoot != "" { + if daemon.configStore.RemappedRoot != "" && hostConfig.UsernsMode.IsPrivate() { if hostConfig.Privileged { return warnings, fmt.Errorf("Privileged mode is incompatible with user namespaces") } diff --git a/docs/reference/api/docker_remote_api.md b/docs/reference/api/docker_remote_api.md index 5aee96c53a..50f281da6a 100644 --- a/docs/reference/api/docker_remote_api.md +++ b/docs/reference/api/docker_remote_api.md @@ -125,6 +125,7 @@ This section lists each version from latest to oldest. Each listing includes a * `GET /info` now returns `KernelMemory` field, showing if "kernel memory limit" is supported. * `POST /containers/create` now takes `PidsLimit` field, if the kernel is >= 4.3 and the pids cgroup is supported. * `GET /containers/(id or name)/stats` now returns `pids_stats`, if the kernel is >= 4.3 and the pids cgroup is supported. +* `POST /containers/create` now allows you to override usernamespaces remapping and use privileged options for the container. * `POST /auth` now returns an `IdentityToken` when supported by a registry. ### v1.22 API changes diff --git a/docs/reference/api/docker_remote_api_v1.23.md b/docs/reference/api/docker_remote_api_v1.23.md index fc456d088a..90895fc4e3 100644 --- a/docs/reference/api/docker_remote_api_v1.23.md +++ b/docs/reference/api/docker_remote_api_v1.23.md @@ -431,6 +431,8 @@ Json Parameters: The default is not to restart. (optional) An ever increasing delay (double the previous delay, starting at 100mS) is added before each restart to prevent flooding the server. + - **UsernsMode** - Sets the usernamespace mode for the container when usernamespace remapping option is enabled. + supported values are: `host`. - **NetworkMode** - Sets the networking mode for the container. Supported standard values are: `bridge`, `host`, `none`, and `container:`. Any other value is taken as a custom network's name to which this container should connect to. diff --git a/docs/reference/commandline/create.md b/docs/reference/commandline/create.md index fa68b0feb1..6128411c5f 100644 --- a/docs/reference/commandline/create.md +++ b/docs/reference/commandline/create.md @@ -83,6 +83,9 @@ Creates a new container. --shm-size=[] Size of `/dev/shm`. The format is ``. `number` must be greater than `0`. Unit is optional and can be `b` (bytes), `k` (kilobytes), `m` (megabytes), or `g` (gigabytes). If you omit the unit, the system uses bytes. If you omit the size entirely, the system uses `64m`. -t, --tty Allocate a pseudo-TTY -u, --user="" Username or UID + --userns="" Container user namespace + 'host': Use the Docker host user namespace + '': Use the Docker daemon user namespace specified by `--userns-remap` option. --ulimit=[] Ulimit options --uts="" UTS namespace to use -v, --volume=[host-src:]container-dest[:] diff --git a/docs/reference/commandline/daemon.md b/docs/reference/commandline/daemon.md index e8071e92ec..f94163aefb 100644 --- a/docs/reference/commandline/daemon.md +++ b/docs/reference/commandline/daemon.md @@ -750,6 +750,16 @@ following algorithm to create the mapping ranges: 2. Map segments will be created from each range in increasing value with a length matching the length of each segment. Therefore the range segment with the lowest numeric starting value will be equal to the remapped root, and continue up through host uid/gid equal to the range segment length. As an example, if the lowest segment starts at ID 1000 and has a length of 100, then a map of 1000 -> 0 (the remapped root) up through 1100 -> 100 will be created from this segment. If the next segment starts at ID 10000, then the next map will start with mapping 10000 -> 101 up to the length of this second segment. This will continue until no more segments are found in the subordinate files for this user. 3. If more than five range segments exist for a single user, only the first five will be utilized, matching the kernel's limitation of only five entries in `/proc/self/uid_map` and `proc/self/gid_map`. +### Disable user namespace for a container + +If you enable user namespaces on the daemon, all containers are started +with user namespaces enabled. In some situations you might want to disable +this feature for a container, for example, to start a privileged container (see +[user namespace known restrictions](#user-namespace-known-restrictions)). +To enable those advanced features for a specific container use `--userns=host` +in the `run/exec/create` command. +This option will completely disable user namespace mapping for the container's user. + ### User namespace known restrictions The following standard Docker features are currently incompatible when diff --git a/docs/reference/commandline/run.md b/docs/reference/commandline/run.md index 496ff4865d..a3ef21f79b 100644 --- a/docs/reference/commandline/run.md +++ b/docs/reference/commandline/run.md @@ -85,6 +85,9 @@ parent = "smn_cli" --stop-signal="SIGTERM" Signal to stop a container -t, --tty Allocate a pseudo-TTY -u, --user="" Username or UID (format: [:]) + --userns="" Container user namespace + 'host': Use the Docker host user namespace + '': Use the Docker daemon user namespace specified by `--userns-remap` option. --ulimit=[] Ulimit options --uts="" UTS namespace to use -v, --volume=[host-src:]container-dest[:] diff --git a/integration-cli/docker_cli_userns_test.go b/integration-cli/docker_cli_userns_test.go index 967debd581..f8b3f77b61 100644 --- a/integration-cli/docker_cli_userns_test.go +++ b/integration-cli/docker_cli_userns_test.go @@ -37,11 +37,13 @@ func (s *DockerDaemonSuite) TestDaemonUserNamespaceRootSetting(c *check.C) { gid, err := strconv.Atoi(uidgid[1]) c.Assert(err, checker.IsNil, check.Commentf("Can't parse gid")) - //writeable by the remapped root UID/GID pair + // writable by the remapped root UID/GID pair c.Assert(os.Chown(tmpDir, uid, gid), checker.IsNil) out, err := s.d.Cmd("run", "-d", "--name", "userns", "-v", tmpDir+":/goofy", "busybox", "sh", "-c", "touch /goofy/testfile; top") c.Assert(err, checker.IsNil, check.Commentf("Output: %s", out)) + user := s.findUser(c, "userns") + c.Assert(uidgid[0], checker.Equals, user) pid, err := s.d.Cmd("inspect", "--format='{{.State.Pid}}'", "userns") c.Assert(err, checker.IsNil, check.Commentf("Could not inspect running container: out: %q", pid)) @@ -62,4 +64,23 @@ func (s *DockerDaemonSuite) TestDaemonUserNamespaceRootSetting(c *check.C) { c.Assert(err, checker.IsNil) c.Assert(stat.UID(), checker.Equals, uint32(uid), check.Commentf("Touched file not owned by remapped root UID")) c.Assert(stat.GID(), checker.Equals, uint32(gid), check.Commentf("Touched file not owned by remapped root GID")) + + // use host usernamespace + out, err = s.d.Cmd("run", "-d", "--name", "userns_skip", "--userns", "host", "busybox", "sh", "-c", "touch /goofy/testfile; top") + c.Assert(err, checker.IsNil, check.Commentf("Output: %s", out)) + user = s.findUser(c, "userns_skip") + // userns are skipped, user is root + c.Assert(user, checker.Equals, "root") +} + +// findUser finds the uid or name of the user of the first process that runs in a container +func (s *DockerDaemonSuite) findUser(c *check.C, container string) string { + out, err := s.d.Cmd("top", container) + c.Assert(err, checker.IsNil, check.Commentf("Output: %s", out)) + rows := strings.Split(out, "\n") + if len(rows) < 2 { + // No process rows founds + c.FailNow() + } + return strings.Fields(rows[1])[0] } diff --git a/man/docker-create.1.md b/man/docker-create.1.md index 16f70a958d..95d171fec3 100644 --- a/man/docker-create.1.md +++ b/man/docker-create.1.md @@ -58,6 +58,7 @@ docker-create - Create a new container [**-P**|**--publish-all**] [**-p**|**--publish**[=*[]*]] [**--pid**[=*[]*]] +[**--userns**[=*[]*]] [**--pids-limit**[=*PIDS_LIMIT*]] [**--privileged**] [**--read-only**] @@ -291,6 +292,10 @@ unit, `b` is used. Set LIMIT to `-1` to enable unlimited swap. **host**: use the host's PID namespace inside the container. Note: the host mode gives the container full access to local PID and is therefore considered insecure. +**--userns**="" + Set the usernamespace mode for the container when `userns-remap` option is enabled. + **host**: use the host usernamespace and enable all privileged options (e.g., `pid=host` or `--privileged`). + **--pids-limit**="" Tune the container's pids limit. Set `-1` to have unlimited pids for the container. diff --git a/man/docker-run.1.md b/man/docker-run.1.md index a22d43762d..c44dd603d0 100644 --- a/man/docker-run.1.md +++ b/man/docker-run.1.md @@ -60,6 +60,7 @@ docker-run - Run a command in a new container [**-P**|**--publish-all**] [**-p**|**--publish**[=*[]*]] [**--pid**[=*[]*]] +[**--userns**[=*[]*]] [**--pids-limit**[=*PIDS_LIMIT*]] [**--privileged**] [**--read-only**] @@ -421,6 +422,10 @@ Use `docker port` to see the actual mapping: `docker port CONTAINER $CONTAINERPO **host**: use the host's PID namespace inside the container. Note: the host mode gives the container full access to local PID and is therefore considered insecure. +**--userns**="" + Set the usernamespace mode for the container when `userns-remap` option is enabled. + **host**: use the host usernamespace and enable all privileged options (e.g., `pid=host` or `--privileged`). + **--pids-limit**="" Tune the container's pids limit. Set `-1` to have unlimited pids for the container. diff --git a/runconfig/hostconfig_test.go b/runconfig/hostconfig_test.go index f8d266cf0a..e14443ba98 100644 --- a/runconfig/hostconfig_test.go +++ b/runconfig/hostconfig_test.go @@ -121,6 +121,27 @@ func TestUTSModeTest(t *testing.T) { } } +func TestUsernsModeTest(t *testing.T) { + usrensMode := map[container.UsernsMode][]bool{ + // private, host, valid + "": {true, false, true}, + "something:weird": {true, false, false}, + "host": {false, true, true}, + "host:name": {true, false, true}, + } + for usernsMode, state := range usrensMode { + if usernsMode.IsPrivate() != state[0] { + t.Fatalf("UsernsMode.IsPrivate for %v should have been %v but was %v", usernsMode, state[0], usernsMode.IsPrivate()) + } + if usernsMode.IsHost() != state[1] { + t.Fatalf("UsernsMode.IsHost for %v should have been %v but was %v", usernsMode, state[1], usernsMode.IsHost()) + } + if usernsMode.Valid() != state[2] { + t.Fatalf("UsernsMode.Valid for %v should have been %v but was %v", usernsMode, state[2], usernsMode.Valid()) + } + } +} + func TestPidModeTest(t *testing.T) { pidModes := map[container.PidMode][]bool{ // private, host, valid diff --git a/runconfig/opts/parse.go b/runconfig/opts/parse.go index 74d6ce993f..1490137a25 100644 --- a/runconfig/opts/parse.go +++ b/runconfig/opts/parse.go @@ -59,6 +59,7 @@ func Parse(cmd *flag.FlagSet, args []string) (*container.Config, *container.Host flPrivileged = cmd.Bool([]string{"-privileged"}, false, "Give extended privileges to this container") flPidMode = cmd.String([]string{"-pid"}, "", "PID namespace to use") flUTSMode = cmd.String([]string{"-uts"}, "", "UTS namespace to use") + flUsernsMode = cmd.String([]string{"-userns"}, "", "User namespace to use") flPublishAll = cmd.Bool([]string{"P", "-publish-all"}, false, "Publish all exposed ports to random ports") flStdin = cmd.Bool([]string{"i", "-interactive"}, false, "Keep STDIN open even if not attached") flTty = cmd.Bool([]string{"t", "-tty"}, false, "Allocate a pseudo-TTY") @@ -316,6 +317,11 @@ func Parse(cmd *flag.FlagSet, args []string) (*container.Config, *container.Host return nil, nil, nil, cmd, fmt.Errorf("--uts: invalid UTS mode") } + usernsMode := container.UsernsMode(*flUsernsMode) + if !usernsMode.Valid() { + return nil, nil, nil, cmd, fmt.Errorf("--userns: invalid USER mode") + } + restartPolicy, err := ParseRestartPolicy(*flRestartPolicy) if err != nil { return nil, nil, nil, cmd, err @@ -404,6 +410,7 @@ func Parse(cmd *flag.FlagSet, args []string) (*container.Config, *container.Host IpcMode: ipcMode, PidMode: pidMode, UTSMode: utsMode, + UsernsMode: usernsMode, CapAdd: strslice.StrSlice(flCapAdd.GetAll()), CapDrop: strslice.StrSlice(flCapDrop.GetAll()), GroupAdd: flGroupAdd.GetAll(),