mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00
Merge pull request #20111 from twistlock/19995_skip_user_ns
Run privileged containers when userns are specified - feature proposal
This commit is contained in:
commit
d8539347bf
12 changed files with 87 additions and 6 deletions
|
@ -218,11 +218,14 @@ func (daemon *Daemon) populateCommand(c *container.Container, env []string) erro
|
||||||
processConfig.Env = env
|
processConfig.Env = env
|
||||||
|
|
||||||
remappedRoot := &execdriver.User{}
|
remappedRoot := &execdriver.User{}
|
||||||
rootUID, rootGID := daemon.GetRemappedUIDGID()
|
if c.HostConfig.UsernsMode.IsPrivate() {
|
||||||
if rootUID != 0 {
|
rootUID, rootGID := daemon.GetRemappedUIDGID()
|
||||||
remappedRoot.UID = rootUID
|
if rootUID != 0 {
|
||||||
remappedRoot.GID = rootGID
|
remappedRoot.UID = rootUID
|
||||||
|
remappedRoot.GID = rootGID
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
uidMap, gidMap := daemon.GetUIDGIDMaps()
|
uidMap, gidMap := daemon.GetUIDGIDMaps()
|
||||||
|
|
||||||
if !daemon.seccompEnabled {
|
if !daemon.seccompEnabled {
|
||||||
|
|
|
@ -429,7 +429,7 @@ func verifyPlatformContainerSettings(daemon *Daemon, hostConfig *containertypes.
|
||||||
logrus.Warnf("IPv4 forwarding is disabled. Networking will not work")
|
logrus.Warnf("IPv4 forwarding is disabled. Networking will not work")
|
||||||
}
|
}
|
||||||
// check for various conflicting options with user namespaces
|
// check for various conflicting options with user namespaces
|
||||||
if daemon.configStore.RemappedRoot != "" {
|
if daemon.configStore.RemappedRoot != "" && hostConfig.UsernsMode.IsPrivate() {
|
||||||
if hostConfig.Privileged {
|
if hostConfig.Privileged {
|
||||||
return warnings, fmt.Errorf("Privileged mode is incompatible with user namespaces")
|
return warnings, fmt.Errorf("Privileged mode is incompatible with user namespaces")
|
||||||
}
|
}
|
||||||
|
|
|
@ -125,6 +125,7 @@ This section lists each version from latest to oldest. Each listing includes a
|
||||||
* `GET /info` now returns `KernelMemory` field, showing if "kernel memory limit" is supported.
|
* `GET /info` now returns `KernelMemory` field, showing if "kernel memory limit" is supported.
|
||||||
* `POST /containers/create` now takes `PidsLimit` field, if the kernel is >= 4.3 and the pids cgroup is supported.
|
* `POST /containers/create` now takes `PidsLimit` field, if the kernel is >= 4.3 and the pids cgroup is supported.
|
||||||
* `GET /containers/(id or name)/stats` now returns `pids_stats`, if the kernel is >= 4.3 and the pids cgroup is supported.
|
* `GET /containers/(id or name)/stats` now returns `pids_stats`, if the kernel is >= 4.3 and the pids cgroup is supported.
|
||||||
|
* `POST /containers/create` now allows you to override usernamespaces remapping and use privileged options for the container.
|
||||||
* `POST /auth` now returns an `IdentityToken` when supported by a registry.
|
* `POST /auth` now returns an `IdentityToken` when supported by a registry.
|
||||||
|
|
||||||
### v1.22 API changes
|
### v1.22 API changes
|
||||||
|
|
|
@ -431,6 +431,8 @@ Json Parameters:
|
||||||
The default is not to restart. (optional)
|
The default is not to restart. (optional)
|
||||||
An ever increasing delay (double the previous delay, starting at 100mS)
|
An ever increasing delay (double the previous delay, starting at 100mS)
|
||||||
is added before each restart to prevent flooding the server.
|
is added before each restart to prevent flooding the server.
|
||||||
|
- **UsernsMode** - Sets the usernamespace mode for the container when usernamespace remapping option is enabled.
|
||||||
|
supported values are: `host`.
|
||||||
- **NetworkMode** - Sets the networking mode for the container. Supported
|
- **NetworkMode** - Sets the networking mode for the container. Supported
|
||||||
standard values are: `bridge`, `host`, `none`, and `container:<name|id>`. Any other value is taken
|
standard values are: `bridge`, `host`, `none`, and `container:<name|id>`. Any other value is taken
|
||||||
as a custom network's name to which this container should connect to.
|
as a custom network's name to which this container should connect to.
|
||||||
|
|
|
@ -83,6 +83,9 @@ Creates a new container.
|
||||||
--shm-size=[] Size of `/dev/shm`. The format is `<number><unit>`. `number` must be greater than `0`. Unit is optional and can be `b` (bytes), `k` (kilobytes), `m` (megabytes), or `g` (gigabytes). If you omit the unit, the system uses bytes. If you omit the size entirely, the system uses `64m`.
|
--shm-size=[] Size of `/dev/shm`. The format is `<number><unit>`. `number` must be greater than `0`. Unit is optional and can be `b` (bytes), `k` (kilobytes), `m` (megabytes), or `g` (gigabytes). If you omit the unit, the system uses bytes. If you omit the size entirely, the system uses `64m`.
|
||||||
-t, --tty Allocate a pseudo-TTY
|
-t, --tty Allocate a pseudo-TTY
|
||||||
-u, --user="" Username or UID
|
-u, --user="" Username or UID
|
||||||
|
--userns="" Container user namespace
|
||||||
|
'host': Use the Docker host user namespace
|
||||||
|
'': Use the Docker daemon user namespace specified by `--userns-remap` option.
|
||||||
--ulimit=[] Ulimit options
|
--ulimit=[] Ulimit options
|
||||||
--uts="" UTS namespace to use
|
--uts="" UTS namespace to use
|
||||||
-v, --volume=[host-src:]container-dest[:<options>]
|
-v, --volume=[host-src:]container-dest[:<options>]
|
||||||
|
|
|
@ -750,6 +750,16 @@ following algorithm to create the mapping ranges:
|
||||||
2. Map segments will be created from each range in increasing value with a length matching the length of each segment. Therefore the range segment with the lowest numeric starting value will be equal to the remapped root, and continue up through host uid/gid equal to the range segment length. As an example, if the lowest segment starts at ID 1000 and has a length of 100, then a map of 1000 -> 0 (the remapped root) up through 1100 -> 100 will be created from this segment. If the next segment starts at ID 10000, then the next map will start with mapping 10000 -> 101 up to the length of this second segment. This will continue until no more segments are found in the subordinate files for this user.
|
2. Map segments will be created from each range in increasing value with a length matching the length of each segment. Therefore the range segment with the lowest numeric starting value will be equal to the remapped root, and continue up through host uid/gid equal to the range segment length. As an example, if the lowest segment starts at ID 1000 and has a length of 100, then a map of 1000 -> 0 (the remapped root) up through 1100 -> 100 will be created from this segment. If the next segment starts at ID 10000, then the next map will start with mapping 10000 -> 101 up to the length of this second segment. This will continue until no more segments are found in the subordinate files for this user.
|
||||||
3. If more than five range segments exist for a single user, only the first five will be utilized, matching the kernel's limitation of only five entries in `/proc/self/uid_map` and `proc/self/gid_map`.
|
3. If more than five range segments exist for a single user, only the first five will be utilized, matching the kernel's limitation of only five entries in `/proc/self/uid_map` and `proc/self/gid_map`.
|
||||||
|
|
||||||
|
### Disable user namespace for a container
|
||||||
|
|
||||||
|
If you enable user namespaces on the daemon, all containers are started
|
||||||
|
with user namespaces enabled. In some situations you might want to disable
|
||||||
|
this feature for a container, for example, to start a privileged container (see
|
||||||
|
[user namespace known restrictions](#user-namespace-known-restrictions)).
|
||||||
|
To enable those advanced features for a specific container use `--userns=host`
|
||||||
|
in the `run/exec/create` command.
|
||||||
|
This option will completely disable user namespace mapping for the container's user.
|
||||||
|
|
||||||
### User namespace known restrictions
|
### User namespace known restrictions
|
||||||
|
|
||||||
The following standard Docker features are currently incompatible when
|
The following standard Docker features are currently incompatible when
|
||||||
|
|
|
@ -85,6 +85,9 @@ parent = "smn_cli"
|
||||||
--stop-signal="SIGTERM" Signal to stop a container
|
--stop-signal="SIGTERM" Signal to stop a container
|
||||||
-t, --tty Allocate a pseudo-TTY
|
-t, --tty Allocate a pseudo-TTY
|
||||||
-u, --user="" Username or UID (format: <name|uid>[:<group|gid>])
|
-u, --user="" Username or UID (format: <name|uid>[:<group|gid>])
|
||||||
|
--userns="" Container user namespace
|
||||||
|
'host': Use the Docker host user namespace
|
||||||
|
'': Use the Docker daemon user namespace specified by `--userns-remap` option.
|
||||||
--ulimit=[] Ulimit options
|
--ulimit=[] Ulimit options
|
||||||
--uts="" UTS namespace to use
|
--uts="" UTS namespace to use
|
||||||
-v, --volume=[host-src:]container-dest[:<options>]
|
-v, --volume=[host-src:]container-dest[:<options>]
|
||||||
|
|
|
@ -37,11 +37,13 @@ func (s *DockerDaemonSuite) TestDaemonUserNamespaceRootSetting(c *check.C) {
|
||||||
gid, err := strconv.Atoi(uidgid[1])
|
gid, err := strconv.Atoi(uidgid[1])
|
||||||
c.Assert(err, checker.IsNil, check.Commentf("Can't parse gid"))
|
c.Assert(err, checker.IsNil, check.Commentf("Can't parse gid"))
|
||||||
|
|
||||||
//writeable by the remapped root UID/GID pair
|
// writable by the remapped root UID/GID pair
|
||||||
c.Assert(os.Chown(tmpDir, uid, gid), checker.IsNil)
|
c.Assert(os.Chown(tmpDir, uid, gid), checker.IsNil)
|
||||||
|
|
||||||
out, err := s.d.Cmd("run", "-d", "--name", "userns", "-v", tmpDir+":/goofy", "busybox", "sh", "-c", "touch /goofy/testfile; top")
|
out, err := s.d.Cmd("run", "-d", "--name", "userns", "-v", tmpDir+":/goofy", "busybox", "sh", "-c", "touch /goofy/testfile; top")
|
||||||
c.Assert(err, checker.IsNil, check.Commentf("Output: %s", out))
|
c.Assert(err, checker.IsNil, check.Commentf("Output: %s", out))
|
||||||
|
user := s.findUser(c, "userns")
|
||||||
|
c.Assert(uidgid[0], checker.Equals, user)
|
||||||
|
|
||||||
pid, err := s.d.Cmd("inspect", "--format='{{.State.Pid}}'", "userns")
|
pid, err := s.d.Cmd("inspect", "--format='{{.State.Pid}}'", "userns")
|
||||||
c.Assert(err, checker.IsNil, check.Commentf("Could not inspect running container: out: %q", pid))
|
c.Assert(err, checker.IsNil, check.Commentf("Could not inspect running container: out: %q", pid))
|
||||||
|
@ -62,4 +64,23 @@ func (s *DockerDaemonSuite) TestDaemonUserNamespaceRootSetting(c *check.C) {
|
||||||
c.Assert(err, checker.IsNil)
|
c.Assert(err, checker.IsNil)
|
||||||
c.Assert(stat.UID(), checker.Equals, uint32(uid), check.Commentf("Touched file not owned by remapped root UID"))
|
c.Assert(stat.UID(), checker.Equals, uint32(uid), check.Commentf("Touched file not owned by remapped root UID"))
|
||||||
c.Assert(stat.GID(), checker.Equals, uint32(gid), check.Commentf("Touched file not owned by remapped root GID"))
|
c.Assert(stat.GID(), checker.Equals, uint32(gid), check.Commentf("Touched file not owned by remapped root GID"))
|
||||||
|
|
||||||
|
// use host usernamespace
|
||||||
|
out, err = s.d.Cmd("run", "-d", "--name", "userns_skip", "--userns", "host", "busybox", "sh", "-c", "touch /goofy/testfile; top")
|
||||||
|
c.Assert(err, checker.IsNil, check.Commentf("Output: %s", out))
|
||||||
|
user = s.findUser(c, "userns_skip")
|
||||||
|
// userns are skipped, user is root
|
||||||
|
c.Assert(user, checker.Equals, "root")
|
||||||
|
}
|
||||||
|
|
||||||
|
// findUser finds the uid or name of the user of the first process that runs in a container
|
||||||
|
func (s *DockerDaemonSuite) findUser(c *check.C, container string) string {
|
||||||
|
out, err := s.d.Cmd("top", container)
|
||||||
|
c.Assert(err, checker.IsNil, check.Commentf("Output: %s", out))
|
||||||
|
rows := strings.Split(out, "\n")
|
||||||
|
if len(rows) < 2 {
|
||||||
|
// No process rows founds
|
||||||
|
c.FailNow()
|
||||||
|
}
|
||||||
|
return strings.Fields(rows[1])[0]
|
||||||
}
|
}
|
||||||
|
|
|
@ -58,6 +58,7 @@ docker-create - Create a new container
|
||||||
[**-P**|**--publish-all**]
|
[**-P**|**--publish-all**]
|
||||||
[**-p**|**--publish**[=*[]*]]
|
[**-p**|**--publish**[=*[]*]]
|
||||||
[**--pid**[=*[]*]]
|
[**--pid**[=*[]*]]
|
||||||
|
[**--userns**[=*[]*]]
|
||||||
[**--pids-limit**[=*PIDS_LIMIT*]]
|
[**--pids-limit**[=*PIDS_LIMIT*]]
|
||||||
[**--privileged**]
|
[**--privileged**]
|
||||||
[**--read-only**]
|
[**--read-only**]
|
||||||
|
@ -291,6 +292,10 @@ unit, `b` is used. Set LIMIT to `-1` to enable unlimited swap.
|
||||||
**host**: use the host's PID namespace inside the container.
|
**host**: use the host's PID namespace inside the container.
|
||||||
Note: the host mode gives the container full access to local PID and is therefore considered insecure.
|
Note: the host mode gives the container full access to local PID and is therefore considered insecure.
|
||||||
|
|
||||||
|
**--userns**=""
|
||||||
|
Set the usernamespace mode for the container when `userns-remap` option is enabled.
|
||||||
|
**host**: use the host usernamespace and enable all privileged options (e.g., `pid=host` or `--privileged`).
|
||||||
|
|
||||||
**--pids-limit**=""
|
**--pids-limit**=""
|
||||||
Tune the container's pids limit. Set `-1` to have unlimited pids for the container.
|
Tune the container's pids limit. Set `-1` to have unlimited pids for the container.
|
||||||
|
|
||||||
|
|
|
@ -60,6 +60,7 @@ docker-run - Run a command in a new container
|
||||||
[**-P**|**--publish-all**]
|
[**-P**|**--publish-all**]
|
||||||
[**-p**|**--publish**[=*[]*]]
|
[**-p**|**--publish**[=*[]*]]
|
||||||
[**--pid**[=*[]*]]
|
[**--pid**[=*[]*]]
|
||||||
|
[**--userns**[=*[]*]]
|
||||||
[**--pids-limit**[=*PIDS_LIMIT*]]
|
[**--pids-limit**[=*PIDS_LIMIT*]]
|
||||||
[**--privileged**]
|
[**--privileged**]
|
||||||
[**--read-only**]
|
[**--read-only**]
|
||||||
|
@ -421,6 +422,10 @@ Use `docker port` to see the actual mapping: `docker port CONTAINER $CONTAINERPO
|
||||||
**host**: use the host's PID namespace inside the container.
|
**host**: use the host's PID namespace inside the container.
|
||||||
Note: the host mode gives the container full access to local PID and is therefore considered insecure.
|
Note: the host mode gives the container full access to local PID and is therefore considered insecure.
|
||||||
|
|
||||||
|
**--userns**=""
|
||||||
|
Set the usernamespace mode for the container when `userns-remap` option is enabled.
|
||||||
|
**host**: use the host usernamespace and enable all privileged options (e.g., `pid=host` or `--privileged`).
|
||||||
|
|
||||||
**--pids-limit**=""
|
**--pids-limit**=""
|
||||||
Tune the container's pids limit. Set `-1` to have unlimited pids for the container.
|
Tune the container's pids limit. Set `-1` to have unlimited pids for the container.
|
||||||
|
|
||||||
|
|
|
@ -121,6 +121,27 @@ func TestUTSModeTest(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestUsernsModeTest(t *testing.T) {
|
||||||
|
usrensMode := map[container.UsernsMode][]bool{
|
||||||
|
// private, host, valid
|
||||||
|
"": {true, false, true},
|
||||||
|
"something:weird": {true, false, false},
|
||||||
|
"host": {false, true, true},
|
||||||
|
"host:name": {true, false, true},
|
||||||
|
}
|
||||||
|
for usernsMode, state := range usrensMode {
|
||||||
|
if usernsMode.IsPrivate() != state[0] {
|
||||||
|
t.Fatalf("UsernsMode.IsPrivate for %v should have been %v but was %v", usernsMode, state[0], usernsMode.IsPrivate())
|
||||||
|
}
|
||||||
|
if usernsMode.IsHost() != state[1] {
|
||||||
|
t.Fatalf("UsernsMode.IsHost for %v should have been %v but was %v", usernsMode, state[1], usernsMode.IsHost())
|
||||||
|
}
|
||||||
|
if usernsMode.Valid() != state[2] {
|
||||||
|
t.Fatalf("UsernsMode.Valid for %v should have been %v but was %v", usernsMode, state[2], usernsMode.Valid())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestPidModeTest(t *testing.T) {
|
func TestPidModeTest(t *testing.T) {
|
||||||
pidModes := map[container.PidMode][]bool{
|
pidModes := map[container.PidMode][]bool{
|
||||||
// private, host, valid
|
// private, host, valid
|
||||||
|
|
|
@ -59,6 +59,7 @@ func Parse(cmd *flag.FlagSet, args []string) (*container.Config, *container.Host
|
||||||
flPrivileged = cmd.Bool([]string{"-privileged"}, false, "Give extended privileges to this container")
|
flPrivileged = cmd.Bool([]string{"-privileged"}, false, "Give extended privileges to this container")
|
||||||
flPidMode = cmd.String([]string{"-pid"}, "", "PID namespace to use")
|
flPidMode = cmd.String([]string{"-pid"}, "", "PID namespace to use")
|
||||||
flUTSMode = cmd.String([]string{"-uts"}, "", "UTS namespace to use")
|
flUTSMode = cmd.String([]string{"-uts"}, "", "UTS namespace to use")
|
||||||
|
flUsernsMode = cmd.String([]string{"-userns"}, "", "User namespace to use")
|
||||||
flPublishAll = cmd.Bool([]string{"P", "-publish-all"}, false, "Publish all exposed ports to random ports")
|
flPublishAll = cmd.Bool([]string{"P", "-publish-all"}, false, "Publish all exposed ports to random ports")
|
||||||
flStdin = cmd.Bool([]string{"i", "-interactive"}, false, "Keep STDIN open even if not attached")
|
flStdin = cmd.Bool([]string{"i", "-interactive"}, false, "Keep STDIN open even if not attached")
|
||||||
flTty = cmd.Bool([]string{"t", "-tty"}, false, "Allocate a pseudo-TTY")
|
flTty = cmd.Bool([]string{"t", "-tty"}, false, "Allocate a pseudo-TTY")
|
||||||
|
@ -316,6 +317,11 @@ func Parse(cmd *flag.FlagSet, args []string) (*container.Config, *container.Host
|
||||||
return nil, nil, nil, cmd, fmt.Errorf("--uts: invalid UTS mode")
|
return nil, nil, nil, cmd, fmt.Errorf("--uts: invalid UTS mode")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
usernsMode := container.UsernsMode(*flUsernsMode)
|
||||||
|
if !usernsMode.Valid() {
|
||||||
|
return nil, nil, nil, cmd, fmt.Errorf("--userns: invalid USER mode")
|
||||||
|
}
|
||||||
|
|
||||||
restartPolicy, err := ParseRestartPolicy(*flRestartPolicy)
|
restartPolicy, err := ParseRestartPolicy(*flRestartPolicy)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, nil, cmd, err
|
return nil, nil, nil, cmd, err
|
||||||
|
@ -404,6 +410,7 @@ func Parse(cmd *flag.FlagSet, args []string) (*container.Config, *container.Host
|
||||||
IpcMode: ipcMode,
|
IpcMode: ipcMode,
|
||||||
PidMode: pidMode,
|
PidMode: pidMode,
|
||||||
UTSMode: utsMode,
|
UTSMode: utsMode,
|
||||||
|
UsernsMode: usernsMode,
|
||||||
CapAdd: strslice.StrSlice(flCapAdd.GetAll()),
|
CapAdd: strslice.StrSlice(flCapAdd.GetAll()),
|
||||||
CapDrop: strslice.StrSlice(flCapDrop.GetAll()),
|
CapDrop: strslice.StrSlice(flCapDrop.GetAll()),
|
||||||
GroupAdd: flGroupAdd.GetAll(),
|
GroupAdd: flGroupAdd.GetAll(),
|
||||||
|
|
Loading…
Reference in a new issue