1
0
Fork 0
mirror of https://github.com/moby/moby.git synced 2022-11-09 12:21:53 -05:00

Merge pull request #19187 from estesp/lets-do-this

User namespaces: graduate from experimental
This commit is contained in:
Sebastiaan van Stijn 2016-01-12 09:34:19 -08:00
commit c72be040bb
12 changed files with 347 additions and 336 deletions

View file

@ -2,118 +2,7 @@
package daemon
import (
"fmt"
"strconv"
"strings"
"github.com/docker/docker/pkg/idtools"
flag "github.com/docker/docker/pkg/mflag"
"github.com/opencontainers/runc/libcontainer/user"
)
import flag "github.com/docker/docker/pkg/mflag"
func (config *Config) attachExperimentalFlags(cmd *flag.FlagSet, usageFn func(string) string) {
cmd.StringVar(&config.RemappedRoot, []string{"-userns-remap"}, "", usageFn("User/Group setting for user namespaces"))
}
const (
defaultIDSpecifier string = "default"
defaultRemappedID string = "dockremap"
)
// Parse the remapped root (user namespace) option, which can be one of:
// username - valid username from /etc/passwd
// username:groupname - valid username; valid groupname from /etc/group
// uid - 32-bit unsigned int valid Linux UID value
// uid:gid - uid value; 32-bit unsigned int Linux GID value
//
// If no groupname is specified, and a username is specified, an attempt
// will be made to lookup a gid for that username as a groupname
//
// If names are used, they are verified to exist in passwd/group
func parseRemappedRoot(usergrp string) (string, string, error) {
var (
userID, groupID int
username, groupname string
)
idparts := strings.Split(usergrp, ":")
if len(idparts) > 2 {
return "", "", fmt.Errorf("Invalid user/group specification in --userns-remap: %q", usergrp)
}
if uid, err := strconv.ParseInt(idparts[0], 10, 32); err == nil {
// must be a uid; take it as valid
userID = int(uid)
luser, err := user.LookupUid(userID)
if err != nil {
return "", "", fmt.Errorf("Uid %d has no entry in /etc/passwd: %v", userID, err)
}
username = luser.Name
if len(idparts) == 1 {
// if the uid was numeric and no gid was specified, take the uid as the gid
groupID = userID
lgrp, err := user.LookupGid(groupID)
if err != nil {
return "", "", fmt.Errorf("Gid %d has no entry in /etc/group: %v", groupID, err)
}
groupname = lgrp.Name
}
} else {
lookupName := idparts[0]
// special case: if the user specified "default", they want Docker to create or
// use (after creation) the "dockremap" user/group for root remapping
if lookupName == defaultIDSpecifier {
lookupName = defaultRemappedID
}
luser, err := user.LookupUser(lookupName)
if err != nil && idparts[0] != defaultIDSpecifier {
// error if the name requested isn't the special "dockremap" ID
return "", "", fmt.Errorf("Error during uid lookup for %q: %v", lookupName, err)
} else if err != nil {
// special case-- if the username == "default", then we have been asked
// to create a new entry pair in /etc/{passwd,group} for which the /etc/sub{uid,gid}
// ranges will be used for the user and group mappings in user namespaced containers
_, _, err := idtools.AddNamespaceRangesUser(defaultRemappedID)
if err == nil {
return defaultRemappedID, defaultRemappedID, nil
}
return "", "", fmt.Errorf("Error during %q user creation: %v", defaultRemappedID, err)
}
userID = luser.Uid
username = luser.Name
if len(idparts) == 1 {
// we only have a string username, and no group specified; look up gid from username as group
group, err := user.LookupGroup(lookupName)
if err != nil {
return "", "", fmt.Errorf("Error during gid lookup for %q: %v", lookupName, err)
}
groupID = group.Gid
groupname = group.Name
}
}
if len(idparts) == 2 {
// groupname or gid is separately specified and must be resolved
// to a unsigned 32-bit gid
if gid, err := strconv.ParseInt(idparts[1], 10, 32); err == nil {
// must be a gid, take it as valid
groupID = int(gid)
lgrp, err := user.LookupGid(groupID)
if err != nil {
return "", "", fmt.Errorf("Gid %d has no entry in /etc/passwd: %v", groupID, err)
}
groupname = lgrp.Name
} else {
// not a number; attempt a lookup
group, err := user.LookupGroup(idparts[1])
if err != nil {
return "", "", fmt.Errorf("Error during gid lookup for %q: %v", idparts[1], err)
}
groupID = group.Gid
groupname = idparts[1]
}
}
return username, groupname, nil
}

View file

@ -79,6 +79,7 @@ func (config *Config) InstallFlags(cmd *flag.FlagSet, usageFn func(string) strin
cmd.BoolVar(&config.EnableCors, []string{"#api-enable-cors", "#-api-enable-cors"}, false, usageFn("Enable CORS headers in the remote API, this is deprecated by --api-cors-header"))
cmd.StringVar(&config.CorsHeaders, []string{"-api-cors-header"}, "", usageFn("Set CORS headers in the remote API"))
cmd.StringVar(&config.CgroupParent, []string{"-cgroup-parent"}, "", usageFn("Set parent cgroup for all containers"))
cmd.StringVar(&config.RemappedRoot, []string{"-userns-remap"}, "", usageFn("User/Group setting for user namespaces"))
config.attachExperimentalFlags(cmd, usageFn)
}

View file

@ -2,88 +2,8 @@
package daemon
import (
"fmt"
"os"
"path/filepath"
"runtime"
"github.com/Sirupsen/logrus"
"github.com/docker/docker/pkg/idtools"
"github.com/docker/engine-api/types/container"
)
func setupRemappedRoot(config *Config) ([]idtools.IDMap, []idtools.IDMap, error) {
if runtime.GOOS != "linux" && config.RemappedRoot != "" {
return nil, nil, fmt.Errorf("User namespaces are only supported on Linux")
}
// if the daemon was started with remapped root option, parse
// the config option to the int uid,gid values
var (
uidMaps, gidMaps []idtools.IDMap
)
if config.RemappedRoot != "" {
username, groupname, err := parseRemappedRoot(config.RemappedRoot)
if err != nil {
return nil, nil, err
}
if username == "root" {
// Cannot setup user namespaces with a 1-to-1 mapping; "--root=0:0" is a no-op
// effectively
logrus.Warnf("User namespaces: root cannot be remapped with itself; user namespaces are OFF")
return uidMaps, gidMaps, nil
}
logrus.Infof("User namespaces: ID ranges will be mapped to subuid/subgid ranges of: %s:%s", username, groupname)
// update remapped root setting now that we have resolved them to actual names
config.RemappedRoot = fmt.Sprintf("%s:%s", username, groupname)
uidMaps, gidMaps, err = idtools.CreateIDMappings(username, groupname)
if err != nil {
return nil, nil, fmt.Errorf("Can't create ID mappings: %v", err)
}
}
return uidMaps, gidMaps, nil
}
func setupDaemonRoot(config *Config, rootDir string, rootUID, rootGID int) error {
config.Root = rootDir
// the docker root metadata directory needs to have execute permissions for all users (o+x)
// so that syscalls executing as non-root, operating on subdirectories of the graph root
// (e.g. mounted layers of a container) can traverse this path.
// The user namespace support will create subdirectories for the remapped root host uid:gid
// pair owned by that same uid:gid pair for proper write access to those needed metadata and
// layer content subtrees.
if _, err := os.Stat(rootDir); err == nil {
// root current exists; verify the access bits are correct by setting them
if err = os.Chmod(rootDir, 0701); err != nil {
return err
}
} else if os.IsNotExist(err) {
// no root exists yet, create it 0701 with root:root ownership
if err := os.MkdirAll(rootDir, 0701); err != nil {
return err
}
}
// if user namespaces are enabled we will create a subtree underneath the specified root
// with any/all specified remapped root uid/gid options on the daemon creating
// a new subdirectory with ownership set to the remapped uid/gid (so as to allow
// `chdir()` to work for containers namespaced to that uid/gid)
if config.RemappedRoot != "" {
config.Root = filepath.Join(rootDir, fmt.Sprintf("%d.%d", rootUID, rootGID))
logrus.Debugf("Creating user namespaced daemon root: %s", config.Root)
// Create the root directory if it doesn't exists
if err := idtools.MkdirAllAs(config.Root, 0700, rootUID, rootGID); err != nil {
return fmt.Errorf("Cannot create daemon root: %s: %v", config.Root, err)
}
}
return nil
}
import "github.com/docker/engine-api/types/container"
func (daemon *Daemon) verifyExperimentalContainerSettings(hostConfig *container.HostConfig, config *container.Config) ([]string, error) {
if hostConfig.Privileged && daemon.configStore.RemappedRoot != "" {
return nil, fmt.Errorf("Privileged mode is incompatible with user namespace mappings")
}
return nil, nil
}

View file

@ -2,26 +2,7 @@
package daemon
import (
"os"
"github.com/docker/docker/pkg/idtools"
"github.com/docker/docker/pkg/system"
"github.com/docker/engine-api/types/container"
)
func setupRemappedRoot(config *Config) ([]idtools.IDMap, []idtools.IDMap, error) {
return nil, nil, nil
}
func setupDaemonRoot(config *Config, rootDir string, rootUID, rootGID int) error {
config.Root = rootDir
// Create the root directory if it doesn't exists
if err := system.MkdirAll(config.Root, 0700); err != nil && !os.IsExist(err) {
return err
}
return nil
}
import "github.com/docker/engine-api/types/container"
func (daemon *Daemon) verifyExperimentalContainerSettings(hostConfig *container.HostConfig, config *container.Config) ([]string, error) {
return nil, nil

View file

@ -7,6 +7,7 @@ import (
"net"
"os"
"path/filepath"
"runtime"
"strconv"
"strings"
"syscall"
@ -33,6 +34,7 @@ import (
"github.com/docker/libnetwork/types"
blkiodev "github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/label"
"github.com/opencontainers/runc/libcontainer/user"
)
const (
@ -42,6 +44,9 @@ const (
platformSupported = true
// It's not kernel limit, we want this 4M limit to supply a reasonable functional container
linuxMinMemory = 4194304
// constants for remapped root settings
defaultIDSpecifier string = "default"
defaultRemappedID string = "dockremap"
)
func getBlkioWeightDevices(config *containertypes.HostConfig) ([]*blkiodev.WeightDevice, error) {
@ -375,6 +380,24 @@ func verifyPlatformContainerSettings(daemon *Daemon, hostConfig *containertypes.
warnings = append(warnings, "IPv4 forwarding is disabled. Networking will not work.")
logrus.Warnf("IPv4 forwarding is disabled. Networking will not work")
}
// check for various conflicting options with user namespaces
if daemon.configStore.RemappedRoot != "" {
if hostConfig.Privileged {
return warnings, fmt.Errorf("Privileged mode is incompatible with user namespaces.")
}
if hostConfig.NetworkMode.IsHost() || hostConfig.NetworkMode.IsContainer() {
return warnings, fmt.Errorf("Cannot share the host or a container's network namespace when user namespaces are enabled.")
}
if hostConfig.PidMode.IsHost() {
return warnings, fmt.Errorf("Cannot share the host PID namespace when user namespaces are enabled.")
}
if hostConfig.IpcMode.IsContainer() {
return warnings, fmt.Errorf("Cannot share a container's IPC namespace when user namespaces are enabled.")
}
if hostConfig.ReadonlyRootfs {
return warnings, fmt.Errorf("Cannot use the --read-only option when user namespaces are enabled.")
}
}
return warnings, nil
}
@ -674,6 +697,171 @@ func setupInitLayer(initLayer string, rootUID, rootGID int) error {
return nil
}
// Parse the remapped root (user namespace) option, which can be one of:
// username - valid username from /etc/passwd
// username:groupname - valid username; valid groupname from /etc/group
// uid - 32-bit unsigned int valid Linux UID value
// uid:gid - uid value; 32-bit unsigned int Linux GID value
//
// If no groupname is specified, and a username is specified, an attempt
// will be made to lookup a gid for that username as a groupname
//
// If names are used, they are verified to exist in passwd/group
func parseRemappedRoot(usergrp string) (string, string, error) {
var (
userID, groupID int
username, groupname string
)
idparts := strings.Split(usergrp, ":")
if len(idparts) > 2 {
return "", "", fmt.Errorf("Invalid user/group specification in --userns-remap: %q", usergrp)
}
if uid, err := strconv.ParseInt(idparts[0], 10, 32); err == nil {
// must be a uid; take it as valid
userID = int(uid)
luser, err := user.LookupUid(userID)
if err != nil {
return "", "", fmt.Errorf("Uid %d has no entry in /etc/passwd: %v", userID, err)
}
username = luser.Name
if len(idparts) == 1 {
// if the uid was numeric and no gid was specified, take the uid as the gid
groupID = userID
lgrp, err := user.LookupGid(groupID)
if err != nil {
return "", "", fmt.Errorf("Gid %d has no entry in /etc/group: %v", groupID, err)
}
groupname = lgrp.Name
}
} else {
lookupName := idparts[0]
// special case: if the user specified "default", they want Docker to create or
// use (after creation) the "dockremap" user/group for root remapping
if lookupName == defaultIDSpecifier {
lookupName = defaultRemappedID
}
luser, err := user.LookupUser(lookupName)
if err != nil && idparts[0] != defaultIDSpecifier {
// error if the name requested isn't the special "dockremap" ID
return "", "", fmt.Errorf("Error during uid lookup for %q: %v", lookupName, err)
} else if err != nil {
// special case-- if the username == "default", then we have been asked
// to create a new entry pair in /etc/{passwd,group} for which the /etc/sub{uid,gid}
// ranges will be used for the user and group mappings in user namespaced containers
_, _, err := idtools.AddNamespaceRangesUser(defaultRemappedID)
if err == nil {
return defaultRemappedID, defaultRemappedID, nil
}
return "", "", fmt.Errorf("Error during %q user creation: %v", defaultRemappedID, err)
}
userID = luser.Uid
username = luser.Name
if len(idparts) == 1 {
// we only have a string username, and no group specified; look up gid from username as group
group, err := user.LookupGroup(lookupName)
if err != nil {
return "", "", fmt.Errorf("Error during gid lookup for %q: %v", lookupName, err)
}
groupID = group.Gid
groupname = group.Name
}
}
if len(idparts) == 2 {
// groupname or gid is separately specified and must be resolved
// to a unsigned 32-bit gid
if gid, err := strconv.ParseInt(idparts[1], 10, 32); err == nil {
// must be a gid, take it as valid
groupID = int(gid)
lgrp, err := user.LookupGid(groupID)
if err != nil {
return "", "", fmt.Errorf("Gid %d has no entry in /etc/passwd: %v", groupID, err)
}
groupname = lgrp.Name
} else {
// not a number; attempt a lookup
group, err := user.LookupGroup(idparts[1])
if err != nil {
return "", "", fmt.Errorf("Error during gid lookup for %q: %v", idparts[1], err)
}
groupID = group.Gid
groupname = idparts[1]
}
}
return username, groupname, nil
}
func setupRemappedRoot(config *Config) ([]idtools.IDMap, []idtools.IDMap, error) {
if runtime.GOOS != "linux" && config.RemappedRoot != "" {
return nil, nil, fmt.Errorf("User namespaces are only supported on Linux")
}
// if the daemon was started with remapped root option, parse
// the config option to the int uid,gid values
var (
uidMaps, gidMaps []idtools.IDMap
)
if config.RemappedRoot != "" {
username, groupname, err := parseRemappedRoot(config.RemappedRoot)
if err != nil {
return nil, nil, err
}
if username == "root" {
// Cannot setup user namespaces with a 1-to-1 mapping; "--root=0:0" is a no-op
// effectively
logrus.Warnf("User namespaces: root cannot be remapped with itself; user namespaces are OFF")
return uidMaps, gidMaps, nil
}
logrus.Infof("User namespaces: ID ranges will be mapped to subuid/subgid ranges of: %s:%s", username, groupname)
// update remapped root setting now that we have resolved them to actual names
config.RemappedRoot = fmt.Sprintf("%s:%s", username, groupname)
uidMaps, gidMaps, err = idtools.CreateIDMappings(username, groupname)
if err != nil {
return nil, nil, fmt.Errorf("Can't create ID mappings: %v", err)
}
}
return uidMaps, gidMaps, nil
}
func setupDaemonRoot(config *Config, rootDir string, rootUID, rootGID int) error {
config.Root = rootDir
// the docker root metadata directory needs to have execute permissions for all users (o+x)
// so that syscalls executing as non-root, operating on subdirectories of the graph root
// (e.g. mounted layers of a container) can traverse this path.
// The user namespace support will create subdirectories for the remapped root host uid:gid
// pair owned by that same uid:gid pair for proper write access to those needed metadata and
// layer content subtrees.
if _, err := os.Stat(rootDir); err == nil {
// root current exists; verify the access bits are correct by setting them
if err = os.Chmod(rootDir, 0701); err != nil {
return err
}
} else if os.IsNotExist(err) {
// no root exists yet, create it 0701 with root:root ownership
if err := os.MkdirAll(rootDir, 0701); err != nil {
return err
}
}
// if user namespaces are enabled we will create a subtree underneath the specified root
// with any/all specified remapped root uid/gid options on the daemon creating
// a new subdirectory with ownership set to the remapped uid/gid (so as to allow
// `chdir()` to work for containers namespaced to that uid/gid)
if config.RemappedRoot != "" {
config.Root = filepath.Join(rootDir, fmt.Sprintf("%d.%d", rootUID, rootGID))
logrus.Debugf("Creating user namespaced daemon root: %s", config.Root)
// Create the root directory if it doesn't exists
if err := idtools.MkdirAllAs(config.Root, 0700, rootUID, rootGID); err != nil {
return fmt.Errorf("Cannot create daemon root: %s: %v", config.Root, err)
}
}
return nil
}
// registerLinks writes the links to a file.
func (daemon *Daemon) registerLinks(container *container.Container, hostConfig *containertypes.HostConfig) error {
if hostConfig == nil {

View file

@ -4,6 +4,7 @@ import (
"encoding/json"
"errors"
"fmt"
"os"
"path/filepath"
"runtime"
"strings"
@ -18,6 +19,7 @@ import (
containertypes "github.com/docker/engine-api/types/container"
// register the windows graph driver
"github.com/docker/docker/daemon/graphdriver/windows"
"github.com/docker/docker/pkg/idtools"
"github.com/docker/docker/pkg/system"
"github.com/docker/libnetwork"
blkiodev "github.com/opencontainers/runc/libcontainer/configs"
@ -135,6 +137,19 @@ func (daemon *Daemon) cleanupMounts() error {
return nil
}
func setupRemappedRoot(config *Config) ([]idtools.IDMap, []idtools.IDMap, error) {
return nil, nil, nil
}
func setupDaemonRoot(config *Config, rootDir string, rootUID, rootGID int) error {
config.Root = rootDir
// Create the root directory if it doesn't exists
if err := system.MkdirAll(config.Root, 0700); err != nil && !os.IsExist(err) {
return err
}
return nil
}
// conditionalMountOnStart is a platform specific helper function during the
// container start to call mount.
func (daemon *Daemon) conditionalMountOnStart(container *container.Container) error {

View file

@ -62,6 +62,7 @@ weight = -1
--tlscert="~/.docker/cert.pem" Path to TLS certificate file
--tlskey="~/.docker/key.pem" Path to TLS key file
--tlsverify Use TLS and verify the remote
--userns-remap="default" Enable user namespace remapping
--userland-proxy=true Use userland proxy for loopback traffic
Options with [] may be specified multiple times.
@ -632,6 +633,133 @@ For information about how to create an authorization plugin, see [authorization
plugin](../../extend/authorization.md) section in the Docker extend section of this documentation.
## Daemon user namespace options
The Linux kernel [user namespace support](http://man7.org/linux/man-pages/man7/user_namespaces.7.html) provides additional security by enabling
a process, and therefore a container, to have a unique range of user and
group IDs which are outside the traditional user and group range utilized by
the host system. Potentially the most important security improvement is that,
by default, container processes running as the `root` user will have expected
administrative privilege (with some restrictions) inside the container but will
effectively be mapped to an unprivileged `uid` on the host.
When user namespace support is enabled, Docker creates a single daemon-wide mapping
for all containers running on the same engine instance. The mappings will
utilize the existing subordinate user and group ID feature available on all modern
Linux distributions.
The [`/etc/subuid`](http://man7.org/linux/man-pages/man5/subuid.5.html) and
[`/etc/subgid`](http://man7.org/linux/man-pages/man5/subgid.5.html) files will be
read for the user, and optional group, specified to the `--userns-remap`
parameter. If you do not wish to specify your own user and/or group, you can
provide `default` as the value to this flag, and a user will be created on your behalf
and provided subordinate uid and gid ranges. This default user will be named
`dockremap`, and entries will be created for it in `/etc/passwd` and
`/etc/group` using your distro's standard user and group creation tools.
> **Note**: The single mapping per-daemon restriction is in place for now
> because Docker shares image layers from its local cache across all
> containers running on the engine instance. Since file ownership must be
> the same for all containers sharing the same layer content, the decision
> was made to map the file ownership on `docker pull` to the daemon's user and
> group mappings so that there is no delay for running containers once the
> content is downloaded. This design preserves the same performance for `docker
> pull`, `docker push`, and container startup as users expect with
> user namespaces disabled.
### Starting the daemon with user namespaces enabled
To enable user namespace support, start the daemon with the
`--userns-remap` flag, which accepts values in the following formats:
- uid
- uid:gid
- username
- username:groupname
If numeric IDs are provided, translation back to valid user or group names
will occur so that the subordinate uid and gid information can be read, given
these resources are name-based, not id-based. If the numeric ID information
provided does not exist as entries in `/etc/passwd` or `/etc/group`, daemon
startup will fail with an error message.
*Example: starting with default Docker user management:*
```
$ docker daemon --userns-remap=default
```
When `default` is provided, Docker will create - or find the existing - user and group
named `dockremap`. If the user is created, and the Linux distribution has
appropriate support, the `/etc/subuid` and `/etc/subgid` files will be populated
with a contiguous 65536 length range of subordinate user and group IDs, starting
at an offset based on prior entries in those files. For example, Ubuntu will
create the following range, based on an existing user named `user1` already owning
the first 65536 range:
```
$ cat /etc/subuid
user1:100000:65536
dockremap:165536:65536
```
> **Note:** On a fresh Fedora install, we had to `touch` the
> `/etc/subuid` and `/etc/subgid` files to have ranges assigned when users
> were created. Once these files existed, range assignment on user creation
> worked properly.
If you have a preferred/self-managed user with subordinate ID mappings already
configured, you can provide that username or uid to the `--userns-remap` flag.
If you have a group that doesn't match the username, you may provide the `gid`
or group name as well; otherwise the username will be used as the group name
when querying the system for the subordinate group ID range.
### Detailed information on `subuid`/`subgid` ranges
Given potential advanced use of the subordinate ID ranges by power users, the
following paragraphs define how the Docker daemon currently uses the range entries
found within the subordinate range files.
The simplest case is that only one contiguous range is defined for the
provided user or group. In this case, Docker will use that entire contiguous
range for the mapping of host uids and gids to the container process. This
means that the first ID in the range will be the remapped root user, and the
IDs above that initial ID will map host ID 1 through the end of the range.
From the example `/etc/subid` content shown above, the remapped root
user would be uid 165536.
If the system administrator has set up multiple ranges for a single user or
group, the Docker daemon will read all the available ranges and use the
following algorithm to create the mapping ranges:
1. The range segments found for the particular user will be sorted by *start ID* ascending.
2. Map segments will be created from each range in increasing value with a length matching the length of each segment. Therefore the range segment with the lowest numeric starting value will be equal to the remapped root, and continue up through host uid/gid equal to the range segment length. As an example, if the lowest segment starts at ID 1000 and has a length of 100, then a map of 1000 -> 0 (the remapped root) up through 1100 -> 100 will be created from this segment. If the next segment starts at ID 10000, then the next map will start with mapping 10000 -> 101 up to the length of this second segment. This will continue until no more segments are found in the subordinate files for this user.
3. If more than five range segments exist for a single user, only the first five will be utilized, matching the kernel's limitation of only five entries in `/proc/self/uid_map` and `proc/self/gid_map`.
### User namespace known restrictions
The following standard Docker features are currently incompatible when
running a Docker daemon with user namespaces enabled:
- sharing PID or NET namespaces with the host (`--pid=host` or `--net=host`)
- sharing a network namespace with an existing container (`--net=container:*other*`)
- sharing an IPC namespace with an existing container (`--ipc=container:*other*`)
- A `--readonly` container filesystem (this is a Linux kernel restriction against remounting with modified flags of a currently mounted filesystem when inside a user namespace)
- external (volume or graph) drivers which are unaware/incapable of using daemon user mappings
- Using `--privileged` mode flag on `docker run`
In general, user namespaces are an advanced feature and will require
coordination with other capabilities. For example, if volumes are mounted from
the host, file ownership will have to be pre-arranged if the user or
administrator wishes the containers to have expected access to the volume
contents.
Finally, while the `root` user inside a user namespaced container process has
many of the expected admin privileges that go along with being the superuser, the
Linux kernel has restrictions based on internal knowledge that this is a user namespaced
process. The most notable restriction that we are aware of at this time is the
inability to use `mknod`. Permission will be denied for device creation even as
container `root` inside a user namespace.
## Miscellaneous options
IP masquerading uses address translation to allow containers without a public

View file

@ -72,7 +72,7 @@ to build a Docker binary with the experimental features enabled:
## Current experimental features
* [External graphdriver plugins](plugins_graphdriver.md)
* [User namespaces](userns.md)
* The user namespaces feature has graduated from experimental.
## How to comment on an experimental feature

View file

@ -1,119 +0,0 @@
# Experimental: User namespace support
Linux kernel [user namespace support](http://man7.org/linux/man-pages/man7/user_namespaces.7.html) provides additional security by enabling
a process--and therefore a container--to have a unique range of user and
group IDs which are outside the traditional user and group range utilized by
the host system. Potentially the most important security improvement is that,
by default, container processes running as the `root` user will have expected
administrative privilege (with some restrictions) inside the container but will
effectively be mapped to an unprivileged `uid` on the host.
In this experimental phase, the Docker daemon creates a single daemon-wide mapping
for all containers running on the same engine instance. The mappings will
utilize the existing subordinate user and group ID feature available on all modern
Linux distributions.
The [`/etc/subuid`](http://man7.org/linux/man-pages/man5/subuid.5.html) and
[`/etc/subgid`](http://man7.org/linux/man-pages/man5/subgid.5.html) files will be
read for the user, and optional group, specified to the `--userns-remap`
parameter. If you do not wish to specify your own user and/or group, you can
provide `default` as the value to this flag, and a user will be created on your behalf
and provided subordinate uid and gid ranges. This default user will be named
`dockremap`, and entries will be created for it in `/etc/passwd` and
`/etc/group` using your distro's standard user and group creation tools.
> **Note**: The single mapping per-daemon restriction exists for this experimental
> phase because Docker shares image layers from its local cache across all
> containers running on the engine instance. Since file ownership must be
> the same for all containers sharing the same layer content, the decision
> was made to map the file ownership on `docker pull` to the daemon's user and
> group mappings so that there is no delay for running containers once the
> content is downloaded--exactly the same performance characteristics as with
> user namespaces disabled.
## Starting the daemon with user namespaces enabled
To enable this experimental user namespace support for a Docker daemon instance,
start the daemon with the aforementioned `--userns-remap` flag, which accepts
values in the following formats:
- uid
- uid:gid
- username
- username:groupname
If numeric IDs are provided, translation back to valid user or group names
will occur so that the subordinate uid and gid information can be read, given
these resources are name-based, not id-based. If the numeric ID information
provided does not exist as entries in `/etc/passwd` or `/etc/group`, daemon
startup will fail with an error message.
*An example: starting with default Docker user management:*
```
$ docker daemon --userns-remap=default
```
In this case, Docker will create--or find the existing--user and group
named `dockremap`. If the user is created, and the Linux distribution has
appropriate support, the `/etc/subuid` and `/etc/subgid` files will be populated
with a contiguous 65536 length range of subordinate user and group IDs, starting
at an offset based on prior entries in those files. For example, Ubuntu will
create the following range, based on an existing user already having the first
65536 range:
```
$ cat /etc/subuid
user1:100000:65536
dockremap:165536:65536
```
> **Note:** On a fresh Fedora install, we found that we had to `touch` the
> `/etc/subuid` and `/etc/subgid` files to have ranges assigned when users
> were created. Once these files existed, range assignment on user creation
> worked properly.
If you have a preferred/self-managed user with subordinate ID mappings already
configured, you can provide that username or uid to the `--userns-remap` flag.
If you have a group that doesn't match the username, you may provide the `gid`
or group name as well; otherwise the username will be used as the group name
when querying the system for the subordinate group ID range.
## Detailed information on `subuid`/`subgid` ranges
Given there may be advanced use of the subordinate ID ranges by power users, we will
describe how the Docker daemon uses the range entries within these files under the
current experimental user namespace support.
The simplest case exists where only one contiguous range is defined for the
provided user or group. In this case, Docker will use that entire contiguous
range for the mapping of host uids and gids to the container process. This
means that the first ID in the range will be the remapped root user, and the
IDs above that initial ID will map host ID 1 through the end of the range.
From the example `/etc/subid` content shown above, that means the remapped root
user would be uid 165536.
If the system administrator has set up multiple ranges for a single user or
group, the Docker daemon will read all the available ranges and use the
following algorithm to create the mapping ranges:
1. The ranges will be sorted by *start ID* ascending
2. Maps will be created from each range with where the host ID will increment starting at 0 for the first range, 0+*range1* length for the second, and so on. This means that the lowest range start ID will be the remapped root, and all further ranges will map IDs from 1 through the uid or gid that equals the sum of all range lengths.
3. Ranges segments above five will be ignored as the kernel ignores any ID maps after five (in `/proc/self/{u,g}id_map`)
## User namespace known restrictions
The following standard Docker features are currently incompatible when
running a Docker daemon with experimental user namespaces enabled:
- sharing namespaces with the host (--pid=host, --net=host, etc.)
- sharing namespaces with other containers (--net=container:*other*)
- A `--readonly` container filesystem (a Linux kernel restriction on remount with new flags of a currently mounted filesystem when inside a user namespace)
- external (volume/graph) drivers which are unaware/incapable of using daemon user mappings
- Using `--privileged` mode containers
- volume use without pre-arranging proper file ownership in mounted volumes
Additionally, while the `root` user inside a user namespaced container
process has many of the privileges of the administrative root user, the
following operations will fail:
- Use of `mknod` - permission is denied for device creation by the container root
- others will be listed here when fully tested

View file

@ -99,7 +99,7 @@ if [ ! "$GOPATH" ]; then
exit 1
fi
if [ "$DOCKER_EXPERIMENTAL" ] || [ "$DOCKER_REMAP_ROOT" ]; then
if [ "$DOCKER_EXPERIMENTAL" ]; then
echo >&2 '# WARNING! DOCKER_EXPERIMENTAL is set: building experimental features'
echo >&2
DOCKER_BUILDTAGS+=" experimental pkcs11"

View file

@ -652,10 +652,14 @@ func (s *DockerSuite) TestContainerApiCreateWithDomainName(c *check.C) {
c.Assert(containerJSON.Config.Domainname, checker.Equals, domainName, check.Commentf("Mismatched Domainname"))
}
func (s *DockerSuite) TestContainerApiCreateNetworkMode(c *check.C) {
func (s *DockerSuite) TestContainerApiCreateBridgeNetworkMode(c *check.C) {
testRequires(c, DaemonIsLinux)
UtilCreateNetworkMode(c, "host")
UtilCreateNetworkMode(c, "bridge")
}
func (s *DockerSuite) TestContainerApiCreateOtherNetworkModes(c *check.C) {
testRequires(c, DaemonIsLinux, NotUserNamespace)
UtilCreateNetworkMode(c, "host")
UtilCreateNetworkMode(c, "container:web1")
}

View file

@ -53,6 +53,7 @@ docker-daemon - Enable daemon mode
[**--tlskey**[=*~/.docker/key.pem*]]
[**--tlsverify**]
[**--userland-proxy**[=*true*]]
[**--userns-remap**[=*default*]]
# DESCRIPTION
**docker** has two distinct functions. It is used for starting the Docker
@ -223,6 +224,9 @@ unix://[/path/to/socket] to use.
**--userland-proxy**=*true*|*false*
Rely on a userland proxy implementation for inter-container and outside-to-container loopback communications. Default is true.
**--userns-remap**=*default*|*uid:gid*|*user:group*|*user*|*uid*
Enable user namespaces for containers on the daemon. Specifying "default" will cause a new user and group to be created to handle UID and GID range remapping for the user namespace mappings used for contained processes. Specifying a user (or uid) and optionally a group (or gid) will cause the daemon to lookup the user and group's subordinate ID ranges for use as the user namespace mappings for contained processes.
# STORAGE DRIVER OPTIONS
Docker uses storage backends (known as "graphdrivers" in the Docker