diff --git a/daemon/config_experimental.go b/daemon/config_experimental.go new file mode 100644 index 0000000000..f1c4bb925d --- /dev/null +++ b/daemon/config_experimental.go @@ -0,0 +1,119 @@ +// +build experimental + +package daemon + +import ( + "fmt" + "strconv" + "strings" + + "github.com/docker/docker/pkg/idtools" + flag "github.com/docker/docker/pkg/mflag" + "github.com/opencontainers/runc/libcontainer/user" +) + +func (config *Config) attachExperimentalFlags(cmd *flag.FlagSet, usageFn func(string) string) { + cmd.StringVar(&config.RemappedRoot, []string{"-userns-remap"}, "", usageFn("User/Group setting for user namespaces")) +} + +const ( + defaultIDSpecifier string = "default" + defaultRemappedID string = "dockremap" +) + +// Parse the remapped root (user namespace) option, which can be one of: +// username - valid username from /etc/passwd +// username:groupname - valid username; valid groupname from /etc/group +// uid - 32-bit unsigned int valid Linux UID value +// uid:gid - uid value; 32-bit unsigned int Linux GID value +// +// If no groupname is specified, and a username is specified, an attempt +// will be made to lookup a gid for that username as a groupname +// +// If names are used, they are verified to exist in passwd/group +func parseRemappedRoot(usergrp string) (string, string, error) { + + var ( + userID, groupID int + username, groupname string + ) + + idparts := strings.Split(usergrp, ":") + if len(idparts) > 2 { + return "", "", fmt.Errorf("Invalid user/group specification in --userns-remap: %q", usergrp) + } + + if uid, err := strconv.ParseInt(idparts[0], 10, 32); err == nil { + // must be a uid; take it as valid + userID = int(uid) + luser, err := user.LookupUid(userID) + if err != nil { + return "", "", fmt.Errorf("Uid %d has no entry in /etc/passwd: %v", userID, err) + } + username = luser.Name + if len(idparts) == 1 { + // if the uid was numeric and no gid was specified, take the uid as the gid + groupID = userID + lgrp, err := user.LookupGid(groupID) + if err != nil { + return "", "", fmt.Errorf("Gid %d has no entry in /etc/group: %v", groupID, err) + } + groupname = lgrp.Name + } + } else { + lookupName := idparts[0] + // special case: if the user specified "default", they want Docker to create or + // use (after creation) the "dockremap" user/group for root remapping + if lookupName == defaultIDSpecifier { + lookupName = defaultRemappedID + } + luser, err := user.LookupUser(lookupName) + if err != nil && idparts[0] != defaultIDSpecifier { + // error if the name requested isn't the special "dockremap" ID + return "", "", fmt.Errorf("Error during uid lookup for %q: %v", lookupName, err) + } else if err != nil { + // special case-- if the username == "default", then we have been asked + // to create a new entry pair in /etc/{passwd,group} for which the /etc/sub{uid,gid} + // ranges will be used for the user and group mappings in user namespaced containers + _, _, err := idtools.AddNamespaceRangesUser(defaultRemappedID) + if err == nil { + return defaultRemappedID, defaultRemappedID, nil + } + return "", "", fmt.Errorf("Error during %q user creation: %v", defaultRemappedID, err) + } + userID = luser.Uid + username = luser.Name + if len(idparts) == 1 { + // we only have a string username, and no group specified; look up gid from username as group + group, err := user.LookupGroup(lookupName) + if err != nil { + return "", "", fmt.Errorf("Error during gid lookup for %q: %v", lookupName, err) + } + groupID = group.Gid + groupname = group.Name + } + } + + if len(idparts) == 2 { + // groupname or gid is separately specified and must be resolved + // to a unsigned 32-bit gid + if gid, err := strconv.ParseInt(idparts[1], 10, 32); err == nil { + // must be a gid, take it as valid + groupID = int(gid) + lgrp, err := user.LookupGid(groupID) + if err != nil { + return "", "", fmt.Errorf("Gid %d has no entry in /etc/passwd: %v", groupID, err) + } + groupname = lgrp.Name + } else { + // not a number; attempt a lookup + group, err := user.LookupGroup(idparts[1]) + if err != nil { + return "", "", fmt.Errorf("Error during gid lookup for %q: %v", idparts[1], err) + } + groupID = group.Gid + groupname = idparts[1] + } + } + return username, groupname, nil +} diff --git a/daemon/config_stub.go b/daemon/config_stub.go new file mode 100644 index 0000000000..796e6b6e4e --- /dev/null +++ b/daemon/config_stub.go @@ -0,0 +1,8 @@ +// +build !experimental + +package daemon + +import flag "github.com/docker/docker/pkg/mflag" + +func (config *Config) attachExperimentalFlags(cmd *flag.FlagSet, usageFn func(string) string) { +} diff --git a/daemon/daemon_experimental.go b/daemon/daemon_experimental.go new file mode 100644 index 0000000000..b65ad7a5b0 --- /dev/null +++ b/daemon/daemon_experimental.go @@ -0,0 +1,110 @@ +// +build experimental + +package daemon + +import ( + "fmt" + "os" + "path/filepath" + "runtime" + + "github.com/Sirupsen/logrus" + "github.com/docker/docker/pkg/directory" + "github.com/docker/docker/pkg/idtools" + "github.com/docker/docker/runconfig" +) + +func setupRemappedRoot(config *Config) ([]idtools.IDMap, []idtools.IDMap, error) { + if config.ExecDriver != "native" && config.RemappedRoot != "" { + return nil, nil, fmt.Errorf("User namespace remapping is only supported with the native execdriver") + } + if runtime.GOOS == "windows" && config.RemappedRoot != "" { + return nil, nil, fmt.Errorf("User namespaces are not supported on Windows") + } + + // if the daemon was started with remapped root option, parse + // the config option to the int uid,gid values + var ( + uidMaps, gidMaps []idtools.IDMap + ) + if config.RemappedRoot != "" { + username, groupname, err := parseRemappedRoot(config.RemappedRoot) + if err != nil { + return nil, nil, err + } + if username == "root" { + // Cannot setup user namespaces with a 1-to-1 mapping; "--root=0:0" is a no-op + // effectively + logrus.Warnf("User namespaces: root cannot be remapped with itself; user namespaces are OFF") + return uidMaps, gidMaps, nil + } + logrus.Infof("User namespaces: ID ranges will be mapped to subuid/subgid ranges of: %s:%s", username, groupname) + // update remapped root setting now that we have resolved them to actual names + config.RemappedRoot = fmt.Sprintf("%s:%s", username, groupname) + + uidMaps, gidMaps, err = idtools.CreateIDMappings(username, groupname) + if err != nil { + return nil, nil, fmt.Errorf("Can't create ID mappings: %v", err) + } + } + return uidMaps, gidMaps, nil +} + +func setupDaemonRoot(config *Config, rootDir string, rootUID, rootGID int) error { + // the main docker root needs to be accessible by all users, as user namespace support + // will create subdirectories owned by either a) the real system root (when no remapping + // is setup) or b) the remapped root host ID (when --root=uid:gid is used) + // for "first time" users of user namespaces, we need to migrate the current directory + // contents to the "0.0" (root == root "namespace" daemon root) + nsRoot := "0.0" + if _, err := os.Stat(rootDir); err == nil { + // root current exists; we need to check for a prior migration + if _, err := os.Stat(filepath.Join(rootDir, nsRoot)); err != nil && os.IsNotExist(err) { + // need to migrate current root to "0.0" subroot + // 1. create non-usernamespaced root as "0.0" + if err := os.Mkdir(filepath.Join(rootDir, nsRoot), 0700); err != nil { + return fmt.Errorf("Cannot create daemon root %q: %v", filepath.Join(rootDir, nsRoot), err) + } + // 2. move current root content to "0.0" new subroot + if err := directory.MoveToSubdir(rootDir, nsRoot); err != nil { + return fmt.Errorf("Cannot migrate current daemon root %q for user namespaces: %v", rootDir, err) + } + // 3. chmod outer root to 755 + if chmodErr := os.Chmod(rootDir, 0755); chmodErr != nil { + return chmodErr + } + } + } else if os.IsNotExist(err) { + // no root exists yet, create it 0755 with root:root ownership + if err := os.MkdirAll(rootDir, 0755); err != nil { + return err + } + // create the "0.0" subroot (so no future "migration" happens of the root) + if err := os.Mkdir(filepath.Join(rootDir, nsRoot), 0700); err != nil { + return err + } + } + + // for user namespaces we will create a subtree underneath the specified root + // with any/all specified remapped root uid/gid options on the daemon creating + // a new subdirectory with ownership set to the remapped uid/gid (so as to allow + // `chdir()` to work for containers namespaced to that uid/gid) + if config.RemappedRoot != "" { + nsRoot = fmt.Sprintf("%d.%d", rootUID, rootGID) + } + config.Root = filepath.Join(rootDir, nsRoot) + logrus.Debugf("Creating actual daemon root: %s", config.Root) + + // Create the root directory if it doesn't exists + if err := idtools.MkdirAllAs(config.Root, 0700, rootUID, rootGID); err != nil { + return fmt.Errorf("Cannot create daemon root: %s: %v", config.Root, err) + } + return nil +} + +func (daemon *Daemon) verifyExperimentalContainerSettings(hostConfig *runconfig.HostConfig, config *runconfig.Config) ([]string, error) { + if hostConfig.Privileged && daemon.config().RemappedRoot != "" { + return nil, fmt.Errorf("Privileged mode is incompatible with user namespace mappings") + } + return nil, nil +} diff --git a/daemon/daemon_stub.go b/daemon/daemon_stub.go new file mode 100644 index 0000000000..8fbb0508e9 --- /dev/null +++ b/daemon/daemon_stub.go @@ -0,0 +1,28 @@ +// +build !experimental + +package daemon + +import ( + "os" + + "github.com/docker/docker/pkg/idtools" + "github.com/docker/docker/pkg/system" + "github.com/docker/docker/runconfig" +) + +func setupRemappedRoot(config *Config) ([]idtools.IDMap, []idtools.IDMap, error) { + return nil, nil, nil +} + +func setupDaemonRoot(config *Config, rootDir string, rootUID, rootGID int) error { + config.Root = rootDir + // Create the root directory if it doesn't exists + if err := system.MkdirAll(config.Root, 0700); err != nil && !os.IsExist(err) { + return err + } + return nil +} + +func (daemon *Daemon) verifyExperimentalContainerSettings(hostConfig *runconfig.HostConfig, config *runconfig.Config) ([]string, error) { + return nil, nil +} diff --git a/experimental/README.md b/experimental/README.md index 74c7f68cc3..ca4f102261 100644 --- a/experimental/README.md +++ b/experimental/README.md @@ -71,6 +71,9 @@ to build a Docker binary with the experimental features enabled: ## Current experimental features + * [External graphdriver plugins](plugins_graphdriver.md) + * [User namespaces](userns.md) + ## How to comment on an experimental feature Each feature's documentation includes a list of proposal pull requests or PRs associated with the feature. If you want to comment on or suggest a change to a feature, please add it to the existing feature PR. diff --git a/experimental/userns.md b/experimental/userns.md new file mode 100644 index 0000000000..79e06ee4a9 --- /dev/null +++ b/experimental/userns.md @@ -0,0 +1,120 @@ +# Experimental: User namespace support + +Linux kernel [user namespace support](http://man7.org/linux/man-pages/man7/user_namespaces.7.html) provides additional security by enabling +a process--and therefore a container--to have a unique range of user and +group IDs which are outside the traditional user and group range utilized by +the host system. Potentially the most important security improvement is that, +by default, container processes running as the `root` user will have expected +administrative privilege (with some restrictions) inside the container but will +effectively be mapped to an unprivileged `uid` on the host. + +In this experimental phase, the Docker daemon creates a single daemon-wide mapping +for all containers running on the same engine instance. The mappings will +utilize the existing subordinate user and group ID feature available on all modern +Linux distributions. +The [`/etc/subuid`](http://man7.org/linux/man-pages/man5/subuid.5.html) and +[`/etc/subgid`](http://man7.org/linux/man-pages/man5/subgid.5.html) files will be +read for the user, and optional group, specified to the `--userns-remap` +parameter. If you do not wish to specify your own user and/or group, you can +provide `default` as the value to this flag, and a user will be created on your behalf +and provided subordinate uid and gid ranges. This default user will be named +`dockremap`, and entries will be created for it in `/etc/passwd` and +`/etc/group` using your distro's standard user and group creation tools. + +> **Note**: The single mapping per-daemon restriction exists for this experimental +> phase because Docker shares image layers from its local cache across all +> containers running on the engine instance. Since file ownership must be +> the same for all containers sharing the same layer content, the decision +> was made to map the file ownership on `docker pull` to the daemon's user and +> group mappings so that there is no delay for running containers once the +> content is downloaded--exactly the same performance characteristics as with +> user namespaces disabled. + +## Starting the daemon with user namespaces enabled +To enable this experimental user namespace support for a Docker daemon instance, +start the daemon with the aforementioned `--userns-remap` flag, which accepts +values in the following formats: + + - uid + - uid:gid + - username + - username:groupname + +If numeric IDs are provided, translation back to valid user or group names +will occur so that the subordinate uid and gid information can be read, given +these resources are name-based, not id-based. If the numeric ID information +provided does not exist as entries in `/etc/passwd` or `/etc/group`, dameon +startup will fail with an error message. + +*An example: starting with default Docker user management:* + +``` + $ docker daemon --userns-remap=default +``` +In this case, Docker will create--or find the existing--user and group +named `dockremap`. If the user is created, and the Linux distribution has +appropriate support, the `/etc/subuid` and `/etc/subgid` files will be populated +with a contiguous 65536 length range of subordinate user and group IDs, starting +at an offset based on prior entries in those files. For example, Ubuntu will +create the following range, based on an existing user already having the first +65536 range: + +``` + $ cat /etc/subuid + user1:100000:65536 + dockremap:165536:65536 +``` + +> **Note:** On a fresh Fedora install, we found that we had to `touch` the +> `/etc/subuid` and `/etc/subgid` files to have ranges assigned when users +> were created. Once these files existed, range assigment on user creation +> worked properly. + +If you have a preferred/self-managed user with subordinate ID mappings already +configured, you can provide that username or uid to the `--userns-remap` flag. +If you have a group that doesn't match the username, you may provide the `gid` +or group name as well; otherwise the username will be used as the group name +when querying the system for the subordinate group ID range. + +## Detailed information on `subuid`/`subgid` ranges + +Given there may be advanced use of the subordinate ID ranges by power users, we will +describe how the Docker daemon uses the range entries within these files under the +current experimental user namespace support. + +The simplest case exists where only one contiguous range is defined for the +provided user or group. In this case, Docker will use that entire contiguous +range for the mapping of host uids and gids to the container process. This +means that the first ID in the range will be the remapped root user, and the +IDs above that initial ID will map host ID 1 through the end of the range. + +From the example `/etc/subid` content shown above, that means the remapped root +user would be uid 165536. + +If the system administrator has set up multiple ranges for a single user or +group, the Docker daemon will read all the available ranges and use the +following algorithm to create the mapping ranges: + +1. The ranges will be sorted by *start ID* ascending +2. Maps will be created from each range with where the host ID will increment starting at 0 for the first range, 0+*range1* length for the second, and so on. This means that the lowest range start ID will be the remapped root, and all further ranges will map IDs from 1 through the uid or gid that equals the sum of all range lengths. +3. Ranges segments above five will be ignored as the kernel ignores any ID maps after five (in `/proc/self/{u,g}id_map`) + +## User namespace known restrictions + +The following standard Docker features are currently incompatible when +running a Docker daemon with experimental user namespaces enabled: + + - sharing namespaces with the host (--pid=host, --net=host, etc.) + - sharing namespaces with other containers (--net=container:*other*) + - A `--readonly` container filesystem (a Linux kernel restriction on remount with new flags of a currently mounted filesystem when inside a user namespace) + - external (volume/graph) drivers which are unaware/incapable of using daemon user mappings + - Using `--privileged` mode containers + - Using the lxc execdriver (only the `native` execdriver is enabled to use user namespaces) + - volume use without pre-arranging proper file ownership in mounted volumes + +Additionally, while the `root` user inside a user namespaced container +process has many of the privileges of the administrative root user, the +following operations will fail: + + - Use of `mknod` - permission is denied for device creation by the container root + - others will be listed here when fully tested diff --git a/integration-cli/docker_cli_experimental_test.go b/integration-cli/docker_cli_experimental_test.go index 694222bf39..e0ed4c76cc 100644 --- a/integration-cli/docker_cli_experimental_test.go +++ b/integration-cli/docker_cli_experimental_test.go @@ -3,8 +3,15 @@ package main import ( + "fmt" + "io/ioutil" + "os" + "os/exec" + "path/filepath" + "strconv" "strings" + "github.com/docker/docker/pkg/system" "github.com/go-check/check" ) @@ -21,3 +28,57 @@ func (s *DockerSuite) TestExperimentalVersion(c *check.C) { c.Fatalf("docker version did not contain experimental: %s", out) } } + +// user namespaces test: run daemon with remapped root setting +// 1. validate uid/gid maps are set properly +// 2. verify that files created are owned by remapped root +func (s *DockerDaemonSuite) TestDaemonUserNamespaceRootSetting(c *check.C) { + testRequires(c, NativeExecDriver) + testRequires(c, SameHostDaemon) + + c.Assert(s.d.StartWithBusybox("--userns-remap", "default"), check.IsNil) + + tmpDir, err := ioutil.TempDir("", "userns") + if err != nil { + c.Fatal(err) + } + defer os.RemoveAll(tmpDir) + + // we need to find the uid and gid of the remapped root from the daemon's root dir info + uidgid := strings.Split(filepath.Base(s.d.root), ".") + c.Assert(len(uidgid), check.Equals, 2, check.Commentf("Should have gotten uid/gid strings from root dirname: %s", filepath.Base(s.d.root))) + uid, err := strconv.Atoi(uidgid[0]) + c.Assert(err, check.IsNil, check.Commentf("Can't parse uid: %v", err)) + gid, err := strconv.Atoi(uidgid[1]) + c.Assert(err, check.IsNil, check.Commentf("Can't parse gid: %v", err)) + + //writeable by the remapped root UID/GID pair + c.Assert(os.Chown(tmpDir, uid, gid), check.IsNil) + + out, err := s.d.Cmd("run", "-d", "--name", "userns", "-v", tmpDir+":/goofy", "busybox", "sh", "-c", "touch /goofy/testfile; top") + c.Assert(err, check.IsNil, check.Commentf("Output: %s", out)) + + pid, err := s.d.Cmd("inspect", "--format='{{.State.Pid}}'", "userns") + if err != nil { + c.Fatalf("Could not inspect running container: out: %q; err: %v", pid, err) + } + // check the uid and gid maps for the PID to ensure root is remapped + // (cmd = cat /proc//uid_map | grep -E '0\s+9999\s+1') + out, rc1, err := runCommandPipelineWithOutput( + exec.Command("cat", "/proc/"+strings.TrimSpace(pid)+"/uid_map"), + exec.Command("grep", "-E", fmt.Sprintf("0[[:space:]]+%d[[:space:]]+", uid))) + c.Assert(rc1, check.Equals, 0, check.Commentf("Didn't match uid_map: output: %s", out)) + + out, rc2, err := runCommandPipelineWithOutput( + exec.Command("cat", "/proc/"+strings.TrimSpace(pid)+"/gid_map"), + exec.Command("grep", "-E", fmt.Sprintf("0[[:space:]]+%d[[:space:]]+", gid))) + c.Assert(rc2, check.Equals, 0, check.Commentf("Didn't match gid_map: output: %s", out)) + + // check that the touched file is owned by remapped uid:gid + stat, err := system.Stat(filepath.Join(tmpDir, "testfile")) + if err != nil { + c.Fatal(err) + } + c.Assert(stat.UID(), check.Equals, uint32(uid), check.Commentf("Touched file not owned by remapped root UID")) + c.Assert(stat.Gid(), check.Equals, uint32(gid), check.Commentf("Touched file not owned by remapped root GID")) +}