mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00
Merge pull request #19187 from estesp/lets-do-this
User namespaces: graduate from experimental
This commit is contained in:
commit
c72be040bb
12 changed files with 347 additions and 336 deletions
|
@ -2,118 +2,7 @@
|
|||
|
||||
package daemon
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/docker/docker/pkg/idtools"
|
||||
flag "github.com/docker/docker/pkg/mflag"
|
||||
"github.com/opencontainers/runc/libcontainer/user"
|
||||
)
|
||||
import flag "github.com/docker/docker/pkg/mflag"
|
||||
|
||||
func (config *Config) attachExperimentalFlags(cmd *flag.FlagSet, usageFn func(string) string) {
|
||||
cmd.StringVar(&config.RemappedRoot, []string{"-userns-remap"}, "", usageFn("User/Group setting for user namespaces"))
|
||||
}
|
||||
|
||||
const (
|
||||
defaultIDSpecifier string = "default"
|
||||
defaultRemappedID string = "dockremap"
|
||||
)
|
||||
|
||||
// Parse the remapped root (user namespace) option, which can be one of:
|
||||
// username - valid username from /etc/passwd
|
||||
// username:groupname - valid username; valid groupname from /etc/group
|
||||
// uid - 32-bit unsigned int valid Linux UID value
|
||||
// uid:gid - uid value; 32-bit unsigned int Linux GID value
|
||||
//
|
||||
// If no groupname is specified, and a username is specified, an attempt
|
||||
// will be made to lookup a gid for that username as a groupname
|
||||
//
|
||||
// If names are used, they are verified to exist in passwd/group
|
||||
func parseRemappedRoot(usergrp string) (string, string, error) {
|
||||
|
||||
var (
|
||||
userID, groupID int
|
||||
username, groupname string
|
||||
)
|
||||
|
||||
idparts := strings.Split(usergrp, ":")
|
||||
if len(idparts) > 2 {
|
||||
return "", "", fmt.Errorf("Invalid user/group specification in --userns-remap: %q", usergrp)
|
||||
}
|
||||
|
||||
if uid, err := strconv.ParseInt(idparts[0], 10, 32); err == nil {
|
||||
// must be a uid; take it as valid
|
||||
userID = int(uid)
|
||||
luser, err := user.LookupUid(userID)
|
||||
if err != nil {
|
||||
return "", "", fmt.Errorf("Uid %d has no entry in /etc/passwd: %v", userID, err)
|
||||
}
|
||||
username = luser.Name
|
||||
if len(idparts) == 1 {
|
||||
// if the uid was numeric and no gid was specified, take the uid as the gid
|
||||
groupID = userID
|
||||
lgrp, err := user.LookupGid(groupID)
|
||||
if err != nil {
|
||||
return "", "", fmt.Errorf("Gid %d has no entry in /etc/group: %v", groupID, err)
|
||||
}
|
||||
groupname = lgrp.Name
|
||||
}
|
||||
} else {
|
||||
lookupName := idparts[0]
|
||||
// special case: if the user specified "default", they want Docker to create or
|
||||
// use (after creation) the "dockremap" user/group for root remapping
|
||||
if lookupName == defaultIDSpecifier {
|
||||
lookupName = defaultRemappedID
|
||||
}
|
||||
luser, err := user.LookupUser(lookupName)
|
||||
if err != nil && idparts[0] != defaultIDSpecifier {
|
||||
// error if the name requested isn't the special "dockremap" ID
|
||||
return "", "", fmt.Errorf("Error during uid lookup for %q: %v", lookupName, err)
|
||||
} else if err != nil {
|
||||
// special case-- if the username == "default", then we have been asked
|
||||
// to create a new entry pair in /etc/{passwd,group} for which the /etc/sub{uid,gid}
|
||||
// ranges will be used for the user and group mappings in user namespaced containers
|
||||
_, _, err := idtools.AddNamespaceRangesUser(defaultRemappedID)
|
||||
if err == nil {
|
||||
return defaultRemappedID, defaultRemappedID, nil
|
||||
}
|
||||
return "", "", fmt.Errorf("Error during %q user creation: %v", defaultRemappedID, err)
|
||||
}
|
||||
userID = luser.Uid
|
||||
username = luser.Name
|
||||
if len(idparts) == 1 {
|
||||
// we only have a string username, and no group specified; look up gid from username as group
|
||||
group, err := user.LookupGroup(lookupName)
|
||||
if err != nil {
|
||||
return "", "", fmt.Errorf("Error during gid lookup for %q: %v", lookupName, err)
|
||||
}
|
||||
groupID = group.Gid
|
||||
groupname = group.Name
|
||||
}
|
||||
}
|
||||
|
||||
if len(idparts) == 2 {
|
||||
// groupname or gid is separately specified and must be resolved
|
||||
// to a unsigned 32-bit gid
|
||||
if gid, err := strconv.ParseInt(idparts[1], 10, 32); err == nil {
|
||||
// must be a gid, take it as valid
|
||||
groupID = int(gid)
|
||||
lgrp, err := user.LookupGid(groupID)
|
||||
if err != nil {
|
||||
return "", "", fmt.Errorf("Gid %d has no entry in /etc/passwd: %v", groupID, err)
|
||||
}
|
||||
groupname = lgrp.Name
|
||||
} else {
|
||||
// not a number; attempt a lookup
|
||||
group, err := user.LookupGroup(idparts[1])
|
||||
if err != nil {
|
||||
return "", "", fmt.Errorf("Error during gid lookup for %q: %v", idparts[1], err)
|
||||
}
|
||||
groupID = group.Gid
|
||||
groupname = idparts[1]
|
||||
}
|
||||
}
|
||||
return username, groupname, nil
|
||||
}
|
||||
|
|
|
@ -79,6 +79,7 @@ func (config *Config) InstallFlags(cmd *flag.FlagSet, usageFn func(string) strin
|
|||
cmd.BoolVar(&config.EnableCors, []string{"#api-enable-cors", "#-api-enable-cors"}, false, usageFn("Enable CORS headers in the remote API, this is deprecated by --api-cors-header"))
|
||||
cmd.StringVar(&config.CorsHeaders, []string{"-api-cors-header"}, "", usageFn("Set CORS headers in the remote API"))
|
||||
cmd.StringVar(&config.CgroupParent, []string{"-cgroup-parent"}, "", usageFn("Set parent cgroup for all containers"))
|
||||
cmd.StringVar(&config.RemappedRoot, []string{"-userns-remap"}, "", usageFn("User/Group setting for user namespaces"))
|
||||
|
||||
config.attachExperimentalFlags(cmd, usageFn)
|
||||
}
|
||||
|
|
|
@ -2,88 +2,8 @@
|
|||
|
||||
package daemon
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
|
||||
"github.com/Sirupsen/logrus"
|
||||
"github.com/docker/docker/pkg/idtools"
|
||||
"github.com/docker/engine-api/types/container"
|
||||
)
|
||||
|
||||
func setupRemappedRoot(config *Config) ([]idtools.IDMap, []idtools.IDMap, error) {
|
||||
if runtime.GOOS != "linux" && config.RemappedRoot != "" {
|
||||
return nil, nil, fmt.Errorf("User namespaces are only supported on Linux")
|
||||
}
|
||||
|
||||
// if the daemon was started with remapped root option, parse
|
||||
// the config option to the int uid,gid values
|
||||
var (
|
||||
uidMaps, gidMaps []idtools.IDMap
|
||||
)
|
||||
if config.RemappedRoot != "" {
|
||||
username, groupname, err := parseRemappedRoot(config.RemappedRoot)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
if username == "root" {
|
||||
// Cannot setup user namespaces with a 1-to-1 mapping; "--root=0:0" is a no-op
|
||||
// effectively
|
||||
logrus.Warnf("User namespaces: root cannot be remapped with itself; user namespaces are OFF")
|
||||
return uidMaps, gidMaps, nil
|
||||
}
|
||||
logrus.Infof("User namespaces: ID ranges will be mapped to subuid/subgid ranges of: %s:%s", username, groupname)
|
||||
// update remapped root setting now that we have resolved them to actual names
|
||||
config.RemappedRoot = fmt.Sprintf("%s:%s", username, groupname)
|
||||
|
||||
uidMaps, gidMaps, err = idtools.CreateIDMappings(username, groupname)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("Can't create ID mappings: %v", err)
|
||||
}
|
||||
}
|
||||
return uidMaps, gidMaps, nil
|
||||
}
|
||||
|
||||
func setupDaemonRoot(config *Config, rootDir string, rootUID, rootGID int) error {
|
||||
config.Root = rootDir
|
||||
// the docker root metadata directory needs to have execute permissions for all users (o+x)
|
||||
// so that syscalls executing as non-root, operating on subdirectories of the graph root
|
||||
// (e.g. mounted layers of a container) can traverse this path.
|
||||
// The user namespace support will create subdirectories for the remapped root host uid:gid
|
||||
// pair owned by that same uid:gid pair for proper write access to those needed metadata and
|
||||
// layer content subtrees.
|
||||
if _, err := os.Stat(rootDir); err == nil {
|
||||
// root current exists; verify the access bits are correct by setting them
|
||||
if err = os.Chmod(rootDir, 0701); err != nil {
|
||||
return err
|
||||
}
|
||||
} else if os.IsNotExist(err) {
|
||||
// no root exists yet, create it 0701 with root:root ownership
|
||||
if err := os.MkdirAll(rootDir, 0701); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// if user namespaces are enabled we will create a subtree underneath the specified root
|
||||
// with any/all specified remapped root uid/gid options on the daemon creating
|
||||
// a new subdirectory with ownership set to the remapped uid/gid (so as to allow
|
||||
// `chdir()` to work for containers namespaced to that uid/gid)
|
||||
if config.RemappedRoot != "" {
|
||||
config.Root = filepath.Join(rootDir, fmt.Sprintf("%d.%d", rootUID, rootGID))
|
||||
logrus.Debugf("Creating user namespaced daemon root: %s", config.Root)
|
||||
// Create the root directory if it doesn't exists
|
||||
if err := idtools.MkdirAllAs(config.Root, 0700, rootUID, rootGID); err != nil {
|
||||
return fmt.Errorf("Cannot create daemon root: %s: %v", config.Root, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
import "github.com/docker/engine-api/types/container"
|
||||
|
||||
func (daemon *Daemon) verifyExperimentalContainerSettings(hostConfig *container.HostConfig, config *container.Config) ([]string, error) {
|
||||
if hostConfig.Privileged && daemon.configStore.RemappedRoot != "" {
|
||||
return nil, fmt.Errorf("Privileged mode is incompatible with user namespace mappings")
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
|
|
@ -2,26 +2,7 @@
|
|||
|
||||
package daemon
|
||||
|
||||
import (
|
||||
"os"
|
||||
|
||||
"github.com/docker/docker/pkg/idtools"
|
||||
"github.com/docker/docker/pkg/system"
|
||||
"github.com/docker/engine-api/types/container"
|
||||
)
|
||||
|
||||
func setupRemappedRoot(config *Config) ([]idtools.IDMap, []idtools.IDMap, error) {
|
||||
return nil, nil, nil
|
||||
}
|
||||
|
||||
func setupDaemonRoot(config *Config, rootDir string, rootUID, rootGID int) error {
|
||||
config.Root = rootDir
|
||||
// Create the root directory if it doesn't exists
|
||||
if err := system.MkdirAll(config.Root, 0700); err != nil && !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
import "github.com/docker/engine-api/types/container"
|
||||
|
||||
func (daemon *Daemon) verifyExperimentalContainerSettings(hostConfig *container.HostConfig, config *container.Config) ([]string, error) {
|
||||
return nil, nil
|
||||
|
|
|
@ -7,6 +7,7 @@ import (
|
|||
"net"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
@ -33,6 +34,7 @@ import (
|
|||
"github.com/docker/libnetwork/types"
|
||||
blkiodev "github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/label"
|
||||
"github.com/opencontainers/runc/libcontainer/user"
|
||||
)
|
||||
|
||||
const (
|
||||
|
@ -42,6 +44,9 @@ const (
|
|||
platformSupported = true
|
||||
// It's not kernel limit, we want this 4M limit to supply a reasonable functional container
|
||||
linuxMinMemory = 4194304
|
||||
// constants for remapped root settings
|
||||
defaultIDSpecifier string = "default"
|
||||
defaultRemappedID string = "dockremap"
|
||||
)
|
||||
|
||||
func getBlkioWeightDevices(config *containertypes.HostConfig) ([]*blkiodev.WeightDevice, error) {
|
||||
|
@ -375,6 +380,24 @@ func verifyPlatformContainerSettings(daemon *Daemon, hostConfig *containertypes.
|
|||
warnings = append(warnings, "IPv4 forwarding is disabled. Networking will not work.")
|
||||
logrus.Warnf("IPv4 forwarding is disabled. Networking will not work")
|
||||
}
|
||||
// check for various conflicting options with user namespaces
|
||||
if daemon.configStore.RemappedRoot != "" {
|
||||
if hostConfig.Privileged {
|
||||
return warnings, fmt.Errorf("Privileged mode is incompatible with user namespaces.")
|
||||
}
|
||||
if hostConfig.NetworkMode.IsHost() || hostConfig.NetworkMode.IsContainer() {
|
||||
return warnings, fmt.Errorf("Cannot share the host or a container's network namespace when user namespaces are enabled.")
|
||||
}
|
||||
if hostConfig.PidMode.IsHost() {
|
||||
return warnings, fmt.Errorf("Cannot share the host PID namespace when user namespaces are enabled.")
|
||||
}
|
||||
if hostConfig.IpcMode.IsContainer() {
|
||||
return warnings, fmt.Errorf("Cannot share a container's IPC namespace when user namespaces are enabled.")
|
||||
}
|
||||
if hostConfig.ReadonlyRootfs {
|
||||
return warnings, fmt.Errorf("Cannot use the --read-only option when user namespaces are enabled.")
|
||||
}
|
||||
}
|
||||
return warnings, nil
|
||||
}
|
||||
|
||||
|
@ -674,6 +697,171 @@ func setupInitLayer(initLayer string, rootUID, rootGID int) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// Parse the remapped root (user namespace) option, which can be one of:
|
||||
// username - valid username from /etc/passwd
|
||||
// username:groupname - valid username; valid groupname from /etc/group
|
||||
// uid - 32-bit unsigned int valid Linux UID value
|
||||
// uid:gid - uid value; 32-bit unsigned int Linux GID value
|
||||
//
|
||||
// If no groupname is specified, and a username is specified, an attempt
|
||||
// will be made to lookup a gid for that username as a groupname
|
||||
//
|
||||
// If names are used, they are verified to exist in passwd/group
|
||||
func parseRemappedRoot(usergrp string) (string, string, error) {
|
||||
|
||||
var (
|
||||
userID, groupID int
|
||||
username, groupname string
|
||||
)
|
||||
|
||||
idparts := strings.Split(usergrp, ":")
|
||||
if len(idparts) > 2 {
|
||||
return "", "", fmt.Errorf("Invalid user/group specification in --userns-remap: %q", usergrp)
|
||||
}
|
||||
|
||||
if uid, err := strconv.ParseInt(idparts[0], 10, 32); err == nil {
|
||||
// must be a uid; take it as valid
|
||||
userID = int(uid)
|
||||
luser, err := user.LookupUid(userID)
|
||||
if err != nil {
|
||||
return "", "", fmt.Errorf("Uid %d has no entry in /etc/passwd: %v", userID, err)
|
||||
}
|
||||
username = luser.Name
|
||||
if len(idparts) == 1 {
|
||||
// if the uid was numeric and no gid was specified, take the uid as the gid
|
||||
groupID = userID
|
||||
lgrp, err := user.LookupGid(groupID)
|
||||
if err != nil {
|
||||
return "", "", fmt.Errorf("Gid %d has no entry in /etc/group: %v", groupID, err)
|
||||
}
|
||||
groupname = lgrp.Name
|
||||
}
|
||||
} else {
|
||||
lookupName := idparts[0]
|
||||
// special case: if the user specified "default", they want Docker to create or
|
||||
// use (after creation) the "dockremap" user/group for root remapping
|
||||
if lookupName == defaultIDSpecifier {
|
||||
lookupName = defaultRemappedID
|
||||
}
|
||||
luser, err := user.LookupUser(lookupName)
|
||||
if err != nil && idparts[0] != defaultIDSpecifier {
|
||||
// error if the name requested isn't the special "dockremap" ID
|
||||
return "", "", fmt.Errorf("Error during uid lookup for %q: %v", lookupName, err)
|
||||
} else if err != nil {
|
||||
// special case-- if the username == "default", then we have been asked
|
||||
// to create a new entry pair in /etc/{passwd,group} for which the /etc/sub{uid,gid}
|
||||
// ranges will be used for the user and group mappings in user namespaced containers
|
||||
_, _, err := idtools.AddNamespaceRangesUser(defaultRemappedID)
|
||||
if err == nil {
|
||||
return defaultRemappedID, defaultRemappedID, nil
|
||||
}
|
||||
return "", "", fmt.Errorf("Error during %q user creation: %v", defaultRemappedID, err)
|
||||
}
|
||||
userID = luser.Uid
|
||||
username = luser.Name
|
||||
if len(idparts) == 1 {
|
||||
// we only have a string username, and no group specified; look up gid from username as group
|
||||
group, err := user.LookupGroup(lookupName)
|
||||
if err != nil {
|
||||
return "", "", fmt.Errorf("Error during gid lookup for %q: %v", lookupName, err)
|
||||
}
|
||||
groupID = group.Gid
|
||||
groupname = group.Name
|
||||
}
|
||||
}
|
||||
|
||||
if len(idparts) == 2 {
|
||||
// groupname or gid is separately specified and must be resolved
|
||||
// to a unsigned 32-bit gid
|
||||
if gid, err := strconv.ParseInt(idparts[1], 10, 32); err == nil {
|
||||
// must be a gid, take it as valid
|
||||
groupID = int(gid)
|
||||
lgrp, err := user.LookupGid(groupID)
|
||||
if err != nil {
|
||||
return "", "", fmt.Errorf("Gid %d has no entry in /etc/passwd: %v", groupID, err)
|
||||
}
|
||||
groupname = lgrp.Name
|
||||
} else {
|
||||
// not a number; attempt a lookup
|
||||
group, err := user.LookupGroup(idparts[1])
|
||||
if err != nil {
|
||||
return "", "", fmt.Errorf("Error during gid lookup for %q: %v", idparts[1], err)
|
||||
}
|
||||
groupID = group.Gid
|
||||
groupname = idparts[1]
|
||||
}
|
||||
}
|
||||
return username, groupname, nil
|
||||
}
|
||||
|
||||
func setupRemappedRoot(config *Config) ([]idtools.IDMap, []idtools.IDMap, error) {
|
||||
if runtime.GOOS != "linux" && config.RemappedRoot != "" {
|
||||
return nil, nil, fmt.Errorf("User namespaces are only supported on Linux")
|
||||
}
|
||||
|
||||
// if the daemon was started with remapped root option, parse
|
||||
// the config option to the int uid,gid values
|
||||
var (
|
||||
uidMaps, gidMaps []idtools.IDMap
|
||||
)
|
||||
if config.RemappedRoot != "" {
|
||||
username, groupname, err := parseRemappedRoot(config.RemappedRoot)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
if username == "root" {
|
||||
// Cannot setup user namespaces with a 1-to-1 mapping; "--root=0:0" is a no-op
|
||||
// effectively
|
||||
logrus.Warnf("User namespaces: root cannot be remapped with itself; user namespaces are OFF")
|
||||
return uidMaps, gidMaps, nil
|
||||
}
|
||||
logrus.Infof("User namespaces: ID ranges will be mapped to subuid/subgid ranges of: %s:%s", username, groupname)
|
||||
// update remapped root setting now that we have resolved them to actual names
|
||||
config.RemappedRoot = fmt.Sprintf("%s:%s", username, groupname)
|
||||
|
||||
uidMaps, gidMaps, err = idtools.CreateIDMappings(username, groupname)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("Can't create ID mappings: %v", err)
|
||||
}
|
||||
}
|
||||
return uidMaps, gidMaps, nil
|
||||
}
|
||||
|
||||
func setupDaemonRoot(config *Config, rootDir string, rootUID, rootGID int) error {
|
||||
config.Root = rootDir
|
||||
// the docker root metadata directory needs to have execute permissions for all users (o+x)
|
||||
// so that syscalls executing as non-root, operating on subdirectories of the graph root
|
||||
// (e.g. mounted layers of a container) can traverse this path.
|
||||
// The user namespace support will create subdirectories for the remapped root host uid:gid
|
||||
// pair owned by that same uid:gid pair for proper write access to those needed metadata and
|
||||
// layer content subtrees.
|
||||
if _, err := os.Stat(rootDir); err == nil {
|
||||
// root current exists; verify the access bits are correct by setting them
|
||||
if err = os.Chmod(rootDir, 0701); err != nil {
|
||||
return err
|
||||
}
|
||||
} else if os.IsNotExist(err) {
|
||||
// no root exists yet, create it 0701 with root:root ownership
|
||||
if err := os.MkdirAll(rootDir, 0701); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// if user namespaces are enabled we will create a subtree underneath the specified root
|
||||
// with any/all specified remapped root uid/gid options on the daemon creating
|
||||
// a new subdirectory with ownership set to the remapped uid/gid (so as to allow
|
||||
// `chdir()` to work for containers namespaced to that uid/gid)
|
||||
if config.RemappedRoot != "" {
|
||||
config.Root = filepath.Join(rootDir, fmt.Sprintf("%d.%d", rootUID, rootGID))
|
||||
logrus.Debugf("Creating user namespaced daemon root: %s", config.Root)
|
||||
// Create the root directory if it doesn't exists
|
||||
if err := idtools.MkdirAllAs(config.Root, 0700, rootUID, rootGID); err != nil {
|
||||
return fmt.Errorf("Cannot create daemon root: %s: %v", config.Root, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// registerLinks writes the links to a file.
|
||||
func (daemon *Daemon) registerLinks(container *container.Container, hostConfig *containertypes.HostConfig) error {
|
||||
if hostConfig == nil {
|
||||
|
|
|
@ -4,6 +4,7 @@ import (
|
|||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
|
@ -18,6 +19,7 @@ import (
|
|||
containertypes "github.com/docker/engine-api/types/container"
|
||||
// register the windows graph driver
|
||||
"github.com/docker/docker/daemon/graphdriver/windows"
|
||||
"github.com/docker/docker/pkg/idtools"
|
||||
"github.com/docker/docker/pkg/system"
|
||||
"github.com/docker/libnetwork"
|
||||
blkiodev "github.com/opencontainers/runc/libcontainer/configs"
|
||||
|
@ -135,6 +137,19 @@ func (daemon *Daemon) cleanupMounts() error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func setupRemappedRoot(config *Config) ([]idtools.IDMap, []idtools.IDMap, error) {
|
||||
return nil, nil, nil
|
||||
}
|
||||
|
||||
func setupDaemonRoot(config *Config, rootDir string, rootUID, rootGID int) error {
|
||||
config.Root = rootDir
|
||||
// Create the root directory if it doesn't exists
|
||||
if err := system.MkdirAll(config.Root, 0700); err != nil && !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// conditionalMountOnStart is a platform specific helper function during the
|
||||
// container start to call mount.
|
||||
func (daemon *Daemon) conditionalMountOnStart(container *container.Container) error {
|
||||
|
|
|
@ -62,6 +62,7 @@ weight = -1
|
|||
--tlscert="~/.docker/cert.pem" Path to TLS certificate file
|
||||
--tlskey="~/.docker/key.pem" Path to TLS key file
|
||||
--tlsverify Use TLS and verify the remote
|
||||
--userns-remap="default" Enable user namespace remapping
|
||||
--userland-proxy=true Use userland proxy for loopback traffic
|
||||
|
||||
Options with [] may be specified multiple times.
|
||||
|
@ -632,6 +633,133 @@ For information about how to create an authorization plugin, see [authorization
|
|||
plugin](../../extend/authorization.md) section in the Docker extend section of this documentation.
|
||||
|
||||
|
||||
## Daemon user namespace options
|
||||
|
||||
The Linux kernel [user namespace support](http://man7.org/linux/man-pages/man7/user_namespaces.7.html) provides additional security by enabling
|
||||
a process, and therefore a container, to have a unique range of user and
|
||||
group IDs which are outside the traditional user and group range utilized by
|
||||
the host system. Potentially the most important security improvement is that,
|
||||
by default, container processes running as the `root` user will have expected
|
||||
administrative privilege (with some restrictions) inside the container but will
|
||||
effectively be mapped to an unprivileged `uid` on the host.
|
||||
|
||||
When user namespace support is enabled, Docker creates a single daemon-wide mapping
|
||||
for all containers running on the same engine instance. The mappings will
|
||||
utilize the existing subordinate user and group ID feature available on all modern
|
||||
Linux distributions.
|
||||
The [`/etc/subuid`](http://man7.org/linux/man-pages/man5/subuid.5.html) and
|
||||
[`/etc/subgid`](http://man7.org/linux/man-pages/man5/subgid.5.html) files will be
|
||||
read for the user, and optional group, specified to the `--userns-remap`
|
||||
parameter. If you do not wish to specify your own user and/or group, you can
|
||||
provide `default` as the value to this flag, and a user will be created on your behalf
|
||||
and provided subordinate uid and gid ranges. This default user will be named
|
||||
`dockremap`, and entries will be created for it in `/etc/passwd` and
|
||||
`/etc/group` using your distro's standard user and group creation tools.
|
||||
|
||||
> **Note**: The single mapping per-daemon restriction is in place for now
|
||||
> because Docker shares image layers from its local cache across all
|
||||
> containers running on the engine instance. Since file ownership must be
|
||||
> the same for all containers sharing the same layer content, the decision
|
||||
> was made to map the file ownership on `docker pull` to the daemon's user and
|
||||
> group mappings so that there is no delay for running containers once the
|
||||
> content is downloaded. This design preserves the same performance for `docker
|
||||
> pull`, `docker push`, and container startup as users expect with
|
||||
> user namespaces disabled.
|
||||
|
||||
### Starting the daemon with user namespaces enabled
|
||||
|
||||
To enable user namespace support, start the daemon with the
|
||||
`--userns-remap` flag, which accepts values in the following formats:
|
||||
|
||||
- uid
|
||||
- uid:gid
|
||||
- username
|
||||
- username:groupname
|
||||
|
||||
If numeric IDs are provided, translation back to valid user or group names
|
||||
will occur so that the subordinate uid and gid information can be read, given
|
||||
these resources are name-based, not id-based. If the numeric ID information
|
||||
provided does not exist as entries in `/etc/passwd` or `/etc/group`, daemon
|
||||
startup will fail with an error message.
|
||||
|
||||
*Example: starting with default Docker user management:*
|
||||
|
||||
```
|
||||
$ docker daemon --userns-remap=default
|
||||
```
|
||||
When `default` is provided, Docker will create - or find the existing - user and group
|
||||
named `dockremap`. If the user is created, and the Linux distribution has
|
||||
appropriate support, the `/etc/subuid` and `/etc/subgid` files will be populated
|
||||
with a contiguous 65536 length range of subordinate user and group IDs, starting
|
||||
at an offset based on prior entries in those files. For example, Ubuntu will
|
||||
create the following range, based on an existing user named `user1` already owning
|
||||
the first 65536 range:
|
||||
|
||||
```
|
||||
$ cat /etc/subuid
|
||||
user1:100000:65536
|
||||
dockremap:165536:65536
|
||||
```
|
||||
|
||||
> **Note:** On a fresh Fedora install, we had to `touch` the
|
||||
> `/etc/subuid` and `/etc/subgid` files to have ranges assigned when users
|
||||
> were created. Once these files existed, range assignment on user creation
|
||||
> worked properly.
|
||||
|
||||
If you have a preferred/self-managed user with subordinate ID mappings already
|
||||
configured, you can provide that username or uid to the `--userns-remap` flag.
|
||||
If you have a group that doesn't match the username, you may provide the `gid`
|
||||
or group name as well; otherwise the username will be used as the group name
|
||||
when querying the system for the subordinate group ID range.
|
||||
|
||||
### Detailed information on `subuid`/`subgid` ranges
|
||||
|
||||
Given potential advanced use of the subordinate ID ranges by power users, the
|
||||
following paragraphs define how the Docker daemon currently uses the range entries
|
||||
found within the subordinate range files.
|
||||
|
||||
The simplest case is that only one contiguous range is defined for the
|
||||
provided user or group. In this case, Docker will use that entire contiguous
|
||||
range for the mapping of host uids and gids to the container process. This
|
||||
means that the first ID in the range will be the remapped root user, and the
|
||||
IDs above that initial ID will map host ID 1 through the end of the range.
|
||||
|
||||
From the example `/etc/subid` content shown above, the remapped root
|
||||
user would be uid 165536.
|
||||
|
||||
If the system administrator has set up multiple ranges for a single user or
|
||||
group, the Docker daemon will read all the available ranges and use the
|
||||
following algorithm to create the mapping ranges:
|
||||
|
||||
1. The range segments found for the particular user will be sorted by *start ID* ascending.
|
||||
2. Map segments will be created from each range in increasing value with a length matching the length of each segment. Therefore the range segment with the lowest numeric starting value will be equal to the remapped root, and continue up through host uid/gid equal to the range segment length. As an example, if the lowest segment starts at ID 1000 and has a length of 100, then a map of 1000 -> 0 (the remapped root) up through 1100 -> 100 will be created from this segment. If the next segment starts at ID 10000, then the next map will start with mapping 10000 -> 101 up to the length of this second segment. This will continue until no more segments are found in the subordinate files for this user.
|
||||
3. If more than five range segments exist for a single user, only the first five will be utilized, matching the kernel's limitation of only five entries in `/proc/self/uid_map` and `proc/self/gid_map`.
|
||||
|
||||
### User namespace known restrictions
|
||||
|
||||
The following standard Docker features are currently incompatible when
|
||||
running a Docker daemon with user namespaces enabled:
|
||||
|
||||
- sharing PID or NET namespaces with the host (`--pid=host` or `--net=host`)
|
||||
- sharing a network namespace with an existing container (`--net=container:*other*`)
|
||||
- sharing an IPC namespace with an existing container (`--ipc=container:*other*`)
|
||||
- A `--readonly` container filesystem (this is a Linux kernel restriction against remounting with modified flags of a currently mounted filesystem when inside a user namespace)
|
||||
- external (volume or graph) drivers which are unaware/incapable of using daemon user mappings
|
||||
- Using `--privileged` mode flag on `docker run`
|
||||
|
||||
In general, user namespaces are an advanced feature and will require
|
||||
coordination with other capabilities. For example, if volumes are mounted from
|
||||
the host, file ownership will have to be pre-arranged if the user or
|
||||
administrator wishes the containers to have expected access to the volume
|
||||
contents.
|
||||
|
||||
Finally, while the `root` user inside a user namespaced container process has
|
||||
many of the expected admin privileges that go along with being the superuser, the
|
||||
Linux kernel has restrictions based on internal knowledge that this is a user namespaced
|
||||
process. The most notable restriction that we are aware of at this time is the
|
||||
inability to use `mknod`. Permission will be denied for device creation even as
|
||||
container `root` inside a user namespace.
|
||||
|
||||
## Miscellaneous options
|
||||
|
||||
IP masquerading uses address translation to allow containers without a public
|
||||
|
|
|
@ -72,7 +72,7 @@ to build a Docker binary with the experimental features enabled:
|
|||
## Current experimental features
|
||||
|
||||
* [External graphdriver plugins](plugins_graphdriver.md)
|
||||
* [User namespaces](userns.md)
|
||||
* The user namespaces feature has graduated from experimental.
|
||||
|
||||
## How to comment on an experimental feature
|
||||
|
||||
|
|
|
@ -1,119 +0,0 @@
|
|||
# Experimental: User namespace support
|
||||
|
||||
Linux kernel [user namespace support](http://man7.org/linux/man-pages/man7/user_namespaces.7.html) provides additional security by enabling
|
||||
a process--and therefore a container--to have a unique range of user and
|
||||
group IDs which are outside the traditional user and group range utilized by
|
||||
the host system. Potentially the most important security improvement is that,
|
||||
by default, container processes running as the `root` user will have expected
|
||||
administrative privilege (with some restrictions) inside the container but will
|
||||
effectively be mapped to an unprivileged `uid` on the host.
|
||||
|
||||
In this experimental phase, the Docker daemon creates a single daemon-wide mapping
|
||||
for all containers running on the same engine instance. The mappings will
|
||||
utilize the existing subordinate user and group ID feature available on all modern
|
||||
Linux distributions.
|
||||
The [`/etc/subuid`](http://man7.org/linux/man-pages/man5/subuid.5.html) and
|
||||
[`/etc/subgid`](http://man7.org/linux/man-pages/man5/subgid.5.html) files will be
|
||||
read for the user, and optional group, specified to the `--userns-remap`
|
||||
parameter. If you do not wish to specify your own user and/or group, you can
|
||||
provide `default` as the value to this flag, and a user will be created on your behalf
|
||||
and provided subordinate uid and gid ranges. This default user will be named
|
||||
`dockremap`, and entries will be created for it in `/etc/passwd` and
|
||||
`/etc/group` using your distro's standard user and group creation tools.
|
||||
|
||||
> **Note**: The single mapping per-daemon restriction exists for this experimental
|
||||
> phase because Docker shares image layers from its local cache across all
|
||||
> containers running on the engine instance. Since file ownership must be
|
||||
> the same for all containers sharing the same layer content, the decision
|
||||
> was made to map the file ownership on `docker pull` to the daemon's user and
|
||||
> group mappings so that there is no delay for running containers once the
|
||||
> content is downloaded--exactly the same performance characteristics as with
|
||||
> user namespaces disabled.
|
||||
|
||||
## Starting the daemon with user namespaces enabled
|
||||
To enable this experimental user namespace support for a Docker daemon instance,
|
||||
start the daemon with the aforementioned `--userns-remap` flag, which accepts
|
||||
values in the following formats:
|
||||
|
||||
- uid
|
||||
- uid:gid
|
||||
- username
|
||||
- username:groupname
|
||||
|
||||
If numeric IDs are provided, translation back to valid user or group names
|
||||
will occur so that the subordinate uid and gid information can be read, given
|
||||
these resources are name-based, not id-based. If the numeric ID information
|
||||
provided does not exist as entries in `/etc/passwd` or `/etc/group`, daemon
|
||||
startup will fail with an error message.
|
||||
|
||||
*An example: starting with default Docker user management:*
|
||||
|
||||
```
|
||||
$ docker daemon --userns-remap=default
|
||||
```
|
||||
In this case, Docker will create--or find the existing--user and group
|
||||
named `dockremap`. If the user is created, and the Linux distribution has
|
||||
appropriate support, the `/etc/subuid` and `/etc/subgid` files will be populated
|
||||
with a contiguous 65536 length range of subordinate user and group IDs, starting
|
||||
at an offset based on prior entries in those files. For example, Ubuntu will
|
||||
create the following range, based on an existing user already having the first
|
||||
65536 range:
|
||||
|
||||
```
|
||||
$ cat /etc/subuid
|
||||
user1:100000:65536
|
||||
dockremap:165536:65536
|
||||
```
|
||||
|
||||
> **Note:** On a fresh Fedora install, we found that we had to `touch` the
|
||||
> `/etc/subuid` and `/etc/subgid` files to have ranges assigned when users
|
||||
> were created. Once these files existed, range assignment on user creation
|
||||
> worked properly.
|
||||
|
||||
If you have a preferred/self-managed user with subordinate ID mappings already
|
||||
configured, you can provide that username or uid to the `--userns-remap` flag.
|
||||
If you have a group that doesn't match the username, you may provide the `gid`
|
||||
or group name as well; otherwise the username will be used as the group name
|
||||
when querying the system for the subordinate group ID range.
|
||||
|
||||
## Detailed information on `subuid`/`subgid` ranges
|
||||
|
||||
Given there may be advanced use of the subordinate ID ranges by power users, we will
|
||||
describe how the Docker daemon uses the range entries within these files under the
|
||||
current experimental user namespace support.
|
||||
|
||||
The simplest case exists where only one contiguous range is defined for the
|
||||
provided user or group. In this case, Docker will use that entire contiguous
|
||||
range for the mapping of host uids and gids to the container process. This
|
||||
means that the first ID in the range will be the remapped root user, and the
|
||||
IDs above that initial ID will map host ID 1 through the end of the range.
|
||||
|
||||
From the example `/etc/subid` content shown above, that means the remapped root
|
||||
user would be uid 165536.
|
||||
|
||||
If the system administrator has set up multiple ranges for a single user or
|
||||
group, the Docker daemon will read all the available ranges and use the
|
||||
following algorithm to create the mapping ranges:
|
||||
|
||||
1. The ranges will be sorted by *start ID* ascending
|
||||
2. Maps will be created from each range with where the host ID will increment starting at 0 for the first range, 0+*range1* length for the second, and so on. This means that the lowest range start ID will be the remapped root, and all further ranges will map IDs from 1 through the uid or gid that equals the sum of all range lengths.
|
||||
3. Ranges segments above five will be ignored as the kernel ignores any ID maps after five (in `/proc/self/{u,g}id_map`)
|
||||
|
||||
## User namespace known restrictions
|
||||
|
||||
The following standard Docker features are currently incompatible when
|
||||
running a Docker daemon with experimental user namespaces enabled:
|
||||
|
||||
- sharing namespaces with the host (--pid=host, --net=host, etc.)
|
||||
- sharing namespaces with other containers (--net=container:*other*)
|
||||
- A `--readonly` container filesystem (a Linux kernel restriction on remount with new flags of a currently mounted filesystem when inside a user namespace)
|
||||
- external (volume/graph) drivers which are unaware/incapable of using daemon user mappings
|
||||
- Using `--privileged` mode containers
|
||||
- volume use without pre-arranging proper file ownership in mounted volumes
|
||||
|
||||
Additionally, while the `root` user inside a user namespaced container
|
||||
process has many of the privileges of the administrative root user, the
|
||||
following operations will fail:
|
||||
|
||||
- Use of `mknod` - permission is denied for device creation by the container root
|
||||
- others will be listed here when fully tested
|
|
@ -99,7 +99,7 @@ if [ ! "$GOPATH" ]; then
|
|||
exit 1
|
||||
fi
|
||||
|
||||
if [ "$DOCKER_EXPERIMENTAL" ] || [ "$DOCKER_REMAP_ROOT" ]; then
|
||||
if [ "$DOCKER_EXPERIMENTAL" ]; then
|
||||
echo >&2 '# WARNING! DOCKER_EXPERIMENTAL is set: building experimental features'
|
||||
echo >&2
|
||||
DOCKER_BUILDTAGS+=" experimental pkcs11"
|
||||
|
|
|
@ -652,10 +652,14 @@ func (s *DockerSuite) TestContainerApiCreateWithDomainName(c *check.C) {
|
|||
c.Assert(containerJSON.Config.Domainname, checker.Equals, domainName, check.Commentf("Mismatched Domainname"))
|
||||
}
|
||||
|
||||
func (s *DockerSuite) TestContainerApiCreateNetworkMode(c *check.C) {
|
||||
func (s *DockerSuite) TestContainerApiCreateBridgeNetworkMode(c *check.C) {
|
||||
testRequires(c, DaemonIsLinux)
|
||||
UtilCreateNetworkMode(c, "host")
|
||||
UtilCreateNetworkMode(c, "bridge")
|
||||
}
|
||||
|
||||
func (s *DockerSuite) TestContainerApiCreateOtherNetworkModes(c *check.C) {
|
||||
testRequires(c, DaemonIsLinux, NotUserNamespace)
|
||||
UtilCreateNetworkMode(c, "host")
|
||||
UtilCreateNetworkMode(c, "container:web1")
|
||||
}
|
||||
|
||||
|
|
|
@ -53,6 +53,7 @@ docker-daemon - Enable daemon mode
|
|||
[**--tlskey**[=*~/.docker/key.pem*]]
|
||||
[**--tlsverify**]
|
||||
[**--userland-proxy**[=*true*]]
|
||||
[**--userns-remap**[=*default*]]
|
||||
|
||||
# DESCRIPTION
|
||||
**docker** has two distinct functions. It is used for starting the Docker
|
||||
|
@ -223,6 +224,9 @@ unix://[/path/to/socket] to use.
|
|||
**--userland-proxy**=*true*|*false*
|
||||
Rely on a userland proxy implementation for inter-container and outside-to-container loopback communications. Default is true.
|
||||
|
||||
**--userns-remap**=*default*|*uid:gid*|*user:group*|*user*|*uid*
|
||||
Enable user namespaces for containers on the daemon. Specifying "default" will cause a new user and group to be created to handle UID and GID range remapping for the user namespace mappings used for contained processes. Specifying a user (or uid) and optionally a group (or gid) will cause the daemon to lookup the user and group's subordinate ID ranges for use as the user namespace mappings for contained processes.
|
||||
|
||||
# STORAGE DRIVER OPTIONS
|
||||
|
||||
Docker uses storage backends (known as "graphdrivers" in the Docker
|
||||
|
|
Loading…
Reference in a new issue