diff --git a/pkg/cgroups/apply_raw.go b/pkg/cgroups/apply_raw.go index 47a2a002b8..5fe317937a 100644 --- a/pkg/cgroups/apply_raw.go +++ b/pkg/cgroups/apply_raw.go @@ -49,6 +49,9 @@ func rawApply(c *Cgroup, pid int) (ActiveCgroup, error) { if err := raw.setupCpu(c, pid); err != nil { return nil, err } + if err := raw.setupCpuset(c, pid); err != nil { + return nil, err + } return raw, nil } @@ -170,6 +173,25 @@ func (raw *rawCgroup) setupCpu(c *Cgroup, pid int) (err error) { return nil } +func (raw *rawCgroup) setupCpuset(c *Cgroup, pid int) (err error) { + if c.CpusetCpus != "" { + dir, err := raw.join("cpuset", pid) + if err != nil { + return err + } + defer func() { + if err != nil { + os.RemoveAll(dir) + } + }() + + if err := writeFile(dir, "cpuset.cpus", c.CpusetCpus); err != nil { + return err + } + } + return nil +} + func (raw *rawCgroup) Cleanup() error { get := func(subsystem string) string { path, _ := raw.path(subsystem) @@ -180,6 +202,7 @@ func (raw *rawCgroup) Cleanup() error { get("memory"), get("devices"), get("cpu"), + get("cpuset"), } { if path != "" { os.RemoveAll(path) diff --git a/pkg/cgroups/cgroups.go b/pkg/cgroups/cgroups.go index 8554ba9376..5fe10346df 100644 --- a/pkg/cgroups/cgroups.go +++ b/pkg/cgroups/cgroups.go @@ -15,10 +15,11 @@ type Cgroup struct { Name string `json:"name,omitempty"` Parent string `json:"parent,omitempty"` - DeviceAccess bool `json:"device_access,omitempty"` // name of parent cgroup or slice - Memory int64 `json:"memory,omitempty"` // Memory limit (in bytes) - MemorySwap int64 `json:"memory_swap,omitempty"` // Total memory usage (memory + swap); set `-1' to disable swap - CpuShares int64 `json:"cpu_shares,omitempty"` // CPU shares (relative weight vs. other containers) + DeviceAccess bool `json:"device_access,omitempty"` // name of parent cgroup or slice + Memory int64 `json:"memory,omitempty"` // Memory limit (in bytes) + MemorySwap int64 `json:"memory_swap,omitempty"` // Total memory usage (memory + swap); set `-1' to disable swap + CpuShares int64 `json:"cpu_shares,omitempty"` // CPU shares (relative weight vs. other containers) + CpusetCpus string `json:"cpuset_cpus,omitempty"` // CPU to use UnitProperties [][2]string `json:"unit_properties,omitempty"` // systemd unit properties } diff --git a/pkg/libcontainer/capabilities/capabilities.go b/pkg/libcontainer/capabilities/capabilities.go index fbf73538e0..4b81e708c7 100644 --- a/pkg/libcontainer/capabilities/capabilities.go +++ b/pkg/libcontainer/capabilities/capabilities.go @@ -27,7 +27,9 @@ func DropCapabilities(container *libcontainer.Container) error { func getCapabilitiesMask(container *libcontainer.Container) []capability.Cap { drop := []capability.Cap{} for _, c := range container.CapabilitiesMask { - drop = append(drop, c.Value) + if !c.Enabled { + drop = append(drop, c.Value) + } } return drop } diff --git a/pkg/libcontainer/network/netns.go b/pkg/libcontainer/network/netns.go new file mode 100644 index 0000000000..7e311f22d8 --- /dev/null +++ b/pkg/libcontainer/network/netns.go @@ -0,0 +1,34 @@ +package network + +import ( + "fmt" + "os" + "syscall" + + "github.com/dotcloud/docker/pkg/libcontainer" + "github.com/dotcloud/docker/pkg/system" +) + +// crosbymichael: could make a network strategy that instead of returning veth pair names it returns a pid to an existing network namespace +type NetNS struct { +} + +func (v *NetNS) Create(n *libcontainer.Network, nspid int, context libcontainer.Context) error { + context["nspath"] = n.Context["nspath"] + return nil +} + +func (v *NetNS) Initialize(config *libcontainer.Network, context libcontainer.Context) error { + nspath, exists := context["nspath"] + if !exists { + return fmt.Errorf("nspath does not exist in network context") + } + f, err := os.OpenFile(nspath, os.O_RDONLY, 0) + if err != nil { + return fmt.Errorf("failed get network namespace fd: %v", err) + } + if err := system.Setns(f.Fd(), syscall.CLONE_NEWNET); err != nil { + return fmt.Errorf("failed to setns current network namespace: %v", err) + } + return nil +} diff --git a/pkg/libcontainer/network/strategy.go b/pkg/libcontainer/network/strategy.go index 693790d280..e41ecc3ea6 100644 --- a/pkg/libcontainer/network/strategy.go +++ b/pkg/libcontainer/network/strategy.go @@ -2,6 +2,7 @@ package network import ( "errors" + "github.com/dotcloud/docker/pkg/libcontainer" ) @@ -12,6 +13,7 @@ var ( var strategies = map[string]NetworkStrategy{ "veth": &Veth{}, "loopback": &Loopback{}, + "netns": &NetNS{}, } // NetworkStrategy represents a specific network configuration for diff --git a/pkg/libcontainer/nsinit/command.go b/pkg/libcontainer/nsinit/command.go index 5546065b6d..153a48ab59 100644 --- a/pkg/libcontainer/nsinit/command.go +++ b/pkg/libcontainer/nsinit/command.go @@ -39,7 +39,9 @@ func (c *DefaultCommandFactory) Create(container *libcontainer.Container, consol // flags on clone, unshare, and setns func GetNamespaceFlags(namespaces libcontainer.Namespaces) (flag int) { for _, ns := range namespaces { - flag |= ns.Value + if ns.Enabled { + flag |= ns.Value + } } return flag } diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index a44faafe0e..73842f729f 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -3,13 +3,14 @@ package nsinit import ( + "os" + "os/exec" + "syscall" + "github.com/dotcloud/docker/pkg/cgroups" "github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/libcontainer/network" "github.com/dotcloud/docker/pkg/system" - "os" - "os/exec" - "syscall" ) // Exec performes setup outside of a namespace so that a container can be diff --git a/pkg/libcontainer/nsinit/init.go b/pkg/libcontainer/nsinit/init.go index 5aa5f9f5b5..85182326ee 100644 --- a/pkg/libcontainer/nsinit/init.go +++ b/pkg/libcontainer/nsinit/init.go @@ -4,6 +4,10 @@ package nsinit import ( "fmt" + "os" + "runtime" + "syscall" + "github.com/dotcloud/docker/pkg/label" "github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/libcontainer/apparmor" @@ -12,9 +16,6 @@ import ( "github.com/dotcloud/docker/pkg/libcontainer/utils" "github.com/dotcloud/docker/pkg/system" "github.com/dotcloud/docker/pkg/user" - "os" - "runtime" - "syscall" ) // Init is the init process that first runs inside a new namespace to setup mounts, users, networking, @@ -58,13 +59,13 @@ func (ns *linuxNs) Init(container *libcontainer.Container, uncleanRootfs, consol if err := system.ParentDeathSignal(uintptr(syscall.SIGTERM)); err != nil { return fmt.Errorf("parent death signal %s", err) } + if err := setupNetwork(container, context); err != nil { + return fmt.Errorf("setup networking %s", err) + } ns.logger.Println("setup mount namespace") if err := setupNewMountNamespace(rootfs, container.Mounts, console, container.ReadonlyFs, container.NoPivotRoot, container.Context["mount_label"]); err != nil { return fmt.Errorf("setup mount namespace %s", err) } - if err := setupNetwork(container, context); err != nil { - return fmt.Errorf("setup networking %s", err) - } if err := system.Sethostname(container.Hostname); err != nil { return fmt.Errorf("sethostname %s", err) } diff --git a/pkg/libcontainer/nsinit/mount.go b/pkg/libcontainer/nsinit/mount.go index 796143c68e..a10f2b540e 100644 --- a/pkg/libcontainer/nsinit/mount.go +++ b/pkg/libcontainer/nsinit/mount.go @@ -32,11 +32,6 @@ func setupNewMountNamespace(rootfs string, bindMounts []libcontainer.Mount, cons if err := system.Mount(rootfs, rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, ""); err != nil { return fmt.Errorf("mouting %s as bind %s", rootfs, err) } - if readonly { - if err := system.Mount(rootfs, rootfs, "bind", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC, ""); err != nil { - return fmt.Errorf("mounting %s as readonly %s", rootfs, err) - } - } if err := mountSystem(rootfs, mountLabel); err != nil { return fmt.Errorf("mount system %s", err) } @@ -82,6 +77,12 @@ func setupNewMountNamespace(rootfs string, bindMounts []libcontainer.Mount, cons } } + if readonly { + if err := system.Mount("/", "/", "bind", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC, ""); err != nil { + return fmt.Errorf("mounting %s as readonly %s", rootfs, err) + } + } + system.Umask(0022) return nil diff --git a/pkg/libcontainer/nsinit/nsinit/main.go b/pkg/libcontainer/nsinit/nsinit/main.go index df32d0b49e..37aa784981 100644 --- a/pkg/libcontainer/nsinit/nsinit/main.go +++ b/pkg/libcontainer/nsinit/nsinit/main.go @@ -3,14 +3,15 @@ package main import ( "encoding/json" "flag" - "github.com/dotcloud/docker/pkg/libcontainer" - "github.com/dotcloud/docker/pkg/libcontainer/nsinit" "io" "io/ioutil" "log" "os" "path/filepath" "strconv" + + "github.com/dotcloud/docker/pkg/libcontainer" + "github.com/dotcloud/docker/pkg/libcontainer/nsinit" ) var ( diff --git a/pkg/libcontainer/types.go b/pkg/libcontainer/types.go index 94fe876554..ffeb55a022 100644 --- a/pkg/libcontainer/types.go +++ b/pkg/libcontainer/types.go @@ -1,7 +1,6 @@ package libcontainer import ( - "encoding/json" "errors" "github.com/syndtr/gocapability/capability" ) @@ -19,29 +18,30 @@ var ( namespaceList = Namespaces{} capabilityList = Capabilities{ - {Key: "SETPCAP", Value: capability.CAP_SETPCAP}, - {Key: "SYS_MODULE", Value: capability.CAP_SYS_MODULE}, - {Key: "SYS_RAWIO", Value: capability.CAP_SYS_RAWIO}, - {Key: "SYS_PACCT", Value: capability.CAP_SYS_PACCT}, - {Key: "SYS_ADMIN", Value: capability.CAP_SYS_ADMIN}, - {Key: "SYS_NICE", Value: capability.CAP_SYS_NICE}, - {Key: "SYS_RESOURCE", Value: capability.CAP_SYS_RESOURCE}, - {Key: "SYS_TIME", Value: capability.CAP_SYS_TIME}, - {Key: "SYS_TTY_CONFIG", Value: capability.CAP_SYS_TTY_CONFIG}, - {Key: "MKNOD", Value: capability.CAP_MKNOD}, - {Key: "AUDIT_WRITE", Value: capability.CAP_AUDIT_WRITE}, - {Key: "AUDIT_CONTROL", Value: capability.CAP_AUDIT_CONTROL}, - {Key: "MAC_OVERRIDE", Value: capability.CAP_MAC_OVERRIDE}, - {Key: "MAC_ADMIN", Value: capability.CAP_MAC_ADMIN}, - {Key: "NET_ADMIN", Value: capability.CAP_NET_ADMIN}, + {Key: "SETPCAP", Value: capability.CAP_SETPCAP, Enabled: false}, + {Key: "SYS_MODULE", Value: capability.CAP_SYS_MODULE, Enabled: false}, + {Key: "SYS_RAWIO", Value: capability.CAP_SYS_RAWIO, Enabled: false}, + {Key: "SYS_PACCT", Value: capability.CAP_SYS_PACCT, Enabled: false}, + {Key: "SYS_ADMIN", Value: capability.CAP_SYS_ADMIN, Enabled: false}, + {Key: "SYS_NICE", Value: capability.CAP_SYS_NICE, Enabled: false}, + {Key: "SYS_RESOURCE", Value: capability.CAP_SYS_RESOURCE, Enabled: false}, + {Key: "SYS_TIME", Value: capability.CAP_SYS_TIME, Enabled: false}, + {Key: "SYS_TTY_CONFIG", Value: capability.CAP_SYS_TTY_CONFIG, Enabled: false}, + {Key: "MKNOD", Value: capability.CAP_MKNOD, Enabled: false}, + {Key: "AUDIT_WRITE", Value: capability.CAP_AUDIT_WRITE, Enabled: false}, + {Key: "AUDIT_CONTROL", Value: capability.CAP_AUDIT_CONTROL, Enabled: false}, + {Key: "MAC_OVERRIDE", Value: capability.CAP_MAC_OVERRIDE, Enabled: false}, + {Key: "MAC_ADMIN", Value: capability.CAP_MAC_ADMIN, Enabled: false}, + {Key: "NET_ADMIN", Value: capability.CAP_NET_ADMIN, Enabled: false}, } ) type ( Namespace struct { - Key string - Value int - File string + Key string `json:"key,omitempty"` + Enabled bool `json:"enabled,omitempty"` + Value int `json:"value,omitempty"` + File string `json:"file,omitempty"` } Namespaces []*Namespace ) @@ -50,27 +50,11 @@ func (ns *Namespace) String() string { return ns.Key } -func (ns *Namespace) MarshalJSON() ([]byte, error) { - return json.Marshal(ns.Key) -} - -func (ns *Namespace) UnmarshalJSON(src []byte) error { - var nsName string - if err := json.Unmarshal(src, &nsName); err != nil { - return err - } - ret := GetNamespace(nsName) - if ret == nil { - return ErrUnkownNamespace - } - *ns = *ret - return nil -} - func GetNamespace(key string) *Namespace { for _, ns := range namespaceList { if ns.Key == key { - return ns + cpy := *ns + return &cpy } } return nil @@ -79,18 +63,23 @@ func GetNamespace(key string) *Namespace { // Contains returns true if the specified Namespace is // in the slice func (n Namespaces) Contains(ns string) bool { + return n.Get(ns) != nil +} + +func (n Namespaces) Get(ns string) *Namespace { for _, nsp := range n { if nsp.Key == ns { - return true + return nsp } } - return false + return nil } type ( Capability struct { - Key string - Value capability.Cap + Key string `json:"key,omitempty"` + Enabled bool `json:"enabled"` + Value capability.Cap `json:"value,omitempty"` } Capabilities []*Capability ) @@ -99,27 +88,11 @@ func (c *Capability) String() string { return c.Key } -func (c *Capability) MarshalJSON() ([]byte, error) { - return json.Marshal(c.Key) -} - -func (c *Capability) UnmarshalJSON(src []byte) error { - var capName string - if err := json.Unmarshal(src, &capName); err != nil { - return err - } - ret := GetCapability(capName) - if ret == nil { - return ErrUnkownCapability - } - *c = *ret - return nil -} - func GetCapability(key string) *Capability { for _, capp := range capabilityList { if capp.Key == key { - return capp + cpy := *capp + return &cpy } } return nil @@ -128,10 +101,14 @@ func GetCapability(key string) *Capability { // Contains returns true if the specified Capability is // in the slice func (c Capabilities) Contains(capp string) bool { + return c.Get(capp) != nil +} + +func (c Capabilities) Get(capp string) *Capability { for _, cap := range c { if cap.Key == capp { - return true + return cap } } - return false + return nil } diff --git a/pkg/libcontainer/types_linux.go b/pkg/libcontainer/types_linux.go index c14531df20..1f937e0c97 100644 --- a/pkg/libcontainer/types_linux.go +++ b/pkg/libcontainer/types_linux.go @@ -6,11 +6,11 @@ import ( func init() { namespaceList = Namespaces{ - {Key: "NEWNS", Value: syscall.CLONE_NEWNS, File: "mnt"}, - {Key: "NEWUTS", Value: syscall.CLONE_NEWUTS, File: "uts"}, - {Key: "NEWIPC", Value: syscall.CLONE_NEWIPC, File: "ipc"}, - {Key: "NEWUSER", Value: syscall.CLONE_NEWUSER, File: "user"}, - {Key: "NEWPID", Value: syscall.CLONE_NEWPID, File: "pid"}, - {Key: "NEWNET", Value: syscall.CLONE_NEWNET, File: "net"}, + {Key: "NEWNS", Value: syscall.CLONE_NEWNS, File: "mnt", Enabled: true}, + {Key: "NEWUTS", Value: syscall.CLONE_NEWUTS, File: "uts", Enabled: true}, + {Key: "NEWIPC", Value: syscall.CLONE_NEWIPC, File: "ipc", Enabled: true}, + {Key: "NEWUSER", Value: syscall.CLONE_NEWUSER, File: "user", Enabled: true}, + {Key: "NEWPID", Value: syscall.CLONE_NEWPID, File: "pid", Enabled: true}, + {Key: "NEWNET", Value: syscall.CLONE_NEWNET, File: "net", Enabled: true}, } } diff --git a/runconfig/parse.go b/runconfig/parse.go index db4ac351a9..c93ec26ed1 100644 --- a/runconfig/parse.go +++ b/runconfig/parse.go @@ -4,10 +4,8 @@ import ( "fmt" "github.com/dotcloud/docker/nat" "github.com/dotcloud/docker/opts" - "github.com/dotcloud/docker/pkg/label" flag "github.com/dotcloud/docker/pkg/mflag" "github.com/dotcloud/docker/pkg/sysinfo" - "github.com/dotcloud/docker/runtime/execdriver" "github.com/dotcloud/docker/utils" "io/ioutil" "path" @@ -34,10 +32,6 @@ func ParseSubcommand(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) } func parseRun(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Config, *HostConfig, *flag.FlagSet, error) { - var ( - processLabel string - mountLabel string - ) var ( // FIXME: use utils.ListOpts for attach and volumes? flAttach = opts.NewListOpts(opts.ValidateAttach) @@ -68,7 +62,6 @@ func parseRun(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Conf flUser = cmd.String([]string{"u", "-user"}, "", "Username or UID") flWorkingDir = cmd.String([]string{"w", "-workdir"}, "", "Working directory inside the container") flCpuShares = cmd.Int64([]string{"c", "-cpu-shares"}, 0, "CPU shares (relative weight)") - flLabelOptions = cmd.String([]string{"Z", "-label"}, "", "Options to pass to underlying labeling system") // For documentation purpose _ = cmd.Bool([]string{"#sig-proxy", "-sig-proxy"}, true, "Proxify all received signal to the process (even in non-tty mode)") @@ -161,15 +154,6 @@ func parseRun(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Conf entrypoint = []string{*flEntrypoint} } - if !*flPrivileged { - pLabel, mLabel, e := label.GenLabels(*flLabelOptions) - if e != nil { - return nil, nil, cmd, fmt.Errorf("Invalid security labels : %s", e) - } - processLabel = pLabel - mountLabel = mLabel - } - lxcConf, err := parseKeyValueOpts(flLxcOpts) if err != nil { return nil, nil, cmd, err @@ -238,10 +222,6 @@ func parseRun(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Conf VolumesFrom: strings.Join(flVolumesFrom.GetAll(), ","), Entrypoint: entrypoint, WorkingDir: *flWorkingDir, - Context: execdriver.Context{ - "mount_label": mountLabel, - "process_label": processLabel, - }, } driverOptions, err := parseDriverOpts(flDriverOpts) @@ -279,6 +259,8 @@ func parseDriverOpts(opts opts.ListOpts) (map[string][]string, error) { parts := strings.SplitN(o, ".", 2) if len(parts) < 2 { return nil, fmt.Errorf("invalid opt format %s", o) + } else if strings.TrimSpace(parts[0]) == "" { + return nil, fmt.Errorf("key cannot be empty %s", o) } values, exists := out[parts[0]] if !exists { diff --git a/runtime/container.go b/runtime/container.go index ca7b23e62b..ed68fd0844 100644 --- a/runtime/container.go +++ b/runtime/container.go @@ -404,7 +404,6 @@ func populateCommand(c *Container) { User: c.Config.User, Config: driverConfig, Resources: resources, - Context: c.Config.Context, } c.command.SysProcAttr = &syscall.SysProcAttr{Setsid: true} } diff --git a/runtime/execdriver/driver.go b/runtime/execdriver/driver.go index 096ea0790d..d067973419 100644 --- a/runtime/execdriver/driver.go +++ b/runtime/execdriver/driver.go @@ -125,7 +125,6 @@ type Command struct { Arguments []string `json:"arguments"` WorkingDir string `json:"working_dir"` ConfigPath string `json:"config_path"` // this should be able to be removed when the lxc template is moved into the driver - Context Context `json:"context"` // generic context for specific options (apparmor, selinux) Tty bool `json:"tty"` Network *Network `json:"network"` Config map[string][]string `json:"config"` // generic values that specific drivers can consume diff --git a/runtime/execdriver/lxc/driver.go b/runtime/execdriver/lxc/driver.go index 086e35f643..896f215366 100644 --- a/runtime/execdriver/lxc/driver.go +++ b/runtime/execdriver/lxc/driver.go @@ -3,6 +3,7 @@ package lxc import ( "fmt" "github.com/dotcloud/docker/pkg/cgroups" + "github.com/dotcloud/docker/pkg/label" "github.com/dotcloud/docker/runtime/execdriver" "github.com/dotcloud/docker/utils" "io/ioutil" @@ -378,19 +379,34 @@ func rootIsShared() bool { } func (d *driver) generateLXCConfig(c *execdriver.Command) (string, error) { - root := path.Join(d.root, "containers", c.ID, "config.lxc") + var ( + process, mount string + root = path.Join(d.root, "containers", c.ID, "config.lxc") + labels = c.Config["label"] + ) fo, err := os.Create(root) if err != nil { return "", err } defer fo.Close() + if len(labels) > 0 { + process, mount, err = label.GenLabels(labels[0]) + if err != nil { + return "", err + } + } + if err := LxcTemplateCompiled.Execute(fo, struct { *execdriver.Command - AppArmor bool + AppArmor bool + ProcessLabel string + MountLabel string }{ - Command: c, - AppArmor: d.apparmor, + Command: c, + AppArmor: d.apparmor, + ProcessLabel: process, + MountLabel: mount, }); err != nil { return "", err } diff --git a/runtime/execdriver/lxc/lxc_template.go b/runtime/execdriver/lxc/lxc_template.go index f325ffcaef..e5248375a8 100644 --- a/runtime/execdriver/lxc/lxc_template.go +++ b/runtime/execdriver/lxc/lxc_template.go @@ -30,9 +30,9 @@ lxc.pts = 1024 # disable the main console lxc.console = none -{{if getProcessLabel .Context}} -lxc.se_context = {{ getProcessLabel .Context}} -{{$MOUNTLABEL := getMountLabel .Context}} +{{if .ProcessLabel}} +lxc.se_context = {{ .ProcessLabel}} +{{$MOUNTLABEL := .MountLabel}} {{end}} # no controlling tty at all @@ -125,7 +125,7 @@ lxc.cgroup.cpu.shares = {{.Resources.CpuShares}} {{if .Config.lxc}} {{range $value := .Config.lxc}} -{{$value}} +lxc.{{$value}} {{end}} {{end}} ` @@ -147,12 +147,23 @@ func getMemorySwap(v *execdriver.Resources) int64 { return v.Memory * 2 } -func getProcessLabel(c execdriver.Context) string { - return c["process_label"] +func getProcessLabel(c map[string][]string) string { + return getLabel(c, "process") } -func getMountLabel(c execdriver.Context) string { - return c["mount_label"] +func getMountLabel(c map[string][]string) string { + return getLabel(c, "mount") +} + +func getLabel(c map[string][]string, name string) string { + label := c["label"] + for _, l := range label { + parts := strings.SplitN(l, "=", 2) + if strings.TrimSpace(parts[0]) == name { + return strings.TrimSpace(parts[1]) + } + } + return "" } func init() { diff --git a/runtime/execdriver/native/configuration/parse.go b/runtime/execdriver/native/configuration/parse.go new file mode 100644 index 0000000000..6d6c643919 --- /dev/null +++ b/runtime/execdriver/native/configuration/parse.go @@ -0,0 +1,186 @@ +package configuration + +import ( + "fmt" + "github.com/dotcloud/docker/pkg/libcontainer" + "github.com/dotcloud/docker/utils" + "os/exec" + "path/filepath" + "strconv" + "strings" +) + +type Action func(*libcontainer.Container, interface{}, string) error + +var actions = map[string]Action{ + "cap.add": addCap, // add a cap + "cap.drop": dropCap, // drop a cap + + "ns.add": addNamespace, // add a namespace + "ns.drop": dropNamespace, // drop a namespace when cloning + + "net.join": joinNetNamespace, // join another containers net namespace + + "cgroups.cpu_shares": cpuShares, // set the cpu shares + "cgroups.memory": memory, // set the memory limit + "cgroups.memory_swap": memorySwap, // set the memory swap limit + "cgroups.cpuset.cpus": cpusetCpus, // set the cpus used + + "apparmor_profile": apparmorProfile, // set the apparmor profile to apply + + "fs.readonly": readonlyFs, // make the rootfs of the container read only +} + +func cpusetCpus(container *libcontainer.Container, context interface{}, value string) error { + if container.Cgroups == nil { + return fmt.Errorf("cannot set cgroups when they are disabled") + } + container.Cgroups.CpusetCpus = value + + return nil +} + +func apparmorProfile(container *libcontainer.Container, context interface{}, value string) error { + container.Context["apparmor_profile"] = value + return nil +} + +func cpuShares(container *libcontainer.Container, context interface{}, value string) error { + if container.Cgroups == nil { + return fmt.Errorf("cannot set cgroups when they are disabled") + } + v, err := strconv.ParseInt(value, 10, 0) + if err != nil { + return err + } + container.Cgroups.CpuShares = v + return nil +} + +func memory(container *libcontainer.Container, context interface{}, value string) error { + if container.Cgroups == nil { + return fmt.Errorf("cannot set cgroups when they are disabled") + } + + v, err := utils.RAMInBytes(value) + if err != nil { + return err + } + container.Cgroups.Memory = v + return nil +} + +func memorySwap(container *libcontainer.Container, context interface{}, value string) error { + if container.Cgroups == nil { + return fmt.Errorf("cannot set cgroups when they are disabled") + } + v, err := strconv.ParseInt(value, 0, 64) + if err != nil { + return err + } + container.Cgroups.MemorySwap = v + return nil +} + +func addCap(container *libcontainer.Container, context interface{}, value string) error { + c := container.CapabilitiesMask.Get(value) + if c == nil { + return fmt.Errorf("%s is not a valid capability", value) + } + c.Enabled = true + return nil +} + +func dropCap(container *libcontainer.Container, context interface{}, value string) error { + c := container.CapabilitiesMask.Get(value) + if c == nil { + return fmt.Errorf("%s is not a valid capability", value) + } + c.Enabled = false + return nil +} + +func addNamespace(container *libcontainer.Container, context interface{}, value string) error { + ns := container.Namespaces.Get(value) + if ns == nil { + return fmt.Errorf("%s is not a valid namespace", value[1:]) + } + ns.Enabled = true + return nil +} + +func dropNamespace(container *libcontainer.Container, context interface{}, value string) error { + ns := container.Namespaces.Get(value) + if ns == nil { + return fmt.Errorf("%s is not a valid namespace", value[1:]) + } + ns.Enabled = false + return nil +} + +func readonlyFs(container *libcontainer.Container, context interface{}, value string) error { + switch value { + case "1", "true": + container.ReadonlyFs = true + default: + container.ReadonlyFs = false + } + return nil +} + +func joinNetNamespace(container *libcontainer.Container, context interface{}, value string) error { + var ( + running = context.(map[string]*exec.Cmd) + cmd = running[value] + ) + + if cmd == nil || cmd.Process == nil { + return fmt.Errorf("%s is not a valid running container to join", value) + } + nspath := filepath.Join("/proc", fmt.Sprint(cmd.Process.Pid), "ns", "net") + container.Networks = append(container.Networks, &libcontainer.Network{ + Type: "netns", + Context: libcontainer.Context{ + "nspath": nspath, + }, + }) + return nil +} + +func vethMacAddress(container *libcontainer.Container, context interface{}, value string) error { + var veth *libcontainer.Network + for _, network := range container.Networks { + if network.Type == "veth" { + veth = network + break + } + } + if veth == nil { + return fmt.Errorf("not veth configured for container") + } + veth.Context["mac"] = value + return nil +} + +// configureCustomOptions takes string commands from the user and allows modification of the +// container's default configuration. +// +// TODO: this can be moved to a general utils or parser in pkg +func ParseConfiguration(container *libcontainer.Container, running map[string]*exec.Cmd, opts []string) error { + for _, opt := range opts { + kv := strings.SplitN(opt, "=", 2) + if len(kv) < 2 { + return fmt.Errorf("invalid format for %s", opt) + } + + action, exists := actions[kv[0]] + if !exists { + return fmt.Errorf("%s is not a valid option for the native driver", kv[0]) + } + + if err := action(container, running, kv[1]); err != nil { + return err + } + } + return nil +} diff --git a/runtime/execdriver/native/configuration/parse_test.go b/runtime/execdriver/native/configuration/parse_test.go new file mode 100644 index 0000000000..8001358766 --- /dev/null +++ b/runtime/execdriver/native/configuration/parse_test.go @@ -0,0 +1,166 @@ +package configuration + +import ( + "github.com/dotcloud/docker/runtime/execdriver/native/template" + "testing" +) + +func TestSetReadonlyRootFs(t *testing.T) { + var ( + container = template.New() + opts = []string{ + "fs.readonly=true", + } + ) + + if container.ReadonlyFs { + t.Fatal("container should not have a readonly rootfs by default") + } + if err := ParseConfiguration(container, nil, opts); err != nil { + t.Fatal(err) + } + + if !container.ReadonlyFs { + t.Fatal("container should have a readonly rootfs") + } +} + +func TestConfigurationsDoNotConflict(t *testing.T) { + var ( + container1 = template.New() + container2 = template.New() + opts = []string{ + "cap.add=NET_ADMIN", + } + ) + + if err := ParseConfiguration(container1, nil, opts); err != nil { + t.Fatal(err) + } + + if !container1.CapabilitiesMask.Get("NET_ADMIN").Enabled { + t.Fatal("container one should have NET_ADMIN enabled") + } + if container2.CapabilitiesMask.Get("NET_ADMIN").Enabled { + t.Fatal("container two should not have NET_ADMIN enabled") + } +} + +func TestCpusetCpus(t *testing.T) { + var ( + container = template.New() + opts = []string{ + "cgroups.cpuset.cpus=1,2", + } + ) + if err := ParseConfiguration(container, nil, opts); err != nil { + t.Fatal(err) + } + + if expected := "1,2"; container.Cgroups.CpusetCpus != expected { + t.Fatalf("expected %s got %s for cpuset.cpus", expected, container.Cgroups.CpusetCpus) + } +} + +func TestAppArmorProfile(t *testing.T) { + var ( + container = template.New() + opts = []string{ + "apparmor_profile=koye-the-protector", + } + ) + if err := ParseConfiguration(container, nil, opts); err != nil { + t.Fatal(err) + } + if expected := "koye-the-protector"; container.Context["apparmor_profile"] != expected { + t.Fatalf("expected profile %s got %s", expected, container.Context["apparmor_profile"]) + } +} + +func TestCpuShares(t *testing.T) { + var ( + container = template.New() + opts = []string{ + "cgroups.cpu_shares=1048", + } + ) + if err := ParseConfiguration(container, nil, opts); err != nil { + t.Fatal(err) + } + + if expected := int64(1048); container.Cgroups.CpuShares != expected { + t.Fatalf("expected cpu shares %d got %d", expected, container.Cgroups.CpuShares) + } +} + +func TestCgroupMemory(t *testing.T) { + var ( + container = template.New() + opts = []string{ + "cgroups.memory=500m", + } + ) + if err := ParseConfiguration(container, nil, opts); err != nil { + t.Fatal(err) + } + + if expected := int64(500 * 1024 * 1024); container.Cgroups.Memory != expected { + t.Fatalf("expected memory %d got %d", expected, container.Cgroups.Memory) + } +} + +func TestAddCap(t *testing.T) { + var ( + container = template.New() + opts = []string{ + "cap.add=MKNOD", + "cap.add=SYS_ADMIN", + } + ) + if err := ParseConfiguration(container, nil, opts); err != nil { + t.Fatal(err) + } + + if !container.CapabilitiesMask.Get("MKNOD").Enabled { + t.Fatal("container should have MKNOD enabled") + } + if !container.CapabilitiesMask.Get("SYS_ADMIN").Enabled { + t.Fatal("container should have SYS_ADMIN enabled") + } +} + +func TestDropCap(t *testing.T) { + var ( + container = template.New() + opts = []string{ + "cap.drop=MKNOD", + } + ) + // enabled all caps like in privileged mode + for _, c := range container.CapabilitiesMask { + c.Enabled = true + } + if err := ParseConfiguration(container, nil, opts); err != nil { + t.Fatal(err) + } + + if container.CapabilitiesMask.Get("MKNOD").Enabled { + t.Fatal("container should not have MKNOD enabled") + } +} + +func TestDropNamespace(t *testing.T) { + var ( + container = template.New() + opts = []string{ + "ns.drop=NEWNET", + } + ) + if err := ParseConfiguration(container, nil, opts); err != nil { + t.Fatal(err) + } + + if container.Namespaces.Get("NEWNET").Enabled { + t.Fatal("container should not have NEWNET enabled") + } +} diff --git a/runtime/execdriver/native/create.go b/runtime/execdriver/native/create.go new file mode 100644 index 0000000000..71fab3e064 --- /dev/null +++ b/runtime/execdriver/native/create.go @@ -0,0 +1,114 @@ +package native + +import ( + "fmt" + "os" + + "github.com/dotcloud/docker/pkg/label" + "github.com/dotcloud/docker/pkg/libcontainer" + "github.com/dotcloud/docker/runtime/execdriver" + "github.com/dotcloud/docker/runtime/execdriver/native/configuration" + "github.com/dotcloud/docker/runtime/execdriver/native/template" +) + +// createContainer populates and configures the container type with the +// data provided by the execdriver.Command +func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Container, error) { + container := template.New() + + container.Hostname = getEnv("HOSTNAME", c.Env) + container.Tty = c.Tty + container.User = c.User + container.WorkingDir = c.WorkingDir + container.Env = c.Env + container.Cgroups.Name = c.ID + // check to see if we are running in ramdisk to disable pivot root + container.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != "" + + if err := d.createNetwork(container, c); err != nil { + return nil, err + } + if c.Privileged { + if err := d.setPrivileged(container); err != nil { + return nil, err + } + } + if err := d.setupCgroups(container, c); err != nil { + return nil, err + } + if err := d.setupMounts(container, c); err != nil { + return nil, err + } + if err := d.setupLabels(container, c); err != nil { + return nil, err + } + if err := configuration.ParseConfiguration(container, d.activeContainers, c.Config["native"]); err != nil { + return nil, err + } + return container, nil +} + +func (d *driver) createNetwork(container *libcontainer.Container, c *execdriver.Command) error { + container.Networks = []*libcontainer.Network{ + { + Mtu: c.Network.Mtu, + Address: fmt.Sprintf("%s/%d", "127.0.0.1", 0), + Gateway: "localhost", + Type: "loopback", + Context: libcontainer.Context{}, + }, + } + + if c.Network.Interface != nil { + vethNetwork := libcontainer.Network{ + Mtu: c.Network.Mtu, + Address: fmt.Sprintf("%s/%d", c.Network.Interface.IPAddress, c.Network.Interface.IPPrefixLen), + Gateway: c.Network.Interface.Gateway, + Type: "veth", + Context: libcontainer.Context{ + "prefix": "veth", + "bridge": c.Network.Interface.Bridge, + }, + } + container.Networks = append(container.Networks, &vethNetwork) + } + return nil +} + +func (d *driver) setPrivileged(container *libcontainer.Container) error { + for _, c := range container.CapabilitiesMask { + c.Enabled = true + } + container.Cgroups.DeviceAccess = true + container.Context["apparmor_profile"] = "unconfined" + return nil +} + +func (d *driver) setupCgroups(container *libcontainer.Container, c *execdriver.Command) error { + if c.Resources != nil { + container.Cgroups.CpuShares = c.Resources.CpuShares + container.Cgroups.Memory = c.Resources.Memory + container.Cgroups.MemorySwap = c.Resources.MemorySwap + } + return nil +} + +func (d *driver) setupMounts(container *libcontainer.Container, c *execdriver.Command) error { + for _, m := range c.Mounts { + container.Mounts = append(container.Mounts, libcontainer.Mount{m.Source, m.Destination, m.Writable, m.Private}) + } + return nil +} + +func (d *driver) setupLabels(container *libcontainer.Container, c *execdriver.Command) error { + labels := c.Config["label"] + if len(labels) > 0 { + process, mount, err := label.GenLabels(labels[0]) + if err != nil { + return err + } + container.Context["mount_label"] = mount + container.Context["process_label"] = process + } + return nil +} diff --git a/runtime/execdriver/native/default_template.go b/runtime/execdriver/native/default_template.go deleted file mode 100644 index 32886f2396..0000000000 --- a/runtime/execdriver/native/default_template.go +++ /dev/null @@ -1,122 +0,0 @@ -package native - -import ( - "fmt" - "github.com/dotcloud/docker/pkg/cgroups" - "github.com/dotcloud/docker/pkg/libcontainer" - "github.com/dotcloud/docker/runtime/execdriver" - "github.com/dotcloud/docker/utils" - "os" -) - -// createContainer populates and configures the container type with the -// data provided by the execdriver.Command -func createContainer(c *execdriver.Command) *libcontainer.Container { - container := getDefaultTemplate() - - container.Hostname = getEnv("HOSTNAME", c.Env) - container.Tty = c.Tty - container.User = c.User - container.WorkingDir = c.WorkingDir - container.Env = c.Env - container.Context["mount_label"] = c.Context["mount_label"] - container.Context["process_label"] = c.Context["process_label"] - - loopbackNetwork := libcontainer.Network{ - Mtu: c.Network.Mtu, - Address: fmt.Sprintf("%s/%d", "127.0.0.1", 0), - Gateway: "localhost", - Type: "loopback", - Context: libcontainer.Context{}, - } - - container.Networks = []*libcontainer.Network{ - &loopbackNetwork, - } - - if c.Network.Interface != nil { - vethNetwork := libcontainer.Network{ - Mtu: c.Network.Mtu, - Address: fmt.Sprintf("%s/%d", c.Network.Interface.IPAddress, c.Network.Interface.IPPrefixLen), - Gateway: c.Network.Interface.Gateway, - Type: "veth", - Context: libcontainer.Context{ - "prefix": "veth", - "bridge": c.Network.Interface.Bridge, - }, - } - container.Networks = append(container.Networks, &vethNetwork) - } - - container.Cgroups.Name = c.ID - if c.Privileged { - container.CapabilitiesMask = nil - container.Cgroups.DeviceAccess = true - container.Context["apparmor_profile"] = "unconfined" - } - if c.Resources != nil { - container.Cgroups.CpuShares = c.Resources.CpuShares - container.Cgroups.Memory = c.Resources.Memory - container.Cgroups.MemorySwap = c.Resources.MemorySwap - } - - if opts, ok := c.Config["unit"]; ok { - props := [][2]string{} - for _, opt := range opts { - key, value, err := utils.ParseKeyValueOpt(opt) - if err == nil { - props = append(props, [2]string{key, value}) - } else { - props = append(props, [2]string{opt, ""}) - } - } - container.Cgroups.UnitProperties = props - } - - // check to see if we are running in ramdisk to disable pivot root - container.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != "" - - for _, m := range c.Mounts { - container.Mounts = append(container.Mounts, libcontainer.Mount{m.Source, m.Destination, m.Writable, m.Private}) - } - - return container -} - -// getDefaultTemplate returns the docker default for -// the libcontainer configuration file -func getDefaultTemplate() *libcontainer.Container { - return &libcontainer.Container{ - CapabilitiesMask: libcontainer.Capabilities{ - libcontainer.GetCapability("SETPCAP"), - libcontainer.GetCapability("SYS_MODULE"), - libcontainer.GetCapability("SYS_RAWIO"), - libcontainer.GetCapability("SYS_PACCT"), - libcontainer.GetCapability("SYS_ADMIN"), - libcontainer.GetCapability("SYS_NICE"), - libcontainer.GetCapability("SYS_RESOURCE"), - libcontainer.GetCapability("SYS_TIME"), - libcontainer.GetCapability("SYS_TTY_CONFIG"), - libcontainer.GetCapability("MKNOD"), - libcontainer.GetCapability("AUDIT_WRITE"), - libcontainer.GetCapability("AUDIT_CONTROL"), - libcontainer.GetCapability("MAC_OVERRIDE"), - libcontainer.GetCapability("MAC_ADMIN"), - libcontainer.GetCapability("NET_ADMIN"), - }, - Namespaces: libcontainer.Namespaces{ - libcontainer.GetNamespace("NEWNS"), - libcontainer.GetNamespace("NEWUTS"), - libcontainer.GetNamespace("NEWIPC"), - libcontainer.GetNamespace("NEWPID"), - libcontainer.GetNamespace("NEWNET"), - }, - Cgroups: &cgroups.Cgroup{ - Parent: "docker", - DeviceAccess: false, - }, - Context: libcontainer.Context{ - "apparmor_profile": "docker-default", - }, - } -} diff --git a/runtime/execdriver/native/driver.go b/runtime/execdriver/native/driver.go index db974cbf04..4acc4b388c 100644 --- a/runtime/execdriver/native/driver.go +++ b/runtime/execdriver/native/driver.go @@ -57,8 +57,9 @@ func init() { } type driver struct { - root string - initPath string + root string + initPath string + activeContainers map[string]*exec.Cmd } func NewDriver(root, initPath string) (*driver, error) { @@ -69,18 +70,22 @@ func NewDriver(root, initPath string) (*driver, error) { return nil, err } return &driver{ - root: root, - initPath: initPath, + root: root, + initPath: initPath, + activeContainers: make(map[string]*exec.Cmd), }, nil } func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (int, error) { - if err := d.validateCommand(c); err != nil { + // take the Command and populate the libcontainer.Container from it + container, err := d.createContainer(c) + if err != nil { return -1, err } + d.activeContainers[c.ID] = &c.Cmd + var ( term nsinit.Terminal - container = createContainer(c) factory = &dockerCommandFactory{c: c, driver: d} stateWriter = &dockerStateWriter{ callback: startCallback, @@ -181,16 +186,6 @@ func (d *driver) removeContainerRoot(id string) error { return os.RemoveAll(filepath.Join(d.root, id)) } -func (d *driver) validateCommand(c *execdriver.Command) error { - // we need to check the Config of the command to make sure that we - // do not have any of the lxc-conf variables - lxc := c.Config["lxc"] - if lxc != nil && len(lxc) > 0 { - return fmt.Errorf("lxc config options are not supported by the native driver") - } - return nil -} - func getEnv(key string, env []string) string { for _, pair := range env { parts := strings.Split(pair, "=") diff --git a/runtime/execdriver/native/template/default_template.go b/runtime/execdriver/native/template/default_template.go new file mode 100644 index 0000000000..b9eb87713e --- /dev/null +++ b/runtime/execdriver/native/template/default_template.go @@ -0,0 +1,43 @@ +package template + +import ( + "github.com/dotcloud/docker/pkg/cgroups" + "github.com/dotcloud/docker/pkg/libcontainer" +) + +// New returns the docker default configuration for libcontainer +func New() *libcontainer.Container { + return &libcontainer.Container{ + CapabilitiesMask: libcontainer.Capabilities{ + libcontainer.GetCapability("SETPCAP"), + libcontainer.GetCapability("SYS_MODULE"), + libcontainer.GetCapability("SYS_RAWIO"), + libcontainer.GetCapability("SYS_PACCT"), + libcontainer.GetCapability("SYS_ADMIN"), + libcontainer.GetCapability("SYS_NICE"), + libcontainer.GetCapability("SYS_RESOURCE"), + libcontainer.GetCapability("SYS_TIME"), + libcontainer.GetCapability("SYS_TTY_CONFIG"), + libcontainer.GetCapability("MKNOD"), + libcontainer.GetCapability("AUDIT_WRITE"), + libcontainer.GetCapability("AUDIT_CONTROL"), + libcontainer.GetCapability("MAC_OVERRIDE"), + libcontainer.GetCapability("MAC_ADMIN"), + libcontainer.GetCapability("NET_ADMIN"), + }, + Namespaces: libcontainer.Namespaces{ + libcontainer.GetNamespace("NEWNS"), + libcontainer.GetNamespace("NEWUTS"), + libcontainer.GetNamespace("NEWIPC"), + libcontainer.GetNamespace("NEWPID"), + libcontainer.GetNamespace("NEWNET"), + }, + Cgroups: &cgroups.Cgroup{ + Parent: "docker", + DeviceAccess: false, + }, + Context: libcontainer.Context{ + "apparmor_profile": "docker-default", + }, + } +} diff --git a/runtime/utils_test.go b/runtime/utils_test.go new file mode 100644 index 0000000000..bdf3543a49 --- /dev/null +++ b/runtime/utils_test.go @@ -0,0 +1,29 @@ +package runtime + +import ( + "testing" + + "github.com/dotcloud/docker/runconfig" + "github.com/dotcloud/docker/utils" +) + +func TestMergeLxcConfig(t *testing.T) { + var ( + hostConfig = &runconfig.HostConfig{ + LxcConf: []utils.KeyValuePair{ + {Key: "lxc.cgroups.cpuset", Value: "1,2"}, + }, + } + driverConfig = make(map[string][]string) + ) + + mergeLxcConfIntoOptions(hostConfig, driverConfig) + if l := len(driverConfig["lxc"]); l > 1 { + t.Fatalf("expected lxc options len of 1 got %d", l) + } + + cpuset := driverConfig["lxc"][0] + if expected := "cgroups.cpuset=1,2"; cpuset != expected { + t.Fatalf("expected %s got %s", expected, cpuset) + } +}