diff --git a/daemon/execdriver/driver.go b/daemon/execdriver/driver.go index 0c06403cb3..7d44ee5354 100644 --- a/daemon/execdriver/driver.go +++ b/daemon/execdriver/driver.go @@ -150,7 +150,7 @@ type Resources struct { CpusetMems string `json:"cpuset_mems"` CPUPeriod int64 `json:"cpu_period"` CPUQuota int64 `json:"cpu_quota"` - BlkioWeight int64 `json:"blkio_weight"` + BlkioWeight uint16 `json:"blkio_weight"` Rlimits []*ulimit.Rlimit `json:"rlimits"` OomKillDisable bool `json:"oom_kill_disable"` MemorySwappiness int64 `json:"memory_swappiness"` diff --git a/daemon/execdriver/driver_unix.go b/daemon/execdriver/driver_unix.go index 2fde9485ab..3baa3e0965 100644 --- a/daemon/execdriver/driver_unix.go +++ b/daemon/execdriver/driver_unix.go @@ -12,6 +12,7 @@ import ( "time" "github.com/docker/docker/daemon/execdriver/native/template" + "github.com/docker/docker/pkg/mount" "github.com/opencontainers/runc/libcontainer" "github.com/opencontainers/runc/libcontainer/cgroups/fs" "github.com/opencontainers/runc/libcontainer/configs" @@ -37,7 +38,7 @@ func InitContainer(c *Command) *configs.Config { container.Devices = c.AutoCreatedDevices container.Rootfs = c.Rootfs container.Readonlyfs = c.ReadonlyRootfs - container.Privatefs = true + container.RootPropagation = mount.RPRIVATE // check to see if we are running in ramdisk to disable pivot root container.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != "" diff --git a/hack/vendor.sh b/hack/vendor.sh index b086559b2b..d42b14d613 100755 --- a/hack/vendor.sh +++ b/hack/vendor.sh @@ -44,8 +44,8 @@ clone git github.com/endophage/gotuf 9bcdad0308e34a49f38448b8ad436ad8860825ce clone git github.com/jfrazelle/go 6e461eb70cb4187b41a84e9a567d7137bdbe0f16 clone git github.com/agl/ed25519 d2b94fd789ea21d12fac1a4443dd3a3f79cda72c -clone git github.com/opencontainers/runc fba07bce72e72ce5b2dd618e4f67dd86ccb49c82 # libcontainer -# libcontainer deps (see src/github.com/docker/libcontainer/update-vendor.sh) +clone git github.com/opencontainers/runc 902c012e85cdae6bb68d8c7a0df69a42f818ce96 # libcontainer +# libcontainer deps (see src/github.com/opencontainers/runc/Godeps/Godeps.json) clone git github.com/coreos/go-systemd v3 clone git github.com/godbus/dbus v2 clone git github.com/syndtr/gocapability 66ef2aa7a23ba682594e2b6f74cf40c0692b49fb diff --git a/pkg/mflag/flag.go b/pkg/mflag/flag.go index dd3188462c..afab4c9c60 100644 --- a/pkg/mflag/flag.go +++ b/pkg/mflag/flag.go @@ -200,6 +200,24 @@ func (i *uint64Value) Get() interface{} { return uint64(*i) } func (i *uint64Value) String() string { return fmt.Sprintf("%v", *i) } +// -- uint16 Value +type uint16Value uint16 + +func newUint16Value(val uint16, p *uint16) *uint16Value { + *p = val + return (*uint16Value)(p) +} + +func (i *uint16Value) Set(s string) error { + v, err := strconv.ParseUint(s, 0, 16) + *i = uint16Value(v) + return err +} + +func (i *uint16Value) Get() interface{} { return uint16(*i) } + +func (i *uint16Value) String() string { return fmt.Sprintf("%v", *i) } + // -- string Value type stringValue string @@ -757,6 +775,32 @@ func Uint64(names []string, value uint64, usage string) *uint64 { return CommandLine.Uint64(names, value, usage) } +// Uint16Var defines a uint16 flag with specified name, default value, and usage string. +// The argument p points to a uint16 variable in which to store the value of the flag. +func (fs *FlagSet) Uint16Var(p *uint16, names []string, value uint16, usage string) { + fs.Var(newUint16Value(value, p), names, usage) +} + +// Uint16Var defines a uint16 flag with specified name, default value, and usage string. +// The argument p points to a uint16 variable in which to store the value of the flag. +func Uint16Var(p *uint16, names []string, value uint16, usage string) { + CommandLine.Var(newUint16Value(value, p), names, usage) +} + +// Uint16 defines a uint16 flag with specified name, default value, and usage string. +// The return value is the address of a uint16 variable that stores the value of the flag. +func (fs *FlagSet) Uint16(names []string, value uint16, usage string) *uint16 { + p := new(uint16) + fs.Uint16Var(p, names, value, usage) + return p +} + +// Uint16 defines a uint16 flag with specified name, default value, and usage string. +// The return value is the address of a uint16 variable that stores the value of the flag. +func Uint16(names []string, value uint16, usage string) *uint16 { + return CommandLine.Uint16(names, value, usage) +} + // StringVar defines a string flag with specified name, default value, and usage string. // The argument p points to a string variable in which to store the value of the flag. func (fs *FlagSet) StringVar(p *string, names []string, value string, usage string) { diff --git a/runconfig/hostconfig.go b/runconfig/hostconfig.go index 7ef99328f9..7a7b8f3b37 100644 --- a/runconfig/hostconfig.go +++ b/runconfig/hostconfig.go @@ -226,7 +226,7 @@ type HostConfig struct { CpusetCpus string // CpusetCpus 0-2, 0,1 CpusetMems string // CpusetMems 0-2, 0,1 CPUQuota int64 `json:"CpuQuota"` // CPU CFS (Completely Fair Scheduler) quota - BlkioWeight int64 // Block IO weight (relative weight vs. other containers) + BlkioWeight uint16 // Block IO weight (relative weight vs. other containers) OomKillDisable bool // Whether to disable OOM Killer or not MemorySwappiness *int64 // Tuning container memory swappiness behaviour Privileged bool // Is the container in privileged mode diff --git a/runconfig/parse.go b/runconfig/parse.go index ac44ba7345..4e6267ab8b 100644 --- a/runconfig/parse.go +++ b/runconfig/parse.go @@ -86,7 +86,7 @@ func Parse(cmd *flag.FlagSet, args []string) (*Config, *HostConfig, *flag.FlagSe flCPUQuota = cmd.Int64([]string{"-cpu-quota"}, 0, "Limit CPU CFS (Completely Fair Scheduler) quota") flCpusetCpus = cmd.String([]string{"#-cpuset", "-cpuset-cpus"}, "", "CPUs in which to allow execution (0-3, 0,1)") flCpusetMems = cmd.String([]string{"-cpuset-mems"}, "", "MEMs in which to allow execution (0-3, 0,1)") - flBlkioWeight = cmd.Int64([]string{"-blkio-weight"}, 0, "Block IO (relative weight), between 10 and 1000") + flBlkioWeight = cmd.Uint16([]string{"-blkio-weight"}, 0, "Block IO (relative weight), between 10 and 1000") flSwappiness = cmd.Int64([]string{"-memory-swappiness"}, -1, "Tuning container memory swappiness (0 to 100)") flNetMode = cmd.String([]string{"-net"}, "default", "Set the Network mode for the container") flMacAddress = cmd.String([]string{"-mac-address"}, "", "Container MAC address (e.g. 92:d0:c6:0a:29:33)") diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio.go b/vendor/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio.go index 3b9405542d..db886bcf1d 100644 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio.go +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio.go @@ -32,33 +32,41 @@ func (s *BlkioGroup) Apply(d *data) error { func (s *BlkioGroup) Set(path string, cgroup *configs.Cgroup) error { if cgroup.BlkioWeight != 0 { - if err := writeFile(path, "blkio.weight", strconv.FormatInt(cgroup.BlkioWeight, 10)); err != nil { + if err := writeFile(path, "blkio.weight", strconv.FormatUint(uint64(cgroup.BlkioWeight), 10)); err != nil { return err } } - if cgroup.BlkioWeightDevice != "" { - if err := writeFile(path, "blkio.weight_device", cgroup.BlkioWeightDevice); err != nil { + if cgroup.BlkioLeafWeight != 0 { + if err := writeFile(path, "blkio.leaf_weight", strconv.FormatUint(uint64(cgroup.BlkioLeafWeight), 10)); err != nil { return err } } - if cgroup.BlkioThrottleReadBpsDevice != "" { - if err := writeFile(path, "blkio.throttle.read_bps_device", cgroup.BlkioThrottleReadBpsDevice); err != nil { + for _, wd := range cgroup.BlkioWeightDevice { + if err := writeFile(path, "blkio.weight_device", wd.WeightString()); err != nil { + return err + } + if err := writeFile(path, "blkio.leaf_weight_device", wd.LeafWeightString()); err != nil { return err } } - if cgroup.BlkioThrottleWriteBpsDevice != "" { - if err := writeFile(path, "blkio.throttle.write_bps_device", cgroup.BlkioThrottleWriteBpsDevice); err != nil { + for _, td := range cgroup.BlkioThrottleReadBpsDevice { + if err := writeFile(path, "blkio.throttle.read_bps_device", td.String()); err != nil { return err } } - if cgroup.BlkioThrottleReadIOpsDevice != "" { - if err := writeFile(path, "blkio.throttle.read_iops_device", cgroup.BlkioThrottleReadIOpsDevice); err != nil { + for _, td := range cgroup.BlkioThrottleWriteBpsDevice { + if err := writeFile(path, "blkio.throttle.write_bps_device", td.String()); err != nil { return err } } - if cgroup.BlkioThrottleWriteIOpsDevice != "" { - if err := writeFile(path, "blkio.throttle.write_iops_device", cgroup.BlkioThrottleWriteIOpsDevice); err != nil { + for _, td := range cgroup.BlkioThrottleReadIOPSDevice { + if err := writeFile(path, "blkio.throttle.read_iops_device", td.String()); err != nil { + return err + } + } + for _, td := range cgroup.BlkioThrottleWriteIOPSDevice { + if err := writeFile(path, "blkio.throttle.write_iops_device", td.String()); err != nil { return err } } diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb.go b/vendor/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb.go index 7f192a9ae2..a7d3a8700d 100644 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb.go +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb.go @@ -29,7 +29,7 @@ func (s *HugetlbGroup) Apply(d *data) error { func (s *HugetlbGroup) Set(path string, cgroup *configs.Cgroup) error { for _, hugetlb := range cgroup.HugetlbLimit { - if err := writeFile(path, strings.Join([]string{"hugetlb", hugetlb.Pagesize, "limit_in_bytes"}, "."), strconv.Itoa(hugetlb.Limit)); err != nil { + if err := writeFile(path, strings.Join([]string{"hugetlb", hugetlb.Pagesize, "limit_in_bytes"}, "."), strconv.FormatUint(hugetlb.Limit, 10)); err != nil { return err } } diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/name.go b/vendor/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/name.go new file mode 100644 index 0000000000..99913802cf --- /dev/null +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/name.go @@ -0,0 +1,25 @@ +package fs + +import ( + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type NameGroup struct { +} + +func (s *NameGroup) Apply(d *data) error { + return nil +} + +func (s *NameGroup) Set(path string, cgroup *configs.Cgroup) error { + return nil +} + +func (s *NameGroup) Remove(d *data) error { + return nil +} + +func (s *NameGroup) GetStats(path string, stats *cgroups.Stats) error { + return nil +} diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_systemd.go b/vendor/src/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_systemd.go index 7b523da9be..f7f7ca2b11 100644 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_systemd.go +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_systemd.go @@ -34,17 +34,18 @@ type subsystem interface { } var subsystems = map[string]subsystem{ - "devices": &fs.DevicesGroup{}, - "memory": &fs.MemoryGroup{}, - "cpu": &fs.CpuGroup{}, - "cpuset": &fs.CpusetGroup{}, - "cpuacct": &fs.CpuacctGroup{}, - "blkio": &fs.BlkioGroup{}, - "hugetlb": &fs.HugetlbGroup{}, - "perf_event": &fs.PerfEventGroup{}, - "freezer": &fs.FreezerGroup{}, - "net_prio": &fs.NetPrioGroup{}, - "net_cls": &fs.NetClsGroup{}, + "devices": &fs.DevicesGroup{}, + "memory": &fs.MemoryGroup{}, + "cpu": &fs.CpuGroup{}, + "cpuset": &fs.CpusetGroup{}, + "cpuacct": &fs.CpuacctGroup{}, + "blkio": &fs.BlkioGroup{}, + "hugetlb": &fs.HugetlbGroup{}, + "perf_event": &fs.PerfEventGroup{}, + "freezer": &fs.FreezerGroup{}, + "net_prio": &fs.NetPrioGroup{}, + "net_cls": &fs.NetClsGroup{}, + "name=systemd": &fs.NameGroup{}, } const ( @@ -176,7 +177,6 @@ func (m *Manager) Apply(pid int) error { properties = append(properties, newProp("MemoryLimit", uint64(c.Memory))) } - // TODO: MemoryReservation and MemorySwap not available in systemd if c.CpuShares != 0 { properties = append(properties, @@ -212,6 +212,7 @@ func (m *Manager) Apply(pid int) error { return err } + // TODO: MemoryReservation and MemorySwap not available in systemd if err := joinMemory(c, pid); err != nil { return err } @@ -236,6 +237,10 @@ func (m *Manager) Apply(pid int) error { if err := joinHugetlb(c, pid); err != nil { return err } + + if err := joinPerfEvent(c, pid); err != nil { + return err + } // FIXME: Systemd does have `BlockIODeviceWeight` property, but we got problem // using that (at least on systemd 208, see https://github.com/opencontainers/runc/libcontainer/pull/354), // so use fs work around for now. @@ -505,6 +510,12 @@ func joinMemory(c *configs.Cgroup, pid int) error { return err } } + if c.MemoryReservation > 0 { + err = writeFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(c.MemoryReservation, 10)) + if err != nil { + return err + } + } if c.OomKillDisable { if err := writeFile(path, "memory.oom_control", "1"); err != nil { return err @@ -547,28 +558,37 @@ func joinBlkio(c *configs.Cgroup, pid int) error { if err != nil { return err } - if c.BlkioWeightDevice != "" { - if err := writeFile(path, "blkio.weight_device", c.BlkioWeightDevice); err != nil { + // systemd doesn't directly support this in the dbus properties + if c.BlkioLeafWeight != 0 { + if err := writeFile(path, "blkio.leaf_weight", strconv.FormatUint(uint64(c.BlkioLeafWeight), 10)); err != nil { return err } } - if c.BlkioThrottleReadBpsDevice != "" { - if err := writeFile(path, "blkio.throttle.read_bps_device", c.BlkioThrottleReadBpsDevice); err != nil { + for _, wd := range c.BlkioWeightDevice { + if err := writeFile(path, "blkio.weight_device", wd.WeightString()); err != nil { + return err + } + if err := writeFile(path, "blkio.leaf_weight_device", wd.LeafWeightString()); err != nil { return err } } - if c.BlkioThrottleWriteBpsDevice != "" { - if err := writeFile(path, "blkio.throttle.write_bps_device", c.BlkioThrottleWriteBpsDevice); err != nil { + for _, td := range c.BlkioThrottleReadBpsDevice { + if err := writeFile(path, "blkio.throttle.read_bps_device", td.String()); err != nil { return err } } - if c.BlkioThrottleReadIOpsDevice != "" { - if err := writeFile(path, "blkio.throttle.read_iops_device", c.BlkioThrottleReadIOpsDevice); err != nil { + for _, td := range c.BlkioThrottleWriteBpsDevice { + if err := writeFile(path, "blkio.throttle.write_bps_device", td.String()); err != nil { return err } } - if c.BlkioThrottleWriteIOpsDevice != "" { - if err := writeFile(path, "blkio.throttle.write_iops_device", c.BlkioThrottleWriteIOpsDevice); err != nil { + for _, td := range c.BlkioThrottleReadIOPSDevice { + if err := writeFile(path, "blkio.throttle.read_iops_device", td.String()); err != nil { + return err + } + } + for _, td := range c.BlkioThrottleWriteIOPSDevice { + if err := writeFile(path, "blkio.throttle.write_iops_device", td.String()); err != nil { return err } } @@ -585,3 +605,13 @@ func joinHugetlb(c *configs.Cgroup, pid int) error { hugetlb := subsystems["hugetlb"] return hugetlb.Set(path, c) } + +func joinPerfEvent(c *configs.Cgroup, pid int) error { + path, err := join(c, "perf_event", pid) + if err != nil && !cgroups.IsNotFound(err) { + return err + } + + perfEvent := subsystems["perf_event"] + return perfEvent.Set(path, c) +} diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go b/vendor/src/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go new file mode 100644 index 0000000000..e0f3ca1653 --- /dev/null +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go @@ -0,0 +1,61 @@ +package configs + +import "fmt" + +// blockIODevice holds major:minor format supported in blkio cgroup +type blockIODevice struct { + // Major is the device's major number + Major int64 `json:"major"` + // Minor is the device's minor number + Minor int64 `json:"minor"` +} + +// WeightDevice struct holds a `major:minor weight`|`major:minor leaf_weight` pair +type WeightDevice struct { + blockIODevice + // Weight is the bandwidth rate for the device, range is from 10 to 1000 + Weight uint16 `json:"weight"` + // LeafWeight is the bandwidth rate for the device while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only + LeafWeight uint16 `json:"leafWeight"` +} + +// NewWeightDevice returns a configured WeightDevice pointer +func NewWeightDevice(major, minor int64, weight, leafWeight uint16) *WeightDevice { + wd := &WeightDevice{} + wd.Major = major + wd.Minor = minor + wd.Weight = weight + wd.LeafWeight = leafWeight + return wd +} + +// WeightString formats the struct to be writable to the cgroup specific file +func (wd *WeightDevice) WeightString() string { + return fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.Weight) +} + +// LeafWeightString formats the struct to be writable to the cgroup specific file +func (wd *WeightDevice) LeafWeightString() string { + return fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.LeafWeight) +} + +// ThrottleDevice struct holds a `major:minor rate_per_second` pair +type ThrottleDevice struct { + blockIODevice + // Rate is the IO rate limit per cgroup per device + Rate uint64 `json:"rate"` +} + +// NewThrottleDevice returns a configured ThrottleDevice pointer +func NewThrottleDevice(major, minor int64, rate uint64) *ThrottleDevice { + td := &ThrottleDevice{} + td.Major = major + td.Minor = minor + td.Rate = rate + return td +} + +// String formats the struct to be writable to the cgroup specific file +func (td *ThrottleDevice) String() string { + return fmt.Sprintf("%d:%d %d", td.Major, td.Minor, td.Rate) +} diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/configs/cgroup.go b/vendor/src/github.com/opencontainers/runc/libcontainer/configs/cgroup.go index 140b530d66..bad86b036a 100644 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/configs/cgroup.go +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/configs/cgroup.go @@ -57,23 +57,26 @@ type Cgroup struct { // MEM to use CpusetMems string `json:"cpuset_mems"` - // IO read rate limit per cgroup per device, bytes per second. - BlkioThrottleReadBpsDevice string `json:"blkio_throttle_read_bps_device"` - - // IO write rate limit per cgroup per divice, bytes per second. - BlkioThrottleWriteBpsDevice string `json:"blkio_throttle_write_bps_device"` - - // IO read rate limit per cgroup per device, IO per second. - BlkioThrottleReadIOpsDevice string `json:"blkio_throttle_read_iops_device"` - - // IO write rate limit per cgroup per device, IO per second. - BlkioThrottleWriteIOpsDevice string `json:"blkio_throttle_write_iops_device"` - // Specifies per cgroup weight, range is from 10 to 1000. - BlkioWeight int64 `json:"blkio_weight"` + BlkioWeight uint16 `json:"blkio_weight"` + + // Specifies tasks' weight in the given cgroup while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only + BlkioLeafWeight uint16 `json:"blkio_leaf_weight"` // Weight per cgroup per device, can override BlkioWeight. - BlkioWeightDevice string `json:"blkio_weight_device"` + BlkioWeightDevice []*WeightDevice `json:"blkio_weight_device"` + + // IO read rate limit per cgroup per device, bytes per second. + BlkioThrottleReadBpsDevice []*ThrottleDevice `json:"blkio_throttle_read_bps_device"` + + // IO write rate limit per cgroup per divice, bytes per second. + BlkioThrottleWriteBpsDevice []*ThrottleDevice `json:"blkio_throttle_write_bps_device"` + + // IO read rate limit per cgroup per device, IO per second. + BlkioThrottleReadIOPSDevice []*ThrottleDevice `json:"blkio_throttle_read_iops_device"` + + // IO write rate limit per cgroup per device, IO per second. + BlkioThrottleWriteIOPSDevice []*ThrottleDevice `json:"blkio_throttle_write_iops_device"` // set the freeze value for the process Freezer FreezerState `json:"freezer"` diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/configs/config.go b/vendor/src/github.com/opencontainers/runc/libcontainer/configs/config.go index 4b298e1eae..7fd311db42 100644 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/configs/config.go +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/configs/config.go @@ -92,8 +92,8 @@ type Config struct { // bind mounts are writtable. Readonlyfs bool `json:"readonlyfs"` - // Privatefs will mount the container's rootfs as private where mount points from the parent will not propogate - Privatefs bool `json:"privatefs"` + // Specifies the mount propagation flags to be applied to /. + RootPropagation int `json:"rootPropagation"` // Mounts specify additional source and destination paths that will be mounted inside the container's // rootfs and mount namespace if specified diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/configs/config_unix.go b/vendor/src/github.com/opencontainers/runc/libcontainer/configs/config_unix.go index 89f580bfa3..c447f3ef29 100644 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/configs/config_unix.go +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/configs/config_unix.go @@ -21,7 +21,7 @@ func (c Config) HostUID() (int, error) { return 0, nil } -// Gets the root uid for the process on host which could be non-zero +// Gets the root gid for the process on host which could be non-zero // when user namespaces are enabled. func (c Config) HostGID() (int, error) { if c.Namespaces.Contains(NEWUSER) { @@ -30,11 +30,11 @@ func (c Config) HostGID() (int, error) { } id, found := c.hostIDFromMapping(0, c.GidMappings) if !found { - return -1, fmt.Errorf("User namespaces enabled, but no root user mapping found.") + return -1, fmt.Errorf("User namespaces enabled, but no root group mapping found.") } return id, nil } - // Return default root uid 0 + // Return default root gid 0 return 0, nil } diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/configs/hugepage_limit.go b/vendor/src/github.com/opencontainers/runc/libcontainer/configs/hugepage_limit.go index 1cce8d09be..d30216380b 100644 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/configs/hugepage_limit.go +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/configs/hugepage_limit.go @@ -5,5 +5,5 @@ type HugepageLimit struct { Pagesize string `json:"page_size"` // usage limit for hugepage. - Limit int `json:"limit"` + Limit uint64 `json:"limit"` } diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c b/vendor/src/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c index 69b077bfb5..01450a90a6 100644 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c @@ -23,7 +23,7 @@ struct clone_arg { * Reserve some space for clone() to locate arguments * and retcode in this place */ - char stack[4096] __attribute__ ((aligned(8))); + char stack[4096] __attribute__ ((aligned(16))); char stack_ptr[0]; jmp_buf *env; }; diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/rootfs_linux.go b/vendor/src/github.com/opencontainers/runc/libcontainer/rootfs_linux.go index a57281e662..65c404f140 100644 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/rootfs_linux.go +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/rootfs_linux.go @@ -13,6 +13,7 @@ import ( "syscall" "time" + "github.com/docker/docker/pkg/mount" "github.com/docker/docker/pkg/symlink" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" @@ -341,7 +342,7 @@ func setupDevSymlinks(rootfs string) error { // symlinks are resolved locally. func reOpenDevNull() error { var stat, devNullStat syscall.Stat_t - file, err := os.Open("/dev/null") + file, err := os.OpenFile("/dev/null", os.O_RDWR, 0) if err != nil { return fmt.Errorf("Failed to open /dev/null - %s", err) } @@ -420,14 +421,89 @@ func mknodDevice(dest string, node *configs.Device) error { return syscall.Chown(dest, int(node.Uid), int(node.Gid)) } +func getMountInfo(mountinfo []*mount.Info, dir string) *mount.Info { + for _, m := range mountinfo { + if m.Mountpoint == dir { + return m + } + } + return nil +} + +// Get the parent mount point of directory passed in as argument. Also return +// optional fields. +func getParentMount(rootfs string) (string, string, error) { + var path string + + mountinfos, err := mount.GetMounts() + if err != nil { + return "", "", err + } + + mountinfo := getMountInfo(mountinfos, rootfs) + if mountinfo != nil { + return rootfs, mountinfo.Optional, nil + } + + path = rootfs + for { + path = filepath.Dir(path) + + mountinfo = getMountInfo(mountinfos, path) + if mountinfo != nil { + return path, mountinfo.Optional, nil + } + + if path == "/" { + break + } + } + + // If we are here, we did not find parent mount. Something is wrong. + return "", "", fmt.Errorf("Could not find parent mount of %s", rootfs) +} + +// Make parent mount private if it was shared +func rootfsParentMountPrivate(config *configs.Config) error { + sharedMount := false + + parentMount, optionalOpts, err := getParentMount(config.Rootfs) + if err != nil { + return err + } + + optsSplit := strings.Split(optionalOpts, " ") + for _, opt := range optsSplit { + if strings.HasPrefix(opt, "shared:") { + sharedMount = true + break + } + } + + // Make parent mount PRIVATE if it was shared. It is needed for two + // reasons. First of all pivot_root() will fail if parent mount is + // shared. Secondly when we bind mount rootfs it will propagate to + // parent namespace and we don't want that to happen. + if sharedMount { + return syscall.Mount("", parentMount, "", syscall.MS_PRIVATE, "") + } + + return nil +} + func prepareRoot(config *configs.Config) error { flag := syscall.MS_SLAVE | syscall.MS_REC - if config.Privatefs { - flag = syscall.MS_PRIVATE | syscall.MS_REC + if config.RootPropagation != 0 { + flag = config.RootPropagation } if err := syscall.Mount("", "/", "", uintptr(flag), ""); err != nil { return err } + + if err := rootfsParentMountPrivate(config); err != nil { + return err + } + return syscall.Mount(config.Rootfs, config.Rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, "") } @@ -469,6 +545,13 @@ func pivotRoot(rootfs, pivotBaseDir string) error { } // path to pivot dir now changed, update pivotDir = filepath.Join(pivotBaseDir, filepath.Base(pivotDir)) + + // Make pivotDir rprivate to make sure any of the unmounts don't + // propagate to parent. + if err := syscall.Mount("", pivotDir, "", syscall.MS_PRIVATE|syscall.MS_REC, ""); err != nil { + return err + } + if err := syscall.Unmount(pivotDir, syscall.MNT_DETACH); err != nil { return fmt.Errorf("unmount pivot_root dir %s", err) } diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/user/user.go b/vendor/src/github.com/opencontainers/runc/libcontainer/user/user.go index 964e31bfd4..e6375ea4dd 100644 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/user/user.go +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/user/user.go @@ -349,21 +349,26 @@ func GetExecUser(userSpec string, defaults *ExecUser, passwd, group io.Reader) ( return user, nil } -// GetAdditionalGroups looks up a list of groups by name or group id against -// against the given /etc/group formatted data. If a group name cannot be found, -// an error will be returned. If a group id cannot be found, it will be returned -// as-is. +// GetAdditionalGroups looks up a list of groups by name or group id +// against the given /etc/group formatted data. If a group name cannot +// be found, an error will be returned. If a group id cannot be found, +// or the given group data is nil, the id will be returned as-is +// provided it is in the legal range. func GetAdditionalGroups(additionalGroups []string, group io.Reader) ([]int, error) { - groups, err := ParseGroupFilter(group, func(g Group) bool { - for _, ag := range additionalGroups { - if g.Name == ag || strconv.Itoa(g.Gid) == ag { - return true + var groups = []Group{} + if group != nil { + var err error + groups, err = ParseGroupFilter(group, func(g Group) bool { + for _, ag := range additionalGroups { + if g.Name == ag || strconv.Itoa(g.Gid) == ag { + return true + } } + return false + }) + if err != nil { + return nil, fmt.Errorf("Unable to find additional groups %v: %v", additionalGroups, err) } - return false - }) - if err != nil { - return nil, fmt.Errorf("Unable to find additional groups %v: %v", additionalGroups, err) } gidMap := make(map[int]struct{}) @@ -401,13 +406,13 @@ func GetAdditionalGroups(additionalGroups []string, group io.Reader) ([]int, err return gids, nil } -// Wrapper around GetAdditionalGroups that opens the groupPath given and gives -// it as an argument to GetAdditionalGroups. +// GetAdditionalGroupsPath is a wrapper around GetAdditionalGroups +// that opens the groupPath given and gives it as an argument to +// GetAdditionalGroups. func GetAdditionalGroupsPath(additionalGroups []string, groupPath string) ([]int, error) { group, err := os.Open(groupPath) - if err != nil { - return nil, fmt.Errorf("Failed to open group file: %v", err) + if err == nil { + defer group.Close() } - defer group.Close() return GetAdditionalGroups(additionalGroups, group) }