diff --git a/libnetwork/vendor.conf b/libnetwork/vendor.conf index f2bb55223e..865704ab4e 100644 --- a/libnetwork/vendor.conf +++ b/libnetwork/vendor.conf @@ -32,7 +32,7 @@ github.com/mattn/go-shellwords v1.0.3 github.com/miekg/dns v1.0.7 github.com/opencontainers/go-digest v1.0.0-rc1 github.com/opencontainers/image-spec v1.0.1 -github.com/opencontainers/runc 69663f0bd4b60df09991c08812a60108003fa340 +github.com/opencontainers/runc 96ec2177ae841256168fcf76954f7177af9446eb github.com/opencontainers/runtime-spec v1.0.1 github.com/samuel/go-zookeeper d0e0d8e11f318e000a8cc434616d69e329edc374 github.com/sirupsen/logrus v1.0.3 diff --git a/libnetwork/vendor/github.com/opencontainers/runc/README.md b/libnetwork/vendor/github.com/opencontainers/runc/README.md index 5215e32c1f..e755fb7bcd 100644 --- a/libnetwork/vendor/github.com/opencontainers/runc/README.md +++ b/libnetwork/vendor/github.com/opencontainers/runc/README.md @@ -68,6 +68,7 @@ make BUILDTAGS='seccomp apparmor' | selinux | selinux process and mount labeling | | | apparmor | apparmor profile support | | | ambient | ambient capability support | kernel 4.3 | +| nokmem | disable kernel memory account | | ### Running the test suite @@ -87,6 +88,18 @@ You can run a specific test case by setting the `TESTFLAGS` variable. # make test TESTFLAGS="-run=SomeTestFunction" ``` +You can run a specific integration test by setting the `TESTPATH` variable. + +```bash +# make test TESTPATH="/checkpoint.bats" +``` + +You can run a test in your proxy environment by setting `DOCKER_BUILD_PROXY` and `DOCKER_RUN_PROXY` variables. + +```bash +# make test DOCKER_BUILD_PROXY="--build-arg HTTP_PROXY=http://yourproxy/" DOCKER_RUN_PROXY="-e HTTP_PROXY=http://yourproxy/" +``` + ### Dependencies Management `runc` uses [vndr](https://github.com/LK4D4/vndr) for dependencies management. @@ -251,3 +264,7 @@ PIDFile=/run/mycontainerid.pid [Install] WantedBy=multi-user.target ``` + +## License + +The code and docs are released under the [Apache 2.0 license](LICENSE). diff --git a/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/README.md b/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/README.md index 42f3efe563..1d7fa04c08 100644 --- a/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/README.md +++ b/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/README.md @@ -148,6 +148,7 @@ config := &configs.Config{ {Type: configs.NEWPID}, {Type: configs.NEWUSER}, {Type: configs.NEWNET}, + {Type: configs.NEWCGROUP}, }), Cgroups: &configs.Cgroup{ Name: "test-container", @@ -323,6 +324,7 @@ generated when building libcontainer with docker. ## Copyright and license -Code and documentation copyright 2014 Docker, inc. Code released under the Apache 2.0 license. -Docs released under Creative commons. - +Code and documentation copyright 2014 Docker, inc. +The code and documentation are released under the [Apache 2.0 license](../LICENSE). +The documentation is also released under Creative Commons Attribution 4.0 International License. +You may obtain a copy of the license, titled CC-BY-4.0, at http://creativecommons.org/licenses/by/4.0/. diff --git a/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go b/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go deleted file mode 100644 index e0f3ca1653..0000000000 --- a/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go +++ /dev/null @@ -1,61 +0,0 @@ -package configs - -import "fmt" - -// blockIODevice holds major:minor format supported in blkio cgroup -type blockIODevice struct { - // Major is the device's major number - Major int64 `json:"major"` - // Minor is the device's minor number - Minor int64 `json:"minor"` -} - -// WeightDevice struct holds a `major:minor weight`|`major:minor leaf_weight` pair -type WeightDevice struct { - blockIODevice - // Weight is the bandwidth rate for the device, range is from 10 to 1000 - Weight uint16 `json:"weight"` - // LeafWeight is the bandwidth rate for the device while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only - LeafWeight uint16 `json:"leafWeight"` -} - -// NewWeightDevice returns a configured WeightDevice pointer -func NewWeightDevice(major, minor int64, weight, leafWeight uint16) *WeightDevice { - wd := &WeightDevice{} - wd.Major = major - wd.Minor = minor - wd.Weight = weight - wd.LeafWeight = leafWeight - return wd -} - -// WeightString formats the struct to be writable to the cgroup specific file -func (wd *WeightDevice) WeightString() string { - return fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.Weight) -} - -// LeafWeightString formats the struct to be writable to the cgroup specific file -func (wd *WeightDevice) LeafWeightString() string { - return fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.LeafWeight) -} - -// ThrottleDevice struct holds a `major:minor rate_per_second` pair -type ThrottleDevice struct { - blockIODevice - // Rate is the IO rate limit per cgroup per device - Rate uint64 `json:"rate"` -} - -// NewThrottleDevice returns a configured ThrottleDevice pointer -func NewThrottleDevice(major, minor int64, rate uint64) *ThrottleDevice { - td := &ThrottleDevice{} - td.Major = major - td.Minor = minor - td.Rate = rate - return td -} - -// String formats the struct to be writable to the cgroup specific file -func (td *ThrottleDevice) String() string { - return fmt.Sprintf("%d:%d %d", td.Major, td.Minor, td.Rate) -} diff --git a/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go b/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go deleted file mode 100644 index e15a662f52..0000000000 --- a/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go +++ /dev/null @@ -1,122 +0,0 @@ -package configs - -type FreezerState string - -const ( - Undefined FreezerState = "" - Frozen FreezerState = "FROZEN" - Thawed FreezerState = "THAWED" -) - -type Cgroup struct { - // Deprecated, use Path instead - Name string `json:"name,omitempty"` - - // name of parent of cgroup or slice - // Deprecated, use Path instead - Parent string `json:"parent,omitempty"` - - // Path specifies the path to cgroups that are created and/or joined by the container. - // The path is assumed to be relative to the host system cgroup mountpoint. - Path string `json:"path"` - - // ScopePrefix describes prefix for the scope name - ScopePrefix string `json:"scope_prefix"` - - // Paths represent the absolute cgroups paths to join. - // This takes precedence over Path. - Paths map[string]string - - // Resources contains various cgroups settings to apply - *Resources -} - -type Resources struct { - // If this is true allow access to any kind of device within the container. If false, allow access only to devices explicitly listed in the allowed_devices list. - // Deprecated - AllowAllDevices *bool `json:"allow_all_devices,omitempty"` - // Deprecated - AllowedDevices []*Device `json:"allowed_devices,omitempty"` - // Deprecated - DeniedDevices []*Device `json:"denied_devices,omitempty"` - - Devices []*Device `json:"devices"` - - // Memory limit (in bytes) - Memory int64 `json:"memory"` - - // Memory reservation or soft_limit (in bytes) - MemoryReservation int64 `json:"memory_reservation"` - - // Total memory usage (memory + swap); set `-1` to enable unlimited swap - MemorySwap int64 `json:"memory_swap"` - - // Kernel memory limit (in bytes) - KernelMemory int64 `json:"kernel_memory"` - - // Kernel memory limit for TCP use (in bytes) - KernelMemoryTCP int64 `json:"kernel_memory_tcp"` - - // CPU shares (relative weight vs. other containers) - CpuShares uint64 `json:"cpu_shares"` - - // CPU hardcap limit (in usecs). Allowed cpu time in a given period. - CpuQuota int64 `json:"cpu_quota"` - - // CPU period to be used for hardcapping (in usecs). 0 to use system default. - CpuPeriod uint64 `json:"cpu_period"` - - // How many time CPU will use in realtime scheduling (in usecs). - CpuRtRuntime int64 `json:"cpu_rt_quota"` - - // CPU period to be used for realtime scheduling (in usecs). - CpuRtPeriod uint64 `json:"cpu_rt_period"` - - // CPU to use - CpusetCpus string `json:"cpuset_cpus"` - - // MEM to use - CpusetMems string `json:"cpuset_mems"` - - // Process limit; set <= `0' to disable limit. - PidsLimit int64 `json:"pids_limit"` - - // Specifies per cgroup weight, range is from 10 to 1000. - BlkioWeight uint16 `json:"blkio_weight"` - - // Specifies tasks' weight in the given cgroup while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only - BlkioLeafWeight uint16 `json:"blkio_leaf_weight"` - - // Weight per cgroup per device, can override BlkioWeight. - BlkioWeightDevice []*WeightDevice `json:"blkio_weight_device"` - - // IO read rate limit per cgroup per device, bytes per second. - BlkioThrottleReadBpsDevice []*ThrottleDevice `json:"blkio_throttle_read_bps_device"` - - // IO write rate limit per cgroup per device, bytes per second. - BlkioThrottleWriteBpsDevice []*ThrottleDevice `json:"blkio_throttle_write_bps_device"` - - // IO read rate limit per cgroup per device, IO per second. - BlkioThrottleReadIOPSDevice []*ThrottleDevice `json:"blkio_throttle_read_iops_device"` - - // IO write rate limit per cgroup per device, IO per second. - BlkioThrottleWriteIOPSDevice []*ThrottleDevice `json:"blkio_throttle_write_iops_device"` - - // set the freeze value for the process - Freezer FreezerState `json:"freezer"` - - // Hugetlb limit (in bytes) - HugetlbLimit []*HugepageLimit `json:"hugetlb_limit"` - - // Whether to disable OOM Killer - OomKillDisable bool `json:"oom_kill_disable"` - - // Tuning swappiness behaviour per cgroup - MemorySwappiness *uint64 `json:"memory_swappiness"` - - // Set priority of network traffic for container - NetPrioIfpriomap []*IfPrioMap `json:"net_prio_ifpriomap"` - - // Set class identifier for container's network packets - NetClsClassid uint32 `json:"net_cls_classid_u"` -} diff --git a/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_windows.go b/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_windows.go deleted file mode 100644 index d74847b0db..0000000000 --- a/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_windows.go +++ /dev/null @@ -1,6 +0,0 @@ -package configs - -// TODO Windows: This can ultimately be entirely factored out on Windows as -// cgroups are a Unix-specific construct. -type Cgroup struct { -} diff --git a/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go b/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go deleted file mode 100644 index 3cae4fd8d9..0000000000 --- a/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go +++ /dev/null @@ -1,348 +0,0 @@ -package configs - -import ( - "bytes" - "encoding/json" - "fmt" - "os/exec" - "time" - - "github.com/opencontainers/runtime-spec/specs-go" - - "github.com/sirupsen/logrus" -) - -type Rlimit struct { - Type int `json:"type"` - Hard uint64 `json:"hard"` - Soft uint64 `json:"soft"` -} - -// IDMap represents UID/GID Mappings for User Namespaces. -type IDMap struct { - ContainerID int `json:"container_id"` - HostID int `json:"host_id"` - Size int `json:"size"` -} - -// Seccomp represents syscall restrictions -// By default, only the native architecture of the kernel is allowed to be used -// for syscalls. Additional architectures can be added by specifying them in -// Architectures. -type Seccomp struct { - DefaultAction Action `json:"default_action"` - Architectures []string `json:"architectures"` - Syscalls []*Syscall `json:"syscalls"` -} - -// Action is taken upon rule match in Seccomp -type Action int - -const ( - Kill Action = iota + 1 - Errno - Trap - Allow - Trace -) - -// Operator is a comparison operator to be used when matching syscall arguments in Seccomp -type Operator int - -const ( - EqualTo Operator = iota + 1 - NotEqualTo - GreaterThan - GreaterThanOrEqualTo - LessThan - LessThanOrEqualTo - MaskEqualTo -) - -// Arg is a rule to match a specific syscall argument in Seccomp -type Arg struct { - Index uint `json:"index"` - Value uint64 `json:"value"` - ValueTwo uint64 `json:"value_two"` - Op Operator `json:"op"` -} - -// Syscall is a rule to match a syscall in Seccomp -type Syscall struct { - Name string `json:"name"` - Action Action `json:"action"` - Args []*Arg `json:"args"` -} - -// TODO Windows. Many of these fields should be factored out into those parts -// which are common across platforms, and those which are platform specific. - -// Config defines configuration options for executing a process inside a contained environment. -type Config struct { - // NoPivotRoot will use MS_MOVE and a chroot to jail the process into the container's rootfs - // This is a common option when the container is running in ramdisk - NoPivotRoot bool `json:"no_pivot_root"` - - // ParentDeathSignal specifies the signal that is sent to the container's process in the case - // that the parent process dies. - ParentDeathSignal int `json:"parent_death_signal"` - - // Path to a directory containing the container's root filesystem. - Rootfs string `json:"rootfs"` - - // Readonlyfs will remount the container's rootfs as readonly where only externally mounted - // bind mounts are writtable. - Readonlyfs bool `json:"readonlyfs"` - - // Specifies the mount propagation flags to be applied to /. - RootPropagation int `json:"rootPropagation"` - - // Mounts specify additional source and destination paths that will be mounted inside the container's - // rootfs and mount namespace if specified - Mounts []*Mount `json:"mounts"` - - // The device nodes that should be automatically created within the container upon container start. Note, make sure that the node is marked as allowed in the cgroup as well! - Devices []*Device `json:"devices"` - - MountLabel string `json:"mount_label"` - - // Hostname optionally sets the container's hostname if provided - Hostname string `json:"hostname"` - - // Namespaces specifies the container's namespaces that it should setup when cloning the init process - // If a namespace is not provided that namespace is shared from the container's parent process - Namespaces Namespaces `json:"namespaces"` - - // Capabilities specify the capabilities to keep when executing the process inside the container - // All capabilities not specified will be dropped from the processes capability mask - Capabilities *Capabilities `json:"capabilities"` - - // Networks specifies the container's network setup to be created - Networks []*Network `json:"networks"` - - // Routes can be specified to create entries in the route table as the container is started - Routes []*Route `json:"routes"` - - // Cgroups specifies specific cgroup settings for the various subsystems that the container is - // placed into to limit the resources the container has available - Cgroups *Cgroup `json:"cgroups"` - - // AppArmorProfile specifies the profile to apply to the process running in the container and is - // change at the time the process is execed - AppArmorProfile string `json:"apparmor_profile,omitempty"` - - // ProcessLabel specifies the label to apply to the process running in the container. It is - // commonly used by selinux - ProcessLabel string `json:"process_label,omitempty"` - - // Rlimits specifies the resource limits, such as max open files, to set in the container - // If Rlimits are not set, the container will inherit rlimits from the parent process - Rlimits []Rlimit `json:"rlimits,omitempty"` - - // OomScoreAdj specifies the adjustment to be made by the kernel when calculating oom scores - // for a process. Valid values are between the range [-1000, '1000'], where processes with - // higher scores are preferred for being killed. - // More information about kernel oom score calculation here: https://lwn.net/Articles/317814/ - OomScoreAdj int `json:"oom_score_adj"` - - // UidMappings is an array of User ID mappings for User Namespaces - UidMappings []IDMap `json:"uid_mappings"` - - // GidMappings is an array of Group ID mappings for User Namespaces - GidMappings []IDMap `json:"gid_mappings"` - - // MaskPaths specifies paths within the container's rootfs to mask over with a bind - // mount pointing to /dev/null as to prevent reads of the file. - MaskPaths []string `json:"mask_paths"` - - // ReadonlyPaths specifies paths within the container's rootfs to remount as read-only - // so that these files prevent any writes. - ReadonlyPaths []string `json:"readonly_paths"` - - // Sysctl is a map of properties and their values. It is the equivalent of using - // sysctl -w my.property.name value in Linux. - Sysctl map[string]string `json:"sysctl"` - - // Seccomp allows actions to be taken whenever a syscall is made within the container. - // A number of rules are given, each having an action to be taken if a syscall matches it. - // A default action to be taken if no rules match is also given. - Seccomp *Seccomp `json:"seccomp"` - - // NoNewPrivileges controls whether processes in the container can gain additional privileges. - NoNewPrivileges bool `json:"no_new_privileges,omitempty"` - - // Hooks are a collection of actions to perform at various container lifecycle events. - // CommandHooks are serialized to JSON, but other hooks are not. - Hooks *Hooks - - // Version is the version of opencontainer specification that is supported. - Version string `json:"version"` - - // Labels are user defined metadata that is stored in the config and populated on the state - Labels []string `json:"labels"` - - // NoNewKeyring will not allocated a new session keyring for the container. It will use the - // callers keyring in this case. - NoNewKeyring bool `json:"no_new_keyring"` - - // Rootless specifies whether the container is a rootless container. - Rootless bool `json:"rootless"` - - // IntelRdt specifies settings for Intel RDT/CAT group that the container is placed into - // to limit the resources (e.g., L3 cache) the container has available - IntelRdt *IntelRdt `json:"intel_rdt,omitempty"` -} - -type Hooks struct { - // Prestart commands are executed after the container namespaces are created, - // but before the user supplied command is executed from init. - Prestart []Hook - - // Poststart commands are executed after the container init process starts. - Poststart []Hook - - // Poststop commands are executed after the container init process exits. - Poststop []Hook -} - -type Capabilities struct { - // Bounding is the set of capabilities checked by the kernel. - Bounding []string - // Effective is the set of capabilities checked by the kernel. - Effective []string - // Inheritable is the capabilities preserved across execve. - Inheritable []string - // Permitted is the limiting superset for effective capabilities. - Permitted []string - // Ambient is the ambient set of capabilities that are kept. - Ambient []string -} - -func (hooks *Hooks) UnmarshalJSON(b []byte) error { - var state struct { - Prestart []CommandHook - Poststart []CommandHook - Poststop []CommandHook - } - - if err := json.Unmarshal(b, &state); err != nil { - return err - } - - deserialize := func(shooks []CommandHook) (hooks []Hook) { - for _, shook := range shooks { - hooks = append(hooks, shook) - } - - return hooks - } - - hooks.Prestart = deserialize(state.Prestart) - hooks.Poststart = deserialize(state.Poststart) - hooks.Poststop = deserialize(state.Poststop) - return nil -} - -func (hooks Hooks) MarshalJSON() ([]byte, error) { - serialize := func(hooks []Hook) (serializableHooks []CommandHook) { - for _, hook := range hooks { - switch chook := hook.(type) { - case CommandHook: - serializableHooks = append(serializableHooks, chook) - default: - logrus.Warnf("cannot serialize hook of type %T, skipping", hook) - } - } - - return serializableHooks - } - - return json.Marshal(map[string]interface{}{ - "prestart": serialize(hooks.Prestart), - "poststart": serialize(hooks.Poststart), - "poststop": serialize(hooks.Poststop), - }) -} - -// HookState is the payload provided to a hook on execution. -type HookState specs.State - -type Hook interface { - // Run executes the hook with the provided state. - Run(HookState) error -} - -// NewFunctionHook will call the provided function when the hook is run. -func NewFunctionHook(f func(HookState) error) FuncHook { - return FuncHook{ - run: f, - } -} - -type FuncHook struct { - run func(HookState) error -} - -func (f FuncHook) Run(s HookState) error { - return f.run(s) -} - -type Command struct { - Path string `json:"path"` - Args []string `json:"args"` - Env []string `json:"env"` - Dir string `json:"dir"` - Timeout *time.Duration `json:"timeout"` -} - -// NewCommandHook will execute the provided command when the hook is run. -func NewCommandHook(cmd Command) CommandHook { - return CommandHook{ - Command: cmd, - } -} - -type CommandHook struct { - Command -} - -func (c Command) Run(s HookState) error { - b, err := json.Marshal(s) - if err != nil { - return err - } - var stdout, stderr bytes.Buffer - cmd := exec.Cmd{ - Path: c.Path, - Args: c.Args, - Env: c.Env, - Stdin: bytes.NewReader(b), - Stdout: &stdout, - Stderr: &stderr, - } - if err := cmd.Start(); err != nil { - return err - } - errC := make(chan error, 1) - go func() { - err := cmd.Wait() - if err != nil { - err = fmt.Errorf("error running hook: %v, stdout: %s, stderr: %s", err, stdout.String(), stderr.String()) - } - errC <- err - }() - var timerCh <-chan time.Time - if c.Timeout != nil { - timer := time.NewTimer(*c.Timeout) - defer timer.Stop() - timerCh = timer.C - } - select { - case err := <-errC: - return err - case <-timerCh: - cmd.Process.Kill() - cmd.Wait() - return fmt.Errorf("hook ran past specified timeout of %.1fs", c.Timeout.Seconds()) - } -} diff --git a/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go b/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go deleted file mode 100644 index 07da108045..0000000000 --- a/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go +++ /dev/null @@ -1,61 +0,0 @@ -package configs - -import "fmt" - -// HostUID gets the translated uid for the process on host which could be -// different when user namespaces are enabled. -func (c Config) HostUID(containerId int) (int, error) { - if c.Namespaces.Contains(NEWUSER) { - if c.UidMappings == nil { - return -1, fmt.Errorf("User namespaces enabled, but no uid mappings found.") - } - id, found := c.hostIDFromMapping(containerId, c.UidMappings) - if !found { - return -1, fmt.Errorf("User namespaces enabled, but no user mapping found.") - } - return id, nil - } - // Return unchanged id. - return containerId, nil -} - -// HostRootUID gets the root uid for the process on host which could be non-zero -// when user namespaces are enabled. -func (c Config) HostRootUID() (int, error) { - return c.HostUID(0) -} - -// HostGID gets the translated gid for the process on host which could be -// different when user namespaces are enabled. -func (c Config) HostGID(containerId int) (int, error) { - if c.Namespaces.Contains(NEWUSER) { - if c.GidMappings == nil { - return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.") - } - id, found := c.hostIDFromMapping(containerId, c.GidMappings) - if !found { - return -1, fmt.Errorf("User namespaces enabled, but no group mapping found.") - } - return id, nil - } - // Return unchanged id. - return containerId, nil -} - -// HostRootGID gets the root gid for the process on host which could be non-zero -// when user namespaces are enabled. -func (c Config) HostRootGID() (int, error) { - return c.HostGID(0) -} - -// Utility function that gets a host ID for a container ID from user namespace map -// if that ID is present in the map. -func (c Config) hostIDFromMapping(containerID int, uMap []IDMap) (int, bool) { - for _, m := range uMap { - if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) { - hostID := m.HostID + (containerID - m.ContainerID) - return hostID, true - } - } - return -1, false -} diff --git a/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/device.go b/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/device.go deleted file mode 100644 index 8701bb212d..0000000000 --- a/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/device.go +++ /dev/null @@ -1,57 +0,0 @@ -package configs - -import ( - "fmt" - "os" -) - -const ( - Wildcard = -1 -) - -// TODO Windows: This can be factored out in the future - -type Device struct { - // Device type, block, char, etc. - Type rune `json:"type"` - - // Path to the device. - Path string `json:"path"` - - // Major is the device's major number. - Major int64 `json:"major"` - - // Minor is the device's minor number. - Minor int64 `json:"minor"` - - // Cgroup permissions format, rwm. - Permissions string `json:"permissions"` - - // FileMode permission bits for the device. - FileMode os.FileMode `json:"file_mode"` - - // Uid of the device. - Uid uint32 `json:"uid"` - - // Gid of the device. - Gid uint32 `json:"gid"` - - // Write the file to the allowed list - Allow bool `json:"allow"` -} - -func (d *Device) CgroupString() string { - return fmt.Sprintf("%c %s:%s %s", d.Type, deviceNumberString(d.Major), deviceNumberString(d.Minor), d.Permissions) -} - -func (d *Device) Mkdev() int { - return int((d.Major << 8) | (d.Minor & 0xff) | ((d.Minor & 0xfff00) << 12)) -} - -// deviceNumberString converts the device number to a string return result. -func deviceNumberString(number int64) string { - if number == Wildcard { - return "*" - } - return fmt.Sprint(number) -} diff --git a/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/device_defaults.go b/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/device_defaults.go deleted file mode 100644 index e4f423c523..0000000000 --- a/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/device_defaults.go +++ /dev/null @@ -1,111 +0,0 @@ -// +build linux - -package configs - -var ( - // DefaultSimpleDevices are devices that are to be both allowed and created. - DefaultSimpleDevices = []*Device{ - // /dev/null and zero - { - Path: "/dev/null", - Type: 'c', - Major: 1, - Minor: 3, - Permissions: "rwm", - FileMode: 0666, - }, - { - Path: "/dev/zero", - Type: 'c', - Major: 1, - Minor: 5, - Permissions: "rwm", - FileMode: 0666, - }, - - { - Path: "/dev/full", - Type: 'c', - Major: 1, - Minor: 7, - Permissions: "rwm", - FileMode: 0666, - }, - - // consoles and ttys - { - Path: "/dev/tty", - Type: 'c', - Major: 5, - Minor: 0, - Permissions: "rwm", - FileMode: 0666, - }, - - // /dev/urandom,/dev/random - { - Path: "/dev/urandom", - Type: 'c', - Major: 1, - Minor: 9, - Permissions: "rwm", - FileMode: 0666, - }, - { - Path: "/dev/random", - Type: 'c', - Major: 1, - Minor: 8, - Permissions: "rwm", - FileMode: 0666, - }, - } - DefaultAllowedDevices = append([]*Device{ - // allow mknod for any device - { - Type: 'c', - Major: Wildcard, - Minor: Wildcard, - Permissions: "m", - }, - { - Type: 'b', - Major: Wildcard, - Minor: Wildcard, - Permissions: "m", - }, - - { - Path: "/dev/console", - Type: 'c', - Major: 5, - Minor: 1, - Permissions: "rwm", - }, - // /dev/pts/ - pts namespaces are "coming soon" - { - Path: "", - Type: 'c', - Major: 136, - Minor: Wildcard, - Permissions: "rwm", - }, - { - Path: "", - Type: 'c', - Major: 5, - Minor: 2, - Permissions: "rwm", - }, - - // tuntap - { - Path: "", - Type: 'c', - Major: 10, - Minor: 200, - Permissions: "rwm", - }, - }, DefaultSimpleDevices...) - DefaultAutoCreatedDevices = append([]*Device{}, DefaultSimpleDevices...) -) diff --git a/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/hugepage_limit.go b/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/hugepage_limit.go deleted file mode 100644 index d30216380b..0000000000 --- a/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/hugepage_limit.go +++ /dev/null @@ -1,9 +0,0 @@ -package configs - -type HugepageLimit struct { - // which type of hugepage to limit. - Pagesize string `json:"page_size"` - - // usage limit for hugepage. - Limit uint64 `json:"limit"` -} diff --git a/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go b/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go deleted file mode 100644 index 36bd5f96a1..0000000000 --- a/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go +++ /dev/null @@ -1,7 +0,0 @@ -package configs - -type IntelRdt struct { - // The schema for L3 cache id and capacity bitmask (CBM) - // Format: "L3:=;=;..." - L3CacheSchema string `json:"l3_cache_schema,omitempty"` -} diff --git a/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/interface_priority_map.go b/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/interface_priority_map.go deleted file mode 100644 index 9a0395eaf5..0000000000 --- a/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/interface_priority_map.go +++ /dev/null @@ -1,14 +0,0 @@ -package configs - -import ( - "fmt" -) - -type IfPrioMap struct { - Interface string `json:"interface"` - Priority int64 `json:"priority"` -} - -func (i *IfPrioMap) CgroupString() string { - return fmt.Sprintf("%s %d", i.Interface, i.Priority) -} diff --git a/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go b/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go deleted file mode 100644 index 670757ddb5..0000000000 --- a/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go +++ /dev/null @@ -1,39 +0,0 @@ -package configs - -const ( - // EXT_COPYUP is a directive to copy up the contents of a directory when - // a tmpfs is mounted over it. - EXT_COPYUP = 1 << iota -) - -type Mount struct { - // Source path for the mount. - Source string `json:"source"` - - // Destination path for the mount inside the container. - Destination string `json:"destination"` - - // Device the mount is for. - Device string `json:"device"` - - // Mount flags. - Flags int `json:"flags"` - - // Propagation Flags - PropagationFlags []int `json:"propagation_flags"` - - // Mount data applied to the mount. - Data string `json:"data"` - - // Relabel source if set, "z" indicates shared, "Z" indicates unshared. - Relabel string `json:"relabel"` - - // Extensions are additional flags that are specific to runc. - Extensions int `json:"extensions"` - - // Optional Command to be run before Source is mounted. - PremountCmds []Command `json:"premount_cmds"` - - // Optional Command to be run after Source is mounted. - PostmountCmds []Command `json:"postmount_cmds"` -} diff --git a/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces.go b/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces.go deleted file mode 100644 index a3329a31a9..0000000000 --- a/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces.go +++ /dev/null @@ -1,5 +0,0 @@ -package configs - -type NamespaceType string - -type Namespaces []Namespace diff --git a/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go b/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go deleted file mode 100644 index 5fc171a57b..0000000000 --- a/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go +++ /dev/null @@ -1,122 +0,0 @@ -package configs - -import ( - "fmt" - "os" - "sync" -) - -const ( - NEWNET NamespaceType = "NEWNET" - NEWPID NamespaceType = "NEWPID" - NEWNS NamespaceType = "NEWNS" - NEWUTS NamespaceType = "NEWUTS" - NEWIPC NamespaceType = "NEWIPC" - NEWUSER NamespaceType = "NEWUSER" -) - -var ( - nsLock sync.Mutex - supportedNamespaces = make(map[NamespaceType]bool) -) - -// NsName converts the namespace type to its filename -func NsName(ns NamespaceType) string { - switch ns { - case NEWNET: - return "net" - case NEWNS: - return "mnt" - case NEWPID: - return "pid" - case NEWIPC: - return "ipc" - case NEWUSER: - return "user" - case NEWUTS: - return "uts" - } - return "" -} - -// IsNamespaceSupported returns whether a namespace is available or -// not -func IsNamespaceSupported(ns NamespaceType) bool { - nsLock.Lock() - defer nsLock.Unlock() - supported, ok := supportedNamespaces[ns] - if ok { - return supported - } - nsFile := NsName(ns) - // if the namespace type is unknown, just return false - if nsFile == "" { - return false - } - _, err := os.Stat(fmt.Sprintf("/proc/self/ns/%s", nsFile)) - // a namespace is supported if it exists and we have permissions to read it - supported = err == nil - supportedNamespaces[ns] = supported - return supported -} - -func NamespaceTypes() []NamespaceType { - return []NamespaceType{ - NEWUSER, // Keep user NS always first, don't move it. - NEWIPC, - NEWUTS, - NEWNET, - NEWPID, - NEWNS, - } -} - -// Namespace defines configuration for each namespace. It specifies an -// alternate path that is able to be joined via setns. -type Namespace struct { - Type NamespaceType `json:"type"` - Path string `json:"path"` -} - -func (n *Namespace) GetPath(pid int) string { - return fmt.Sprintf("/proc/%d/ns/%s", pid, NsName(n.Type)) -} - -func (n *Namespaces) Remove(t NamespaceType) bool { - i := n.index(t) - if i == -1 { - return false - } - *n = append((*n)[:i], (*n)[i+1:]...) - return true -} - -func (n *Namespaces) Add(t NamespaceType, path string) { - i := n.index(t) - if i == -1 { - *n = append(*n, Namespace{Type: t, Path: path}) - return - } - (*n)[i].Path = path -} - -func (n *Namespaces) index(t NamespaceType) int { - for i, ns := range *n { - if ns.Type == t { - return i - } - } - return -1 -} - -func (n *Namespaces) Contains(t NamespaceType) bool { - return n.index(t) != -1 -} - -func (n *Namespaces) PathOf(t NamespaceType) string { - i := n.index(t) - if i == -1 { - return "" - } - return (*n)[i].Path -} diff --git a/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go b/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go deleted file mode 100644 index 4ce6813d23..0000000000 --- a/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go +++ /dev/null @@ -1,31 +0,0 @@ -// +build linux - -package configs - -import "golang.org/x/sys/unix" - -func (n *Namespace) Syscall() int { - return namespaceInfo[n.Type] -} - -var namespaceInfo = map[NamespaceType]int{ - NEWNET: unix.CLONE_NEWNET, - NEWNS: unix.CLONE_NEWNS, - NEWUSER: unix.CLONE_NEWUSER, - NEWIPC: unix.CLONE_NEWIPC, - NEWUTS: unix.CLONE_NEWUTS, - NEWPID: unix.CLONE_NEWPID, -} - -// CloneFlags parses the container's Namespaces options to set the correct -// flags on clone, unshare. This function returns flags only for new namespaces. -func (n *Namespaces) CloneFlags() uintptr { - var flag int - for _, v := range *n { - if v.Path != "" { - continue - } - flag |= namespaceInfo[v.Type] - } - return uintptr(flag) -} diff --git a/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall_unsupported.go b/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall_unsupported.go deleted file mode 100644 index 5d9a5c81f3..0000000000 --- a/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall_unsupported.go +++ /dev/null @@ -1,13 +0,0 @@ -// +build !linux,!windows - -package configs - -func (n *Namespace) Syscall() int { - panic("No namespace syscall support") -} - -// CloneFlags parses the container's Namespaces options to set the correct -// flags on clone, unshare. This function returns flags only for new namespaces. -func (n *Namespaces) CloneFlags() uintptr { - panic("No namespace syscall support") -} diff --git a/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go b/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go deleted file mode 100644 index 19bf713de3..0000000000 --- a/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go +++ /dev/null @@ -1,8 +0,0 @@ -// +build !linux - -package configs - -// Namespace defines configuration for each namespace. It specifies an -// alternate path that is able to be joined via setns. -type Namespace struct { -} diff --git a/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/network.go b/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/network.go deleted file mode 100644 index ccdb228e14..0000000000 --- a/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/configs/network.go +++ /dev/null @@ -1,72 +0,0 @@ -package configs - -// Network defines configuration for a container's networking stack -// -// The network configuration can be omitted from a container causing the -// container to be setup with the host's networking stack -type Network struct { - // Type sets the networks type, commonly veth and loopback - Type string `json:"type"` - - // Name of the network interface - Name string `json:"name"` - - // The bridge to use. - Bridge string `json:"bridge"` - - // MacAddress contains the MAC address to set on the network interface - MacAddress string `json:"mac_address"` - - // Address contains the IPv4 and mask to set on the network interface - Address string `json:"address"` - - // Gateway sets the gateway address that is used as the default for the interface - Gateway string `json:"gateway"` - - // IPv6Address contains the IPv6 and mask to set on the network interface - IPv6Address string `json:"ipv6_address"` - - // IPv6Gateway sets the ipv6 gateway address that is used as the default for the interface - IPv6Gateway string `json:"ipv6_gateway"` - - // Mtu sets the mtu value for the interface and will be mirrored on both the host and - // container's interfaces if a pair is created, specifically in the case of type veth - // Note: This does not apply to loopback interfaces. - Mtu int `json:"mtu"` - - // TxQueueLen sets the tx_queuelen value for the interface and will be mirrored on both the host and - // container's interfaces if a pair is created, specifically in the case of type veth - // Note: This does not apply to loopback interfaces. - TxQueueLen int `json:"txqueuelen"` - - // HostInterfaceName is a unique name of a veth pair that resides on in the host interface of the - // container. - HostInterfaceName string `json:"host_interface_name"` - - // HairpinMode specifies if hairpin NAT should be enabled on the virtual interface - // bridge port in the case of type veth - // Note: This is unsupported on some systems. - // Note: This does not apply to loopback interfaces. - HairpinMode bool `json:"hairpin_mode"` -} - -// Routes can be specified to create entries in the route table as the container is started -// -// All of destination, source, and gateway should be either IPv4 or IPv6. -// One of the three options must be present, and omitted entries will use their -// IP family default for the route table. For IPv4 for example, setting the -// gateway to 1.2.3.4 and the interface to eth0 will set up a standard -// destination of 0.0.0.0(or *) when viewed in the route table. -type Route struct { - // Sets the destination and mask, should be a CIDR. Accepts IPv4 and IPv6 - Destination string `json:"destination"` - - // Sets the source and mask, should be a CIDR. Accepts IPv4 and IPv6 - Source string `json:"source"` - - // Sets the gateway. Accepts IPv4 and IPv6 - Gateway string `json:"gateway"` - - // The device to set this route up for, for example: eth0 - InterfaceName string `json:"interface_name"` -} diff --git a/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/nsenter/README.md b/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/nsenter/README.md index 5757013712..9ec6c39316 100644 --- a/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/nsenter/README.md +++ b/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/nsenter/README.md @@ -10,8 +10,8 @@ The `nsenter` package will `import "C"` and it uses [cgo](https://golang.org/cmd package. In cgo, if the import of "C" is immediately preceded by a comment, that comment, called the preamble, is used as a header when compiling the C parts of the package. So every time we import package `nsenter`, the C code function `nsexec()` would be -called. And package `nsenter` is now only imported in `main_unix.go`, so every time -before we call `cmd.Start` on linux, that C code would run. +called. And package `nsenter` is only imported in `init.go`, so every time the runc +`init` command is invoked, that C code is run. Because `nsexec()` must be run before the Go runtime in order to use the Linux kernel namespace, you must `import` this library into a package if @@ -37,7 +37,7 @@ the parent `nsexec()` will exit and the child `nsexec()` process will return to allow the Go runtime take over. NOTE: We do both `setns(2)` and `clone(2)` even if we don't have any -CLONE_NEW* clone flags because we must fork a new process in order to +`CLONE_NEW*` clone flags because we must fork a new process in order to enter the PID namespace. diff --git a/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c b/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c index 2c69cee5d6..28269dfc02 100644 --- a/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c +++ b/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c @@ -42,6 +42,12 @@ enum sync_t { SYNC_ERR = 0xFF, /* Fatal error, no turning back. The error code follows. */ }; +/* + * Synchronisation value for cgroup namespace setup. + * The same constant is defined in process_linux.go as "createCgroupns". + */ +#define CREATECGROUPNS 0x80 + /* longjmp() arguments. */ #define JUMP_PARENT 0x00 #define JUMP_CHILD 0xA0 @@ -82,7 +88,7 @@ struct nlconfig_t { uint8_t is_setgroup; /* Rootless container settings. */ - uint8_t is_rootless; + uint8_t is_rootless_euid; /* boolean */ char *uidmappath; size_t uidmappath_len; char *gidmappath; @@ -100,7 +106,7 @@ struct nlconfig_t { #define GIDMAP_ATTR 27284 #define SETGROUP_ATTR 27285 #define OOM_SCORE_ADJ_ATTR 27286 -#define ROOTLESS_ATTR 27287 +#define ROOTLESS_EUID_ATTR 27287 #define UIDMAPPATH_ATTR 27288 #define GIDMAPPATH_ATTR 27289 @@ -211,7 +217,7 @@ static int try_mapping_tool(const char *app, int pid, char *map, size_t map_len) /* * If @app is NULL, execve will segfault. Just check it here and bail (if - * we're in this path, the caller is already getting desparate and there + * we're in this path, the caller is already getting desperate and there * isn't a backup to this failing). This usually would be a configuration * or programming issue. */ @@ -419,8 +425,8 @@ static void nl_parse(int fd, struct nlconfig_t *config) case CLONE_FLAGS_ATTR: config->cloneflags = readint32(current); break; - case ROOTLESS_ATTR: - config->is_rootless = readint8(current); + case ROOTLESS_EUID_ATTR: + config->is_rootless_euid = readint8(current); /* boolean */ break; case OOM_SCORE_ADJ_ATTR: config->oom_score_adj = current; @@ -505,7 +511,8 @@ void join_namespaces(char *nslist) ns->fd = fd; ns->ns = nsflag(namespace); - strncpy(ns->path, path, PATH_MAX); + strncpy(ns->path, path, PATH_MAX - 1); + ns->path[PATH_MAX - 1] = '\0'; } while ((namespace = strtok_r(NULL, ",", &saveptr)) != NULL); /* @@ -639,7 +646,6 @@ void nsexec(void) case JUMP_PARENT:{ int len; pid_t child, first_child = -1; - char buf[JSON_MAX]; bool ready = false; /* For debugging. */ @@ -678,17 +684,15 @@ void nsexec(void) /* * Enable setgroups(2) if we've been asked to. But we also * have to explicitly disable setgroups(2) if we're - * creating a rootless container (this is required since - * Linux 3.19). + * creating a rootless container for single-entry mapping. + * i.e. config.is_setgroup == false. + * (this is required since Linux 3.19). + * + * For rootless multi-entry mapping, config.is_setgroup shall be true and + * newuidmap/newgidmap shall be used. */ - if (config.is_rootless && config.is_setgroup) { - kill(child, SIGKILL); - bail("cannot allow setgroup in an unprivileged user namespace setup"); - } - if (config.is_setgroup) - update_setgroups(child, SETGROUPS_ALLOW); - if (config.is_rootless) + if (config.is_rootless_euid && !config.is_setgroup) update_setgroups(child, SETGROUPS_DENY); /* Set up mappings. */ @@ -717,6 +721,18 @@ void nsexec(void) kill(child, SIGKILL); bail("failed to sync with child: write(SYNC_RECVPID_ACK)"); } + + /* Send the init_func pid back to our parent. + * + * Send the init_func pid and the pid of the first child back to our parent. + * We need to send both back because we can't reap the first child we created (CLONE_PARENT). + * It becomes the responsibility of our parent to reap the first child. + */ + len = dprintf(pipenum, "{\"pid\": %d, \"pid_first\": %d}\n", child, first_child); + if (len < 0) { + kill(child, SIGKILL); + bail("unable to generate JSON for child pid"); + } } break; case SYNC_CHILD_READY: @@ -760,23 +776,6 @@ void nsexec(void) bail("unexpected sync value: %u", s); } } - - /* - * Send the init_func pid and the pid of the first child back to our parent. - * - * We need to send both back because we can't reap the first child we created (CLONE_PARENT). - * It becomes the responsibility of our parent to reap the first child. - */ - len = snprintf(buf, JSON_MAX, "{\"pid\": %d, \"pid_first\": %d}\n", child, first_child); - if (len < 0) { - kill(child, SIGKILL); - bail("unable to generate JSON for child pid"); - } - if (write(pipenum, buf, len) != len) { - kill(child, SIGKILL); - bail("unable to send child pid to bootstrapper"); - } - exit(0); } @@ -809,25 +808,30 @@ void nsexec(void) if (config.namespaces) join_namespaces(config.namespaces); - /* - * Unshare all of the namespaces. Now, it should be noted that this - * ordering might break in the future (especially with rootless - * containers). But for now, it's not possible to split this into - * CLONE_NEWUSER + [the rest] because of some RHEL SELinux issues. - * - * Note that we don't merge this with clone() because there were - * some old kernel versions where clone(CLONE_PARENT | CLONE_NEWPID) - * was broken, so we'll just do it the long way anyway. - */ - if (unshare(config.cloneflags) < 0) - bail("failed to unshare namespaces"); - /* * Deal with user namespaces first. They are quite special, as they * affect our ability to unshare other namespaces and are used as * context for privilege checks. + * + * We don't unshare all namespaces in one go. The reason for this + * is that, while the kernel documentation may claim otherwise, + * there are certain cases where unsharing all namespaces at once + * will result in namespace objects being owned incorrectly. + * Ideally we should just fix these kernel bugs, but it's better to + * be safe than sorry, and fix them separately. + * + * A specific case of this is that the SELinux label of the + * internal kern-mount that mqueue uses will be incorrect if the + * UTS namespace is cloned before the USER namespace is mapped. + * I've also heard of similar problems with the network namespace + * in some scenarios. This also mirrors how LXC deals with this + * problem. */ if (config.cloneflags & CLONE_NEWUSER) { + if (unshare(CLONE_NEWUSER) < 0) + bail("failed to unshare user namespace"); + config.cloneflags &= ~CLONE_NEWUSER; + /* * We don't have the privileges to do any mapping here (see the * clone_parent rant). So signal our parent to hook us up. @@ -853,7 +857,23 @@ void nsexec(void) if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) < 0) bail("failed to set process as dumpable"); } + + /* Become root in the namespace proper. */ + if (setresuid(0, 0, 0) < 0) + bail("failed to become root in user namespace"); } + /* + * Unshare all of the namespaces. Now, it should be noted that this + * ordering might break in the future (especially with rootless + * containers). But for now, it's not possible to split this into + * CLONE_NEWUSER + [the rest] because of some RHEL SELinux issues. + * + * Note that we don't merge this with clone() because there were + * some old kernel versions where clone(CLONE_PARENT | CLONE_NEWPID) + * was broken, so we'll just do it the long way anyway. + */ + if (unshare(config.cloneflags & ~CLONE_NEWCGROUP) < 0) + bail("failed to unshare namespaces"); /* * TODO: What about non-namespace clone flags that we're dropping here? @@ -936,11 +956,23 @@ void nsexec(void) if (setgid(0) < 0) bail("setgid failed"); - if (!config.is_rootless && config.is_setgroup) { + if (!config.is_rootless_euid && config.is_setgroup) { if (setgroups(0, NULL) < 0) bail("setgroups failed"); } + /* ... wait until our topmost parent has finished cgroup setup in p.manager.Apply() ... */ + if (config.cloneflags & CLONE_NEWCGROUP) { + uint8_t value; + if (read(pipenum, &value, sizeof(value)) != sizeof(value)) + bail("read synchronisation value failed"); + if (value == CREATECGROUPNS) { + if (unshare(CLONE_NEWCGROUP) < 0) + bail("failed to unshare cgroup namespace"); + } else + bail("received unknown synchronisation value"); + } + s = SYNC_CHILD_READY; if (write(syncfd, &s, sizeof(s)) != sizeof(s)) bail("failed to sync with patent: write(SYNC_CHILD_READY)"); diff --git a/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unix.go b/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unix.go index c45e300411..92b5ae8de0 100644 --- a/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unix.go +++ b/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unix.go @@ -5,6 +5,7 @@ package user import ( "io" "os" + "strconv" "golang.org/x/sys/unix" ) @@ -114,3 +115,30 @@ func CurrentUser() (User, error) { func CurrentGroup() (Group, error) { return LookupGid(unix.Getgid()) } + +func currentUserSubIDs(fileName string) ([]SubID, error) { + u, err := CurrentUser() + if err != nil { + return nil, err + } + filter := func(entry SubID) bool { + return entry.Name == u.Name || entry.Name == strconv.Itoa(u.Uid) + } + return ParseSubIDFileFilter(fileName, filter) +} + +func CurrentUserSubUIDs() ([]SubID, error) { + return currentUserSubIDs("/etc/subuid") +} + +func CurrentUserSubGIDs() ([]SubID, error) { + return currentUserSubIDs("/etc/subgid") +} + +func CurrentProcessUIDMap() ([]IDMap, error) { + return ParseIDMapFile("/proc/self/uid_map") +} + +func CurrentProcessGIDMap() ([]IDMap, error) { + return ParseIDMapFile("/proc/self/gid_map") +} diff --git a/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/user/user.go b/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/user/user.go index 93414516ca..7b912bbf8b 100644 --- a/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/user/user.go +++ b/libnetwork/vendor/github.com/opencontainers/runc/libcontainer/user/user.go @@ -75,12 +75,29 @@ func groupFromOS(g *user.Group) (Group, error) { return newGroup, nil } +// SubID represents an entry in /etc/sub{u,g}id +type SubID struct { + Name string + SubID int64 + Count int64 +} + +// IDMap represents an entry in /proc/PID/{u,g}id_map +type IDMap struct { + ID int64 + ParentID int64 + Count int64 +} + func parseLine(line string, v ...interface{}) { - if line == "" { + parseParts(strings.Split(line, ":"), v...) +} + +func parseParts(parts []string, v ...interface{}) { + if len(parts) == 0 { return } - parts := strings.Split(line, ":") for i, p := range parts { // Ignore cases where we don't have enough fields to populate the arguments. // Some configuration files like to misbehave. @@ -96,6 +113,8 @@ func parseLine(line string, v ...interface{}) { case *int: // "numbers", with conversion errors ignored because of some misbehaving configuration files. *e, _ = strconv.Atoi(p) + case *int64: + *e, _ = strconv.ParseInt(p, 10, 64) case *[]string: // Comma-separated lists. if p != "" { @@ -105,7 +124,7 @@ func parseLine(line string, v ...interface{}) { } default: // Someone goof'd when writing code using this function. Scream so they can hear us. - panic(fmt.Sprintf("parseLine only accepts {*string, *int, *[]string} as arguments! %#v is not a pointer!", e)) + panic(fmt.Sprintf("parseLine only accepts {*string, *int, *int64, *[]string} as arguments! %#v is not a pointer!", e)) } } } @@ -479,3 +498,111 @@ func GetAdditionalGroupsPath(additionalGroups []string, groupPath string) ([]int } return GetAdditionalGroups(additionalGroups, group) } + +func ParseSubIDFile(path string) ([]SubID, error) { + subid, err := os.Open(path) + if err != nil { + return nil, err + } + defer subid.Close() + return ParseSubID(subid) +} + +func ParseSubID(subid io.Reader) ([]SubID, error) { + return ParseSubIDFilter(subid, nil) +} + +func ParseSubIDFileFilter(path string, filter func(SubID) bool) ([]SubID, error) { + subid, err := os.Open(path) + if err != nil { + return nil, err + } + defer subid.Close() + return ParseSubIDFilter(subid, filter) +} + +func ParseSubIDFilter(r io.Reader, filter func(SubID) bool) ([]SubID, error) { + if r == nil { + return nil, fmt.Errorf("nil source for subid-formatted data") + } + + var ( + s = bufio.NewScanner(r) + out = []SubID{} + ) + + for s.Scan() { + if err := s.Err(); err != nil { + return nil, err + } + + line := strings.TrimSpace(s.Text()) + if line == "" { + continue + } + + // see: man 5 subuid + p := SubID{} + parseLine(line, &p.Name, &p.SubID, &p.Count) + + if filter == nil || filter(p) { + out = append(out, p) + } + } + + return out, nil +} + +func ParseIDMapFile(path string) ([]IDMap, error) { + r, err := os.Open(path) + if err != nil { + return nil, err + } + defer r.Close() + return ParseIDMap(r) +} + +func ParseIDMap(r io.Reader) ([]IDMap, error) { + return ParseIDMapFilter(r, nil) +} + +func ParseIDMapFileFilter(path string, filter func(IDMap) bool) ([]IDMap, error) { + r, err := os.Open(path) + if err != nil { + return nil, err + } + defer r.Close() + return ParseIDMapFilter(r, filter) +} + +func ParseIDMapFilter(r io.Reader, filter func(IDMap) bool) ([]IDMap, error) { + if r == nil { + return nil, fmt.Errorf("nil source for idmap-formatted data") + } + + var ( + s = bufio.NewScanner(r) + out = []IDMap{} + ) + + for s.Scan() { + if err := s.Err(); err != nil { + return nil, err + } + + line := strings.TrimSpace(s.Text()) + if line == "" { + continue + } + + // see: man 7 user_namespaces + p := IDMap{} + parseParts(strings.Fields(line), &p.ID, &p.ParentID, &p.Count) + + if filter == nil || filter(p) { + out = append(out, p) + } + } + + return out, nil +} diff --git a/libnetwork/vendor/github.com/opencontainers/runc/vendor.conf b/libnetwork/vendor/github.com/opencontainers/runc/vendor.conf index e2b519e673..fadbe07071 100644 --- a/libnetwork/vendor/github.com/opencontainers/runc/vendor.conf +++ b/libnetwork/vendor/github.com/opencontainers/runc/vendor.conf @@ -1,7 +1,7 @@ # OCI runtime-spec. When updating this, make sure you use a version tag rather # than a commit ID so it's much more obvious what version of the spec we are # using. -github.com/opencontainers/runtime-spec v1.0.0 +github.com/opencontainers/runtime-spec 5684b8af48c1ac3b1451fa499724e30e3c20a294 # Core libcontainer functionality. github.com/mrunalp/fileutils ed869b029674c0e9ce4c0dfa781405c2d9946d08 github.com/opencontainers/selinux v1.0.0-rc1