mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00
Merge pull request #38873 from thaJeztah/update_containerd_1.2.5
Update containerd v1.2.5, runc 2b18fe1d885ee5083ef9f0838fee39b62d653e30
This commit is contained in:
commit
05e7d000f2
24 changed files with 918 additions and 168 deletions
|
@ -4,7 +4,7 @@
|
|||
# containerd is also pinned in vendor.conf. When updating the binary
|
||||
# version you may also need to update the vendor version to pick up bug
|
||||
# fixes or new APIs.
|
||||
CONTAINERD_COMMIT=e6b3f5632f50dbc4e9cb6288d911bf4f5e95b18e # v1.2.4
|
||||
CONTAINERD_COMMIT=bb71b10fd8f58240ca47fbb579b9d1028eea7c84 # v1.2.5
|
||||
|
||||
install_containerd() {
|
||||
echo "Install containerd version $CONTAINERD_COMMIT"
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
# The version of runc should match the version that is used by the containerd
|
||||
# version that is used. If you need to update runc, open a pull request in
|
||||
# the containerd project first, and update both after that is merged.
|
||||
RUNC_COMMIT=6635b4f0c6af3810594d2770f662f34ddc15b40d
|
||||
RUNC_COMMIT=2b18fe1d885ee5083ef9f0838fee39b62d653e30
|
||||
|
||||
install_runc() {
|
||||
# If using RHEL7 kernels (3.10.0 el7), disable kmem accounting/limiting
|
||||
|
|
|
@ -79,7 +79,7 @@ google.golang.org/grpc v1.12.0
|
|||
# the containerd project first, and update both after that is merged.
|
||||
# This commit does not need to match RUNC_COMMIT as it is used for helper
|
||||
# packages but should be newer or equal.
|
||||
github.com/opencontainers/runc 12f6a991201fdb8f82579582d5e00e28fba06d0a
|
||||
github.com/opencontainers/runc 2b18fe1d885ee5083ef9f0838fee39b62d653e30
|
||||
github.com/opencontainers/runtime-spec 29686dbc5559d93fb1ef402eeda3e35c38d75af4 # v1.0.1-59-g29686db
|
||||
github.com/opencontainers/image-spec v1.0.1
|
||||
github.com/seccomp/libseccomp-golang 32f571b70023028bd57d9288c20efbcb237f3ce0
|
||||
|
@ -118,10 +118,10 @@ github.com/googleapis/gax-go v2.0.0
|
|||
google.golang.org/genproto 694d95ba50e67b2e363f3483057db5d4910c18f9
|
||||
|
||||
# containerd
|
||||
github.com/containerd/containerd e6b3f5632f50dbc4e9cb6288d911bf4f5e95b18e # v1.2.4
|
||||
github.com/containerd/containerd bb71b10fd8f58240ca47fbb579b9d1028eea7c84 # v1.2.5
|
||||
github.com/containerd/fifo 3d5202aec260678c48179c56f40e6f38a095738c
|
||||
github.com/containerd/continuity 004b46473808b3e7a4a3049c20e4376c91eb966d
|
||||
github.com/containerd/cgroups 5e610833b72089b37d0e615de9a92dfc043757c2
|
||||
github.com/containerd/cgroups dbea6f2bd41658b84b00417ceefa416b979cbf10
|
||||
github.com/containerd/console c12b1e7919c14469339a5d38f2f8ed9b64a9de23
|
||||
github.com/containerd/go-runc 5a6d9f37cfa36b15efba46dc7ea349fa9b7143c3
|
||||
github.com/containerd/typeurl a93fcdb778cd272c6e9b3028b2f42d813e785d40
|
||||
|
|
14
vendor/github.com/containerd/cgroups/README.md
generated
vendored
14
vendor/github.com/containerd/cgroups/README.md
generated
vendored
|
@ -1,8 +1,9 @@
|
|||
# cgroups
|
||||
|
||||
[![Build Status](https://travis-ci.org/containerd/cgroups.svg?branch=master)](https://travis-ci.org/containerd/cgroups)
|
||||
|
||||
[![codecov](https://codecov.io/gh/containerd/cgroups/branch/master/graph/badge.svg)](https://codecov.io/gh/containerd/cgroups)
|
||||
[![GoDoc](https://godoc.org/github.com/containerd/cgroups?status.svg)](https://godoc.org/github.com/containerd/cgroups)
|
||||
[![Go Report Card](https://goreportcard.com/badge/github.com/containerd/cgroups)](https://goreportcard.com/report/github.com/containerd/cgroups)
|
||||
|
||||
Go package for creating, managing, inspecting, and destroying cgroups.
|
||||
The resources format for settings on the cgroup uses the OCI runtime-spec found
|
||||
|
@ -110,3 +111,14 @@ err := control.MoveTo(destination)
|
|||
```go
|
||||
subCgroup, err := control.New("child", resources)
|
||||
```
|
||||
|
||||
## Project details
|
||||
|
||||
Cgroups is a containerd sub-project, licensed under the [Apache 2.0 license](./LICENSE).
|
||||
As a containerd sub-project, you will find the:
|
||||
|
||||
* [Project governance](https://github.com/containerd/project/blob/master/GOVERNANCE.md),
|
||||
* [Maintainers](https://github.com/containerd/project/blob/master/MAINTAINERS),
|
||||
* and [Contributing guidelines](https://github.com/containerd/project/blob/master/CONTRIBUTING.md)
|
||||
|
||||
information in our [`containerd/project`](https://github.com/containerd/project) repository.
|
||||
|
|
57
vendor/github.com/containerd/cgroups/blkio.go
generated
vendored
57
vendor/github.com/containerd/cgroups/blkio.go
generated
vendored
|
@ -191,31 +191,42 @@ func (b *blkioController) readEntry(devices map[deviceKey]string, path, name str
|
|||
}
|
||||
|
||||
func createBlkioSettings(blkio *specs.LinuxBlockIO) []blkioSettings {
|
||||
settings := []blkioSettings{
|
||||
{
|
||||
name: "weight",
|
||||
value: blkio.Weight,
|
||||
format: uintf,
|
||||
},
|
||||
{
|
||||
name: "leaf_weight",
|
||||
value: blkio.LeafWeight,
|
||||
format: uintf,
|
||||
},
|
||||
}
|
||||
for _, wd := range blkio.WeightDevice {
|
||||
settings := []blkioSettings{}
|
||||
|
||||
if blkio.Weight != nil {
|
||||
settings = append(settings,
|
||||
blkioSettings{
|
||||
name: "weight_device",
|
||||
value: wd,
|
||||
format: weightdev,
|
||||
},
|
||||
blkioSettings{
|
||||
name: "leaf_weight_device",
|
||||
value: wd,
|
||||
format: weightleafdev,
|
||||
name: "weight",
|
||||
value: blkio.Weight,
|
||||
format: uintf,
|
||||
})
|
||||
}
|
||||
if blkio.LeafWeight != nil {
|
||||
settings = append(settings,
|
||||
blkioSettings{
|
||||
name: "leaf_weight",
|
||||
value: blkio.LeafWeight,
|
||||
format: uintf,
|
||||
})
|
||||
}
|
||||
for _, wd := range blkio.WeightDevice {
|
||||
if wd.Weight != nil {
|
||||
settings = append(settings,
|
||||
blkioSettings{
|
||||
name: "weight_device",
|
||||
value: wd,
|
||||
format: weightdev,
|
||||
})
|
||||
}
|
||||
if wd.LeafWeight != nil {
|
||||
settings = append(settings,
|
||||
blkioSettings{
|
||||
name: "leaf_weight_device",
|
||||
value: wd,
|
||||
format: weightleafdev,
|
||||
})
|
||||
}
|
||||
}
|
||||
for _, t := range []struct {
|
||||
name string
|
||||
list []specs.LinuxThrottleDevice
|
||||
|
@ -265,12 +276,12 @@ func uintf(v interface{}) []byte {
|
|||
|
||||
func weightdev(v interface{}) []byte {
|
||||
wd := v.(specs.LinuxWeightDevice)
|
||||
return []byte(fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.Weight))
|
||||
return []byte(fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, *wd.Weight))
|
||||
}
|
||||
|
||||
func weightleafdev(v interface{}) []byte {
|
||||
wd := v.(specs.LinuxWeightDevice)
|
||||
return []byte(fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.LeafWeight))
|
||||
return []byte(fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, *wd.LeafWeight))
|
||||
}
|
||||
|
||||
func throttleddev(v interface{}) []byte {
|
||||
|
|
92
vendor/github.com/containerd/cgroups/cgroup.go
generated
vendored
92
vendor/github.com/containerd/cgroups/cgroup.go
generated
vendored
|
@ -30,47 +30,84 @@ import (
|
|||
)
|
||||
|
||||
// New returns a new control via the cgroup cgroups interface
|
||||
func New(hierarchy Hierarchy, path Path, resources *specs.LinuxResources) (Cgroup, error) {
|
||||
func New(hierarchy Hierarchy, path Path, resources *specs.LinuxResources, opts ...InitOpts) (Cgroup, error) {
|
||||
config := newInitConfig()
|
||||
for _, o := range opts {
|
||||
if err := o(config); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
subsystems, err := hierarchy()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var active []Subsystem
|
||||
for _, s := range subsystems {
|
||||
// check if subsystem exists
|
||||
if err := initializeSubsystem(s, path, resources); err != nil {
|
||||
if err == ErrControllerNotActive {
|
||||
if config.InitCheck != nil {
|
||||
if skerr := config.InitCheck(s, path, err); skerr != nil {
|
||||
if skerr != ErrIgnoreSubsystem {
|
||||
return nil, skerr
|
||||
}
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
active = append(active, s)
|
||||
}
|
||||
return &cgroup{
|
||||
path: path,
|
||||
subsystems: subsystems,
|
||||
subsystems: active,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Load will load an existing cgroup and allow it to be controlled
|
||||
func Load(hierarchy Hierarchy, path Path) (Cgroup, error) {
|
||||
func Load(hierarchy Hierarchy, path Path, opts ...InitOpts) (Cgroup, error) {
|
||||
config := newInitConfig()
|
||||
for _, o := range opts {
|
||||
if err := o(config); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
var activeSubsystems []Subsystem
|
||||
subsystems, err := hierarchy()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// check the the subsystems still exist
|
||||
// check that the subsystems still exist, and keep only those that actually exist
|
||||
for _, s := range pathers(subsystems) {
|
||||
p, err := path(s.Name())
|
||||
if err != nil {
|
||||
if os.IsNotExist(errors.Cause(err)) {
|
||||
return nil, ErrCgroupDeleted
|
||||
}
|
||||
if err == ErrControllerNotActive {
|
||||
if config.InitCheck != nil {
|
||||
if skerr := config.InitCheck(s, path, err); skerr != nil {
|
||||
if skerr != ErrIgnoreSubsystem {
|
||||
return nil, skerr
|
||||
}
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
if _, err := os.Lstat(s.Path(p)); err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil, ErrCgroupDeleted
|
||||
continue
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
activeSubsystems = append(activeSubsystems, s)
|
||||
}
|
||||
return &cgroup{
|
||||
path: path,
|
||||
subsystems: subsystems,
|
||||
subsystems: activeSubsystems,
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
@ -319,6 +356,49 @@ func (c *cgroup) processes(subsystem Name, recursive bool) ([]Process, error) {
|
|||
return processes, err
|
||||
}
|
||||
|
||||
// Tasks returns the tasks running inside the cgroup along
|
||||
// with the subsystem used, pid, and path
|
||||
func (c *cgroup) Tasks(subsystem Name, recursive bool) ([]Task, error) {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
if c.err != nil {
|
||||
return nil, c.err
|
||||
}
|
||||
return c.tasks(subsystem, recursive)
|
||||
}
|
||||
|
||||
func (c *cgroup) tasks(subsystem Name, recursive bool) ([]Task, error) {
|
||||
s := c.getSubsystem(subsystem)
|
||||
sp, err := c.path(subsystem)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
path := s.(pather).Path(sp)
|
||||
var tasks []Task
|
||||
err = filepath.Walk(path, func(p string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !recursive && info.IsDir() {
|
||||
if p == path {
|
||||
return nil
|
||||
}
|
||||
return filepath.SkipDir
|
||||
}
|
||||
dir, name := filepath.Split(p)
|
||||
if name != cgroupTasks {
|
||||
return nil
|
||||
}
|
||||
procs, err := readTasksPids(dir, subsystem)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
tasks = append(tasks, procs...)
|
||||
return nil
|
||||
})
|
||||
return tasks, err
|
||||
}
|
||||
|
||||
// Freeze freezes the entire cgroup and all the processes inside it
|
||||
func (c *cgroup) Freeze() error {
|
||||
c.mu.Lock()
|
||||
|
|
11
vendor/github.com/containerd/cgroups/control.go
generated
vendored
11
vendor/github.com/containerd/cgroups/control.go
generated
vendored
|
@ -44,6 +44,15 @@ type Process struct {
|
|||
Path string
|
||||
}
|
||||
|
||||
type Task struct {
|
||||
// Subsystem is the name of the subsystem that the task is in
|
||||
Subsystem Name
|
||||
// Pid is the process id of the task
|
||||
Pid int
|
||||
// Path is the full path of the subsystem and location that the task is in
|
||||
Path string
|
||||
}
|
||||
|
||||
// Cgroup handles interactions with the individual groups to perform
|
||||
// actions on them as them main interface to this cgroup package
|
||||
type Cgroup interface {
|
||||
|
@ -64,6 +73,8 @@ type Cgroup interface {
|
|||
Update(resources *specs.LinuxResources) error
|
||||
// Processes returns all the processes in a select subsystem for the cgroup
|
||||
Processes(Name, bool) ([]Process, error)
|
||||
// Tasks returns all the tasks in a select subsystem for the cgroup
|
||||
Tasks(Name, bool) ([]Task, error)
|
||||
// Freeze freezes or pauses all processes inside the cgroup
|
||||
Freeze() error
|
||||
// Thaw thaw or resumes all processes inside the cgroup
|
||||
|
|
10
vendor/github.com/containerd/cgroups/cpuset.go
generated
vendored
10
vendor/github.com/containerd/cgroups/cpuset.go
generated
vendored
|
@ -57,21 +57,21 @@ func (c *cpusetController) Create(path string, resources *specs.LinuxResources)
|
|||
if resources.CPU != nil {
|
||||
for _, t := range []struct {
|
||||
name string
|
||||
value *string
|
||||
value string
|
||||
}{
|
||||
{
|
||||
name: "cpus",
|
||||
value: &resources.CPU.Cpus,
|
||||
value: resources.CPU.Cpus,
|
||||
},
|
||||
{
|
||||
name: "mems",
|
||||
value: &resources.CPU.Mems,
|
||||
value: resources.CPU.Mems,
|
||||
},
|
||||
} {
|
||||
if t.value != nil {
|
||||
if t.value != "" {
|
||||
if err := ioutil.WriteFile(
|
||||
filepath.Join(c.Path(path), fmt.Sprintf("cpuset.%s", t.name)),
|
||||
[]byte(*t.value),
|
||||
[]byte(t.value),
|
||||
defaultFilePerm,
|
||||
); err != nil {
|
||||
return err
|
||||
|
|
3
vendor/github.com/containerd/cgroups/devices.go
generated
vendored
3
vendor/github.com/containerd/cgroups/devices.go
generated
vendored
|
@ -58,6 +58,9 @@ func (d *devicesController) Create(path string, resources *specs.LinuxResources)
|
|||
if device.Allow {
|
||||
file = allowDeviceFile
|
||||
}
|
||||
if device.Type == "" {
|
||||
device.Type = "a"
|
||||
}
|
||||
if err := ioutil.WriteFile(
|
||||
filepath.Join(d.Path(path), file),
|
||||
[]byte(deviceString(device)),
|
||||
|
|
2
vendor/github.com/containerd/cgroups/net_prio.go
generated
vendored
2
vendor/github.com/containerd/cgroups/net_prio.go
generated
vendored
|
@ -50,7 +50,7 @@ func (n *netprioController) Create(path string, resources *specs.LinuxResources)
|
|||
if resources.Network != nil {
|
||||
for _, prio := range resources.Network.Priorities {
|
||||
if err := ioutil.WriteFile(
|
||||
filepath.Join(n.Path(path), "net_prio_ifpriomap"),
|
||||
filepath.Join(n.Path(path), "net_prio.ifpriomap"),
|
||||
formatPrio(prio.Name, prio.Priority),
|
||||
defaultFilePerm,
|
||||
); err != nil {
|
||||
|
|
61
vendor/github.com/containerd/cgroups/opts.go
generated
vendored
Normal file
61
vendor/github.com/containerd/cgroups/opts.go
generated
vendored
Normal file
|
@ -0,0 +1,61 @@
|
|||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cgroups
|
||||
|
||||
import (
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
var (
|
||||
// ErrIgnoreSubsystem allows the specific subsystem to be skipped
|
||||
ErrIgnoreSubsystem = errors.New("skip subsystem")
|
||||
// ErrDevicesRequired is returned when the devices subsystem is required but
|
||||
// does not exist or is not active
|
||||
ErrDevicesRequired = errors.New("devices subsystem is required")
|
||||
)
|
||||
|
||||
// InitOpts allows configuration for the creation or loading of a cgroup
|
||||
type InitOpts func(*InitConfig) error
|
||||
|
||||
// InitConfig provides configuration options for the creation
|
||||
// or loading of a cgroup and its subsystems
|
||||
type InitConfig struct {
|
||||
// InitCheck can be used to check initialization errors from the subsystem
|
||||
InitCheck InitCheck
|
||||
}
|
||||
|
||||
func newInitConfig() *InitConfig {
|
||||
return &InitConfig{
|
||||
InitCheck: RequireDevices,
|
||||
}
|
||||
}
|
||||
|
||||
// InitCheck allows subsystems errors to be checked when initialized or loaded
|
||||
type InitCheck func(Subsystem, Path, error) error
|
||||
|
||||
// AllowAny allows any subsystem errors to be skipped
|
||||
func AllowAny(s Subsystem, p Path, err error) error {
|
||||
return ErrIgnoreSubsystem
|
||||
}
|
||||
|
||||
// RequireDevices requires the device subsystem but no others
|
||||
func RequireDevices(s Subsystem, p Path, err error) error {
|
||||
if s.Name() == Devices {
|
||||
return ErrDevicesRequired
|
||||
}
|
||||
return ErrIgnoreSubsystem
|
||||
}
|
5
vendor/github.com/containerd/cgroups/paths.go
generated
vendored
5
vendor/github.com/containerd/cgroups/paths.go
generated
vendored
|
@ -57,6 +57,9 @@ func PidPath(pid int) Path {
|
|||
return existingPath(paths, "")
|
||||
}
|
||||
|
||||
// ErrControllerNotActive is returned when a controller is not supported or enabled
|
||||
var ErrControllerNotActive = errors.New("controller is not supported")
|
||||
|
||||
func existingPath(paths map[string]string, suffix string) Path {
|
||||
// localize the paths based on the root mount dest for nested cgroups
|
||||
for n, p := range paths {
|
||||
|
@ -77,7 +80,7 @@ func existingPath(paths map[string]string, suffix string) Path {
|
|||
root, ok := paths[string(name)]
|
||||
if !ok {
|
||||
if root, ok = paths[fmt.Sprintf("name=%s", name)]; !ok {
|
||||
return "", fmt.Errorf("unable to find %q in controller set", name)
|
||||
return "", ErrControllerNotActive
|
||||
}
|
||||
}
|
||||
if suffix != "" {
|
||||
|
|
2
vendor/github.com/containerd/cgroups/subsystem.go
generated
vendored
2
vendor/github.com/containerd/cgroups/subsystem.go
generated
vendored
|
@ -42,7 +42,7 @@ const (
|
|||
)
|
||||
|
||||
// Subsystems returns a complete list of the default cgroups
|
||||
// avaliable on most linux systems
|
||||
// available on most linux systems
|
||||
func Subsystems() []Name {
|
||||
n := []Name{
|
||||
Hugetlb,
|
||||
|
|
33
vendor/github.com/containerd/cgroups/systemd.go
generated
vendored
33
vendor/github.com/containerd/cgroups/systemd.go
generated
vendored
|
@ -32,6 +32,11 @@ const (
|
|||
defaultSlice = "system.slice"
|
||||
)
|
||||
|
||||
var (
|
||||
canDelegate bool
|
||||
once sync.Once
|
||||
)
|
||||
|
||||
func Systemd() ([]Subsystem, error) {
|
||||
root, err := v1MountPoint()
|
||||
if err != nil {
|
||||
|
@ -54,7 +59,7 @@ func Slice(slice, name string) Path {
|
|||
slice = defaultSlice
|
||||
}
|
||||
return func(subsystem Name) (string, error) {
|
||||
return filepath.Join(slice, unitName(name)), nil
|
||||
return filepath.Join(slice, name), nil
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -80,15 +85,39 @@ func (s *SystemdController) Create(path string, resources *specs.LinuxResources)
|
|||
}
|
||||
defer conn.Close()
|
||||
slice, name := splitName(path)
|
||||
// We need to see if systemd can handle the delegate property
|
||||
// Systemd will return an error if it cannot handle delegate regardless
|
||||
// of its bool setting.
|
||||
checkDelegate := func() {
|
||||
canDelegate = true
|
||||
dlSlice := newProperty("Delegate", true)
|
||||
if _, err := conn.StartTransientUnit(slice, "testdelegate", []systemdDbus.Property{dlSlice}, nil); err != nil {
|
||||
if dbusError, ok := err.(dbus.Error); ok {
|
||||
// Starting with systemd v237, Delegate is not even a property of slices anymore,
|
||||
// so the D-Bus call fails with "InvalidArgs" error.
|
||||
if strings.Contains(dbusError.Name, "org.freedesktop.DBus.Error.PropertyReadOnly") || strings.Contains(dbusError.Name, "org.freedesktop.DBus.Error.InvalidArgs") {
|
||||
canDelegate = false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
conn.StopUnit(slice, "testDelegate", nil)
|
||||
}
|
||||
once.Do(checkDelegate)
|
||||
properties := []systemdDbus.Property{
|
||||
systemdDbus.PropDescription(fmt.Sprintf("cgroup %s", name)),
|
||||
systemdDbus.PropWants(slice),
|
||||
newProperty("DefaultDependencies", false),
|
||||
newProperty("Delegate", true),
|
||||
newProperty("MemoryAccounting", true),
|
||||
newProperty("CPUAccounting", true),
|
||||
newProperty("BlockIOAccounting", true),
|
||||
}
|
||||
|
||||
// If we can delegate, we add the property back in
|
||||
if canDelegate {
|
||||
properties = append(properties, newProperty("Delegate", true))
|
||||
}
|
||||
|
||||
ch := make(chan string)
|
||||
_, err = conn.StartTransientUnit(name, "replace", properties, ch)
|
||||
if err != nil {
|
||||
|
|
29
vendor/github.com/containerd/cgroups/utils.go
generated
vendored
29
vendor/github.com/containerd/cgroups/utils.go
generated
vendored
|
@ -111,7 +111,7 @@ func remove(path string) error {
|
|||
return fmt.Errorf("cgroups: unable to remove path %q", path)
|
||||
}
|
||||
|
||||
// readPids will read all the pids in a cgroup by the provided path
|
||||
// readPids will read all the pids of processes in a cgroup by the provided path
|
||||
func readPids(path string, subsystem Name) ([]Process, error) {
|
||||
f, err := os.Open(filepath.Join(path, cgroupProcs))
|
||||
if err != nil {
|
||||
|
@ -138,6 +138,33 @@ func readPids(path string, subsystem Name) ([]Process, error) {
|
|||
return out, nil
|
||||
}
|
||||
|
||||
// readTasksPids will read all the pids of tasks in a cgroup by the provided path
|
||||
func readTasksPids(path string, subsystem Name) ([]Task, error) {
|
||||
f, err := os.Open(filepath.Join(path, cgroupTasks))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
var (
|
||||
out []Task
|
||||
s = bufio.NewScanner(f)
|
||||
)
|
||||
for s.Scan() {
|
||||
if t := s.Text(); t != "" {
|
||||
pid, err := strconv.Atoi(t)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out = append(out, Task{
|
||||
Pid: pid,
|
||||
Subsystem: subsystem,
|
||||
Path: path,
|
||||
})
|
||||
}
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func hugePageSizes() ([]string, error) {
|
||||
var (
|
||||
pageSizes []string
|
||||
|
|
2
vendor/github.com/containerd/containerd/archive/tar.go
generated
vendored
2
vendor/github.com/containerd/containerd/archive/tar.go
generated
vendored
|
@ -194,7 +194,7 @@ func applyNaive(ctx context.Context, root string, tr *tar.Reader, options ApplyO
|
|||
parentPath = filepath.Dir(path)
|
||||
}
|
||||
if _, err := os.Lstat(parentPath); err != nil && os.IsNotExist(err) {
|
||||
err = mkdirAll(parentPath, 0700)
|
||||
err = mkdirAll(parentPath, 0755)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
|
32
vendor/github.com/containerd/containerd/image.go
generated
vendored
32
vendor/github.com/containerd/containerd/image.go
generated
vendored
|
@ -170,26 +170,22 @@ func (i *image) Unpack(ctx context.Context, snapshotterName string) error {
|
|||
chain = append(chain, layer.Diff.Digest)
|
||||
}
|
||||
|
||||
if unpacked {
|
||||
desc, err := i.i.Config(ctx, cs, i.platform)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
rootfs := identity.ChainID(chain).String()
|
||||
|
||||
cinfo := content.Info{
|
||||
Digest: desc.Digest,
|
||||
Labels: map[string]string{
|
||||
fmt.Sprintf("containerd.io/gc.ref.snapshot.%s", snapshotterName): rootfs,
|
||||
},
|
||||
}
|
||||
if _, err := cs.Update(ctx, cinfo, fmt.Sprintf("labels.containerd.io/gc.ref.snapshot.%s", snapshotterName)); err != nil {
|
||||
return err
|
||||
}
|
||||
desc, err := i.i.Config(ctx, cs, i.platform)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
rootfs := identity.ChainID(chain).String()
|
||||
|
||||
cinfo := content.Info{
|
||||
Digest: desc.Digest,
|
||||
Labels: map[string]string{
|
||||
fmt.Sprintf("containerd.io/gc.ref.snapshot.%s", snapshotterName): rootfs,
|
||||
},
|
||||
}
|
||||
|
||||
_, err = cs.Update(ctx, cinfo, fmt.Sprintf("labels.containerd.io/gc.ref.snapshot.%s", snapshotterName))
|
||||
return err
|
||||
}
|
||||
|
||||
func (i *image) getLayers(ctx context.Context, platform platforms.MatchComparer) ([]rootfs.Layer, error) {
|
||||
|
|
138
vendor/github.com/containerd/containerd/metadata/gc.go
generated
vendored
138
vendor/github.com/containerd/containerd/metadata/gc.go
generated
vendored
|
@ -64,6 +64,18 @@ func scanRoots(ctx context.Context, tx *bolt.Tx, nc chan<- gc.Node) error {
|
|||
// iterate through each namespace
|
||||
v1c := v1bkt.Cursor()
|
||||
|
||||
// cerr indicates the scan did not successfully send all
|
||||
// the roots. The scan does not need to be cancelled but
|
||||
// must return error at the end.
|
||||
var cerr error
|
||||
fn := func(n gc.Node) {
|
||||
select {
|
||||
case nc <- n:
|
||||
case <-ctx.Done():
|
||||
cerr = ctx.Err()
|
||||
}
|
||||
}
|
||||
|
||||
for k, v := v1c.First(); k != nil; k, v = v1c.Next() {
|
||||
if v != nil {
|
||||
continue
|
||||
|
@ -92,11 +104,7 @@ func scanRoots(ctx context.Context, tx *bolt.Tx, nc chan<- gc.Node) error {
|
|||
}
|
||||
}
|
||||
|
||||
select {
|
||||
case nc <- gcnode(ResourceLease, ns, string(k)):
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
}
|
||||
fn(gcnode(ResourceLease, ns, string(k)))
|
||||
|
||||
// Emit content and snapshots as roots instead of implementing
|
||||
// in references. Since leases cannot be referenced there is
|
||||
|
@ -106,11 +114,7 @@ func scanRoots(ctx context.Context, tx *bolt.Tx, nc chan<- gc.Node) error {
|
|||
cbkt := libkt.Bucket(bucketKeyObjectContent)
|
||||
if cbkt != nil {
|
||||
if err := cbkt.ForEach(func(k, v []byte) error {
|
||||
select {
|
||||
case nc <- gcnode(ResourceContent, ns, string(k)):
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
}
|
||||
fn(gcnode(ResourceContent, ns, string(k)))
|
||||
return nil
|
||||
}); err != nil {
|
||||
return err
|
||||
|
@ -126,11 +130,7 @@ func scanRoots(ctx context.Context, tx *bolt.Tx, nc chan<- gc.Node) error {
|
|||
snbkt := sbkt.Bucket(sk)
|
||||
|
||||
return snbkt.ForEach(func(k, v []byte) error {
|
||||
select {
|
||||
case nc <- gcnode(ResourceSnapshot, ns, fmt.Sprintf("%s/%s", sk, k)):
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
}
|
||||
fn(gcnode(ResourceSnapshot, ns, fmt.Sprintf("%s/%s", sk, k)))
|
||||
return nil
|
||||
})
|
||||
}); err != nil {
|
||||
|
@ -141,11 +141,7 @@ func scanRoots(ctx context.Context, tx *bolt.Tx, nc chan<- gc.Node) error {
|
|||
ibkt := libkt.Bucket(bucketKeyObjectIngests)
|
||||
if ibkt != nil {
|
||||
if err := ibkt.ForEach(func(k, v []byte) error {
|
||||
select {
|
||||
case nc <- gcnode(ResourceIngest, ns, string(k)):
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
}
|
||||
fn(gcnode(ResourceIngest, ns, string(k)))
|
||||
return nil
|
||||
}); err != nil {
|
||||
return err
|
||||
|
@ -168,18 +164,9 @@ func scanRoots(ctx context.Context, tx *bolt.Tx, nc chan<- gc.Node) error {
|
|||
target := ibkt.Bucket(k).Bucket(bucketKeyTarget)
|
||||
if target != nil {
|
||||
contentKey := string(target.Get(bucketKeyDigest))
|
||||
select {
|
||||
case nc <- gcnode(ResourceContent, ns, contentKey):
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
}
|
||||
fn(gcnode(ResourceContent, ns, contentKey))
|
||||
}
|
||||
return sendSnapshotRefs(ns, ibkt.Bucket(k), func(n gc.Node) {
|
||||
select {
|
||||
case nc <- n:
|
||||
case <-ctx.Done():
|
||||
}
|
||||
})
|
||||
return sendLabelRefs(ns, ibkt.Bucket(k), fn)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -200,11 +187,7 @@ func scanRoots(ctx context.Context, tx *bolt.Tx, nc chan<- gc.Node) error {
|
|||
if ea == nil || expThreshold.After(*ea) {
|
||||
return nil
|
||||
}
|
||||
select {
|
||||
case nc <- gcnode(ResourceIngest, ns, string(k)):
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
}
|
||||
fn(gcnode(ResourceIngest, ns, string(k)))
|
||||
return nil
|
||||
}); err != nil {
|
||||
return err
|
||||
|
@ -216,7 +199,12 @@ func scanRoots(ctx context.Context, tx *bolt.Tx, nc chan<- gc.Node) error {
|
|||
if v != nil {
|
||||
return nil
|
||||
}
|
||||
return sendRootRef(ctx, nc, gcnode(ResourceContent, ns, string(k)), cbkt.Bucket(k))
|
||||
|
||||
if isRootRef(cbkt.Bucket(k)) {
|
||||
fn(gcnode(ResourceContent, ns, string(k)))
|
||||
}
|
||||
|
||||
return nil
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -229,23 +217,15 @@ func scanRoots(ctx context.Context, tx *bolt.Tx, nc chan<- gc.Node) error {
|
|||
if v != nil {
|
||||
return nil
|
||||
}
|
||||
snapshotter := string(cbkt.Bucket(k).Get(bucketKeySnapshotter))
|
||||
|
||||
cibkt := cbkt.Bucket(k)
|
||||
snapshotter := string(cibkt.Get(bucketKeySnapshotter))
|
||||
if snapshotter != "" {
|
||||
ss := string(cbkt.Bucket(k).Get(bucketKeySnapshotKey))
|
||||
select {
|
||||
case nc <- gcnode(ResourceSnapshot, ns, fmt.Sprintf("%s/%s", snapshotter, ss)):
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
}
|
||||
ss := string(cibkt.Get(bucketKeySnapshotKey))
|
||||
fn(gcnode(ResourceSnapshot, ns, fmt.Sprintf("%s/%s", snapshotter, ss)))
|
||||
}
|
||||
|
||||
// TODO: Send additional snapshot refs through labels
|
||||
return sendSnapshotRefs(ns, cbkt.Bucket(k), func(n gc.Node) {
|
||||
select {
|
||||
case nc <- n:
|
||||
case <-ctx.Done():
|
||||
}
|
||||
})
|
||||
return sendLabelRefs(ns, cibkt, fn)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -263,15 +243,17 @@ func scanRoots(ctx context.Context, tx *bolt.Tx, nc chan<- gc.Node) error {
|
|||
if v != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return sendRootRef(ctx, nc, gcnode(ResourceSnapshot, ns, fmt.Sprintf("%s/%s", sk, k)), snbkt.Bucket(k))
|
||||
if isRootRef(snbkt.Bucket(k)) {
|
||||
fn(gcnode(ResourceSnapshot, ns, fmt.Sprintf("%s/%s", sk, k)))
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
return cerr
|
||||
}
|
||||
|
||||
func references(ctx context.Context, tx *bolt.Tx, node gc.Node, fn func(gc.Node)) error {
|
||||
|
@ -282,10 +264,7 @@ func references(ctx context.Context, tx *bolt.Tx, node gc.Node, fn func(gc.Node)
|
|||
return nil
|
||||
}
|
||||
|
||||
if err := sendSnapshotRefs(node.Namespace, bkt, fn); err != nil {
|
||||
return err
|
||||
}
|
||||
return sendContentRefs(node.Namespace, bkt, fn)
|
||||
return sendLabelRefs(node.Namespace, bkt, fn)
|
||||
} else if node.Type == ResourceSnapshot {
|
||||
parts := strings.SplitN(node.Key, "/", 2)
|
||||
if len(parts) != 2 {
|
||||
|
@ -304,7 +283,7 @@ func references(ctx context.Context, tx *bolt.Tx, node gc.Node, fn func(gc.Node)
|
|||
fn(gcnode(ResourceSnapshot, node.Namespace, fmt.Sprintf("%s/%s", ss, pv)))
|
||||
}
|
||||
|
||||
return sendSnapshotRefs(node.Namespace, bkt, fn)
|
||||
return sendLabelRefs(node.Namespace, bkt, fn)
|
||||
} else if node.Type == ResourceIngest {
|
||||
// Send expected value
|
||||
bkt := getBucket(tx, bucketKeyVersion, []byte(node.Namespace), bucketKeyObjectContent, bucketKeyObjectIngests, []byte(node.Key))
|
||||
|
@ -456,25 +435,8 @@ func remove(ctx context.Context, tx *bolt.Tx, node gc.Node) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// sendSnapshotRefs sends all snapshot references referred to by the labels in the bkt
|
||||
func sendSnapshotRefs(ns string, bkt *bolt.Bucket, fn func(gc.Node)) error {
|
||||
lbkt := bkt.Bucket(bucketKeyObjectLabels)
|
||||
if lbkt != nil {
|
||||
lc := lbkt.Cursor()
|
||||
|
||||
for k, v := lc.Seek(labelGCSnapRef); k != nil && strings.HasPrefix(string(k), string(labelGCSnapRef)); k, v = lc.Next() {
|
||||
snapshotter := k[len(labelGCSnapRef):]
|
||||
if i := bytes.IndexByte(snapshotter, '/'); i >= 0 {
|
||||
snapshotter = snapshotter[:i]
|
||||
}
|
||||
fn(gcnode(ResourceSnapshot, ns, fmt.Sprintf("%s/%s", snapshotter, v)))
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// sendContentRefs sends all content references referred to by the labels in the bkt
|
||||
func sendContentRefs(ns string, bkt *bolt.Bucket, fn func(gc.Node)) error {
|
||||
// sendLabelRefs sends all snapshot and content references referred to by the labels in the bkt
|
||||
func sendLabelRefs(ns string, bkt *bolt.Bucket, fn func(gc.Node)) error {
|
||||
lbkt := bkt.Bucket(bucketKeyObjectLabels)
|
||||
if lbkt != nil {
|
||||
lc := lbkt.Cursor()
|
||||
|
@ -490,6 +452,15 @@ func sendContentRefs(ns string, bkt *bolt.Bucket, fn func(gc.Node)) error {
|
|||
|
||||
fn(gcnode(ResourceContent, ns, string(v)))
|
||||
}
|
||||
|
||||
for k, v := lc.Seek(labelGCSnapRef); k != nil && strings.HasPrefix(string(k), string(labelGCSnapRef)); k, v = lc.Next() {
|
||||
snapshotter := k[len(labelGCSnapRef):]
|
||||
if i := bytes.IndexByte(snapshotter, '/'); i >= 0 {
|
||||
snapshotter = snapshotter[:i]
|
||||
}
|
||||
fn(gcnode(ResourceSnapshot, ns, fmt.Sprintf("%s/%s", snapshotter, v)))
|
||||
}
|
||||
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
@ -506,17 +477,6 @@ func isRootRef(bkt *bolt.Bucket) bool {
|
|||
return false
|
||||
}
|
||||
|
||||
func sendRootRef(ctx context.Context, nc chan<- gc.Node, n gc.Node, bkt *bolt.Bucket) error {
|
||||
if isRootRef(bkt) {
|
||||
select {
|
||||
case nc <- n:
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func gcnode(t gc.ResourceType, ns, key string) gc.Node {
|
||||
return gc.Node{
|
||||
Type: t,
|
||||
|
|
6
vendor/github.com/containerd/containerd/vendor.conf
generated
vendored
6
vendor/github.com/containerd/containerd/vendor.conf
generated
vendored
|
@ -1,6 +1,6 @@
|
|||
github.com/containerd/go-runc 5a6d9f37cfa36b15efba46dc7ea349fa9b7143c3
|
||||
github.com/containerd/console c12b1e7919c14469339a5d38f2f8ed9b64a9de23
|
||||
github.com/containerd/cgroups 5e610833b72089b37d0e615de9a92dfc043757c2
|
||||
github.com/containerd/cgroups dbea6f2bd41658b84b00417ceefa416b979cbf10
|
||||
github.com/containerd/typeurl a93fcdb778cd272c6e9b3028b2f42d813e785d40
|
||||
github.com/containerd/fifo 3d5202aec260678c48179c56f40e6f38a095738c
|
||||
github.com/containerd/btrfs 2e1aa0ddf94f91fa282b6ed87c23bf0d64911244
|
||||
|
@ -20,7 +20,7 @@ github.com/gogo/protobuf v1.0.0
|
|||
github.com/gogo/googleapis 08a7655d27152912db7aaf4f983275eaf8d128ef
|
||||
github.com/golang/protobuf v1.1.0
|
||||
github.com/opencontainers/runtime-spec eba862dc2470385a233c7507392675cbeadf7353 # v1.0.1-45-geba862d
|
||||
github.com/opencontainers/runc 6635b4f0c6af3810594d2770f662f34ddc15b40d
|
||||
github.com/opencontainers/runc 2b18fe1d885ee5083ef9f0838fee39b62d653e30
|
||||
github.com/sirupsen/logrus v1.0.0
|
||||
github.com/urfave/cli 7bc6a0acffa589f415f88aca16cc1de5ffd66f9c
|
||||
golang.org/x/net b3756b4b77d7b13260a0a2ec658753cf48922eac
|
||||
|
@ -43,7 +43,7 @@ github.com/google/go-cmp v0.1.0
|
|||
go.etcd.io/bbolt v1.3.1-etcd.8
|
||||
|
||||
# cri dependencies
|
||||
github.com/containerd/cri da0c016c830b2ea97fd1d737c49a568a816bf964 # release/1.2 branch
|
||||
github.com/containerd/cri a92c40017473cbe0239ce180125f12669757e44f # release/1.2 branch
|
||||
github.com/containerd/go-cni 40bcf8ec8acd7372be1d77031d585d5d8e561c90
|
||||
github.com/blang/semver v3.1.0
|
||||
github.com/containernetworking/cni v0.6.0
|
||||
|
|
5
vendor/github.com/opencontainers/runc/README.md
generated
vendored
5
vendor/github.com/opencontainers/runc/README.md
generated
vendored
|
@ -16,10 +16,9 @@ This means that `runc` 1.0.0 should implement the 1.0 version of the specificati
|
|||
|
||||
You can find official releases of `runc` on the [release](https://github.com/opencontainers/runc/releases) page.
|
||||
|
||||
### Security
|
||||
## Security
|
||||
|
||||
If you wish to report a security issue, please disclose the issue responsibly
|
||||
to security@opencontainers.org.
|
||||
Reporting process and disclosure communications are outlined in [/org/security](https://github.com/opencontainers/org/blob/master/security/)
|
||||
|
||||
## Building
|
||||
|
||||
|
|
42
vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go
generated
vendored
42
vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go
generated
vendored
|
@ -14,6 +14,7 @@ import (
|
|||
"time"
|
||||
|
||||
units "github.com/docker/go-units"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const (
|
||||
|
@ -463,11 +464,40 @@ func WriteCgroupProc(dir string, pid int) error {
|
|||
return fmt.Errorf("no such directory for %s", CgroupProcesses)
|
||||
}
|
||||
|
||||
// Don't attach any pid to the cgroup if -1 is specified as a pid
|
||||
if pid != -1 {
|
||||
if err := ioutil.WriteFile(filepath.Join(dir, CgroupProcesses), []byte(strconv.Itoa(pid)), 0700); err != nil {
|
||||
return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err)
|
||||
}
|
||||
// Dont attach any pid to the cgroup if -1 is specified as a pid
|
||||
if pid == -1 {
|
||||
return nil
|
||||
}
|
||||
|
||||
cgroupProcessesFile, err := os.OpenFile(filepath.Join(dir, CgroupProcesses), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0700)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err)
|
||||
}
|
||||
defer cgroupProcessesFile.Close()
|
||||
|
||||
for i := 0; i < 5; i++ {
|
||||
_, err = cgroupProcessesFile.WriteString(strconv.Itoa(pid))
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// EINVAL might mean that the task being added to cgroup.procs is in state
|
||||
// TASK_NEW. We should attempt to do so again.
|
||||
if isEINVAL(err) {
|
||||
time.Sleep(30 * time.Millisecond)
|
||||
continue
|
||||
}
|
||||
|
||||
return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func isEINVAL(err error) bool {
|
||||
switch err := err.(type) {
|
||||
case *os.PathError:
|
||||
return err.Err == unix.EINVAL
|
||||
default:
|
||||
return false
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
|
516
vendor/github.com/opencontainers/runc/libcontainer/nsenter/cloned_binary.c
generated
vendored
Normal file
516
vendor/github.com/opencontainers/runc/libcontainer/nsenter/cloned_binary.c
generated
vendored
Normal file
|
@ -0,0 +1,516 @@
|
|||
/*
|
||||
* Copyright (C) 2019 Aleksa Sarai <cyphar@cyphar.com>
|
||||
* Copyright (C) 2019 SUSE LLC
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#define _GNU_SOURCE
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/statfs.h>
|
||||
#include <sys/vfs.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/mount.h>
|
||||
#include <sys/sendfile.h>
|
||||
#include <sys/syscall.h>
|
||||
|
||||
/* Use our own wrapper for memfd_create. */
|
||||
#if !defined(SYS_memfd_create) && defined(__NR_memfd_create)
|
||||
# define SYS_memfd_create __NR_memfd_create
|
||||
#endif
|
||||
/* memfd_create(2) flags -- copied from <linux/memfd.h>. */
|
||||
#ifndef MFD_CLOEXEC
|
||||
# define MFD_CLOEXEC 0x0001U
|
||||
# define MFD_ALLOW_SEALING 0x0002U
|
||||
#endif
|
||||
int memfd_create(const char *name, unsigned int flags)
|
||||
{
|
||||
#ifdef SYS_memfd_create
|
||||
return syscall(SYS_memfd_create, name, flags);
|
||||
#else
|
||||
errno = ENOSYS;
|
||||
return -1;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/* This comes directly from <linux/fcntl.h>. */
|
||||
#ifndef F_LINUX_SPECIFIC_BASE
|
||||
# define F_LINUX_SPECIFIC_BASE 1024
|
||||
#endif
|
||||
#ifndef F_ADD_SEALS
|
||||
# define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
|
||||
# define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10)
|
||||
#endif
|
||||
#ifndef F_SEAL_SEAL
|
||||
# define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */
|
||||
# define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */
|
||||
# define F_SEAL_GROW 0x0004 /* prevent file from growing */
|
||||
# define F_SEAL_WRITE 0x0008 /* prevent writes */
|
||||
#endif
|
||||
|
||||
#define CLONED_BINARY_ENV "_LIBCONTAINER_CLONED_BINARY"
|
||||
#define RUNC_MEMFD_COMMENT "runc_cloned:/proc/self/exe"
|
||||
#define RUNC_MEMFD_SEALS \
|
||||
(F_SEAL_SEAL | F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_WRITE)
|
||||
|
||||
static void *must_realloc(void *ptr, size_t size)
|
||||
{
|
||||
void *old = ptr;
|
||||
do {
|
||||
ptr = realloc(old, size);
|
||||
} while(!ptr);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
/*
|
||||
* Verify whether we are currently in a self-cloned program (namely, is
|
||||
* /proc/self/exe a memfd). F_GET_SEALS will only succeed for memfds (or rather
|
||||
* for shmem files), and we want to be sure it's actually sealed.
|
||||
*/
|
||||
static int is_self_cloned(void)
|
||||
{
|
||||
int fd, ret, is_cloned = 0;
|
||||
struct stat statbuf = {};
|
||||
struct statfs fsbuf = {};
|
||||
|
||||
fd = open("/proc/self/exe", O_RDONLY|O_CLOEXEC);
|
||||
if (fd < 0)
|
||||
return -ENOTRECOVERABLE;
|
||||
|
||||
/*
|
||||
* Is the binary a fully-sealed memfd? We don't need CLONED_BINARY_ENV for
|
||||
* this, because you cannot write to a sealed memfd no matter what (so
|
||||
* sharing it isn't a bad thing -- and an admin could bind-mount a sealed
|
||||
* memfd to /usr/bin/runc to allow re-use).
|
||||
*/
|
||||
ret = fcntl(fd, F_GET_SEALS);
|
||||
if (ret >= 0) {
|
||||
is_cloned = (ret == RUNC_MEMFD_SEALS);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* All other forms require CLONED_BINARY_ENV, since they are potentially
|
||||
* writeable (or we can't tell if they're fully safe) and thus we must
|
||||
* check the environment as an extra layer of defence.
|
||||
*/
|
||||
if (!getenv(CLONED_BINARY_ENV)) {
|
||||
is_cloned = false;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Is the binary on a read-only filesystem? We can't detect bind-mounts in
|
||||
* particular (in-kernel they are identical to regular mounts) but we can
|
||||
* at least be sure that it's read-only. In addition, to make sure that
|
||||
* it's *our* bind-mount we check CLONED_BINARY_ENV.
|
||||
*/
|
||||
if (fstatfs(fd, &fsbuf) >= 0)
|
||||
is_cloned |= (fsbuf.f_flags & MS_RDONLY);
|
||||
|
||||
/*
|
||||
* Okay, we're a tmpfile -- or we're currently running on RHEL <=7.6
|
||||
* which appears to have a borked backport of F_GET_SEALS. Either way,
|
||||
* having a file which has no hardlinks indicates that we aren't using
|
||||
* a host-side "runc" binary and this is something that a container
|
||||
* cannot fake (because unlinking requires being able to resolve the
|
||||
* path that you want to unlink).
|
||||
*/
|
||||
if (fstat(fd, &statbuf) >= 0)
|
||||
is_cloned |= (statbuf.st_nlink == 0);
|
||||
|
||||
out:
|
||||
close(fd);
|
||||
return is_cloned;
|
||||
}
|
||||
|
||||
/* Read a given file into a new buffer, and providing the length. */
|
||||
static char *read_file(char *path, size_t *length)
|
||||
{
|
||||
int fd;
|
||||
char buf[4096], *copy = NULL;
|
||||
|
||||
if (!length)
|
||||
return NULL;
|
||||
|
||||
fd = open(path, O_RDONLY | O_CLOEXEC);
|
||||
if (fd < 0)
|
||||
return NULL;
|
||||
|
||||
*length = 0;
|
||||
for (;;) {
|
||||
ssize_t n;
|
||||
|
||||
n = read(fd, buf, sizeof(buf));
|
||||
if (n < 0)
|
||||
goto error;
|
||||
if (!n)
|
||||
break;
|
||||
|
||||
copy = must_realloc(copy, (*length + n) * sizeof(*copy));
|
||||
memcpy(copy + *length, buf, n);
|
||||
*length += n;
|
||||
}
|
||||
close(fd);
|
||||
return copy;
|
||||
|
||||
error:
|
||||
close(fd);
|
||||
free(copy);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* A poor-man's version of "xargs -0". Basically parses a given block of
|
||||
* NUL-delimited data, within the given length and adds a pointer to each entry
|
||||
* to the array of pointers.
|
||||
*/
|
||||
static int parse_xargs(char *data, int data_length, char ***output)
|
||||
{
|
||||
int num = 0;
|
||||
char *cur = data;
|
||||
|
||||
if (!data || *output != NULL)
|
||||
return -1;
|
||||
|
||||
while (cur < data + data_length) {
|
||||
num++;
|
||||
*output = must_realloc(*output, (num + 1) * sizeof(**output));
|
||||
(*output)[num - 1] = cur;
|
||||
cur += strlen(cur) + 1;
|
||||
}
|
||||
(*output)[num] = NULL;
|
||||
return num;
|
||||
}
|
||||
|
||||
/*
|
||||
* "Parse" out argv from /proc/self/cmdline.
|
||||
* This is necessary because we are running in a context where we don't have a
|
||||
* main() that we can just get the arguments from.
|
||||
*/
|
||||
static int fetchve(char ***argv)
|
||||
{
|
||||
char *cmdline = NULL;
|
||||
size_t cmdline_size;
|
||||
|
||||
cmdline = read_file("/proc/self/cmdline", &cmdline_size);
|
||||
if (!cmdline)
|
||||
goto error;
|
||||
|
||||
if (parse_xargs(cmdline, cmdline_size, argv) <= 0)
|
||||
goto error;
|
||||
|
||||
return 0;
|
||||
|
||||
error:
|
||||
free(cmdline);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
enum {
|
||||
EFD_NONE = 0,
|
||||
EFD_MEMFD,
|
||||
EFD_FILE,
|
||||
};
|
||||
|
||||
/*
|
||||
* This comes from <linux/fcntl.h>. We can't hard-code __O_TMPFILE because it
|
||||
* changes depending on the architecture. If we don't have O_TMPFILE we always
|
||||
* have the mkostemp(3) fallback.
|
||||
*/
|
||||
#ifndef O_TMPFILE
|
||||
# if defined(__O_TMPFILE) && defined(O_DIRECTORY)
|
||||
# define O_TMPFILE (__O_TMPFILE | O_DIRECTORY)
|
||||
# endif
|
||||
#endif
|
||||
|
||||
static int make_execfd(int *fdtype)
|
||||
{
|
||||
int fd = -1;
|
||||
char template[PATH_MAX] = {0};
|
||||
char *prefix = secure_getenv("_LIBCONTAINER_STATEDIR");
|
||||
|
||||
if (!prefix || *prefix != '/')
|
||||
prefix = "/tmp";
|
||||
if (snprintf(template, sizeof(template), "%s/runc.XXXXXX", prefix) < 0)
|
||||
return -1;
|
||||
|
||||
/*
|
||||
* Now try memfd, it's much nicer than actually creating a file in STATEDIR
|
||||
* since it's easily detected thanks to sealing and also doesn't require
|
||||
* assumptions about STATEDIR.
|
||||
*/
|
||||
*fdtype = EFD_MEMFD;
|
||||
fd = memfd_create(RUNC_MEMFD_COMMENT, MFD_CLOEXEC | MFD_ALLOW_SEALING);
|
||||
if (fd >= 0)
|
||||
return fd;
|
||||
if (errno != ENOSYS && errno != EINVAL)
|
||||
goto error;
|
||||
|
||||
#ifdef O_TMPFILE
|
||||
/*
|
||||
* Try O_TMPFILE to avoid races where someone might snatch our file. Note
|
||||
* that O_EXCL isn't actually a security measure here (since you can just
|
||||
* fd re-open it and clear O_EXCL).
|
||||
*/
|
||||
*fdtype = EFD_FILE;
|
||||
fd = open(prefix, O_TMPFILE | O_EXCL | O_RDWR | O_CLOEXEC, 0700);
|
||||
if (fd >= 0) {
|
||||
struct stat statbuf = {};
|
||||
bool working_otmpfile = false;
|
||||
|
||||
/*
|
||||
* open(2) ignores unknown O_* flags -- yeah, I was surprised when I
|
||||
* found this out too. As a result we can't check for EINVAL. However,
|
||||
* if we get nlink != 0 (or EISDIR) then we know that this kernel
|
||||
* doesn't support O_TMPFILE.
|
||||
*/
|
||||
if (fstat(fd, &statbuf) >= 0)
|
||||
working_otmpfile = (statbuf.st_nlink == 0);
|
||||
|
||||
if (working_otmpfile)
|
||||
return fd;
|
||||
|
||||
/* Pretend that we got EISDIR since O_TMPFILE failed. */
|
||||
close(fd);
|
||||
errno = EISDIR;
|
||||
}
|
||||
if (errno != EISDIR)
|
||||
goto error;
|
||||
#endif /* defined(O_TMPFILE) */
|
||||
|
||||
/*
|
||||
* Our final option is to create a temporary file the old-school way, and
|
||||
* then unlink it so that nothing else sees it by accident.
|
||||
*/
|
||||
*fdtype = EFD_FILE;
|
||||
fd = mkostemp(template, O_CLOEXEC);
|
||||
if (fd >= 0) {
|
||||
if (unlink(template) >= 0)
|
||||
return fd;
|
||||
close(fd);
|
||||
}
|
||||
|
||||
error:
|
||||
*fdtype = EFD_NONE;
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int seal_execfd(int *fd, int fdtype)
|
||||
{
|
||||
switch (fdtype) {
|
||||
case EFD_MEMFD:
|
||||
return fcntl(*fd, F_ADD_SEALS, RUNC_MEMFD_SEALS);
|
||||
case EFD_FILE: {
|
||||
/* Need to re-open our pseudo-memfd as an O_PATH to avoid execve(2) giving -ETXTBSY. */
|
||||
int newfd;
|
||||
char fdpath[PATH_MAX] = {0};
|
||||
|
||||
if (fchmod(*fd, 0100) < 0)
|
||||
return -1;
|
||||
|
||||
if (snprintf(fdpath, sizeof(fdpath), "/proc/self/fd/%d", *fd) < 0)
|
||||
return -1;
|
||||
|
||||
newfd = open(fdpath, O_PATH | O_CLOEXEC);
|
||||
if (newfd < 0)
|
||||
return -1;
|
||||
|
||||
close(*fd);
|
||||
*fd = newfd;
|
||||
return 0;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int try_bindfd(void)
|
||||
{
|
||||
int fd, ret = -1;
|
||||
char template[PATH_MAX] = {0};
|
||||
char *prefix = secure_getenv("_LIBCONTAINER_STATEDIR");
|
||||
|
||||
if (!prefix || *prefix != '/')
|
||||
prefix = "/tmp";
|
||||
if (snprintf(template, sizeof(template), "%s/runc.XXXXXX", prefix) < 0)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* We need somewhere to mount it, mounting anything over /proc/self is a
|
||||
* BAD idea on the host -- even if we do it temporarily.
|
||||
*/
|
||||
fd = mkstemp(template);
|
||||
if (fd < 0)
|
||||
return ret;
|
||||
close(fd);
|
||||
|
||||
/*
|
||||
* For obvious reasons this won't work in rootless mode because we haven't
|
||||
* created a userns+mntns -- but getting that to work will be a bit
|
||||
* complicated and it's only worth doing if someone actually needs it.
|
||||
*/
|
||||
ret = -EPERM;
|
||||
if (mount("/proc/self/exe", template, "", MS_BIND, "") < 0)
|
||||
goto out;
|
||||
if (mount("", template, "", MS_REMOUNT | MS_BIND | MS_RDONLY, "") < 0)
|
||||
goto out_umount;
|
||||
|
||||
|
||||
/* Get read-only handle that we're sure can't be made read-write. */
|
||||
ret = open(template, O_PATH | O_CLOEXEC);
|
||||
|
||||
out_umount:
|
||||
/*
|
||||
* Make sure the MNT_DETACH works, otherwise we could get remounted
|
||||
* read-write and that would be quite bad (the fd would be made read-write
|
||||
* too, invalidating the protection).
|
||||
*/
|
||||
if (umount2(template, MNT_DETACH) < 0) {
|
||||
if (ret >= 0)
|
||||
close(ret);
|
||||
ret = -ENOTRECOVERABLE;
|
||||
}
|
||||
|
||||
out:
|
||||
/*
|
||||
* We don't care about unlink errors, the worst that happens is that
|
||||
* there's an empty file left around in STATEDIR.
|
||||
*/
|
||||
unlink(template);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t fd_to_fd(int outfd, int infd)
|
||||
{
|
||||
ssize_t total = 0;
|
||||
char buffer[4096];
|
||||
|
||||
for (;;) {
|
||||
ssize_t nread, nwritten = 0;
|
||||
|
||||
nread = read(infd, buffer, sizeof(buffer));
|
||||
if (nread < 0)
|
||||
return -1;
|
||||
if (!nread)
|
||||
break;
|
||||
|
||||
do {
|
||||
ssize_t n = write(outfd, buffer + nwritten, nread - nwritten);
|
||||
if (n < 0)
|
||||
return -1;
|
||||
nwritten += n;
|
||||
} while(nwritten < nread);
|
||||
|
||||
total += nwritten;
|
||||
}
|
||||
|
||||
return total;
|
||||
}
|
||||
|
||||
static int clone_binary(void)
|
||||
{
|
||||
int binfd, execfd;
|
||||
struct stat statbuf = {};
|
||||
size_t sent = 0;
|
||||
int fdtype = EFD_NONE;
|
||||
|
||||
/*
|
||||
* Before we resort to copying, let's try creating an ro-binfd in one shot
|
||||
* by getting a handle for a read-only bind-mount of the execfd.
|
||||
*/
|
||||
execfd = try_bindfd();
|
||||
if (execfd >= 0)
|
||||
return execfd;
|
||||
|
||||
/*
|
||||
* Dammit, that didn't work -- time to copy the binary to a safe place we
|
||||
* can seal the contents.
|
||||
*/
|
||||
execfd = make_execfd(&fdtype);
|
||||
if (execfd < 0 || fdtype == EFD_NONE)
|
||||
return -ENOTRECOVERABLE;
|
||||
|
||||
binfd = open("/proc/self/exe", O_RDONLY | O_CLOEXEC);
|
||||
if (binfd < 0)
|
||||
goto error;
|
||||
|
||||
if (fstat(binfd, &statbuf) < 0)
|
||||
goto error_binfd;
|
||||
|
||||
while (sent < statbuf.st_size) {
|
||||
int n = sendfile(execfd, binfd, NULL, statbuf.st_size - sent);
|
||||
if (n < 0) {
|
||||
/* sendfile can fail so we fallback to a dumb user-space copy. */
|
||||
n = fd_to_fd(execfd, binfd);
|
||||
if (n < 0)
|
||||
goto error_binfd;
|
||||
}
|
||||
sent += n;
|
||||
}
|
||||
close(binfd);
|
||||
if (sent != statbuf.st_size)
|
||||
goto error;
|
||||
|
||||
if (seal_execfd(&execfd, fdtype) < 0)
|
||||
goto error;
|
||||
|
||||
return execfd;
|
||||
|
||||
error_binfd:
|
||||
close(binfd);
|
||||
error:
|
||||
close(execfd);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
/* Get cheap access to the environment. */
|
||||
extern char **environ;
|
||||
|
||||
int ensure_cloned_binary(void)
|
||||
{
|
||||
int execfd;
|
||||
char **argv = NULL;
|
||||
|
||||
/* Check that we're not self-cloned, and if we are then bail. */
|
||||
int cloned = is_self_cloned();
|
||||
if (cloned > 0 || cloned == -ENOTRECOVERABLE)
|
||||
return cloned;
|
||||
|
||||
if (fetchve(&argv) < 0)
|
||||
return -EINVAL;
|
||||
|
||||
execfd = clone_binary();
|
||||
if (execfd < 0)
|
||||
return -EIO;
|
||||
|
||||
if (putenv(CLONED_BINARY_ENV "=1"))
|
||||
goto error;
|
||||
|
||||
fexecve(execfd, argv, environ);
|
||||
error:
|
||||
close(execfd);
|
||||
return -ENOEXEC;
|
||||
}
|
11
vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c
generated
vendored
11
vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c
generated
vendored
|
@ -534,6 +534,9 @@ void join_namespaces(char *nslist)
|
|||
free(namespaces);
|
||||
}
|
||||
|
||||
/* Defined in cloned_binary.c. */
|
||||
extern int ensure_cloned_binary(void);
|
||||
|
||||
void nsexec(void)
|
||||
{
|
||||
int pipenum;
|
||||
|
@ -549,6 +552,14 @@ void nsexec(void)
|
|||
if (pipenum == -1)
|
||||
return;
|
||||
|
||||
/*
|
||||
* We need to re-exec if we are not in a cloned binary. This is necessary
|
||||
* to ensure that containers won't be able to access the host binary
|
||||
* through /proc/self/exe. See CVE-2019-5736.
|
||||
*/
|
||||
if (ensure_cloned_binary() < 0)
|
||||
bail("could not ensure we are a cloned binary");
|
||||
|
||||
/* Parse all of the netlink configuration. */
|
||||
nl_parse(pipenum, &config);
|
||||
|
||||
|
|
5
vendor/github.com/opencontainers/runc/vendor.conf
generated
vendored
5
vendor/github.com/opencontainers/runc/vendor.conf
generated
vendored
|
@ -1,8 +1,9 @@
|
|||
# OCI runtime-spec. When updating this, make sure you use a version tag rather
|
||||
# than a commit ID so it's much more obvious what version of the spec we are
|
||||
# using.
|
||||
github.com/opencontainers/runtime-spec 5684b8af48c1ac3b1451fa499724e30e3c20a294
|
||||
github.com/opencontainers/runtime-spec 29686dbc5559d93fb1ef402eeda3e35c38d75af4
|
||||
# Core libcontainer functionality.
|
||||
github.com/checkpoint-restore/go-criu v3.11
|
||||
github.com/mrunalp/fileutils ed869b029674c0e9ce4c0dfa781405c2d9946d08
|
||||
github.com/opencontainers/selinux v1.0.0-rc1
|
||||
github.com/seccomp/libseccomp-golang 84e90a91acea0f4e51e62bc1a75de18b1fc0790f
|
||||
|
@ -18,7 +19,7 @@ github.com/golang/protobuf 18c9bb3261723cd5401db4d0c9fbc5c3b6c70fe8
|
|||
github.com/cyphar/filepath-securejoin v0.2.1
|
||||
github.com/docker/go-units v0.2.0
|
||||
github.com/urfave/cli d53eb991652b1d438abdd34ce4bfa3ef1539108e
|
||||
golang.org/x/sys 7ddbeae9ae08c6a06a59597f0c9edbc5ff2444ce https://github.com/golang/sys
|
||||
golang.org/x/sys 41f3e6584952bb034a481797859f6ab34b6803bd https://github.com/golang/sys
|
||||
|
||||
# console dependencies
|
||||
github.com/containerd/console 2748ece16665b45a47f884001d5831ec79703880
|
||||
|
|
Loading…
Reference in a new issue