Merge pull request #40174 from AkihiroSuda/cgroup2

support cgroup2
This commit is contained in:
Sebastiaan van Stijn 2020-01-09 20:09:11 +01:00 committed by GitHub
commit e6c1820ef5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 161 additions and 35 deletions

View File

@ -9,6 +9,7 @@ import (
"github.com/docker/docker/opts"
"github.com/docker/docker/rootless"
units "github.com/docker/go-units"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/pkg/errors"
"github.com/spf13/pflag"
)
@ -64,6 +65,10 @@ func installConfigFlags(conf *config.Config, flags *pflag.FlagSet) error {
// rootless needs to be explicitly specified for running "rootful" dockerd in rootless dockerd (#38702)
// Note that defaultUserlandProxyPath and honorXDG are configured according to the value of rootless.RunningWithRootlessKit, not the value of --rootless.
flags.BoolVar(&conf.Rootless, "rootless", rootless.RunningWithRootlessKit(), "Enable rootless mode; typically used with RootlessKit (experimental)")
flags.StringVar(&conf.CgroupNamespaceMode, "default-cgroupns-mode", config.DefaultCgroupNamespaceMode, `Default mode for containers cgroup namespace ("host" | "private")`)
defaultCgroupNamespaceMode := "host"
if cgroups.IsCgroup2UnifiedMode() {
defaultCgroupNamespaceMode = "private"
}
flags.StringVar(&conf.CgroupNamespaceMode, "default-cgroupns-mode", defaultCgroupNamespaceMode, `Default mode for containers cgroup namespace ("host" | "private")`)
return nil
}

View File

@ -11,8 +11,6 @@ import (
)
const (
// DefaultCgroupNamespaceMode is the default for a container's CgroupnsMode, if not set otherwise
DefaultCgroupNamespaceMode = "host" // TODO: change to private
// DefaultIpcMode is default for container's IpcMode, if not set otherwise
DefaultIpcMode = "private"
)

View File

@ -794,6 +794,7 @@ func NewDaemon(ctx context.Context, config *config.Config, pluginStore *plugin.S
PluginStore: pluginStore,
startupDone: make(chan struct{}),
}
// Ensure the daemon is properly shutdown if there is a failure during
// initialization
defer func() {
@ -914,7 +915,7 @@ func NewDaemon(ctx context.Context, config *config.Config, pluginStore *plugin.S
}
}
return pluginexec.New(ctx, getPluginExecRoot(config.Root), pluginCli, config.ContainerdPluginNamespace, m)
return pluginexec.New(ctx, getPluginExecRoot(config.Root), pluginCli, config.ContainerdPluginNamespace, m, d.useShimV2())
}
// Plugin system initialization should happen before restore. Do not change order.
@ -1063,7 +1064,7 @@ func NewDaemon(ctx context.Context, config *config.Config, pluginStore *plugin.S
go d.execCommandGC()
d.containerd, err = libcontainerd.NewClient(ctx, d.containerdCli, filepath.Join(config.ExecRoot, "containerd"), config.ContainerdNamespace, d)
d.containerd, err = libcontainerd.NewClient(ctx, d.containerdCli, filepath.Join(config.ExecRoot, "containerd"), config.ContainerdNamespace, d, d.useShimV2())
if err != nil {
return nil, err
}

View File

@ -364,10 +364,15 @@ func (daemon *Daemon) adaptContainerSettings(hostConfig *containertypes.HostConf
// Set default cgroup namespace mode, if unset for container
if hostConfig.CgroupnsMode.IsEmpty() {
if hostConfig.Privileged {
// for cgroup v2: unshare cgroupns even for privileged containers
// https://github.com/containers/libpod/pull/4374#issuecomment-549776387
if hostConfig.Privileged && !cgroups.IsCgroup2UnifiedMode() {
hostConfig.CgroupnsMode = containertypes.CgroupnsMode("host")
} else {
m := config.DefaultCgroupNamespaceMode
m := "host"
if cgroups.IsCgroup2UnifiedMode() {
m = "private"
}
if daemon.configStore != nil {
m = daemon.configStore.CgroupNamespaceMode
}
@ -708,8 +713,8 @@ func verifyPlatformContainerSettings(daemon *Daemon, hostConfig *containertypes.
warnings = append(warnings, "Your kernel does not support cgroup namespaces. Cgroup namespace setting discarded.")
}
if hostConfig.Privileged {
return warnings, fmt.Errorf("privileged mode is incompatible with private cgroup namespaces. You must run the container in the host cgroup namespace when running privileged mode")
if hostConfig.Privileged && !cgroups.IsCgroup2UnifiedMode() {
return warnings, fmt.Errorf("privileged mode is incompatible with private cgroup namespaces on cgroup v1 host. You must run the container in the host cgroup namespace when running privileged mode")
}
}
@ -1594,6 +1599,10 @@ func (daemon *Daemon) initCgroupsPath(path string) error {
return nil
}
if cgroups.IsCgroup2UnifiedMode() {
return fmt.Errorf("daemon-scoped cpu-rt-period and cpu-rt-runtime are not implemented for cgroup v2")
}
// Recursively create cgroup to ensure that the system and all parent cgroups have values set
// for the period and runtime as this limits what the children can be set to.
daemon.initCgroupsPath(filepath.Dir(path))
@ -1639,3 +1648,7 @@ func (daemon *Daemon) setupSeccompProfile() error {
}
return nil
}
func (daemon *Daemon) useShimV2() bool {
return cgroups.IsCgroup2UnifiedMode()
}

View File

@ -653,3 +653,7 @@ func (daemon *Daemon) initRuntimes(_ map[string]types.Runtime) error {
func setupResolvConf(config *config.Config) {
}
func (daemon *Daemon) useShimV2() bool {
return true
}

View File

@ -316,7 +316,9 @@ func WithNamespaces(daemon *Daemon, c *container.Container) coci.SpecOpts {
return fmt.Errorf("invalid cgroup namespace mode: %v", cgroupNsMode)
}
if cgroupNsMode.IsPrivate() && !c.HostConfig.Privileged {
// for cgroup v2: unshare cgroupns even for privileged containers
// https://github.com/containers/libpod/pull/4374#issuecomment-549776387
if cgroupNsMode.IsPrivate() && (cgroups.IsCgroup2UnifiedMode() || !c.HostConfig.Privileged) {
nsCgroup := specs.LinuxNamespace{Type: "cgroup"}
setNamespace(s, nsCgroup)
}

View File

@ -8,6 +8,7 @@ import (
"path/filepath"
"github.com/containerd/containerd/runtime/linux/runctypes"
v2runcoptions "github.com/containerd/containerd/runtime/v2/runc/options"
"github.com/docker/docker/container"
"github.com/docker/docker/errdefs"
"github.com/pkg/errors"
@ -43,6 +44,20 @@ func (daemon *Daemon) getLibcontainerdCreateOptions(container *container.Contain
if err != nil {
return nil, err
}
if daemon.useShimV2() {
opts := &v2runcoptions.Options{
BinaryName: path,
Root: filepath.Join(daemon.configStore.ExecRoot,
fmt.Sprintf("runtime-%s", container.HostConfig.Runtime)),
}
if UsingSystemd(daemon.configStore) {
opts.SystemdCgroup = true
}
return opts, nil
}
opts := &runctypes.RuncOptions{
Runtime: path,
RuntimeRoot: filepath.Join(daemon.configStore.ExecRoot,

View File

@ -115,7 +115,7 @@ func TestCgroupNamespacesRunPrivilegedAndPrivate(t *testing.T) {
skip.If(t, !requirement.CgroupNamespacesEnabled())
// Running with both privileged and cgroupns=private is not allowed
errStr := "privileged mode is incompatible with private cgroup namespaces. You must run the container in the host cgroup namespace when running privileged mode"
errStr := "privileged mode is incompatible with private cgroup namespaces on cgroup v1 host. You must run the container in the host cgroup namespace when running privileged mode"
testCreateFailureWithCgroupNs(t, "private", errStr, container.WithPrivileged(true), container.WithCgroupnsMode("private"))
}

View File

@ -9,6 +9,6 @@ import (
)
// NewClient creates a new libcontainerd client from a containerd client
func NewClient(ctx context.Context, cli *containerd.Client, stateDir, ns string, b libcontainerdtypes.Backend) (libcontainerdtypes.Client, error) {
return remote.NewClient(ctx, cli, stateDir, ns, b)
func NewClient(ctx context.Context, cli *containerd.Client, stateDir, ns string, b libcontainerdtypes.Backend, useShimV2 bool) (libcontainerdtypes.Client, error) {
return remote.NewClient(ctx, cli, stateDir, ns, b, useShimV2)
}

View File

@ -11,9 +11,10 @@ import (
)
// NewClient creates a new libcontainerd client from a containerd client
func NewClient(ctx context.Context, cli *containerd.Client, stateDir, ns string, b libcontainerdtypes.Backend) (libcontainerdtypes.Client, error) {
func NewClient(ctx context.Context, cli *containerd.Client, stateDir, ns string, b libcontainerdtypes.Backend, useShimV2 bool) (libcontainerdtypes.Client, error) {
if !system.ContainerdRuntimeSupported() {
// useShimV2 is ignored for windows
return local.NewClient(ctx, cli, stateDir, ns, b)
}
return remote.NewClient(ctx, cli, stateDir, ns, b)
return remote.NewClient(ctx, cli, stateDir, ns, b, useShimV2)
}

View File

@ -23,6 +23,7 @@ import (
"github.com/containerd/containerd/events"
"github.com/containerd/containerd/images"
"github.com/containerd/containerd/runtime/linux/runctypes"
v2runcoptions "github.com/containerd/containerd/runtime/v2/runc/options"
"github.com/containerd/typeurl"
"github.com/docker/docker/errdefs"
"github.com/docker/docker/libcontainerd/queue"
@ -45,21 +46,27 @@ type client struct {
logger *logrus.Entry
ns string
backend libcontainerdtypes.Backend
eventQ queue.Queue
oomMu sync.Mutex
oom map[string]bool
backend libcontainerdtypes.Backend
eventQ queue.Queue
oomMu sync.Mutex
oom map[string]bool
useShimV2 bool
v2runcoptionsMu sync.Mutex
// v2runcoptions is used for copying options specified on Create() to Start()
v2runcoptions map[string]v2runcoptions.Options
}
// NewClient creates a new libcontainerd client from a containerd client
func NewClient(ctx context.Context, cli *containerd.Client, stateDir, ns string, b libcontainerdtypes.Backend) (libcontainerdtypes.Client, error) {
func NewClient(ctx context.Context, cli *containerd.Client, stateDir, ns string, b libcontainerdtypes.Backend, useShimV2 bool) (libcontainerdtypes.Client, error) {
c := &client{
client: cli,
stateDir: stateDir,
logger: logrus.WithField("module", "libcontainerd").WithField("namespace", ns),
ns: ns,
backend: b,
oom: make(map[string]bool),
client: cli,
stateDir: stateDir,
logger: logrus.WithField("module", "libcontainerd").WithField("namespace", ns),
ns: ns,
backend: b,
oom: make(map[string]bool),
useShimV2: useShimV2,
v2runcoptions: make(map[string]v2runcoptions.Options),
}
go c.processEventStream(ctx, ns)
@ -126,9 +133,13 @@ func (c *client) Create(ctx context.Context, id string, ociSpec *specs.Spec, run
bdir := c.bundleDir(id)
c.logger.WithField("bundle", bdir).WithField("root", ociSpec.Root.Path).Debug("bundle dir created")
rt := runtimeName
if c.useShimV2 {
rt = shimV2RuntimeName
}
newOpts := []containerd.NewContainerOpts{
containerd.WithSpec(ociSpec),
containerd.WithRuntime(runtimeName, runtimeOptions),
containerd.WithRuntime(rt, runtimeOptions),
WithBundle(bdir, ociSpec),
}
opts = append(opts, newOpts...)
@ -140,6 +151,13 @@ func (c *client) Create(ctx context.Context, id string, ociSpec *specs.Spec, run
}
return wrapError(err)
}
if c.useShimV2 {
if x, ok := runtimeOptions.(*v2runcoptions.Options); ok {
c.v2runcoptionsMu.Lock()
c.v2runcoptions[id] = *x
c.v2runcoptionsMu.Unlock()
}
}
return nil
}
@ -200,11 +218,26 @@ func (c *client) Start(ctx context.Context, id, checkpointDir string, withStdin
if runtime.GOOS != "windows" {
taskOpts = append(taskOpts, func(_ context.Context, _ *containerd.Client, info *containerd.TaskInfo) error {
info.Options = &runctypes.CreateOptions{
IoUid: uint32(uid),
IoGid: uint32(gid),
NoPivotRoot: os.Getenv("DOCKER_RAMDISK") != "",
if c.useShimV2 {
// For v2, we need to inherit options specified on Create
c.v2runcoptionsMu.Lock()
opts, ok := c.v2runcoptions[id]
c.v2runcoptionsMu.Unlock()
if !ok {
opts = v2runcoptions.Options{}
}
opts.IoUid = uint32(uid)
opts.IoGid = uint32(gid)
opts.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != ""
info.Options = &opts
} else {
info.Options = &runctypes.CreateOptions{
IoUid: uint32(uid),
IoGid: uint32(gid),
NoPivotRoot: os.Getenv("DOCKER_RAMDISK") != "",
}
}
return nil
})
} else {
@ -466,6 +499,9 @@ func (c *client) Delete(ctx context.Context, containerID string) error {
c.oomMu.Lock()
delete(c.oom, containerID)
c.oomMu.Unlock()
c.v2runcoptionsMu.Lock()
delete(c.v2runcoptions, containerID)
c.v2runcoptionsMu.Unlock()
if os.Getenv("LIBCONTAINERD_NOCLEAN") != "1" {
if err := os.RemoveAll(bundle); err != nil {
c.logger.WithError(err).WithFields(logrus.Fields{

View File

@ -16,7 +16,10 @@ import (
"github.com/sirupsen/logrus"
)
const runtimeName = "io.containerd.runtime.v1.linux"
const (
runtimeName = "io.containerd.runtime.v1.linux"
shimV2RuntimeName = "io.containerd.runc.v2"
)
func summaryFromInterface(i interface{}) (*libcontainerdtypes.Summary, error) {
return &libcontainerdtypes.Summary{}, nil

View File

@ -16,7 +16,10 @@ import (
"github.com/sirupsen/logrus"
)
const runtimeName = "io.containerd.runhcs.v1"
const (
runtimeName = "io.containerd.runhcs.v1"
shimV2RuntimeName = runtimeName
)
func summaryFromInterface(i interface{}) (*libcontainerdtypes.Summary, error) {
switch pd := i.(type) {

View File

@ -60,6 +60,9 @@ func New(quiet bool) *SysInfo {
w := o(sysInfo, cgMounts)
warnings = append(warnings, w...)
}
if cgroups.IsCgroup2UnifiedMode() {
warnings = append(warnings, "Your system is running cgroup v2 (unsupported)")
}
if !quiet {
for _, w := range warnings {
logrus.Warn(w)
@ -70,6 +73,15 @@ func New(quiet bool) *SysInfo {
// applyMemoryCgroupInfo reads the memory information from the memory cgroup mount point.
func applyMemoryCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
if cgroups.IsCgroup2UnifiedMode() {
// TODO: check cgroup2 info correctly
info.MemoryLimit = true
info.SwapLimit = true
info.MemoryReservation = true
info.OomKillDisable = true
info.MemorySwappiness = true
return nil
}
var warnings []string
mountPoint, ok := cgMounts["memory"]
if !ok {
@ -108,6 +120,15 @@ func applyMemoryCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
// applyCPUCgroupInfo reads the cpu information from the cpu cgroup mount point.
func applyCPUCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
if cgroups.IsCgroup2UnifiedMode() {
// TODO: check cgroup2 info correctly
info.CPUShares = true
info.CPUCfsPeriod = true
info.CPUCfsQuota = true
info.CPURealtimePeriod = true
info.CPURealtimeRuntime = true
return nil
}
var warnings []string
mountPoint, ok := cgMounts["cpu"]
if !ok {
@ -145,6 +166,15 @@ func applyCPUCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
// applyBlkioCgroupInfo reads the blkio information from the blkio cgroup mount point.
func applyBlkioCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
if cgroups.IsCgroup2UnifiedMode() {
// TODO: check cgroup2 info correctly
info.BlkioWeight = true
info.BlkioReadBpsDevice = true
info.BlkioWriteBpsDevice = true
info.BlkioReadIOpsDevice = true
info.BlkioWriteIOpsDevice = true
return nil
}
var warnings []string
mountPoint, ok := cgMounts["blkio"]
if !ok {
@ -186,6 +216,11 @@ func applyBlkioCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
// applyCPUSetCgroupInfo reads the cpuset information from the cpuset cgroup mount point.
func applyCPUSetCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
if cgroups.IsCgroup2UnifiedMode() {
// TODO: check cgroup2 info correctly
info.Cpuset = true
return nil
}
var warnings []string
mountPoint, ok := cgMounts["cpuset"]
if !ok {
@ -213,6 +248,11 @@ func applyCPUSetCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
// applyPIDSCgroupInfo reads the pids information from the pids cgroup mount point.
func applyPIDSCgroupInfo(info *SysInfo, _ map[string]string) []string {
if cgroups.IsCgroup2UnifiedMode() {
// TODO: check cgroup2 info correctly
info.PidsLimit = true
return nil
}
var warnings []string
_, err := cgroups.FindCgroupMountpoint("", "pids")
if err != nil {
@ -225,6 +265,11 @@ func applyPIDSCgroupInfo(info *SysInfo, _ map[string]string) []string {
// applyDevicesCgroupInfo reads the pids information from the devices cgroup mount point.
func applyDevicesCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
if cgroups.IsCgroup2UnifiedMode() {
// TODO: check cgroup2 info correctly
info.CgroupDevicesEnabled = true
return nil
}
var warnings []string
_, ok := cgMounts["devices"]
info.CgroupDevicesEnabled = ok

View File

@ -26,13 +26,13 @@ type ExitHandler interface {
}
// New creates a new containerd plugin executor
func New(ctx context.Context, rootDir string, cli *containerd.Client, ns string, exitHandler ExitHandler) (*Executor, error) {
func New(ctx context.Context, rootDir string, cli *containerd.Client, ns string, exitHandler ExitHandler, useShimV2 bool) (*Executor, error) {
e := &Executor{
rootDir: rootDir,
exitHandler: exitHandler,
}
client, err := libcontainerd.NewClient(ctx, cli, rootDir, ns, e)
client, err := libcontainerd.NewClient(ctx, cli, rootDir, ns, e, useShimV2)
if err != nil {
return nil, errors.Wrap(err, "error creating containerd exec client")
}