mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00
commit
e6c1820ef5
15 changed files with 161 additions and 35 deletions
|
@ -9,6 +9,7 @@ import (
|
|||
"github.com/docker/docker/opts"
|
||||
"github.com/docker/docker/rootless"
|
||||
units "github.com/docker/go-units"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/spf13/pflag"
|
||||
)
|
||||
|
@ -64,6 +65,10 @@ func installConfigFlags(conf *config.Config, flags *pflag.FlagSet) error {
|
|||
// rootless needs to be explicitly specified for running "rootful" dockerd in rootless dockerd (#38702)
|
||||
// Note that defaultUserlandProxyPath and honorXDG are configured according to the value of rootless.RunningWithRootlessKit, not the value of --rootless.
|
||||
flags.BoolVar(&conf.Rootless, "rootless", rootless.RunningWithRootlessKit(), "Enable rootless mode; typically used with RootlessKit (experimental)")
|
||||
flags.StringVar(&conf.CgroupNamespaceMode, "default-cgroupns-mode", config.DefaultCgroupNamespaceMode, `Default mode for containers cgroup namespace ("host" | "private")`)
|
||||
defaultCgroupNamespaceMode := "host"
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
defaultCgroupNamespaceMode = "private"
|
||||
}
|
||||
flags.StringVar(&conf.CgroupNamespaceMode, "default-cgroupns-mode", defaultCgroupNamespaceMode, `Default mode for containers cgroup namespace ("host" | "private")`)
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -11,8 +11,6 @@ import (
|
|||
)
|
||||
|
||||
const (
|
||||
// DefaultCgroupNamespaceMode is the default for a container's CgroupnsMode, if not set otherwise
|
||||
DefaultCgroupNamespaceMode = "host" // TODO: change to private
|
||||
// DefaultIpcMode is default for container's IpcMode, if not set otherwise
|
||||
DefaultIpcMode = "private"
|
||||
)
|
||||
|
|
|
@ -794,6 +794,7 @@ func NewDaemon(ctx context.Context, config *config.Config, pluginStore *plugin.S
|
|||
PluginStore: pluginStore,
|
||||
startupDone: make(chan struct{}),
|
||||
}
|
||||
|
||||
// Ensure the daemon is properly shutdown if there is a failure during
|
||||
// initialization
|
||||
defer func() {
|
||||
|
@ -914,7 +915,7 @@ func NewDaemon(ctx context.Context, config *config.Config, pluginStore *plugin.S
|
|||
}
|
||||
}
|
||||
|
||||
return pluginexec.New(ctx, getPluginExecRoot(config.Root), pluginCli, config.ContainerdPluginNamespace, m)
|
||||
return pluginexec.New(ctx, getPluginExecRoot(config.Root), pluginCli, config.ContainerdPluginNamespace, m, d.useShimV2())
|
||||
}
|
||||
|
||||
// Plugin system initialization should happen before restore. Do not change order.
|
||||
|
@ -1063,7 +1064,7 @@ func NewDaemon(ctx context.Context, config *config.Config, pluginStore *plugin.S
|
|||
|
||||
go d.execCommandGC()
|
||||
|
||||
d.containerd, err = libcontainerd.NewClient(ctx, d.containerdCli, filepath.Join(config.ExecRoot, "containerd"), config.ContainerdNamespace, d)
|
||||
d.containerd, err = libcontainerd.NewClient(ctx, d.containerdCli, filepath.Join(config.ExecRoot, "containerd"), config.ContainerdNamespace, d, d.useShimV2())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
|
@ -364,10 +364,15 @@ func (daemon *Daemon) adaptContainerSettings(hostConfig *containertypes.HostConf
|
|||
|
||||
// Set default cgroup namespace mode, if unset for container
|
||||
if hostConfig.CgroupnsMode.IsEmpty() {
|
||||
if hostConfig.Privileged {
|
||||
// for cgroup v2: unshare cgroupns even for privileged containers
|
||||
// https://github.com/containers/libpod/pull/4374#issuecomment-549776387
|
||||
if hostConfig.Privileged && !cgroups.IsCgroup2UnifiedMode() {
|
||||
hostConfig.CgroupnsMode = containertypes.CgroupnsMode("host")
|
||||
} else {
|
||||
m := config.DefaultCgroupNamespaceMode
|
||||
m := "host"
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
m = "private"
|
||||
}
|
||||
if daemon.configStore != nil {
|
||||
m = daemon.configStore.CgroupNamespaceMode
|
||||
}
|
||||
|
@ -708,8 +713,8 @@ func verifyPlatformContainerSettings(daemon *Daemon, hostConfig *containertypes.
|
|||
warnings = append(warnings, "Your kernel does not support cgroup namespaces. Cgroup namespace setting discarded.")
|
||||
}
|
||||
|
||||
if hostConfig.Privileged {
|
||||
return warnings, fmt.Errorf("privileged mode is incompatible with private cgroup namespaces. You must run the container in the host cgroup namespace when running privileged mode")
|
||||
if hostConfig.Privileged && !cgroups.IsCgroup2UnifiedMode() {
|
||||
return warnings, fmt.Errorf("privileged mode is incompatible with private cgroup namespaces on cgroup v1 host. You must run the container in the host cgroup namespace when running privileged mode")
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1594,6 +1599,10 @@ func (daemon *Daemon) initCgroupsPath(path string) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
return fmt.Errorf("daemon-scoped cpu-rt-period and cpu-rt-runtime are not implemented for cgroup v2")
|
||||
}
|
||||
|
||||
// Recursively create cgroup to ensure that the system and all parent cgroups have values set
|
||||
// for the period and runtime as this limits what the children can be set to.
|
||||
daemon.initCgroupsPath(filepath.Dir(path))
|
||||
|
@ -1639,3 +1648,7 @@ func (daemon *Daemon) setupSeccompProfile() error {
|
|||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (daemon *Daemon) useShimV2() bool {
|
||||
return cgroups.IsCgroup2UnifiedMode()
|
||||
}
|
||||
|
|
|
@ -653,3 +653,7 @@ func (daemon *Daemon) initRuntimes(_ map[string]types.Runtime) error {
|
|||
|
||||
func setupResolvConf(config *config.Config) {
|
||||
}
|
||||
|
||||
func (daemon *Daemon) useShimV2() bool {
|
||||
return true
|
||||
}
|
||||
|
|
|
@ -316,7 +316,9 @@ func WithNamespaces(daemon *Daemon, c *container.Container) coci.SpecOpts {
|
|||
return fmt.Errorf("invalid cgroup namespace mode: %v", cgroupNsMode)
|
||||
}
|
||||
|
||||
if cgroupNsMode.IsPrivate() && !c.HostConfig.Privileged {
|
||||
// for cgroup v2: unshare cgroupns even for privileged containers
|
||||
// https://github.com/containers/libpod/pull/4374#issuecomment-549776387
|
||||
if cgroupNsMode.IsPrivate() && (cgroups.IsCgroup2UnifiedMode() || !c.HostConfig.Privileged) {
|
||||
nsCgroup := specs.LinuxNamespace{Type: "cgroup"}
|
||||
setNamespace(s, nsCgroup)
|
||||
}
|
||||
|
|
|
@ -8,6 +8,7 @@ import (
|
|||
"path/filepath"
|
||||
|
||||
"github.com/containerd/containerd/runtime/linux/runctypes"
|
||||
v2runcoptions "github.com/containerd/containerd/runtime/v2/runc/options"
|
||||
"github.com/docker/docker/container"
|
||||
"github.com/docker/docker/errdefs"
|
||||
"github.com/pkg/errors"
|
||||
|
@ -43,6 +44,20 @@ func (daemon *Daemon) getLibcontainerdCreateOptions(container *container.Contain
|
|||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if daemon.useShimV2() {
|
||||
opts := &v2runcoptions.Options{
|
||||
BinaryName: path,
|
||||
Root: filepath.Join(daemon.configStore.ExecRoot,
|
||||
fmt.Sprintf("runtime-%s", container.HostConfig.Runtime)),
|
||||
}
|
||||
|
||||
if UsingSystemd(daemon.configStore) {
|
||||
opts.SystemdCgroup = true
|
||||
}
|
||||
|
||||
return opts, nil
|
||||
|
||||
}
|
||||
opts := &runctypes.RuncOptions{
|
||||
Runtime: path,
|
||||
RuntimeRoot: filepath.Join(daemon.configStore.ExecRoot,
|
||||
|
|
|
@ -115,7 +115,7 @@ func TestCgroupNamespacesRunPrivilegedAndPrivate(t *testing.T) {
|
|||
skip.If(t, !requirement.CgroupNamespacesEnabled())
|
||||
|
||||
// Running with both privileged and cgroupns=private is not allowed
|
||||
errStr := "privileged mode is incompatible with private cgroup namespaces. You must run the container in the host cgroup namespace when running privileged mode"
|
||||
errStr := "privileged mode is incompatible with private cgroup namespaces on cgroup v1 host. You must run the container in the host cgroup namespace when running privileged mode"
|
||||
testCreateFailureWithCgroupNs(t, "private", errStr, container.WithPrivileged(true), container.WithCgroupnsMode("private"))
|
||||
}
|
||||
|
||||
|
|
|
@ -9,6 +9,6 @@ import (
|
|||
)
|
||||
|
||||
// NewClient creates a new libcontainerd client from a containerd client
|
||||
func NewClient(ctx context.Context, cli *containerd.Client, stateDir, ns string, b libcontainerdtypes.Backend) (libcontainerdtypes.Client, error) {
|
||||
return remote.NewClient(ctx, cli, stateDir, ns, b)
|
||||
func NewClient(ctx context.Context, cli *containerd.Client, stateDir, ns string, b libcontainerdtypes.Backend, useShimV2 bool) (libcontainerdtypes.Client, error) {
|
||||
return remote.NewClient(ctx, cli, stateDir, ns, b, useShimV2)
|
||||
}
|
||||
|
|
|
@ -11,9 +11,10 @@ import (
|
|||
)
|
||||
|
||||
// NewClient creates a new libcontainerd client from a containerd client
|
||||
func NewClient(ctx context.Context, cli *containerd.Client, stateDir, ns string, b libcontainerdtypes.Backend) (libcontainerdtypes.Client, error) {
|
||||
func NewClient(ctx context.Context, cli *containerd.Client, stateDir, ns string, b libcontainerdtypes.Backend, useShimV2 bool) (libcontainerdtypes.Client, error) {
|
||||
if !system.ContainerdRuntimeSupported() {
|
||||
// useShimV2 is ignored for windows
|
||||
return local.NewClient(ctx, cli, stateDir, ns, b)
|
||||
}
|
||||
return remote.NewClient(ctx, cli, stateDir, ns, b)
|
||||
return remote.NewClient(ctx, cli, stateDir, ns, b, useShimV2)
|
||||
}
|
||||
|
|
|
@ -23,6 +23,7 @@ import (
|
|||
"github.com/containerd/containerd/events"
|
||||
"github.com/containerd/containerd/images"
|
||||
"github.com/containerd/containerd/runtime/linux/runctypes"
|
||||
v2runcoptions "github.com/containerd/containerd/runtime/v2/runc/options"
|
||||
"github.com/containerd/typeurl"
|
||||
"github.com/docker/docker/errdefs"
|
||||
"github.com/docker/docker/libcontainerd/queue"
|
||||
|
@ -45,21 +46,27 @@ type client struct {
|
|||
logger *logrus.Entry
|
||||
ns string
|
||||
|
||||
backend libcontainerdtypes.Backend
|
||||
eventQ queue.Queue
|
||||
oomMu sync.Mutex
|
||||
oom map[string]bool
|
||||
backend libcontainerdtypes.Backend
|
||||
eventQ queue.Queue
|
||||
oomMu sync.Mutex
|
||||
oom map[string]bool
|
||||
useShimV2 bool
|
||||
v2runcoptionsMu sync.Mutex
|
||||
// v2runcoptions is used for copying options specified on Create() to Start()
|
||||
v2runcoptions map[string]v2runcoptions.Options
|
||||
}
|
||||
|
||||
// NewClient creates a new libcontainerd client from a containerd client
|
||||
func NewClient(ctx context.Context, cli *containerd.Client, stateDir, ns string, b libcontainerdtypes.Backend) (libcontainerdtypes.Client, error) {
|
||||
func NewClient(ctx context.Context, cli *containerd.Client, stateDir, ns string, b libcontainerdtypes.Backend, useShimV2 bool) (libcontainerdtypes.Client, error) {
|
||||
c := &client{
|
||||
client: cli,
|
||||
stateDir: stateDir,
|
||||
logger: logrus.WithField("module", "libcontainerd").WithField("namespace", ns),
|
||||
ns: ns,
|
||||
backend: b,
|
||||
oom: make(map[string]bool),
|
||||
client: cli,
|
||||
stateDir: stateDir,
|
||||
logger: logrus.WithField("module", "libcontainerd").WithField("namespace", ns),
|
||||
ns: ns,
|
||||
backend: b,
|
||||
oom: make(map[string]bool),
|
||||
useShimV2: useShimV2,
|
||||
v2runcoptions: make(map[string]v2runcoptions.Options),
|
||||
}
|
||||
|
||||
go c.processEventStream(ctx, ns)
|
||||
|
@ -126,9 +133,13 @@ func (c *client) Create(ctx context.Context, id string, ociSpec *specs.Spec, run
|
|||
bdir := c.bundleDir(id)
|
||||
c.logger.WithField("bundle", bdir).WithField("root", ociSpec.Root.Path).Debug("bundle dir created")
|
||||
|
||||
rt := runtimeName
|
||||
if c.useShimV2 {
|
||||
rt = shimV2RuntimeName
|
||||
}
|
||||
newOpts := []containerd.NewContainerOpts{
|
||||
containerd.WithSpec(ociSpec),
|
||||
containerd.WithRuntime(runtimeName, runtimeOptions),
|
||||
containerd.WithRuntime(rt, runtimeOptions),
|
||||
WithBundle(bdir, ociSpec),
|
||||
}
|
||||
opts = append(opts, newOpts...)
|
||||
|
@ -140,6 +151,13 @@ func (c *client) Create(ctx context.Context, id string, ociSpec *specs.Spec, run
|
|||
}
|
||||
return wrapError(err)
|
||||
}
|
||||
if c.useShimV2 {
|
||||
if x, ok := runtimeOptions.(*v2runcoptions.Options); ok {
|
||||
c.v2runcoptionsMu.Lock()
|
||||
c.v2runcoptions[id] = *x
|
||||
c.v2runcoptionsMu.Unlock()
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -200,11 +218,26 @@ func (c *client) Start(ctx context.Context, id, checkpointDir string, withStdin
|
|||
|
||||
if runtime.GOOS != "windows" {
|
||||
taskOpts = append(taskOpts, func(_ context.Context, _ *containerd.Client, info *containerd.TaskInfo) error {
|
||||
info.Options = &runctypes.CreateOptions{
|
||||
IoUid: uint32(uid),
|
||||
IoGid: uint32(gid),
|
||||
NoPivotRoot: os.Getenv("DOCKER_RAMDISK") != "",
|
||||
if c.useShimV2 {
|
||||
// For v2, we need to inherit options specified on Create
|
||||
c.v2runcoptionsMu.Lock()
|
||||
opts, ok := c.v2runcoptions[id]
|
||||
c.v2runcoptionsMu.Unlock()
|
||||
if !ok {
|
||||
opts = v2runcoptions.Options{}
|
||||
}
|
||||
opts.IoUid = uint32(uid)
|
||||
opts.IoGid = uint32(gid)
|
||||
opts.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != ""
|
||||
info.Options = &opts
|
||||
} else {
|
||||
info.Options = &runctypes.CreateOptions{
|
||||
IoUid: uint32(uid),
|
||||
IoGid: uint32(gid),
|
||||
NoPivotRoot: os.Getenv("DOCKER_RAMDISK") != "",
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
} else {
|
||||
|
@ -466,6 +499,9 @@ func (c *client) Delete(ctx context.Context, containerID string) error {
|
|||
c.oomMu.Lock()
|
||||
delete(c.oom, containerID)
|
||||
c.oomMu.Unlock()
|
||||
c.v2runcoptionsMu.Lock()
|
||||
delete(c.v2runcoptions, containerID)
|
||||
c.v2runcoptionsMu.Unlock()
|
||||
if os.Getenv("LIBCONTAINERD_NOCLEAN") != "1" {
|
||||
if err := os.RemoveAll(bundle); err != nil {
|
||||
c.logger.WithError(err).WithFields(logrus.Fields{
|
||||
|
|
|
@ -16,7 +16,10 @@ import (
|
|||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
const runtimeName = "io.containerd.runtime.v1.linux"
|
||||
const (
|
||||
runtimeName = "io.containerd.runtime.v1.linux"
|
||||
shimV2RuntimeName = "io.containerd.runc.v2"
|
||||
)
|
||||
|
||||
func summaryFromInterface(i interface{}) (*libcontainerdtypes.Summary, error) {
|
||||
return &libcontainerdtypes.Summary{}, nil
|
||||
|
|
|
@ -16,7 +16,10 @@ import (
|
|||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
const runtimeName = "io.containerd.runhcs.v1"
|
||||
const (
|
||||
runtimeName = "io.containerd.runhcs.v1"
|
||||
shimV2RuntimeName = runtimeName
|
||||
)
|
||||
|
||||
func summaryFromInterface(i interface{}) (*libcontainerdtypes.Summary, error) {
|
||||
switch pd := i.(type) {
|
||||
|
|
|
@ -60,6 +60,9 @@ func New(quiet bool) *SysInfo {
|
|||
w := o(sysInfo, cgMounts)
|
||||
warnings = append(warnings, w...)
|
||||
}
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
warnings = append(warnings, "Your system is running cgroup v2 (unsupported)")
|
||||
}
|
||||
if !quiet {
|
||||
for _, w := range warnings {
|
||||
logrus.Warn(w)
|
||||
|
@ -70,6 +73,15 @@ func New(quiet bool) *SysInfo {
|
|||
|
||||
// applyMemoryCgroupInfo reads the memory information from the memory cgroup mount point.
|
||||
func applyMemoryCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
// TODO: check cgroup2 info correctly
|
||||
info.MemoryLimit = true
|
||||
info.SwapLimit = true
|
||||
info.MemoryReservation = true
|
||||
info.OomKillDisable = true
|
||||
info.MemorySwappiness = true
|
||||
return nil
|
||||
}
|
||||
var warnings []string
|
||||
mountPoint, ok := cgMounts["memory"]
|
||||
if !ok {
|
||||
|
@ -108,6 +120,15 @@ func applyMemoryCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
|
|||
|
||||
// applyCPUCgroupInfo reads the cpu information from the cpu cgroup mount point.
|
||||
func applyCPUCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
// TODO: check cgroup2 info correctly
|
||||
info.CPUShares = true
|
||||
info.CPUCfsPeriod = true
|
||||
info.CPUCfsQuota = true
|
||||
info.CPURealtimePeriod = true
|
||||
info.CPURealtimeRuntime = true
|
||||
return nil
|
||||
}
|
||||
var warnings []string
|
||||
mountPoint, ok := cgMounts["cpu"]
|
||||
if !ok {
|
||||
|
@ -145,6 +166,15 @@ func applyCPUCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
|
|||
|
||||
// applyBlkioCgroupInfo reads the blkio information from the blkio cgroup mount point.
|
||||
func applyBlkioCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
// TODO: check cgroup2 info correctly
|
||||
info.BlkioWeight = true
|
||||
info.BlkioReadBpsDevice = true
|
||||
info.BlkioWriteBpsDevice = true
|
||||
info.BlkioReadIOpsDevice = true
|
||||
info.BlkioWriteIOpsDevice = true
|
||||
return nil
|
||||
}
|
||||
var warnings []string
|
||||
mountPoint, ok := cgMounts["blkio"]
|
||||
if !ok {
|
||||
|
@ -186,6 +216,11 @@ func applyBlkioCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
|
|||
|
||||
// applyCPUSetCgroupInfo reads the cpuset information from the cpuset cgroup mount point.
|
||||
func applyCPUSetCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
// TODO: check cgroup2 info correctly
|
||||
info.Cpuset = true
|
||||
return nil
|
||||
}
|
||||
var warnings []string
|
||||
mountPoint, ok := cgMounts["cpuset"]
|
||||
if !ok {
|
||||
|
@ -213,6 +248,11 @@ func applyCPUSetCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
|
|||
|
||||
// applyPIDSCgroupInfo reads the pids information from the pids cgroup mount point.
|
||||
func applyPIDSCgroupInfo(info *SysInfo, _ map[string]string) []string {
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
// TODO: check cgroup2 info correctly
|
||||
info.PidsLimit = true
|
||||
return nil
|
||||
}
|
||||
var warnings []string
|
||||
_, err := cgroups.FindCgroupMountpoint("", "pids")
|
||||
if err != nil {
|
||||
|
@ -225,6 +265,11 @@ func applyPIDSCgroupInfo(info *SysInfo, _ map[string]string) []string {
|
|||
|
||||
// applyDevicesCgroupInfo reads the pids information from the devices cgroup mount point.
|
||||
func applyDevicesCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
// TODO: check cgroup2 info correctly
|
||||
info.CgroupDevicesEnabled = true
|
||||
return nil
|
||||
}
|
||||
var warnings []string
|
||||
_, ok := cgMounts["devices"]
|
||||
info.CgroupDevicesEnabled = ok
|
||||
|
|
|
@ -26,13 +26,13 @@ type ExitHandler interface {
|
|||
}
|
||||
|
||||
// New creates a new containerd plugin executor
|
||||
func New(ctx context.Context, rootDir string, cli *containerd.Client, ns string, exitHandler ExitHandler) (*Executor, error) {
|
||||
func New(ctx context.Context, rootDir string, cli *containerd.Client, ns string, exitHandler ExitHandler, useShimV2 bool) (*Executor, error) {
|
||||
e := &Executor{
|
||||
rootDir: rootDir,
|
||||
exitHandler: exitHandler,
|
||||
}
|
||||
|
||||
client, err := libcontainerd.NewClient(ctx, cli, rootDir, ns, e)
|
||||
client, err := libcontainerd.NewClient(ctx, cli, rootDir, ns, e, useShimV2)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "error creating containerd exec client")
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue