mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00
20833b06a0
Signed-off-by: John Howard <jhoward@microsoft.com> Also fixes https://github.com/moby/moby/issues/22874 This commit is a pre-requisite to moving moby/moby on Windows to using Containerd for its runtime. The reason for this is that the interface between moby and containerd for the runtime is an OCI spec which must be unambigious. It is the responsibility of the runtime (runhcs in the case of containerd on Windows) to ensure that arguments are escaped prior to calling into HCS and onwards to the Win32 CreateProcess call. Previously, the builder was always escaping arguments which has led to several bugs in moby. Because the local runtime in libcontainerd had context of whether or not arguments were escaped, it was possible to hack around in daemon/oci_windows.go with knowledge of the context of the call (from builder or not). With a remote runtime, this is not possible as there's rightly no context of the caller passed across in the OCI spec. Put another way, as I put above, the OCI spec must be unambigious. The other previous limitation (which leads to various subtle bugs) is that moby is coded entirely from a Linux-centric point of view. Unfortunately, Windows != Linux. Windows CreateProcess uses a command line, not an array of arguments. And it has very specific rules about how to escape a command line. Some interesting reading links about this are: https://blogs.msdn.microsoft.com/twistylittlepassagesallalike/2011/04/23/everyone-quotes-command-line-arguments-the-wrong-way/ https://stackoverflow.com/questions/31838469/how-do-i-convert-argv-to-lpcommandline-parameter-of-createprocess https://docs.microsoft.com/en-us/cpp/cpp/parsing-cpp-command-line-arguments?view=vs-2017 For this reason, the OCI spec has recently been updated to cater for more natural syntax by including a CommandLine option in Process. What does this commit do? Primary objective is to ensure that the built OCI spec is unambigious. It changes the builder so that `ArgsEscaped` as commited in a layer is only controlled by the use of CMD or ENTRYPOINT. Subsequently, when calling in to create a container from the builder, if follows a different path to both `docker run` and `docker create` using the added `ContainerCreateIgnoreImagesArgsEscaped`. This allows a RUN from the builder to control how to escape in the OCI spec. It changes the builder so that when shell form is used for RUN, CMD or ENTRYPOINT, it builds (for WCOW) a more natural command line using the original as put by the user in the dockerfile, not the parsed version as a set of args which loses fidelity. This command line is put into args[0] and `ArgsEscaped` is set to true for CMD or ENTRYPOINT. A RUN statement does not commit `ArgsEscaped` to the commited layer regardless or whether shell or exec form were used.
334 lines
11 KiB
Go
334 lines
11 KiB
Go
package daemon // import "github.com/docker/docker/daemon"
|
|
|
|
import (
|
|
"fmt"
|
|
"net"
|
|
"runtime"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/docker/docker/api/types"
|
|
containertypes "github.com/docker/docker/api/types/container"
|
|
networktypes "github.com/docker/docker/api/types/network"
|
|
"github.com/docker/docker/container"
|
|
"github.com/docker/docker/errdefs"
|
|
"github.com/docker/docker/image"
|
|
"github.com/docker/docker/pkg/idtools"
|
|
"github.com/docker/docker/pkg/system"
|
|
"github.com/docker/docker/runconfig"
|
|
"github.com/opencontainers/selinux/go-selinux/label"
|
|
"github.com/pkg/errors"
|
|
"github.com/sirupsen/logrus"
|
|
)
|
|
|
|
type createOpts struct {
|
|
params types.ContainerCreateConfig
|
|
managed bool
|
|
ignoreImagesArgsEscaped bool
|
|
}
|
|
|
|
// CreateManagedContainer creates a container that is managed by a Service
|
|
func (daemon *Daemon) CreateManagedContainer(params types.ContainerCreateConfig) (containertypes.ContainerCreateCreatedBody, error) {
|
|
return daemon.containerCreate(createOpts{
|
|
params: params,
|
|
managed: true,
|
|
ignoreImagesArgsEscaped: false})
|
|
}
|
|
|
|
// ContainerCreate creates a regular container
|
|
func (daemon *Daemon) ContainerCreate(params types.ContainerCreateConfig) (containertypes.ContainerCreateCreatedBody, error) {
|
|
return daemon.containerCreate(createOpts{
|
|
params: params,
|
|
managed: false,
|
|
ignoreImagesArgsEscaped: false})
|
|
}
|
|
|
|
// ContainerCreateIgnoreImagesArgsEscaped creates a regular container. This is called from the builder RUN case
|
|
// and ensures that we do not take the images ArgsEscaped
|
|
func (daemon *Daemon) ContainerCreateIgnoreImagesArgsEscaped(params types.ContainerCreateConfig) (containertypes.ContainerCreateCreatedBody, error) {
|
|
return daemon.containerCreate(createOpts{
|
|
params: params,
|
|
managed: false,
|
|
ignoreImagesArgsEscaped: true})
|
|
}
|
|
|
|
func (daemon *Daemon) containerCreate(opts createOpts) (containertypes.ContainerCreateCreatedBody, error) {
|
|
start := time.Now()
|
|
if opts.params.Config == nil {
|
|
return containertypes.ContainerCreateCreatedBody{}, errdefs.InvalidParameter(errors.New("Config cannot be empty in order to create a container"))
|
|
}
|
|
|
|
os := runtime.GOOS
|
|
if opts.params.Config.Image != "" {
|
|
img, err := daemon.imageService.GetImage(opts.params.Config.Image)
|
|
if err == nil {
|
|
os = img.OS
|
|
}
|
|
} else {
|
|
// This mean scratch. On Windows, we can safely assume that this is a linux
|
|
// container. On other platforms, it's the host OS (which it already is)
|
|
if runtime.GOOS == "windows" && system.LCOWSupported() {
|
|
os = "linux"
|
|
}
|
|
}
|
|
|
|
warnings, err := daemon.verifyContainerSettings(os, opts.params.HostConfig, opts.params.Config, false)
|
|
if err != nil {
|
|
return containertypes.ContainerCreateCreatedBody{Warnings: warnings}, errdefs.InvalidParameter(err)
|
|
}
|
|
|
|
err = verifyNetworkingConfig(opts.params.NetworkingConfig)
|
|
if err != nil {
|
|
return containertypes.ContainerCreateCreatedBody{Warnings: warnings}, errdefs.InvalidParameter(err)
|
|
}
|
|
|
|
if opts.params.HostConfig == nil {
|
|
opts.params.HostConfig = &containertypes.HostConfig{}
|
|
}
|
|
err = daemon.adaptContainerSettings(opts.params.HostConfig, opts.params.AdjustCPUShares)
|
|
if err != nil {
|
|
return containertypes.ContainerCreateCreatedBody{Warnings: warnings}, errdefs.InvalidParameter(err)
|
|
}
|
|
|
|
container, err := daemon.create(opts)
|
|
if err != nil {
|
|
return containertypes.ContainerCreateCreatedBody{Warnings: warnings}, err
|
|
}
|
|
containerActions.WithValues("create").UpdateSince(start)
|
|
|
|
return containertypes.ContainerCreateCreatedBody{ID: container.ID, Warnings: warnings}, nil
|
|
}
|
|
|
|
// Create creates a new container from the given configuration with a given name.
|
|
func (daemon *Daemon) create(opts createOpts) (retC *container.Container, retErr error) {
|
|
var (
|
|
container *container.Container
|
|
img *image.Image
|
|
imgID image.ID
|
|
err error
|
|
)
|
|
|
|
os := runtime.GOOS
|
|
if opts.params.Config.Image != "" {
|
|
img, err = daemon.imageService.GetImage(opts.params.Config.Image)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if img.OS != "" {
|
|
os = img.OS
|
|
} else {
|
|
// default to the host OS except on Windows with LCOW
|
|
if runtime.GOOS == "windows" && system.LCOWSupported() {
|
|
os = "linux"
|
|
}
|
|
}
|
|
imgID = img.ID()
|
|
|
|
if runtime.GOOS == "windows" && img.OS == "linux" && !system.LCOWSupported() {
|
|
return nil, errors.New("operating system on which parent image was created is not Windows")
|
|
}
|
|
} else {
|
|
if runtime.GOOS == "windows" {
|
|
os = "linux" // 'scratch' case.
|
|
}
|
|
}
|
|
|
|
// On WCOW, if are not being invoked by the builder to create this container (where
|
|
// ignoreImagesArgEscaped will be true) - if the image already has its arguments escaped,
|
|
// ensure that this is replicated across to the created container to avoid double-escaping
|
|
// of the arguments/command line when the runtime attempts to run the container.
|
|
if os == "windows" && !opts.ignoreImagesArgsEscaped && img != nil && img.RunConfig().ArgsEscaped {
|
|
opts.params.Config.ArgsEscaped = true
|
|
}
|
|
|
|
if err := daemon.mergeAndVerifyConfig(opts.params.Config, img); err != nil {
|
|
return nil, errdefs.InvalidParameter(err)
|
|
}
|
|
|
|
if err := daemon.mergeAndVerifyLogConfig(&opts.params.HostConfig.LogConfig); err != nil {
|
|
return nil, errdefs.InvalidParameter(err)
|
|
}
|
|
|
|
if container, err = daemon.newContainer(opts.params.Name, os, opts.params.Config, opts.params.HostConfig, imgID, opts.managed); err != nil {
|
|
return nil, err
|
|
}
|
|
defer func() {
|
|
if retErr != nil {
|
|
if err := daemon.cleanupContainer(container, true, true); err != nil {
|
|
logrus.Errorf("failed to cleanup container on create error: %v", err)
|
|
}
|
|
}
|
|
}()
|
|
|
|
if err := daemon.setSecurityOptions(container, opts.params.HostConfig); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
container.HostConfig.StorageOpt = opts.params.HostConfig.StorageOpt
|
|
|
|
// Fixes: https://github.com/moby/moby/issues/34074 and
|
|
// https://github.com/docker/for-win/issues/999.
|
|
// Merge the daemon's storage options if they aren't already present. We only
|
|
// do this on Windows as there's no effective sandbox size limit other than
|
|
// physical on Linux.
|
|
if runtime.GOOS == "windows" {
|
|
if container.HostConfig.StorageOpt == nil {
|
|
container.HostConfig.StorageOpt = make(map[string]string)
|
|
}
|
|
for _, v := range daemon.configStore.GraphOptions {
|
|
opt := strings.SplitN(v, "=", 2)
|
|
if _, ok := container.HostConfig.StorageOpt[opt[0]]; !ok {
|
|
container.HostConfig.StorageOpt[opt[0]] = opt[1]
|
|
}
|
|
}
|
|
}
|
|
|
|
// Set RWLayer for container after mount labels have been set
|
|
rwLayer, err := daemon.imageService.CreateLayer(container, setupInitLayer(daemon.idMapping))
|
|
if err != nil {
|
|
return nil, errdefs.System(err)
|
|
}
|
|
container.RWLayer = rwLayer
|
|
|
|
rootIDs := daemon.idMapping.RootPair()
|
|
|
|
if err := idtools.MkdirAndChown(container.Root, 0700, rootIDs); err != nil {
|
|
return nil, err
|
|
}
|
|
if err := idtools.MkdirAndChown(container.CheckpointDir(), 0700, rootIDs); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if err := daemon.setHostConfig(container, opts.params.HostConfig); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if err := daemon.createContainerOSSpecificSettings(container, opts.params.Config, opts.params.HostConfig); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var endpointsConfigs map[string]*networktypes.EndpointSettings
|
|
if opts.params.NetworkingConfig != nil {
|
|
endpointsConfigs = opts.params.NetworkingConfig.EndpointsConfig
|
|
}
|
|
// Make sure NetworkMode has an acceptable value. We do this to ensure
|
|
// backwards API compatibility.
|
|
runconfig.SetDefaultNetModeIfBlank(container.HostConfig)
|
|
|
|
daemon.updateContainerNetworkSettings(container, endpointsConfigs)
|
|
if err := daemon.Register(container); err != nil {
|
|
return nil, err
|
|
}
|
|
stateCtr.set(container.ID, "stopped")
|
|
daemon.LogContainerEvent(container, "create")
|
|
return container, nil
|
|
}
|
|
|
|
func toHostConfigSelinuxLabels(labels []string) []string {
|
|
for i, l := range labels {
|
|
labels[i] = "label=" + l
|
|
}
|
|
return labels
|
|
}
|
|
|
|
func (daemon *Daemon) generateSecurityOpt(hostConfig *containertypes.HostConfig) ([]string, error) {
|
|
for _, opt := range hostConfig.SecurityOpt {
|
|
con := strings.Split(opt, "=")
|
|
if con[0] == "label" {
|
|
// Caller overrode SecurityOpts
|
|
return nil, nil
|
|
}
|
|
}
|
|
ipcMode := hostConfig.IpcMode
|
|
pidMode := hostConfig.PidMode
|
|
privileged := hostConfig.Privileged
|
|
if ipcMode.IsHost() || pidMode.IsHost() || privileged {
|
|
return toHostConfigSelinuxLabels(label.DisableSecOpt()), nil
|
|
}
|
|
|
|
var ipcLabel []string
|
|
var pidLabel []string
|
|
ipcContainer := ipcMode.Container()
|
|
pidContainer := pidMode.Container()
|
|
if ipcContainer != "" {
|
|
c, err := daemon.GetContainer(ipcContainer)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
ipcLabel = label.DupSecOpt(c.ProcessLabel)
|
|
if pidContainer == "" {
|
|
return toHostConfigSelinuxLabels(ipcLabel), err
|
|
}
|
|
}
|
|
if pidContainer != "" {
|
|
c, err := daemon.GetContainer(pidContainer)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
pidLabel = label.DupSecOpt(c.ProcessLabel)
|
|
if ipcContainer == "" {
|
|
return toHostConfigSelinuxLabels(pidLabel), err
|
|
}
|
|
}
|
|
|
|
if pidLabel != nil && ipcLabel != nil {
|
|
for i := 0; i < len(pidLabel); i++ {
|
|
if pidLabel[i] != ipcLabel[i] {
|
|
return nil, fmt.Errorf("--ipc and --pid containers SELinux labels aren't the same")
|
|
}
|
|
}
|
|
return toHostConfigSelinuxLabels(pidLabel), nil
|
|
}
|
|
return nil, nil
|
|
}
|
|
|
|
func (daemon *Daemon) mergeAndVerifyConfig(config *containertypes.Config, img *image.Image) error {
|
|
if img != nil && img.Config != nil {
|
|
if err := merge(config, img.Config); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
// Reset the Entrypoint if it is [""]
|
|
if len(config.Entrypoint) == 1 && config.Entrypoint[0] == "" {
|
|
config.Entrypoint = nil
|
|
}
|
|
if len(config.Entrypoint) == 0 && len(config.Cmd) == 0 {
|
|
return fmt.Errorf("No command specified")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Checks if the client set configurations for more than one network while creating a container
|
|
// Also checks if the IPAMConfig is valid
|
|
func verifyNetworkingConfig(nwConfig *networktypes.NetworkingConfig) error {
|
|
if nwConfig == nil || len(nwConfig.EndpointsConfig) == 0 {
|
|
return nil
|
|
}
|
|
if len(nwConfig.EndpointsConfig) == 1 {
|
|
for k, v := range nwConfig.EndpointsConfig {
|
|
if v == nil {
|
|
return errdefs.InvalidParameter(errors.Errorf("no EndpointSettings for %s", k))
|
|
}
|
|
if v.IPAMConfig != nil {
|
|
if v.IPAMConfig.IPv4Address != "" && net.ParseIP(v.IPAMConfig.IPv4Address).To4() == nil {
|
|
return errors.Errorf("invalid IPv4 address: %s", v.IPAMConfig.IPv4Address)
|
|
}
|
|
if v.IPAMConfig.IPv6Address != "" {
|
|
n := net.ParseIP(v.IPAMConfig.IPv6Address)
|
|
// if the address is an invalid network address (ParseIP == nil) or if it is
|
|
// an IPv4 address (To4() != nil), then it is an invalid IPv6 address
|
|
if n == nil || n.To4() != nil {
|
|
return errors.Errorf("invalid IPv6 address: %s", v.IPAMConfig.IPv6Address)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
l := make([]string, 0, len(nwConfig.EndpointsConfig))
|
|
for k := range nwConfig.EndpointsConfig {
|
|
l = append(l, k)
|
|
}
|
|
return errors.Errorf("Container cannot be connected to network endpoints: %s", strings.Join(l, ", "))
|
|
}
|