1
0
Fork 0
mirror of https://github.com/moby/moby.git synced 2022-11-09 12:21:53 -05:00
moby--moby/daemon/create.go
John Howard 20833b06a0 Windows: (WCOW) Generate OCI spec that remote runtime can escape
Signed-off-by: John Howard <jhoward@microsoft.com>

Also fixes https://github.com/moby/moby/issues/22874

This commit is a pre-requisite to moving moby/moby on Windows to using
Containerd for its runtime.

The reason for this is that the interface between moby and containerd
for the runtime is an OCI spec which must be unambigious.

It is the responsibility of the runtime (runhcs in the case of
containerd on Windows) to ensure that arguments are escaped prior
to calling into HCS and onwards to the Win32 CreateProcess call.

Previously, the builder was always escaping arguments which has
led to several bugs in moby. Because the local runtime in
libcontainerd had context of whether or not arguments were escaped,
it was possible to hack around in daemon/oci_windows.go with
knowledge of the context of the call (from builder or not).

With a remote runtime, this is not possible as there's rightly
no context of the caller passed across in the OCI spec. Put another
way, as I put above, the OCI spec must be unambigious.

The other previous limitation (which leads to various subtle bugs)
is that moby is coded entirely from a Linux-centric point of view.

Unfortunately, Windows != Linux. Windows CreateProcess uses a
command line, not an array of arguments. And it has very specific
rules about how to escape a command line. Some interesting reading
links about this are:

https://blogs.msdn.microsoft.com/twistylittlepassagesallalike/2011/04/23/everyone-quotes-command-line-arguments-the-wrong-way/
https://stackoverflow.com/questions/31838469/how-do-i-convert-argv-to-lpcommandline-parameter-of-createprocess
https://docs.microsoft.com/en-us/cpp/cpp/parsing-cpp-command-line-arguments?view=vs-2017

For this reason, the OCI spec has recently been updated to cater
for more natural syntax by including a CommandLine option in
Process.

What does this commit do?

Primary objective is to ensure that the built OCI spec is unambigious.

It changes the builder so that `ArgsEscaped` as commited in a
layer is only controlled by the use of CMD or ENTRYPOINT.

Subsequently, when calling in to create a container from the builder,
if follows a different path to both `docker run` and `docker create`
using the added `ContainerCreateIgnoreImagesArgsEscaped`. This allows
a RUN from the builder to control how to escape in the OCI spec.

It changes the builder so that when shell form is used for RUN,
CMD or ENTRYPOINT, it builds (for WCOW) a more natural command line
using the original as put by the user in the dockerfile, not
the parsed version as a set of args which loses fidelity.
This command line is put into args[0] and `ArgsEscaped` is set
to true for CMD or ENTRYPOINT. A RUN statement does not commit
`ArgsEscaped` to the commited layer regardless or whether shell
or exec form were used.
2019-03-12 18:41:55 -07:00

334 lines
11 KiB
Go

package daemon // import "github.com/docker/docker/daemon"
import (
"fmt"
"net"
"runtime"
"strings"
"time"
"github.com/docker/docker/api/types"
containertypes "github.com/docker/docker/api/types/container"
networktypes "github.com/docker/docker/api/types/network"
"github.com/docker/docker/container"
"github.com/docker/docker/errdefs"
"github.com/docker/docker/image"
"github.com/docker/docker/pkg/idtools"
"github.com/docker/docker/pkg/system"
"github.com/docker/docker/runconfig"
"github.com/opencontainers/selinux/go-selinux/label"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)
type createOpts struct {
params types.ContainerCreateConfig
managed bool
ignoreImagesArgsEscaped bool
}
// CreateManagedContainer creates a container that is managed by a Service
func (daemon *Daemon) CreateManagedContainer(params types.ContainerCreateConfig) (containertypes.ContainerCreateCreatedBody, error) {
return daemon.containerCreate(createOpts{
params: params,
managed: true,
ignoreImagesArgsEscaped: false})
}
// ContainerCreate creates a regular container
func (daemon *Daemon) ContainerCreate(params types.ContainerCreateConfig) (containertypes.ContainerCreateCreatedBody, error) {
return daemon.containerCreate(createOpts{
params: params,
managed: false,
ignoreImagesArgsEscaped: false})
}
// ContainerCreateIgnoreImagesArgsEscaped creates a regular container. This is called from the builder RUN case
// and ensures that we do not take the images ArgsEscaped
func (daemon *Daemon) ContainerCreateIgnoreImagesArgsEscaped(params types.ContainerCreateConfig) (containertypes.ContainerCreateCreatedBody, error) {
return daemon.containerCreate(createOpts{
params: params,
managed: false,
ignoreImagesArgsEscaped: true})
}
func (daemon *Daemon) containerCreate(opts createOpts) (containertypes.ContainerCreateCreatedBody, error) {
start := time.Now()
if opts.params.Config == nil {
return containertypes.ContainerCreateCreatedBody{}, errdefs.InvalidParameter(errors.New("Config cannot be empty in order to create a container"))
}
os := runtime.GOOS
if opts.params.Config.Image != "" {
img, err := daemon.imageService.GetImage(opts.params.Config.Image)
if err == nil {
os = img.OS
}
} else {
// This mean scratch. On Windows, we can safely assume that this is a linux
// container. On other platforms, it's the host OS (which it already is)
if runtime.GOOS == "windows" && system.LCOWSupported() {
os = "linux"
}
}
warnings, err := daemon.verifyContainerSettings(os, opts.params.HostConfig, opts.params.Config, false)
if err != nil {
return containertypes.ContainerCreateCreatedBody{Warnings: warnings}, errdefs.InvalidParameter(err)
}
err = verifyNetworkingConfig(opts.params.NetworkingConfig)
if err != nil {
return containertypes.ContainerCreateCreatedBody{Warnings: warnings}, errdefs.InvalidParameter(err)
}
if opts.params.HostConfig == nil {
opts.params.HostConfig = &containertypes.HostConfig{}
}
err = daemon.adaptContainerSettings(opts.params.HostConfig, opts.params.AdjustCPUShares)
if err != nil {
return containertypes.ContainerCreateCreatedBody{Warnings: warnings}, errdefs.InvalidParameter(err)
}
container, err := daemon.create(opts)
if err != nil {
return containertypes.ContainerCreateCreatedBody{Warnings: warnings}, err
}
containerActions.WithValues("create").UpdateSince(start)
return containertypes.ContainerCreateCreatedBody{ID: container.ID, Warnings: warnings}, nil
}
// Create creates a new container from the given configuration with a given name.
func (daemon *Daemon) create(opts createOpts) (retC *container.Container, retErr error) {
var (
container *container.Container
img *image.Image
imgID image.ID
err error
)
os := runtime.GOOS
if opts.params.Config.Image != "" {
img, err = daemon.imageService.GetImage(opts.params.Config.Image)
if err != nil {
return nil, err
}
if img.OS != "" {
os = img.OS
} else {
// default to the host OS except on Windows with LCOW
if runtime.GOOS == "windows" && system.LCOWSupported() {
os = "linux"
}
}
imgID = img.ID()
if runtime.GOOS == "windows" && img.OS == "linux" && !system.LCOWSupported() {
return nil, errors.New("operating system on which parent image was created is not Windows")
}
} else {
if runtime.GOOS == "windows" {
os = "linux" // 'scratch' case.
}
}
// On WCOW, if are not being invoked by the builder to create this container (where
// ignoreImagesArgEscaped will be true) - if the image already has its arguments escaped,
// ensure that this is replicated across to the created container to avoid double-escaping
// of the arguments/command line when the runtime attempts to run the container.
if os == "windows" && !opts.ignoreImagesArgsEscaped && img != nil && img.RunConfig().ArgsEscaped {
opts.params.Config.ArgsEscaped = true
}
if err := daemon.mergeAndVerifyConfig(opts.params.Config, img); err != nil {
return nil, errdefs.InvalidParameter(err)
}
if err := daemon.mergeAndVerifyLogConfig(&opts.params.HostConfig.LogConfig); err != nil {
return nil, errdefs.InvalidParameter(err)
}
if container, err = daemon.newContainer(opts.params.Name, os, opts.params.Config, opts.params.HostConfig, imgID, opts.managed); err != nil {
return nil, err
}
defer func() {
if retErr != nil {
if err := daemon.cleanupContainer(container, true, true); err != nil {
logrus.Errorf("failed to cleanup container on create error: %v", err)
}
}
}()
if err := daemon.setSecurityOptions(container, opts.params.HostConfig); err != nil {
return nil, err
}
container.HostConfig.StorageOpt = opts.params.HostConfig.StorageOpt
// Fixes: https://github.com/moby/moby/issues/34074 and
// https://github.com/docker/for-win/issues/999.
// Merge the daemon's storage options if they aren't already present. We only
// do this on Windows as there's no effective sandbox size limit other than
// physical on Linux.
if runtime.GOOS == "windows" {
if container.HostConfig.StorageOpt == nil {
container.HostConfig.StorageOpt = make(map[string]string)
}
for _, v := range daemon.configStore.GraphOptions {
opt := strings.SplitN(v, "=", 2)
if _, ok := container.HostConfig.StorageOpt[opt[0]]; !ok {
container.HostConfig.StorageOpt[opt[0]] = opt[1]
}
}
}
// Set RWLayer for container after mount labels have been set
rwLayer, err := daemon.imageService.CreateLayer(container, setupInitLayer(daemon.idMapping))
if err != nil {
return nil, errdefs.System(err)
}
container.RWLayer = rwLayer
rootIDs := daemon.idMapping.RootPair()
if err := idtools.MkdirAndChown(container.Root, 0700, rootIDs); err != nil {
return nil, err
}
if err := idtools.MkdirAndChown(container.CheckpointDir(), 0700, rootIDs); err != nil {
return nil, err
}
if err := daemon.setHostConfig(container, opts.params.HostConfig); err != nil {
return nil, err
}
if err := daemon.createContainerOSSpecificSettings(container, opts.params.Config, opts.params.HostConfig); err != nil {
return nil, err
}
var endpointsConfigs map[string]*networktypes.EndpointSettings
if opts.params.NetworkingConfig != nil {
endpointsConfigs = opts.params.NetworkingConfig.EndpointsConfig
}
// Make sure NetworkMode has an acceptable value. We do this to ensure
// backwards API compatibility.
runconfig.SetDefaultNetModeIfBlank(container.HostConfig)
daemon.updateContainerNetworkSettings(container, endpointsConfigs)
if err := daemon.Register(container); err != nil {
return nil, err
}
stateCtr.set(container.ID, "stopped")
daemon.LogContainerEvent(container, "create")
return container, nil
}
func toHostConfigSelinuxLabels(labels []string) []string {
for i, l := range labels {
labels[i] = "label=" + l
}
return labels
}
func (daemon *Daemon) generateSecurityOpt(hostConfig *containertypes.HostConfig) ([]string, error) {
for _, opt := range hostConfig.SecurityOpt {
con := strings.Split(opt, "=")
if con[0] == "label" {
// Caller overrode SecurityOpts
return nil, nil
}
}
ipcMode := hostConfig.IpcMode
pidMode := hostConfig.PidMode
privileged := hostConfig.Privileged
if ipcMode.IsHost() || pidMode.IsHost() || privileged {
return toHostConfigSelinuxLabels(label.DisableSecOpt()), nil
}
var ipcLabel []string
var pidLabel []string
ipcContainer := ipcMode.Container()
pidContainer := pidMode.Container()
if ipcContainer != "" {
c, err := daemon.GetContainer(ipcContainer)
if err != nil {
return nil, err
}
ipcLabel = label.DupSecOpt(c.ProcessLabel)
if pidContainer == "" {
return toHostConfigSelinuxLabels(ipcLabel), err
}
}
if pidContainer != "" {
c, err := daemon.GetContainer(pidContainer)
if err != nil {
return nil, err
}
pidLabel = label.DupSecOpt(c.ProcessLabel)
if ipcContainer == "" {
return toHostConfigSelinuxLabels(pidLabel), err
}
}
if pidLabel != nil && ipcLabel != nil {
for i := 0; i < len(pidLabel); i++ {
if pidLabel[i] != ipcLabel[i] {
return nil, fmt.Errorf("--ipc and --pid containers SELinux labels aren't the same")
}
}
return toHostConfigSelinuxLabels(pidLabel), nil
}
return nil, nil
}
func (daemon *Daemon) mergeAndVerifyConfig(config *containertypes.Config, img *image.Image) error {
if img != nil && img.Config != nil {
if err := merge(config, img.Config); err != nil {
return err
}
}
// Reset the Entrypoint if it is [""]
if len(config.Entrypoint) == 1 && config.Entrypoint[0] == "" {
config.Entrypoint = nil
}
if len(config.Entrypoint) == 0 && len(config.Cmd) == 0 {
return fmt.Errorf("No command specified")
}
return nil
}
// Checks if the client set configurations for more than one network while creating a container
// Also checks if the IPAMConfig is valid
func verifyNetworkingConfig(nwConfig *networktypes.NetworkingConfig) error {
if nwConfig == nil || len(nwConfig.EndpointsConfig) == 0 {
return nil
}
if len(nwConfig.EndpointsConfig) == 1 {
for k, v := range nwConfig.EndpointsConfig {
if v == nil {
return errdefs.InvalidParameter(errors.Errorf("no EndpointSettings for %s", k))
}
if v.IPAMConfig != nil {
if v.IPAMConfig.IPv4Address != "" && net.ParseIP(v.IPAMConfig.IPv4Address).To4() == nil {
return errors.Errorf("invalid IPv4 address: %s", v.IPAMConfig.IPv4Address)
}
if v.IPAMConfig.IPv6Address != "" {
n := net.ParseIP(v.IPAMConfig.IPv6Address)
// if the address is an invalid network address (ParseIP == nil) or if it is
// an IPv4 address (To4() != nil), then it is an invalid IPv6 address
if n == nil || n.To4() != nil {
return errors.Errorf("invalid IPv6 address: %s", v.IPAMConfig.IPv6Address)
}
}
}
}
return nil
}
l := make([]string, 0, len(nwConfig.EndpointsConfig))
for k := range nwConfig.EndpointsConfig {
l = append(l, k)
}
return errors.Errorf("Container cannot be connected to network endpoints: %s", strings.Join(l, ", "))
}