From 94d70d835500bec3b171425271916d3e40f29635 Mon Sep 17 00:00:00 2001 From: John Howard Date: Fri, 18 Mar 2016 11:53:27 -0700 Subject: [PATCH] Windows libcontainerd implementation Signed-off-by: John Howard Signed-off-by: John Starks Signed-off-by: Darren Stahl Signed-off-by: Tonis Tiigi --- container/container_windows.go | 11 +- container/mounts_windows.go | 8 + container/state_windows.go | 4 +- daemon/config_windows.go | 5 +- daemon/container_operations_windows.go | 136 +----- daemon/daemon_windows.go | 68 ++- daemon/exec_windows.go | 12 +- daemon/inspect_windows.go | 6 +- daemon/monitor_windows.go | 13 + daemon/oci_windows.go | 204 +++++++++ daemon/update_windows.go | 13 + daemon/volumes_windows.go | 18 +- docker/daemon_windows.go | 5 + libcontainerd/client_windows.go | 579 ++++++++++++++++++++++++ libcontainerd/container_windows.go | 204 +++++++++ libcontainerd/process_windows.go | 24 + libcontainerd/remote_windows.go | 28 ++ libcontainerd/types_windows.go | 18 + libcontainerd/utils_windows.go | 16 + libcontainerd/windowsoci/oci_windows.go | 188 ++++++++ libcontainerd/windowsoci/unsupported.go | 3 + oci/defaults_windows.go | 23 + 22 files changed, 1419 insertions(+), 167 deletions(-) create mode 100644 container/mounts_windows.go create mode 100644 daemon/monitor_windows.go create mode 100644 daemon/oci_windows.go create mode 100644 daemon/update_windows.go create mode 100644 libcontainerd/client_windows.go create mode 100644 libcontainerd/container_windows.go create mode 100644 libcontainerd/process_windows.go create mode 100644 libcontainerd/remote_windows.go create mode 100644 libcontainerd/types_windows.go create mode 100644 libcontainerd/utils_windows.go create mode 100644 libcontainerd/windowsoci/oci_windows.go create mode 100644 libcontainerd/windowsoci/unsupported.go create mode 100644 oci/defaults_windows.go diff --git a/container/container_windows.go b/container/container_windows.go index fb24ebb968..5c923960bb 100644 --- a/container/container_windows.go +++ b/container/container_windows.go @@ -7,7 +7,6 @@ import ( "os" "path/filepath" - "github.com/docker/docker/daemon/execdriver" "github.com/docker/docker/volume" containertypes "github.com/docker/engine-api/types/container" ) @@ -23,6 +22,12 @@ type Container struct { // Fields below here are platform specific. } +// ExitStatus provides exit reasons for a container. +type ExitStatus struct { + // The exit code with which the container exited. + ExitCode int +} + // CreateDaemonEnvironment creates a new environment variable slice for this container. func (container *Container) CreateDaemonEnvironment(linkedEnv []string) []string { // On Windows, nothing to link. Just return the container environment. @@ -35,7 +40,7 @@ func (container *Container) UnmountIpcMounts(unmount func(pth string) error) { } // IpcMounts returns the list of Ipc related mounts. -func (container *Container) IpcMounts() []execdriver.Mount { +func (container *Container) IpcMounts() []Mount { return nil } @@ -45,7 +50,7 @@ func (container *Container) UnmountVolumes(forceSyscall bool, volumeEventLog fun } // TmpfsMounts returns the list of tmpfs mounts -func (container *Container) TmpfsMounts() []execdriver.Mount { +func (container *Container) TmpfsMounts() []Mount { return nil } diff --git a/container/mounts_windows.go b/container/mounts_windows.go new file mode 100644 index 0000000000..01b327f788 --- /dev/null +++ b/container/mounts_windows.go @@ -0,0 +1,8 @@ +package container + +// Mount contains information for a mount operation. +type Mount struct { + Source string `json:"source"` + Destination string `json:"destination"` + Writable bool `json:"writable"` +} diff --git a/container/state_windows.go b/container/state_windows.go index 645c9348c3..02802a02a4 100644 --- a/container/state_windows.go +++ b/container/state_windows.go @@ -1,9 +1,7 @@ package container -import "github.com/docker/docker/daemon/execdriver" - // setFromExitStatus is a platform specific helper function to set the state // based on the ExitStatus structure. -func (s *State) setFromExitStatus(exitStatus *execdriver.ExitStatus) { +func (s *State) setFromExitStatus(exitStatus *ExitStatus) { s.ExitCode = exitStatus.ExitCode } diff --git a/daemon/config_windows.go b/daemon/config_windows.go index 81480ad80b..ca141b986c 100644 --- a/daemon/config_windows.go +++ b/daemon/config_windows.go @@ -7,8 +7,9 @@ import ( ) var ( - defaultPidFile = os.Getenv("programdata") + string(os.PathSeparator) + "docker.pid" - defaultGraph = os.Getenv("programdata") + string(os.PathSeparator) + "docker" + defaultPidFile = os.Getenv("programdata") + string(os.PathSeparator) + "docker.pid" + defaultGraph = os.Getenv("programdata") + string(os.PathSeparator) + "docker" + defaultExecRoot = defaultGraph ) // bridgeConfig stores all the bridge driver specific diff --git a/daemon/container_operations_windows.go b/daemon/container_operations_windows.go index 56e95abf25..701bfd8c90 100644 --- a/daemon/container_operations_windows.go +++ b/daemon/container_operations_windows.go @@ -4,14 +4,9 @@ package daemon import ( "fmt" - "strings" - - networktypes "github.com/docker/engine-api/types/network" "github.com/docker/docker/container" - "github.com/docker/docker/daemon/execdriver" - "github.com/docker/docker/daemon/execdriver/windows" - "github.com/docker/docker/layer" + networktypes "github.com/docker/engine-api/types/network" "github.com/docker/libnetwork" ) @@ -29,135 +24,6 @@ func (daemon *Daemon) DisconnectFromNetwork(container *container.Container, n li return fmt.Errorf("Windows does not support disconnecting a running container from a network") } -func (daemon *Daemon) populateCommand(c *container.Container, env []string) error { - en := &execdriver.Network{ - Interface: nil, - } - - var epList []string - - // Connect all the libnetwork allocated networks to the container - if c.NetworkSettings != nil { - for n := range c.NetworkSettings.Networks { - sn, err := daemon.FindNetwork(n) - if err != nil { - continue - } - - ep, err := c.GetEndpointInNetwork(sn) - if err != nil { - continue - } - - data, err := ep.DriverInfo() - if err != nil { - continue - } - if data["hnsid"] != nil { - epList = append(epList, data["hnsid"].(string)) - } - } - } - - if daemon.netController == nil { - parts := strings.SplitN(string(c.HostConfig.NetworkMode), ":", 2) - switch parts[0] { - case "none": - case "default", "": // empty string to support existing containers - if !c.Config.NetworkDisabled { - en.Interface = &execdriver.NetworkInterface{ - MacAddress: c.Config.MacAddress, - Bridge: daemon.configStore.bridgeConfig.Iface, - PortBindings: c.HostConfig.PortBindings, - - // TODO Windows. Include IPAddress. There already is a - // property IPAddress on execDrive.CommonNetworkInterface, - // but there is no CLI option in docker to pass through - // an IPAddress on docker run. - } - } - default: - return fmt.Errorf("invalid network mode: %s", c.HostConfig.NetworkMode) - } - } - - // TODO Windows. More resource controls to be implemented later. - resources := &execdriver.Resources{ - CommonResources: execdriver.CommonResources{ - CPUShares: c.HostConfig.CPUShares, - }, - } - - processConfig := execdriver.ProcessConfig{ - CommonProcessConfig: execdriver.CommonProcessConfig{ - Entrypoint: c.Path, - Arguments: c.Args, - Tty: c.Config.Tty, - }, - ConsoleSize: c.HostConfig.ConsoleSize, - } - - processConfig.Env = env - - var layerPaths []string - img, err := daemon.imageStore.Get(c.ImageID) - if err != nil { - return fmt.Errorf("Failed to graph.Get on ImageID %s - %s", c.ImageID, err) - } - - if img.RootFS != nil && img.RootFS.Type == "layers+base" { - max := len(img.RootFS.DiffIDs) - for i := 0; i <= max; i++ { - img.RootFS.DiffIDs = img.RootFS.DiffIDs[:i] - path, err := layer.GetLayerPath(daemon.layerStore, img.RootFS.ChainID()) - if err != nil { - return fmt.Errorf("Failed to get layer path from graphdriver %s for ImageID %s - %s", daemon.layerStore, img.RootFS.ChainID(), err) - } - // Reverse order, expecting parent most first - layerPaths = append([]string{path}, layerPaths...) - } - } - - m, err := c.RWLayer.Metadata() - if err != nil { - return fmt.Errorf("Failed to get layer metadata - %s", err) - } - layerFolder := m["dir"] - - var hvPartition bool - // Work out the isolation (whether it is a hypervisor partition) - if c.HostConfig.Isolation.IsDefault() { - // Not specified by caller. Take daemon default - hvPartition = windows.DefaultIsolation.IsHyperV() - } else { - // Take value specified by caller - hvPartition = c.HostConfig.Isolation.IsHyperV() - } - - c.Command = &execdriver.Command{ - CommonCommand: execdriver.CommonCommand{ - ID: c.ID, - Rootfs: c.BaseFS, - WorkingDir: c.Config.WorkingDir, - Network: en, - MountLabel: c.GetMountLabel(), - Resources: resources, - ProcessConfig: processConfig, - ProcessLabel: c.GetProcessLabel(), - }, - FirstStart: !c.HasBeenStartedBefore, - LayerFolder: layerFolder, - LayerPaths: layerPaths, - Hostname: c.Config.Hostname, - Isolation: string(c.HostConfig.Isolation), - ArgsEscaped: c.Config.ArgsEscaped, - HvPartition: hvPartition, - EpList: epList, - } - - return nil -} - // getSize returns real size & virtual size func (daemon *Daemon) getSize(container *container.Container) (int64, int64) { // TODO Windows diff --git a/daemon/daemon_windows.go b/daemon/daemon_windows.go index 27f19be50d..8f1e8f337f 100644 --- a/daemon/daemon_windows.go +++ b/daemon/daemon_windows.go @@ -18,11 +18,13 @@ import ( "github.com/docker/docker/layer" "github.com/docker/docker/reference" "github.com/docker/docker/runconfig" - containertypes "github.com/docker/engine-api/types/container" // register the windows graph driver "github.com/docker/docker/daemon/graphdriver/windows" "github.com/docker/docker/pkg/idtools" + "github.com/docker/docker/pkg/parsers" "github.com/docker/docker/pkg/system" + "github.com/docker/engine-api/types" + containertypes "github.com/docker/engine-api/types/container" "github.com/docker/libnetwork" nwconfig "github.com/docker/libnetwork/config" winlibnetwork "github.com/docker/libnetwork/drivers/windows" @@ -39,7 +41,7 @@ const ( windowsMaxCPUShares = 10000 ) -func getBlkioWeightDevices(config *containertypes.HostConfig) ([]*blkiodev.WeightDevice, error) { +func getBlkioWeightDevices(config *containertypes.HostConfig) ([]blkiodev.WeightDevice, error) { return nil, nil } @@ -47,19 +49,19 @@ func parseSecurityOpt(container *container.Container, config *containertypes.Hos return nil } -func getBlkioReadIOpsDevices(config *containertypes.HostConfig) ([]*blkiodev.ThrottleDevice, error) { +func getBlkioReadIOpsDevices(config *containertypes.HostConfig) ([]blkiodev.ThrottleDevice, error) { return nil, nil } -func getBlkioWriteIOpsDevices(config *containertypes.HostConfig) ([]*blkiodev.ThrottleDevice, error) { +func getBlkioWriteIOpsDevices(config *containertypes.HostConfig) ([]blkiodev.ThrottleDevice, error) { return nil, nil } -func getBlkioReadBpsDevices(config *containertypes.HostConfig) ([]*blkiodev.ThrottleDevice, error) { +func getBlkioReadBpsDevices(config *containertypes.HostConfig) ([]blkiodev.ThrottleDevice, error) { return nil, nil } -func getBlkioWriteBpsDevices(config *containertypes.HostConfig) ([]*blkiodev.ThrottleDevice, error) { +func getBlkioWriteBpsDevices(config *containertypes.HostConfig) ([]blkiodev.ThrottleDevice, error) { return nil, nil } @@ -287,6 +289,10 @@ func (daemon *Daemon) registerLinks(container *container.Container, hostConfig * return nil } +func (daemon *Daemon) cleanupMountsByID(in string) error { + return nil +} + func (daemon *Daemon) cleanupMounts() error { return nil } @@ -307,8 +313,19 @@ func setupDaemonRoot(config *Config, rootDir string, rootUID, rootGID int) error // conditionalMountOnStart is a platform specific helper function during the // container start to call mount. func (daemon *Daemon) conditionalMountOnStart(container *container.Container) error { + + // Are we going to run as a Hyper-V container? + hv := false + if container.HostConfig.Isolation.IsDefault() { + // Container is set to use the default, so take the default from the daemon configuration + hv = daemon.defaultIsolation.IsHyperV() + } else { + // Container is requesting an isolation mode. Honour it. + hv = container.HostConfig.Isolation.IsHyperV() + } + // We do not mount if a Hyper-V container - if !container.HostConfig.Isolation.IsHyperV() { + if !hv { if err := daemon.Mount(container); err != nil { return err } @@ -318,11 +335,12 @@ func (daemon *Daemon) conditionalMountOnStart(container *container.Container) er // conditionalUnmountOnCleanup is a platform specific helper function called // during the cleanup of a container to unmount. -func (daemon *Daemon) conditionalUnmountOnCleanup(container *container.Container) { +func (daemon *Daemon) conditionalUnmountOnCleanup(container *container.Container) error { // We do not unmount if a Hyper-V container if !container.HostConfig.Isolation.IsHyperV() { - daemon.Unmount(container) + return daemon.Unmount(container) } + return nil } func restoreCustomImage(is image.Store, ls layer.Store, rs reference.Store) error { @@ -404,3 +422,35 @@ func restoreCustomImage(is image.Store, ls layer.Store, rs reference.Store) erro func driverOptions(config *Config) []nwconfig.Option { return []nwconfig.Option{} } + +func (daemon *Daemon) stats(c *container.Container) (*types.StatsJSON, error) { + return nil, nil +} + +// setDefaultIsolation determine the default isolation mode for the +// daemon to run in. This is only applicable on Windows +func (daemon *Daemon) setDefaultIsolation() error { + daemon.defaultIsolation = containertypes.Isolation("process") + for _, option := range daemon.configStore.ExecOptions { + key, val, err := parsers.ParseKeyValueOpt(option) + if err != nil { + return err + } + key = strings.ToLower(key) + switch key { + + case "isolation": + if !containertypes.Isolation(val).IsValid() { + return fmt.Errorf("Invalid exec-opt value for 'isolation':'%s'", val) + } + if containertypes.Isolation(val).IsHyperV() { + daemon.defaultIsolation = containertypes.Isolation("hyperv") + } + default: + return fmt.Errorf("Unrecognised exec-opt '%s'\n", key) + } + } + + logrus.Infof("Windows default isolation mode: %s", daemon.defaultIsolation) + return nil +} diff --git a/daemon/exec_windows.go b/daemon/exec_windows.go index 09efa82a2e..be25d20007 100644 --- a/daemon/exec_windows.go +++ b/daemon/exec_windows.go @@ -2,11 +2,13 @@ package daemon import ( "github.com/docker/docker/container" - "github.com/docker/docker/daemon/execdriver" - "github.com/docker/engine-api/types" + "github.com/docker/docker/daemon/exec" + "github.com/docker/docker/libcontainerd" ) -// setPlatformSpecificExecProcessConfig sets platform-specific fields in the -// ProcessConfig structure. This is a no-op on Windows -func setPlatformSpecificExecProcessConfig(config *types.ExecConfig, container *container.Container, pc *execdriver.ProcessConfig) { +func execSetPlatformOpt(c *container.Container, ec *exec.Config, p *libcontainerd.Process) error { + // Process arguments need to be escaped before sending to OCI. + // TODO (jstarks): escape the entrypoint too once the tests are fixed to not rely on this behavior + p.Args = append([]string{p.Args[0]}, escapeArgs(p.Args[1:])...) + return nil } diff --git a/daemon/inspect_windows.go b/daemon/inspect_windows.go index f20571d052..22496e5b07 100644 --- a/daemon/inspect_windows.go +++ b/daemon/inspect_windows.go @@ -33,8 +33,8 @@ func (daemon *Daemon) containerInspectPre120(name string) (*types.ContainerJSON, func inspectExecProcessConfig(e *exec.Config) *backend.ExecProcessConfig { return &backend.ExecProcessConfig{ - Tty: e.ProcessConfig.Tty, - Entrypoint: e.ProcessConfig.Entrypoint, - Arguments: e.ProcessConfig.Arguments, + Tty: e.Tty, + Entrypoint: e.Entrypoint, + Arguments: e.Args, } } diff --git a/daemon/monitor_windows.go b/daemon/monitor_windows.go new file mode 100644 index 0000000000..b808ed3d03 --- /dev/null +++ b/daemon/monitor_windows.go @@ -0,0 +1,13 @@ +package daemon + +import ( + "github.com/docker/docker/container" + "github.com/docker/docker/libcontainerd" +) + +// platformConstructExitStatus returns a platform specific exit status structure +func platformConstructExitStatus(e libcontainerd.StateInfo) *container.ExitStatus { + return &container.ExitStatus{ + ExitCode: int(e.ExitCode), + } +} diff --git a/daemon/oci_windows.go b/daemon/oci_windows.go new file mode 100644 index 0000000000..2af99dc820 --- /dev/null +++ b/daemon/oci_windows.go @@ -0,0 +1,204 @@ +package daemon + +import ( + "fmt" + "strings" + "syscall" + + "github.com/docker/docker/container" + "github.com/docker/docker/layer" + "github.com/docker/docker/libcontainerd" + "github.com/docker/docker/libcontainerd/windowsoci" + "github.com/docker/docker/oci" +) + +func (daemon *Daemon) createSpec(c *container.Container) (*libcontainerd.Spec, error) { + s := oci.DefaultSpec() + + linkedEnv, err := daemon.setupLinkedContainers(c) + if err != nil { + return nil, err + } + + // TODO Windows - this can be removed. Not used (UID/GID) + rootUID, rootGID := daemon.GetRemappedUIDGID() + if err := c.SetupWorkingDirectory(rootUID, rootGID); err != nil { + return nil, err + } + + img, err := daemon.imageStore.Get(c.ImageID) + if err != nil { + return nil, fmt.Errorf("Failed to graph.Get on ImageID %s - %s", c.ImageID, err) + } + + // In base spec + s.Hostname = c.FullHostname() + + // In s.Mounts + mounts, err := daemon.setupMounts(c) + if err != nil { + return nil, err + } + for _, mount := range mounts { + s.Mounts = append(s.Mounts, windowsoci.Mount{ + Source: mount.Source, + Destination: mount.Destination, + Readonly: !mount.Writable, + }) + } + + // Are we going to run as a Hyper-V container? + hv := false + if c.HostConfig.Isolation.IsDefault() { + // Container is set to use the default, so take the default from the daemon configuration + hv = daemon.defaultIsolation.IsHyperV() + } else { + // Container is requesting an isolation mode. Honour it. + hv = c.HostConfig.Isolation.IsHyperV() + } + if hv { + // TODO We don't yet have the ImagePath hooked up. But set to + // something non-nil to pickup in libcontainerd. + s.Windows.HvRuntime = &windowsoci.HvRuntime{} + } + + // In s.Process + if c.Config.ArgsEscaped { + s.Process.Args = append([]string{c.Path}, c.Args...) + } else { + // TODO (jstarks): escape the entrypoint too once the tests are fixed to not rely on this behavior + s.Process.Args = append([]string{c.Path}, escapeArgs(c.Args)...) + } + s.Process.Cwd = c.Config.WorkingDir + s.Process.Env = c.CreateDaemonEnvironment(linkedEnv) + s.Process.InitialConsoleSize = c.HostConfig.ConsoleSize + s.Process.Terminal = c.Config.Tty + s.Process.User.User = c.Config.User + + // In spec.Root + s.Root.Path = c.BaseFS + s.Root.Readonly = c.HostConfig.ReadonlyRootfs + + // In s.Windows + s.Windows.FirstStart = !c.HasBeenStartedBefore + + // s.Windows.LayerFolder. + m, err := c.RWLayer.Metadata() + if err != nil { + return nil, fmt.Errorf("Failed to get layer metadata - %s", err) + } + s.Windows.LayerFolder = m["dir"] + + // s.Windows.LayerPaths + var layerPaths []string + if img.RootFS != nil && img.RootFS.Type == "layers+base" { + max := len(img.RootFS.DiffIDs) + for i := 0; i <= max; i++ { + img.RootFS.DiffIDs = img.RootFS.DiffIDs[:i] + path, err := layer.GetLayerPath(daemon.layerStore, img.RootFS.ChainID()) + if err != nil { + return nil, fmt.Errorf("Failed to get layer path from graphdriver %s for ImageID %s - %s", daemon.layerStore, img.RootFS.ChainID(), err) + } + // Reverse order, expecting parent most first + layerPaths = append([]string{path}, layerPaths...) + } + } + s.Windows.LayerPaths = layerPaths + + // In s.Windows.Networking (TP5+ libnetwork way of doing things) + // Connect all the libnetwork allocated networks to the container + var epList []string + if c.NetworkSettings != nil { + for n := range c.NetworkSettings.Networks { + sn, err := daemon.FindNetwork(n) + if err != nil { + continue + } + + ep, err := c.GetEndpointInNetwork(sn) + if err != nil { + continue + } + + data, err := ep.DriverInfo() + if err != nil { + continue + } + if data["hnsid"] != nil { + epList = append(epList, data["hnsid"].(string)) + } + } + } + s.Windows.Networking = &windowsoci.Networking{ + EndpointList: epList, + } + + // In s.Windows.Networking (TP4 back compat) + // TODO Windows: Post TP4 - Remove this along with definitions from spec + // and changes to libcontainerd to not read these fields. + if daemon.netController == nil { + parts := strings.SplitN(string(c.HostConfig.NetworkMode), ":", 2) + switch parts[0] { + case "none": + case "default", "": // empty string to support existing containers + if !c.Config.NetworkDisabled { + s.Windows.Networking = &windowsoci.Networking{ + MacAddress: c.Config.MacAddress, + Bridge: daemon.configStore.bridgeConfig.Iface, + PortBindings: c.HostConfig.PortBindings, + } + } + default: + return nil, fmt.Errorf("invalid network mode: %s", c.HostConfig.NetworkMode) + } + } + + // In s.Windows.Resources + // @darrenstahlmsft implement these resources + cpuShares := uint64(c.HostConfig.CPUShares) + s.Windows.Resources = &windowsoci.Resources{ + CPU: &windowsoci.CPU{ + //TODO Count: ..., + //TODO Percent: ..., + Shares: &cpuShares, + }, + Memory: &windowsoci.Memory{ + //TODO Limit: ..., + //TODO Reservation: ..., + }, + Network: &windowsoci.Network{ + //TODO Bandwidth: ..., + }, + Storage: &windowsoci.Storage{ + //TODO Bps: ..., + //TODO Iops: ..., + //TODO SandboxSize: ..., + }, + } + + // BUGBUG - Next problem. This was an exec opt. Where do we now get these? + // Come back to this when add Xenon support. + // var hvPartition bool + // // Work out the isolation (whether it is a hypervisor partition) + // if c.HostConfig.Isolation.IsDefault() { + // // Not specified by caller. Take daemon default + // hvPartition = windows.DefaultIsolation.IsHyperV() + // } else { + // // Take value specified by caller + // hvPartition = c.HostConfig.Isolation.IsHyperV() + // } + + // Isolation: string(c.HostConfig.Isolation), + // HvPartition: hvPartition, + // } + + return (*libcontainerd.Spec)(&s), nil +} + +func escapeArgs(args []string) []string { + escapedArgs := make([]string, len(args)) + for i, a := range args { + escapedArgs[i] = syscall.EscapeArg(a) + } + return escapedArgs +} diff --git a/daemon/update_windows.go b/daemon/update_windows.go new file mode 100644 index 0000000000..2cd0ff2618 --- /dev/null +++ b/daemon/update_windows.go @@ -0,0 +1,13 @@ +// +build windows + +package daemon + +import ( + "github.com/docker/docker/libcontainerd" + "github.com/docker/engine-api/types/container" +) + +func toContainerdResources(resources container.Resources) libcontainerd.Resources { + var r libcontainerd.Resources + return r +} diff --git a/daemon/volumes_windows.go b/daemon/volumes_windows.go index 23c6a3b5e3..b0ab4d6dd5 100644 --- a/daemon/volumes_windows.go +++ b/daemon/volumes_windows.go @@ -7,18 +7,22 @@ import ( "sort" "github.com/docker/docker/container" - "github.com/docker/docker/daemon/execdriver" "github.com/docker/docker/volume" ) // setupMounts configures the mount points for a container by appending each -// of the configured mounts on the container to the execdriver mount structure +// of the configured mounts on the container to the oci mount structure // which will ultimately be passed into the exec driver during container creation. // It also ensures each of the mounts are lexographically sorted. -func (daemon *Daemon) setupMounts(container *container.Container) ([]execdriver.Mount, error) { - var mnts []execdriver.Mount - for _, mount := range container.MountPoints { // type is volume.MountPoint - if err := daemon.lazyInitializeVolume(container.ID, mount); err != nil { + +// BUGBUG TODO Windows containerd. This would be much better if it returned +// an array of windowsoci mounts, not container mounts. Then no need to +// do multiple transitions. + +func (daemon *Daemon) setupMounts(c *container.Container) ([]container.Mount, error) { + var mnts []container.Mount + for _, mount := range c.MountPoints { // type is volume.MountPoint + if err := daemon.lazyInitializeVolume(c.ID, mount); err != nil { return nil, err } // If there is no source, take it from the volume path @@ -29,7 +33,7 @@ func (daemon *Daemon) setupMounts(container *container.Container) ([]execdriver. if s == "" { return nil, fmt.Errorf("No source for mount name '%s' driver %q destination '%s'", mount.Name, mount.Driver, mount.Destination) } - mnts = append(mnts, execdriver.Mount{ + mnts = append(mnts, container.Mount{ Source: s, Destination: mount.Destination, Writable: mount.RW, diff --git a/docker/daemon_windows.go b/docker/daemon_windows.go index 52649daf0b..ae8d737d6c 100644 --- a/docker/daemon_windows.go +++ b/docker/daemon_windows.go @@ -10,6 +10,7 @@ import ( "github.com/Sirupsen/logrus" apiserver "github.com/docker/docker/api/server" "github.com/docker/docker/daemon" + "github.com/docker/docker/libcontainerd" "github.com/docker/docker/pkg/mflag" "github.com/docker/docker/pkg/system" ) @@ -57,3 +58,7 @@ func setupConfigReloadTrap(configFile string, flags *mflag.FlagSet, reload func( } }() } + +func (cli *DaemonCli) getPlatformRemoteOptions() []libcontainerd.RemoteOption { + return nil +} diff --git a/libcontainerd/client_windows.go b/libcontainerd/client_windows.go new file mode 100644 index 0000000000..b97d03d234 --- /dev/null +++ b/libcontainerd/client_windows.go @@ -0,0 +1,579 @@ +package libcontainerd + +import ( + "encoding/json" + "errors" + "fmt" + "io" + "path/filepath" + "strconv" + "strings" + + "syscall" + "time" + + "github.com/Microsoft/hcsshim" + "github.com/Sirupsen/logrus" +) + +type client struct { + clientCommon + + // Platform specific properties below here (none presently on Windows) +} + +// defaultContainerNAT is the default name of the container NAT device that is +// preconfigured on the server. TODO Windows - Remove for TP5 support as not needed. +const defaultContainerNAT = "ContainerNAT" + +// Win32 error codes that are used for various workarounds +// These really should be ALL_CAPS to match golangs syscall library and standard +// Win32 error conventions, but golint insists on CamelCase. +const ( + CoEClassstring = syscall.Errno(0x800401F3) // Invalid class string + ErrorNoNetwork = syscall.Errno(1222) // The network is not present or not started + ErrorBadPathname = syscall.Errno(161) // The specified path is invalid + ErrorInvalidObject = syscall.Errno(0x800710D8) // The object identifier does not represent a valid object +) + +type layer struct { + ID string + Path string +} + +type defConfig struct { + DefFile string +} + +type portBinding struct { + Protocol string + InternalPort int + ExternalPort int +} + +type natSettings struct { + Name string + PortBindings []portBinding +} + +type networkConnection struct { + NetworkName string + //EnableNat bool + Nat natSettings +} +type networkSettings struct { + MacAddress string +} + +type device struct { + DeviceType string + Connection interface{} + Settings interface{} +} + +type mappedDir struct { + HostPath string + ContainerPath string + ReadOnly bool +} + +// TODO Windows RTM: @darrenstahlmsft Add ProcessorCount +type containerInit struct { + SystemType string // HCS requires this to be hard-coded to "Container" + Name string // Name of the container. We use the docker ID. + Owner string // The management platform that created this container + IsDummy bool // Used for development purposes. + VolumePath string // Windows volume path for scratch space + Devices []device // Devices used by the container + IgnoreFlushesDuringBoot bool // Optimization hint for container startup in Windows + LayerFolderPath string // Where the layer folders are located + Layers []layer // List of storage layers + ProcessorWeight uint64 `json:",omitempty"` // CPU Shares 0..10000 on Windows; where 0 will be omitted and HCS will default. + ProcessorMaximum int64 `json:",omitempty"` // CPU maximum usage percent 1..100 + StorageIOPSMaximum uint64 `json:",omitempty"` // Maximum Storage IOPS + StorageBandwidthMaximum uint64 `json:",omitempty"` // Maximum Storage Bandwidth in bytes per second + StorageSandboxSize uint64 `json:",omitempty"` // Size in bytes that the container system drive should be expanded to if smaller + MemoryMaximumInMB int64 `json:",omitempty"` // Maximum memory available to the container in Megabytes + HostName string // Hostname + MappedDirectories []mappedDir // List of mapped directories (volumes/mounts) + SandboxPath string // Location of unmounted sandbox (used for Hyper-V containers) + HvPartition bool // True if it a Hyper-V Container + EndpointList []string // List of networking endpoints to be attached to container +} + +// defaultOwner is a tag passed to HCS to allow it to differentiate between +// container creator management stacks. We hard code "docker" in the case +// of docker. +const defaultOwner = "docker" + +// Create is the entrypoint to create a container from a spec, and if successfully +// created, start it too. +func (clnt *client) Create(containerID string, spec Spec, options ...CreateOption) error { + logrus.Debugln("LCD client.Create() with spec", spec) + + cu := &containerInit{ + SystemType: "Container", + Name: containerID, + Owner: defaultOwner, + + VolumePath: spec.Root.Path, + IgnoreFlushesDuringBoot: spec.Windows.FirstStart, + LayerFolderPath: spec.Windows.LayerFolder, + HostName: spec.Hostname, + } + + if spec.Windows.Networking != nil { + cu.EndpointList = spec.Windows.Networking.EndpointList + } + + if spec.Windows.Resources != nil { + if spec.Windows.Resources.CPU != nil { + if spec.Windows.Resources.CPU.Shares != nil { + cu.ProcessorWeight = *spec.Windows.Resources.CPU.Shares + } + if spec.Windows.Resources.CPU.Percent != nil { + cu.ProcessorMaximum = *spec.Windows.Resources.CPU.Percent * 100 // ProcessorMaximum is a value between 1 and 10000 + } + } + if spec.Windows.Resources.Memory != nil { + if spec.Windows.Resources.Memory.Limit != nil { + cu.MemoryMaximumInMB = *spec.Windows.Resources.Memory.Limit / 1024 / 1024 + } + } + if spec.Windows.Resources.Storage != nil { + if spec.Windows.Resources.Storage.Bps != nil { + cu.StorageBandwidthMaximum = *spec.Windows.Resources.Storage.Bps + } + if spec.Windows.Resources.Storage.Iops != nil { + cu.StorageIOPSMaximum = *spec.Windows.Resources.Storage.Iops + } + if spec.Windows.Resources.Storage.SandboxSize != nil { + cu.StorageSandboxSize = *spec.Windows.Resources.Storage.SandboxSize + } + } + } + + // TODO Ultimately need to set the path from HvRuntime.ImagePath + cu.HvPartition = (spec.Windows.HvRuntime != nil) + // if spec.Windows.HvRuntime != nil { + // cu.HvPartition = len(spec.Windows.HvRuntime.ImagePath) > 0 + // } + + if cu.HvPartition { + cu.SandboxPath = filepath.Dir(spec.Windows.LayerFolder) + } else { + cu.VolumePath = spec.Root.Path + cu.LayerFolderPath = spec.Windows.LayerFolder + } + + for _, layerPath := range spec.Windows.LayerPaths { + _, filename := filepath.Split(layerPath) + g, err := hcsshim.NameToGuid(filename) + if err != nil { + return err + } + cu.Layers = append(cu.Layers, layer{ + ID: g.ToString(), + Path: layerPath, + }) + } + + // Add the mounts (volumes, bind mounts etc) to the structure + mds := make([]mappedDir, len(spec.Mounts)) + for i, mount := range spec.Mounts { + mds[i] = mappedDir{ + HostPath: mount.Source, + ContainerPath: mount.Destination, + ReadOnly: mount.Readonly} + } + cu.MappedDirectories = mds + + // TODO Windows: vv START OF TP4 BLOCK OF CODE. REMOVE ONCE TP4 IS NO LONGER SUPPORTED + if hcsshim.IsTP4() && + spec.Windows.Networking != nil && + spec.Windows.Networking.Bridge != "" { + // Enumerate through the port bindings specified by the user and convert + // them into the internal structure matching the JSON blob that can be + // understood by the HCS. + var pbs []portBinding + for i, v := range spec.Windows.Networking.PortBindings { + proto := strings.ToUpper(i.Proto()) + if proto != "TCP" && proto != "UDP" { + return fmt.Errorf("invalid protocol %s", i.Proto()) + } + + if len(v) > 1 { + return fmt.Errorf("Windows does not support more than one host port in NAT settings") + } + + for _, v2 := range v { + var ( + iPort, ePort int + err error + ) + if len(v2.HostIP) != 0 { + return fmt.Errorf("Windows does not support host IP addresses in NAT settings") + } + if ePort, err = strconv.Atoi(v2.HostPort); err != nil { + return fmt.Errorf("invalid container port %s: %s", v2.HostPort, err) + } + if iPort, err = strconv.Atoi(i.Port()); err != nil { + return fmt.Errorf("invalid internal port %s: %s", i.Port(), err) + } + if iPort < 0 || iPort > 65535 || ePort < 0 || ePort > 65535 { + return fmt.Errorf("specified NAT port is not in allowed range") + } + pbs = append(pbs, + portBinding{ExternalPort: ePort, + InternalPort: iPort, + Protocol: proto}) + } + } + + dev := device{ + DeviceType: "Network", + Connection: &networkConnection{ + NetworkName: spec.Windows.Networking.Bridge, + Nat: natSettings{ + Name: defaultContainerNAT, + PortBindings: pbs, + }, + }, + } + + if spec.Windows.Networking.MacAddress != "" { + windowsStyleMAC := strings.Replace( + spec.Windows.Networking.MacAddress, ":", "-", -1) + dev.Settings = networkSettings{ + MacAddress: windowsStyleMAC, + } + } + cu.Devices = append(cu.Devices, dev) + } else { + logrus.Debugln("No network interface") + } + // TODO Windows: ^^ END OF TP4 BLOCK OF CODE. REMOVE ONCE TP4 IS NO LONGER SUPPORTED + + configurationb, err := json.Marshal(cu) + if err != nil { + return err + } + + configuration := string(configurationb) + + // TODO Windows TP5 timeframe. Remove when TP4 is no longer supported. + // The following a workaround for Windows TP4 which has a networking + // bug which fairly frequently returns an error. Back off and retry. + if !hcsshim.IsTP4() { + if err := hcsshim.CreateComputeSystem(containerID, configuration); err != nil { + return err + } + } else { + maxAttempts := 5 + for i := 1; i <= maxAttempts; i++ { + err = hcsshim.CreateComputeSystem(containerID, configuration) + if err == nil { + break + } + + if herr, ok := err.(*hcsshim.HcsError); ok { + if herr.Err != syscall.ERROR_NOT_FOUND && // Element not found + herr.Err != syscall.ERROR_FILE_NOT_FOUND && // The system cannot find the file specified + herr.Err != ErrorNoNetwork && // The network is not present or not started + herr.Err != ErrorBadPathname && // The specified path is invalid + herr.Err != CoEClassstring && // Invalid class string + herr.Err != ErrorInvalidObject { // The object identifier does not represent a valid object + logrus.Debugln("Failed to create temporary container ", err) + return err + } + logrus.Warnf("Invoking Windows TP4 retry hack (%d of %d)", i, maxAttempts-1) + time.Sleep(50 * time.Millisecond) + } + } + } + + // Construct a container object for calling start on it. + container := &container{ + containerCommon: containerCommon{ + process: process{ + processCommon: processCommon{ + containerID: containerID, + client: clnt, + friendlyName: InitFriendlyName, + }, + }, + processes: make(map[string]*process), + }, + ociSpec: spec, + } + + container.options = options + for _, option := range options { + if err := option.Apply(container); err != nil { + logrus.Error(err) + } + } + + // Call start, and if it fails, delete the container from our + // internal structure, and also keep HCS in sync by deleting the + // container there. + logrus.Debugf("Create() id=%s, Calling start()", containerID) + if err := container.start(); err != nil { + clnt.deleteContainer(containerID) + return err + } + + logrus.Debugf("Create() id=%s completed successfully", containerID) + return nil + +} + +// AddProcess is the handler for adding a process to an already running +// container. It's called through docker exec. +func (clnt *client) AddProcess(containerID, processFriendlyName string, procToAdd Process) error { + + clnt.lock(containerID) + defer clnt.unlock(containerID) + container, err := clnt.getContainer(containerID) + if err != nil { + return err + } + + createProcessParms := hcsshim.CreateProcessParams{ + EmulateConsole: procToAdd.Terminal, + ConsoleSize: procToAdd.InitialConsoleSize, + } + + // Take working directory from the process to add if it is defined, + // otherwise take from the first process. + if procToAdd.Cwd != "" { + createProcessParms.WorkingDirectory = procToAdd.Cwd + } else { + createProcessParms.WorkingDirectory = container.ociSpec.Process.Cwd + } + + // Configure the environment for the process + createProcessParms.Environment = setupEnvironmentVariables(procToAdd.Env) + createProcessParms.CommandLine = strings.Join(procToAdd.Args, " ") + + logrus.Debugf("commandLine: %s", createProcessParms.CommandLine) + + // Start the command running in the container. Note we always tell HCS to + // create stdout as it's required regardless of '-i' or '-t' options, so that + // docker can always grab the output through logs. We also tell HCS to always + // create stdin, even if it's not used - it will be closed shortly. Stderr + // is only created if it we're not -t. + var stdout, stderr io.ReadCloser + var pid uint32 + iopipe := &IOPipe{Terminal: procToAdd.Terminal} + pid, iopipe.Stdin, stdout, stderr, err = hcsshim.CreateProcessInComputeSystem( + containerID, + true, + true, + !procToAdd.Terminal, + createProcessParms) + if err != nil { + logrus.Errorf("AddProcess %s CreateProcessInComputeSystem() failed %s", containerID, err) + return err + } + + // Convert io.ReadClosers to io.Readers + if stdout != nil { + iopipe.Stdout = openReaderFromPipe(stdout) + } + if stderr != nil { + iopipe.Stderr = openReaderFromPipe(stderr) + } + + // Add the process to the containers list of processes + container.processes[processFriendlyName] = + &process{ + processCommon: processCommon{ + containerID: containerID, + friendlyName: processFriendlyName, + client: clnt, + systemPid: pid, + }, + } + + // Make sure the lock is not held while calling back into the daemon + clnt.unlock(containerID) + + // Tell the engine to attach streams back to the client + if err := clnt.backend.AttachStreams(processFriendlyName, *iopipe); err != nil { + return err + } + + // Lock again so that the defer unlock doesn't fail. (I really don't like this code) + clnt.lock(containerID) + + // Spin up a go routine waiting for exit to handle cleanup + go container.waitExit(pid, processFriendlyName, false) + + return nil +} + +// Signal handles `docker stop` on Windows. While Linux has support for +// the full range of signals, signals aren't really implemented on Windows. +// We fake supporting regular stop and -9 to force kill. +func (clnt *client) Signal(containerID string, sig int) error { + var ( + cont *container + err error + ) + + // Get the container as we need it to find the pid of the process. + clnt.lock(containerID) + defer clnt.unlock(containerID) + if cont, err = clnt.getContainer(containerID); err != nil { + return err + } + + logrus.Debugf("lcd: Signal() containerID=%s sig=%d pid=%d", containerID, sig, cont.systemPid) + context := fmt.Sprintf("Signal: sig=%d pid=%d", sig, cont.systemPid) + + if syscall.Signal(sig) == syscall.SIGKILL { + // Terminate the compute system + if err := hcsshim.TerminateComputeSystem(containerID, hcsshim.TimeoutInfinite, context); err != nil { + logrus.Errorf("Failed to terminate %s - %q", containerID, err) + } + + } else { + // Terminate Process + if err = hcsshim.TerminateProcessInComputeSystem(containerID, cont.systemPid); err != nil { + logrus.Warnf("Failed to terminate pid %d in %s: %q", cont.systemPid, containerID, err) + // Ignore errors + err = nil + } + + // Shutdown the compute system + if err := hcsshim.ShutdownComputeSystem(containerID, hcsshim.TimeoutInfinite, context); err != nil { + logrus.Errorf("Failed to shutdown %s - %q", containerID, err) + } + } + return nil +} + +// Resize handles a CLI event to resize an interactive docker run or docker exec +// window. +func (clnt *client) Resize(containerID, processFriendlyName string, width, height int) error { + // Get the libcontainerd container object + clnt.lock(containerID) + defer clnt.unlock(containerID) + cont, err := clnt.getContainer(containerID) + if err != nil { + return err + } + + if processFriendlyName == InitFriendlyName { + logrus.Debugln("Resizing systemPID in", containerID, cont.process.systemPid) + return hcsshim.ResizeConsoleInComputeSystem(containerID, cont.process.systemPid, height, width) + } + + for _, p := range cont.processes { + if p.friendlyName == processFriendlyName { + logrus.Debugln("Resizing exec'd process", containerID, p.systemPid) + return hcsshim.ResizeConsoleInComputeSystem(containerID, p.systemPid, height, width) + } + } + + return fmt.Errorf("Resize could not find containerID %s to resize", containerID) + +} + +// Pause handles pause requests for containers +func (clnt *client) Pause(containerID string) error { + return errors.New("Windows: Containers cannot be paused") +} + +// Resume handles resume requests for containers +func (clnt *client) Resume(containerID string) error { + return errors.New("Windows: Containers cannot be paused") +} + +// Stats handles stats requests for containers +func (clnt *client) Stats(containerID string) (*Stats, error) { + return nil, errors.New("Windows: Stats not implemented") +} + +// Restore is the handler for restoring a container +func (clnt *client) Restore(containerID string, unusedOnWindows ...CreateOption) error { + + logrus.Debugf("lcd Restore %s", containerID) + return clnt.backend.StateChanged(containerID, StateInfo{ + State: StateExit, + ExitCode: 1 << 31, + }) + + // var err error + // clnt.lock(containerID) + // defer clnt.unlock(containerID) + + // logrus.Debugf("restore container %s state %s", containerID) + + // if _, err := clnt.getContainer(containerID); err == nil { + // return fmt.Errorf("container %s is aleady active", containerID) + // } + + // defer func() { + // if err != nil { + // clnt.deleteContainer(containerID) + // } + // }() + + // // ====> BUGBUG Where does linux get the pid from systemPid: pid, + // container := &container{ + // containerCommon: containerCommon{ + // process: process{ + // processCommon: processCommon{ + // containerID: containerID, + // client: clnt, + // friendlyName: InitFriendlyName, + // }, + // }, + // processes: make(map[string]*process), + // }, + // } + + // container.systemPid = systemPid(cont) + + // var terminal bool + // for _, p := range cont.Processes { + // if p.Pid == InitFriendlyName { + // terminal = p.Terminal + // } + // } + + // iopipe, err := container.openFifos(terminal) + // if err != nil { + // return err + // } + + // if err := clnt.backend.AttachStreams(containerID, *iopipe); err != nil { + // return err + // } + + // clnt.appendContainer(container) + + // err = clnt.backend.StateChanged(containerID, StateInfo{ + // State: StateRestore, + // Pid: container.systemPid, + // }) + + // if err != nil { + // return err + // } + + // return nil +} + +// GetPidsForContainers is not implemented on Windows. +func (clnt *client) GetPidsForContainer(containerID string) ([]int, error) { + return nil, errors.New("GetPidsForContainer: GetPidsForContainer() not implemented") +} + +func (clnt *client) UpdateResources(containerID string, resources Resources) error { + // Updating resource isn't supported on Windows + // but we should return nil for enabling updating container + return nil +} diff --git a/libcontainerd/container_windows.go b/libcontainerd/container_windows.go new file mode 100644 index 0000000000..f1df64aa4f --- /dev/null +++ b/libcontainerd/container_windows.go @@ -0,0 +1,204 @@ +package libcontainerd + +import ( + "io" + "strings" + "syscall" + + "github.com/Microsoft/hcsshim" + "github.com/Sirupsen/logrus" +) + +type container struct { + containerCommon + + // Platform specific fields are below here. There are none presently on Windows. + options []CreateOption + + // The ociSpec is required, as client.Create() needs a spec, + // but can be called from the RestartManager context which does not + // otherwise have access to the Spec + ociSpec Spec +} + +func (ctr *container) newProcess(friendlyName string) *process { + return &process{ + processCommon: processCommon{ + containerID: ctr.containerID, + friendlyName: friendlyName, + client: ctr.client, + }, + } +} + +func (ctr *container) start() error { + var err error + + // Start the container + logrus.Debugln("Starting container ", ctr.containerID) + if err = hcsshim.StartComputeSystem(ctr.containerID); err != nil { + logrus.Errorf("Failed to start compute system: %s", err) + return err + } + + createProcessParms := hcsshim.CreateProcessParams{ + EmulateConsole: ctr.ociSpec.Process.Terminal, + WorkingDirectory: ctr.ociSpec.Process.Cwd, + ConsoleSize: ctr.ociSpec.Process.InitialConsoleSize, + } + + // Configure the environment for the process + createProcessParms.Environment = setupEnvironmentVariables(ctr.ociSpec.Process.Env) + createProcessParms.CommandLine = strings.Join(ctr.ociSpec.Process.Args, " ") + + iopipe := &IOPipe{Terminal: ctr.ociSpec.Process.Terminal} + + // Start the command running in the container. Note we always tell HCS to + // create stdout as it's required regardless of '-i' or '-t' options, so that + // docker can always grab the output through logs. We also tell HCS to always + // create stdin, even if it's not used - it will be closed shortly. Stderr + // is only created if it we're not -t. + var pid uint32 + var stdout, stderr io.ReadCloser + pid, iopipe.Stdin, stdout, stderr, err = hcsshim.CreateProcessInComputeSystem( + ctr.containerID, + true, + true, + !ctr.ociSpec.Process.Terminal, + createProcessParms) + if err != nil { + logrus.Errorf("CreateProcessInComputeSystem() failed %s", err) + + // Explicitly terminate the compute system here. + if err2 := hcsshim.TerminateComputeSystem(ctr.containerID, hcsshim.TimeoutInfinite, "CreateProcessInComputeSystem failed"); err2 != nil { + // Ignore this error, there's not a lot we can do except log it + logrus.Warnf("Failed to TerminateComputeSystem after a failed CreateProcessInComputeSystem. Ignoring this.", err2) + } else { + logrus.Debugln("Cleaned up after failed CreateProcessInComputeSystem by calling TerminateComputeSystem") + } + return err + } + + // Convert io.ReadClosers to io.Readers + if stdout != nil { + iopipe.Stdout = openReaderFromPipe(stdout) + } + if stderr != nil { + iopipe.Stderr = openReaderFromPipe(stderr) + } + + // Save the PID + logrus.Debugf("Process started - PID %d", pid) + ctr.systemPid = uint32(pid) + + // Spin up a go routine waiting for exit to handle cleanup + go ctr.waitExit(pid, InitFriendlyName, true) + + ctr.client.appendContainer(ctr) + + if err := ctr.client.backend.AttachStreams(ctr.containerID, *iopipe); err != nil { + // OK to return the error here, as waitExit will handle tear-down in HCS + return err + } + + // Tell the docker engine that the container has started. + si := StateInfo{ + State: StateStart, + Pid: ctr.systemPid, // Not sure this is needed? Double-check monitor.go in daemon BUGBUG @jhowardmsft + } + return ctr.client.backend.StateChanged(ctr.containerID, si) + +} + +// waitExit runs as a goroutine waiting for the process to exit. It's +// equivalent to (in the linux containerd world) where events come in for +// state change notifications from containerd. +func (ctr *container) waitExit(pid uint32, processFriendlyName string, isFirstProcessToStart bool) error { + logrus.Debugln("waitExit on pid", pid) + + // Block indefinitely for the process to exit. + exitCode, err := hcsshim.WaitForProcessInComputeSystem(ctr.containerID, pid, hcsshim.TimeoutInfinite) + if err != nil { + if herr, ok := err.(*hcsshim.HcsError); ok && herr.Err != syscall.ERROR_BROKEN_PIPE { + logrus.Warnf("WaitForProcessInComputeSystem failed (container may have been killed): %s", err) + } + // Fall through here, do not return. This ensures we attempt to continue the + // shutdown in HCS nad tell the docker engine that the process/container + // has exited to avoid a container being dropped on the floor. + } + + // Assume the container has exited + si := StateInfo{ + State: StateExit, + ExitCode: uint32(exitCode), + Pid: pid, + ProcessID: processFriendlyName, + } + + // But it could have been an exec'd process which exited + if !isFirstProcessToStart { + si.State = StateExitProcess + } + + // If this is the init process, always call into vmcompute.dll to + // shutdown the container after we have completed. + if isFirstProcessToStart { + logrus.Debugf("Shutting down container %s", ctr.containerID) + // Explicit timeout here rather than hcsshim.TimeoutInfinte to avoid a + // (remote) possibility that ShutdownComputeSystem hangs indefinitely. + const shutdownTimeout = 5 * 60 * 1000 // 5 minutes + if err := hcsshim.ShutdownComputeSystem(ctr.containerID, shutdownTimeout, "waitExit"); err != nil { + if herr, ok := err.(*hcsshim.HcsError); !ok || + (herr.Err != hcsshim.ERROR_SHUTDOWN_IN_PROGRESS && + herr.Err != ErrorBadPathname && + herr.Err != syscall.ERROR_PATH_NOT_FOUND) { + logrus.Warnf("Ignoring error from ShutdownComputeSystem %s", err) + } + } else { + logrus.Debugf("Completed shutting down container %s", ctr.containerID) + } + + // BUGBUG - Is taking the lock necessary here? Should it just be taken for + // the deleteContainer call, not for the restart logic? @jhowardmsft + ctr.client.lock(ctr.containerID) + defer ctr.client.unlock(ctr.containerID) + + if si.State == StateExit && ctr.restartManager != nil { + restart, wait, err := ctr.restartManager.ShouldRestart(uint32(exitCode)) + if err != nil { + logrus.Error(err) + } else if restart { + si.State = StateRestart + ctr.restarting = true + go func() { + err := <-wait + ctr.restarting = false + if err != nil { + si.State = StateExit + if err := ctr.client.backend.StateChanged(ctr.containerID, si); err != nil { + logrus.Error(err) + } + logrus.Error(err) + } else { + ctr.client.Create(ctr.containerID, ctr.ociSpec, ctr.options...) + } + }() + } + } + + // Remove process from list if we have exited + // We need to do so here in case the Message Handler decides to restart it. + if si.State == StateExit { + ctr.client.deleteContainer(ctr.friendlyName) + } + } + + // Call into the backend to notify it of the state change. + logrus.Debugf("waitExit() calling backend.StateChanged %v", si) + if err := ctr.client.backend.StateChanged(ctr.containerID, si); err != nil { + logrus.Error(err) + } + + logrus.Debugln("waitExit() completed OK") + return nil +} diff --git a/libcontainerd/process_windows.go b/libcontainerd/process_windows.go new file mode 100644 index 0000000000..2905fad1fb --- /dev/null +++ b/libcontainerd/process_windows.go @@ -0,0 +1,24 @@ +package libcontainerd + +import ( + "io" +) + +// process keeps the state for both main container process and exec process. + +// process keeps the state for both main container process and exec process. +type process struct { + processCommon +} + +func openReaderFromPipe(p io.ReadCloser) io.Reader { + r, w := io.Pipe() + go func() { + if _, err := io.Copy(w, p); err != nil { + r.CloseWithError(err) + } + w.Close() + p.Close() + }() + return r +} diff --git a/libcontainerd/remote_windows.go b/libcontainerd/remote_windows.go new file mode 100644 index 0000000000..b6133fe16c --- /dev/null +++ b/libcontainerd/remote_windows.go @@ -0,0 +1,28 @@ +package libcontainerd + +import "sync" + +type remote struct { +} + +func (r *remote) Client(b Backend) (Client, error) { + c := &client{ + clientCommon: clientCommon{ + backend: b, + containerMutexes: make(map[string]*sync.Mutex), + containers: make(map[string]*container), + }, + } + return c, nil +} + +// Cleanup is a no-op on Windows. It is here to implement the same interface +// to meet compilation requirements. +func (r *remote) Cleanup() { +} + +// New creates a fresh instance of libcontainerd remote. This is largely +// a no-op on Windows. +func New(_ string, _ ...RemoteOption) (Remote, error) { + return &remote{}, nil +} diff --git a/libcontainerd/types_windows.go b/libcontainerd/types_windows.go new file mode 100644 index 0000000000..69cfb27d07 --- /dev/null +++ b/libcontainerd/types_windows.go @@ -0,0 +1,18 @@ +package libcontainerd + +import "github.com/docker/docker/libcontainerd/windowsoci" + +// Spec is the base configuration for the container. +type Spec windowsoci.WindowsSpec + +// Process contains information to start a specific application inside the container. +type Process windowsoci.Process + +// User specifies user information for the containers main process. +type User windowsoci.User + +// Stats contains a stats properties from containerd. +type Stats struct{} + +// Resources defines updatable container resource values. +type Resources struct{} diff --git a/libcontainerd/utils_windows.go b/libcontainerd/utils_windows.go new file mode 100644 index 0000000000..a9d95d635f --- /dev/null +++ b/libcontainerd/utils_windows.go @@ -0,0 +1,16 @@ +package libcontainerd + +import "strings" + +// setupEnvironmentVariables convert a string array of environment variables +// into a map as required by the HCS. Source array is in format [v1=k1] [v2=k2] etc. +func setupEnvironmentVariables(a []string) map[string]string { + r := make(map[string]string) + for _, s := range a { + arr := strings.Split(s, "=") + if len(arr) == 2 { + r[arr[0]] = arr[1] + } + } + return r +} diff --git a/libcontainerd/windowsoci/oci_windows.go b/libcontainerd/windowsoci/oci_windows.go new file mode 100644 index 0000000000..c948805954 --- /dev/null +++ b/libcontainerd/windowsoci/oci_windows.go @@ -0,0 +1,188 @@ +package windowsoci + +// This file is a hack - essentially a mirror of OCI spec for Windows. + +import ( + "fmt" + + "github.com/docker/go-connections/nat" +) + +// WindowsSpec is the full specification for Windows containers. +type WindowsSpec struct { + Spec + + // Windows is platform specific configuration for Windows based containers. + Windows Windows `json:"windows"` +} + +// Spec is the base configuration for the container. It specifies platform +// independent configuration. This information must be included when the +// bundle is packaged for distribution. +type Spec struct { + + // Version is the version of the specification that is supported. + Version string `json:"ociVersion"` + // Platform is the host information for OS and Arch. + Platform Platform `json:"platform"` + // Process is the container's main process. + Process Process `json:"process"` + // Root is the root information for the container's filesystem. + Root Root `json:"root"` + // Hostname is the container's host name. + Hostname string `json:"hostname,omitempty"` + // Mounts profile configuration for adding mounts to the container's filesystem. + Mounts []Mount `json:"mounts"` +} + +// Windows contains platform specific configuration for Windows based containers. +type Windows struct { + // Resources contain information for handling resource constraints for the container + Resources *Resources `json:"resources,omitempty"` + // Networking contains the platform specific network settings for the container. + Networking *Networking `json:"networking,omitempty"` + // FirstStart is used for an optimization on first boot of Windows + FirstStart bool `json:"first_start,omitempty"` + // LayerFolder is the path to the current layer folder + LayerFolder string `json:"layer_folder,omitempty"` + // Layer paths of the parent layers + LayerPaths []string `json:"layer_paths,omitempty"` + // HvRuntime contains settings specific to Hyper-V containers, omitted if not using Hyper-V isolation + HvRuntime *HvRuntime `json:"hv_runtime,omitempty"` +} + +// Process contains information to start a specific application inside the container. +type Process struct { + // Terminal indicates if stderr should NOT be attached for the container. + Terminal bool `json:"terminal"` + // ConsoleSize contains the initial h,w of the console size + InitialConsoleSize [2]int `json:"-"` + // User specifies user information for the process. + User User `json:"user"` + // Args specifies the binary and arguments for the application to execute. + Args []string `json:"args"` + // Env populates the process environment for the process. + Env []string `json:"env,omitempty"` + // Cwd is the current working directory for the process and must be + // relative to the container's root. + Cwd string `json:"cwd"` +} + +// User contains the user information for Windows +type User struct { + User string `json:"user,omitempty"` +} + +// Root contains information about the container's root filesystem on the host. +type Root struct { + // Path is the absolute path to the container's root filesystem. + Path string `json:"path"` + // Readonly makes the root filesystem for the container readonly before the process is executed. + Readonly bool `json:"readonly"` +} + +// Platform specifies OS and arch information for the host system that the container +// is created for. +type Platform struct { + // OS is the operating system. + OS string `json:"os"` + // Arch is the architecture + Arch string `json:"arch"` +} + +// Mount specifies a mount for a container. +type Mount struct { + // Destination is the path where the mount will be placed relative to the container's root. The path and child directories MUST exist, a runtime MUST NOT create directories automatically to a mount point. + Destination string `json:"destination"` + // Type specifies the mount kind. + Type string `json:"type"` + // Source specifies the source path of the mount. In the case of bind mounts + // this would be the file on the host. + Source string `json:"source"` + // Readonly specifies if the mount should be read-only + Readonly bool `json:"readonly"` +} + +// HvRuntime contains settings specific to Hyper-V containers +type HvRuntime struct { + // ImagePath is the path to the Utility VM image for this container + ImagePath string `json:"image_path,omitempty"` +} + +// Networking contains the platform specific network settings for the container +type Networking struct { + // TODO Windows TP5. The following three fields are for 'legacy' non- + // libnetwork networking through HCS. They can be removed once TP4 is + // no longer supported. Also remove in libcontainerd\client_windows.go, + // function Create(), and in daemon\oci_windows.go, function CreateSpec() + MacAddress string `json:"mac,omitempty"` + Bridge string `json:"bridge,omitempty"` + PortBindings nat.PortMap `json:"port_bindings,omitempty"` + // End of TODO Windows TP5. + + // List of endpoints to be attached to the container + EndpointList []string `json:"endpoints,omitempty"` +} + +// Storage contains storage resource management settings +type Storage struct { + // Specifies maximum Iops for the system drive + Iops *uint64 `json:"iops,omitempty"` + // Specifies maximum bytes per second for the system drive + Bps *uint64 `json:"bps,omitempty"` + // Sandbox size indicates the size to expand the system drive to if it is currently smaller + SandboxSize *uint64 `json:"sandbox_size,omitempty"` +} + +// Memory contains memory settings for the container +type Memory struct { + // Memory limit (in bytes). + Limit *int64 `json:"limit,omitempty"` + // Memory reservation (in bytes). + Reservation *uint64 `json:"reservation,omitempty"` +} + +// CPU contains information for cpu resource management +type CPU struct { + // Number of CPUs available to the container. This is an appoximation for Windows Server Containers. + Count *uint64 `json:"count,omitempty"` + // CPU shares (relative weight (ratio) vs. other containers with cpu shares). Range is from 1 to 10000. + Shares *uint64 `json:"shares,omitempty"` + // Percent of available CPUs usable by the container. + Percent *int64 `json:"percent,omitempty"` +} + +// Network network resource management information +type Network struct { + // Bandwidth is the maximum egress bandwidth in bytes per second + Bandwidth *uint64 `json:"bandwidth,omitempty"` +} + +// Resources has container runtime resource constraints +// TODO Windows containerd. This structure needs ratifying with the old resources +// structure used on Windows and the latest OCI spec. +type Resources struct { + // Memory restriction configuration + Memory *Memory `json:"memory,omitempty"` + // CPU resource restriction configuration + CPU *CPU `json:"cpu,omitempty"` + // Storage restriction configuration + Storage *Storage `json:"storage,omitempty"` + // Network restriction configuration + Network *Network `json:"network,omitempty"` +} + +const ( + // VersionMajor is for an API incompatible changes + VersionMajor = 0 + // VersionMinor is for functionality in a backwards-compatible manner + VersionMinor = 3 + // VersionPatch is for backwards-compatible bug fixes + VersionPatch = 0 + + // VersionDev indicates development branch. Releases will be empty string. + VersionDev = "" +) + +// Version is the specification version that the package types support. +var Version = fmt.Sprintf("%d.%d.%d%s (Windows)", VersionMajor, VersionMinor, VersionPatch, VersionDev) diff --git a/libcontainerd/windowsoci/unsupported.go b/libcontainerd/windowsoci/unsupported.go new file mode 100644 index 0000000000..a97c282995 --- /dev/null +++ b/libcontainerd/windowsoci/unsupported.go @@ -0,0 +1,3 @@ +// +build !windows + +package windowsoci diff --git a/oci/defaults_windows.go b/oci/defaults_windows.go new file mode 100644 index 0000000000..03dc942eb1 --- /dev/null +++ b/oci/defaults_windows.go @@ -0,0 +1,23 @@ +package oci + +import ( + "runtime" + + "github.com/docker/docker/libcontainerd/windowsoci" +) + +// DefaultSpec returns default spec used by docker. +func DefaultSpec() windowsoci.WindowsSpec { + s := windowsoci.Spec{ + Version: windowsoci.Version, + Platform: windowsoci.Platform{ + OS: runtime.GOOS, + Arch: runtime.GOARCH, + }, + } + + return windowsoci.WindowsSpec{ + Spec: s, + Windows: windowsoci.Windows{}, + } +}