package libcontainerd import ( "encoding/json" "errors" "fmt" "io" "path/filepath" "strconv" "strings" "syscall" "time" "github.com/Microsoft/hcsshim" "github.com/Sirupsen/logrus" ) type client struct { clientCommon // Platform specific properties below here (none presently on Windows) } // defaultContainerNAT is the default name of the container NAT device that is // preconfigured on the server. TODO Windows - Remove for TP5 support as not needed. const defaultContainerNAT = "ContainerNAT" // Win32 error codes that are used for various workarounds // These really should be ALL_CAPS to match golangs syscall library and standard // Win32 error conventions, but golint insists on CamelCase. const ( CoEClassstring = syscall.Errno(0x800401F3) // Invalid class string ErrorNoNetwork = syscall.Errno(1222) // The network is not present or not started ErrorBadPathname = syscall.Errno(161) // The specified path is invalid ErrorInvalidObject = syscall.Errno(0x800710D8) // The object identifier does not represent a valid object ) type layer struct { ID string Path string } type defConfig struct { DefFile string } type portBinding struct { Protocol string InternalPort int ExternalPort int } type natSettings struct { Name string PortBindings []portBinding } type networkConnection struct { NetworkName string Nat natSettings } type networkSettings struct { MacAddress string } type device struct { DeviceType string Connection interface{} Settings interface{} } type mappedDir struct { HostPath string ContainerPath string ReadOnly bool } // TODO Windows: @darrenstahlmsft Add ProcessorCount type containerInit struct { SystemType string // HCS requires this to be hard-coded to "Container" Name string // Name of the container. We use the docker ID. Owner string // The management platform that created this container IsDummy bool // Used for development purposes. VolumePath string // Windows volume path for scratch space Devices []device // Devices used by the container IgnoreFlushesDuringBoot bool // Optimization hint for container startup in Windows LayerFolderPath string // Where the layer folders are located Layers []layer // List of storage layers ProcessorWeight uint64 `json:",omitempty"` // CPU Shares 0..10000 on Windows; where 0 will be omitted and HCS will default. ProcessorMaximum int64 `json:",omitempty"` // CPU maximum usage percent 1..100 StorageIOPSMaximum uint64 `json:",omitempty"` // Maximum Storage IOPS StorageBandwidthMaximum uint64 `json:",omitempty"` // Maximum Storage Bandwidth in bytes per second StorageSandboxSize uint64 `json:",omitempty"` // Size in bytes that the container system drive should be expanded to if smaller MemoryMaximumInMB int64 `json:",omitempty"` // Maximum memory available to the container in Megabytes HostName string // Hostname MappedDirectories []mappedDir // List of mapped directories (volumes/mounts) SandboxPath string // Location of unmounted sandbox (used for Hyper-V containers) HvPartition bool // True if it a Hyper-V Container EndpointList []string // List of networking endpoints to be attached to container } // defaultOwner is a tag passed to HCS to allow it to differentiate between // container creator management stacks. We hard code "docker" in the case // of docker. const defaultOwner = "docker" // Create is the entrypoint to create a container from a spec, and if successfully // created, start it too. func (clnt *client) Create(containerID string, spec Spec, options ...CreateOption) error { logrus.Debugln("LCD client.Create() with spec", spec) cu := &containerInit{ SystemType: "Container", Name: containerID, Owner: defaultOwner, VolumePath: spec.Root.Path, IgnoreFlushesDuringBoot: spec.Windows.FirstStart, LayerFolderPath: spec.Windows.LayerFolder, HostName: spec.Hostname, } if spec.Windows.Networking != nil { cu.EndpointList = spec.Windows.Networking.EndpointList } if spec.Windows.Resources != nil { if spec.Windows.Resources.CPU != nil { if spec.Windows.Resources.CPU.Shares != nil { cu.ProcessorWeight = *spec.Windows.Resources.CPU.Shares } if spec.Windows.Resources.CPU.Percent != nil { cu.ProcessorMaximum = *spec.Windows.Resources.CPU.Percent * 100 // ProcessorMaximum is a value between 1 and 10000 } } if spec.Windows.Resources.Memory != nil { if spec.Windows.Resources.Memory.Limit != nil { cu.MemoryMaximumInMB = *spec.Windows.Resources.Memory.Limit / 1024 / 1024 } } if spec.Windows.Resources.Storage != nil { if spec.Windows.Resources.Storage.Bps != nil { cu.StorageBandwidthMaximum = *spec.Windows.Resources.Storage.Bps } if spec.Windows.Resources.Storage.Iops != nil { cu.StorageIOPSMaximum = *spec.Windows.Resources.Storage.Iops } if spec.Windows.Resources.Storage.SandboxSize != nil { cu.StorageSandboxSize = *spec.Windows.Resources.Storage.SandboxSize } } } cu.HvPartition = (spec.Windows.HvRuntime != nil) // TODO Windows @jhowardmsft. FIXME post TP5. // if spec.Windows.HvRuntime != nil { // if spec.WIndows.HVRuntime.ImagePath != "" { // cu.TBD = spec.Windows.HvRuntime.ImagePath // } // } if cu.HvPartition { cu.SandboxPath = filepath.Dir(spec.Windows.LayerFolder) } else { cu.VolumePath = spec.Root.Path cu.LayerFolderPath = spec.Windows.LayerFolder } for _, layerPath := range spec.Windows.LayerPaths { _, filename := filepath.Split(layerPath) g, err := hcsshim.NameToGuid(filename) if err != nil { return err } cu.Layers = append(cu.Layers, layer{ ID: g.ToString(), Path: layerPath, }) } // Add the mounts (volumes, bind mounts etc) to the structure mds := make([]mappedDir, len(spec.Mounts)) for i, mount := range spec.Mounts { mds[i] = mappedDir{ HostPath: mount.Source, ContainerPath: mount.Destination, ReadOnly: mount.Readonly} } cu.MappedDirectories = mds // TODO Windows: vv START OF TP4 BLOCK OF CODE. REMOVE ONCE TP4 IS NO LONGER SUPPORTED if hcsshim.IsTP4() && spec.Windows.Networking != nil && spec.Windows.Networking.Bridge != "" { // Enumerate through the port bindings specified by the user and convert // them into the internal structure matching the JSON blob that can be // understood by the HCS. var pbs []portBinding for i, v := range spec.Windows.Networking.PortBindings { proto := strings.ToUpper(i.Proto()) if proto != "TCP" && proto != "UDP" { return fmt.Errorf("invalid protocol %s", i.Proto()) } if len(v) > 1 { return fmt.Errorf("Windows does not support more than one host port in NAT settings") } for _, v2 := range v { var ( iPort, ePort int err error ) if len(v2.HostIP) != 0 { return fmt.Errorf("Windows does not support host IP addresses in NAT settings") } if ePort, err = strconv.Atoi(v2.HostPort); err != nil { return fmt.Errorf("invalid container port %s: %s", v2.HostPort, err) } if iPort, err = strconv.Atoi(i.Port()); err != nil { return fmt.Errorf("invalid internal port %s: %s", i.Port(), err) } if iPort < 0 || iPort > 65535 || ePort < 0 || ePort > 65535 { return fmt.Errorf("specified NAT port is not in allowed range") } pbs = append(pbs, portBinding{ExternalPort: ePort, InternalPort: iPort, Protocol: proto}) } } dev := device{ DeviceType: "Network", Connection: &networkConnection{ NetworkName: spec.Windows.Networking.Bridge, Nat: natSettings{ Name: defaultContainerNAT, PortBindings: pbs, }, }, } if spec.Windows.Networking.MacAddress != "" { windowsStyleMAC := strings.Replace( spec.Windows.Networking.MacAddress, ":", "-", -1) dev.Settings = networkSettings{ MacAddress: windowsStyleMAC, } } cu.Devices = append(cu.Devices, dev) } else { logrus.Debugln("No network interface") } // TODO Windows: ^^ END OF TP4 BLOCK OF CODE. REMOVE ONCE TP4 IS NO LONGER SUPPORTED configurationb, err := json.Marshal(cu) if err != nil { return err } configuration := string(configurationb) // TODO Windows TP5 timeframe. Remove when TP4 is no longer supported. // The following a workaround for Windows TP4 which has a networking // bug which fairly frequently returns an error. Back off and retry. if !hcsshim.IsTP4() { if err := hcsshim.CreateComputeSystem(containerID, configuration); err != nil { return err } } else { maxAttempts := 5 for i := 1; i <= maxAttempts; i++ { err = hcsshim.CreateComputeSystem(containerID, configuration) if err == nil { break } if herr, ok := err.(*hcsshim.HcsError); ok { if herr.Err != syscall.ERROR_NOT_FOUND && // Element not found herr.Err != syscall.ERROR_FILE_NOT_FOUND && // The system cannot find the file specified herr.Err != ErrorNoNetwork && // The network is not present or not started herr.Err != ErrorBadPathname && // The specified path is invalid herr.Err != CoEClassstring && // Invalid class string herr.Err != ErrorInvalidObject { // The object identifier does not represent a valid object logrus.Debugln("Failed to create temporary container ", err) return err } logrus.Warnf("Invoking Windows TP4 retry hack (%d of %d)", i, maxAttempts-1) time.Sleep(50 * time.Millisecond) } } } // Construct a container object for calling start on it. container := &container{ containerCommon: containerCommon{ process: process{ processCommon: processCommon{ containerID: containerID, client: clnt, friendlyName: InitFriendlyName, }, commandLine: strings.Join(spec.Process.Args, " "), }, processes: make(map[string]*process), }, ociSpec: spec, } container.options = options for _, option := range options { if err := option.Apply(container); err != nil { logrus.Error(err) } } // Call start, and if it fails, delete the container from our // internal structure, and also keep HCS in sync by deleting the // container there. logrus.Debugf("Create() id=%s, Calling start()", containerID) if err := container.start(); err != nil { clnt.deleteContainer(containerID) return err } logrus.Debugf("Create() id=%s completed successfully", containerID) return nil } // AddProcess is the handler for adding a process to an already running // container. It's called through docker exec. func (clnt *client) AddProcess(containerID, processFriendlyName string, procToAdd Process) error { clnt.lock(containerID) defer clnt.unlock(containerID) container, err := clnt.getContainer(containerID) if err != nil { return err } createProcessParms := hcsshim.CreateProcessParams{ EmulateConsole: procToAdd.Terminal, ConsoleSize: procToAdd.InitialConsoleSize, } // Take working directory from the process to add if it is defined, // otherwise take from the first process. if procToAdd.Cwd != "" { createProcessParms.WorkingDirectory = procToAdd.Cwd } else { createProcessParms.WorkingDirectory = container.ociSpec.Process.Cwd } // Configure the environment for the process createProcessParms.Environment = setupEnvironmentVariables(procToAdd.Env) createProcessParms.CommandLine = strings.Join(procToAdd.Args, " ") logrus.Debugf("commandLine: %s", createProcessParms.CommandLine) // Start the command running in the container. Note we always tell HCS to // create stdout as it's required regardless of '-i' or '-t' options, so that // docker can always grab the output through logs. We also tell HCS to always // create stdin, even if it's not used - it will be closed shortly. Stderr // is only created if it we're not -t. var stdout, stderr io.ReadCloser var pid uint32 iopipe := &IOPipe{Terminal: procToAdd.Terminal} pid, iopipe.Stdin, stdout, stderr, err = hcsshim.CreateProcessInComputeSystem( containerID, true, true, !procToAdd.Terminal, createProcessParms) if err != nil { logrus.Errorf("AddProcess %s CreateProcessInComputeSystem() failed %s", containerID, err) return err } // Convert io.ReadClosers to io.Readers if stdout != nil { iopipe.Stdout = openReaderFromPipe(stdout) } if stderr != nil { iopipe.Stderr = openReaderFromPipe(stderr) } // Add the process to the containers list of processes container.processes[processFriendlyName] = &process{ processCommon: processCommon{ containerID: containerID, friendlyName: processFriendlyName, client: clnt, systemPid: pid, }, commandLine: createProcessParms.CommandLine, } // Make sure the lock is not held while calling back into the daemon clnt.unlock(containerID) // Tell the engine to attach streams back to the client if err := clnt.backend.AttachStreams(processFriendlyName, *iopipe); err != nil { return err } // Lock again so that the defer unlock doesn't fail. (I really don't like this code) clnt.lock(containerID) // Spin up a go routine waiting for exit to handle cleanup go container.waitExit(pid, processFriendlyName, false) return nil } // Signal handles `docker stop` on Windows. While Linux has support for // the full range of signals, signals aren't really implemented on Windows. // We fake supporting regular stop and -9 to force kill. func (clnt *client) Signal(containerID string, sig int) error { var ( cont *container err error ) // Get the container as we need it to find the pid of the process. clnt.lock(containerID) defer clnt.unlock(containerID) if cont, err = clnt.getContainer(containerID); err != nil { return err } logrus.Debugf("lcd: Signal() containerID=%s sig=%d pid=%d", containerID, sig, cont.systemPid) context := fmt.Sprintf("Signal: sig=%d pid=%d", sig, cont.systemPid) if syscall.Signal(sig) == syscall.SIGKILL { // Terminate the compute system if err := hcsshim.TerminateComputeSystem(containerID, hcsshim.TimeoutInfinite, context); err != nil { logrus.Errorf("Failed to terminate %s - %q", containerID, err) } } else { // Terminate Process if err = hcsshim.TerminateProcessInComputeSystem(containerID, cont.systemPid); err != nil { logrus.Warnf("Failed to terminate pid %d in %s: %q", cont.systemPid, containerID, err) // Ignore errors err = nil } // Shutdown the compute system if err := hcsshim.ShutdownComputeSystem(containerID, hcsshim.TimeoutInfinite, context); err != nil { logrus.Errorf("Failed to shutdown %s - %q", containerID, err) } } return nil } // Resize handles a CLI event to resize an interactive docker run or docker exec // window. func (clnt *client) Resize(containerID, processFriendlyName string, width, height int) error { // Get the libcontainerd container object clnt.lock(containerID) defer clnt.unlock(containerID) cont, err := clnt.getContainer(containerID) if err != nil { return err } if processFriendlyName == InitFriendlyName { logrus.Debugln("Resizing systemPID in", containerID, cont.process.systemPid) return hcsshim.ResizeConsoleInComputeSystem(containerID, cont.process.systemPid, height, width) } for _, p := range cont.processes { if p.friendlyName == processFriendlyName { logrus.Debugln("Resizing exec'd process", containerID, p.systemPid) return hcsshim.ResizeConsoleInComputeSystem(containerID, p.systemPid, height, width) } } return fmt.Errorf("Resize could not find containerID %s to resize", containerID) } // Pause handles pause requests for containers func (clnt *client) Pause(containerID string) error { return errors.New("Windows: Containers cannot be paused") } // Resume handles resume requests for containers func (clnt *client) Resume(containerID string) error { return errors.New("Windows: Containers cannot be paused") } // Stats handles stats requests for containers func (clnt *client) Stats(containerID string) (*Stats, error) { return nil, errors.New("Windows: Stats not implemented") } // Restore is the handler for restoring a container func (clnt *client) Restore(containerID string, unusedOnWindows ...CreateOption) error { // TODO Windows: Implement this. For now, just tell the backend the container exited. logrus.Debugf("lcd Restore %s", containerID) return clnt.backend.StateChanged(containerID, StateInfo{ State: StateExit, ExitCode: 1 << 31, }) } // GetPidsForContainer returns a list of process IDs running in a container. // Although implemented, this is not used in Windows. func (clnt *client) GetPidsForContainer(containerID string) ([]int, error) { var pids []int clnt.lock(containerID) defer clnt.unlock(containerID) cont, err := clnt.getContainer(containerID) if err != nil { return nil, err } // Add the first process pids = append(pids, int(cont.containerCommon.systemPid)) // And add all the exec'd processes for _, p := range cont.processes { pids = append(pids, int(p.processCommon.systemPid)) } return pids, nil } // Summary returns a summary of the processes running in a container. // This is present in Windows to support docker top. In linux, the // engine shells out to ps to get process information. On Windows, as // the containers could be Hyper-V containers, they would not be // visible on the container host. However, libcontainerd does have // that information. func (clnt *client) Summary(containerID string) ([]Summary, error) { var s []Summary clnt.lock(containerID) defer clnt.unlock(containerID) cont, err := clnt.getContainer(containerID) if err != nil { return nil, err } // Add the first process s = append(s, Summary{ Pid: cont.containerCommon.systemPid, Command: cont.ociSpec.Process.Args[0]}) // And add all the exec'd processes for _, p := range cont.processes { s = append(s, Summary{ Pid: p.processCommon.systemPid, Command: p.commandLine}) } return s, nil } // UpdateResources updates resources for a running container. func (clnt *client) UpdateResources(containerID string, resources Resources) error { // Updating resource isn't supported on Windows // but we should return nil for enabling updating container return nil }