mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00
52237787fa
Signed-off-by: John Howard <jhoward@microsoft.com>
567 lines
18 KiB
Go
567 lines
18 KiB
Go
package libcontainerd
|
|
|
|
import (
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"path/filepath"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"syscall"
|
|
"time"
|
|
|
|
"github.com/Microsoft/hcsshim"
|
|
"github.com/Sirupsen/logrus"
|
|
)
|
|
|
|
type client struct {
|
|
clientCommon
|
|
|
|
// Platform specific properties below here (none presently on Windows)
|
|
}
|
|
|
|
// defaultContainerNAT is the default name of the container NAT device that is
|
|
// preconfigured on the server. TODO Windows - Remove for TP5 support as not needed.
|
|
const defaultContainerNAT = "ContainerNAT"
|
|
|
|
// Win32 error codes that are used for various workarounds
|
|
// These really should be ALL_CAPS to match golangs syscall library and standard
|
|
// Win32 error conventions, but golint insists on CamelCase.
|
|
const (
|
|
CoEClassstring = syscall.Errno(0x800401F3) // Invalid class string
|
|
ErrorNoNetwork = syscall.Errno(1222) // The network is not present or not started
|
|
ErrorBadPathname = syscall.Errno(161) // The specified path is invalid
|
|
ErrorInvalidObject = syscall.Errno(0x800710D8) // The object identifier does not represent a valid object
|
|
)
|
|
|
|
type layer struct {
|
|
ID string
|
|
Path string
|
|
}
|
|
|
|
type defConfig struct {
|
|
DefFile string
|
|
}
|
|
|
|
type portBinding struct {
|
|
Protocol string
|
|
InternalPort int
|
|
ExternalPort int
|
|
}
|
|
|
|
type natSettings struct {
|
|
Name string
|
|
PortBindings []portBinding
|
|
}
|
|
|
|
type networkConnection struct {
|
|
NetworkName string
|
|
Nat natSettings
|
|
}
|
|
type networkSettings struct {
|
|
MacAddress string
|
|
}
|
|
|
|
type device struct {
|
|
DeviceType string
|
|
Connection interface{}
|
|
Settings interface{}
|
|
}
|
|
|
|
type mappedDir struct {
|
|
HostPath string
|
|
ContainerPath string
|
|
ReadOnly bool
|
|
}
|
|
|
|
// TODO Windows: @darrenstahlmsft Add ProcessorCount
|
|
type containerInit struct {
|
|
SystemType string // HCS requires this to be hard-coded to "Container"
|
|
Name string // Name of the container. We use the docker ID.
|
|
Owner string // The management platform that created this container
|
|
IsDummy bool // Used for development purposes.
|
|
VolumePath string // Windows volume path for scratch space
|
|
Devices []device // Devices used by the container
|
|
IgnoreFlushesDuringBoot bool // Optimization hint for container startup in Windows
|
|
LayerFolderPath string // Where the layer folders are located
|
|
Layers []layer // List of storage layers
|
|
ProcessorWeight uint64 `json:",omitempty"` // CPU Shares 0..10000 on Windows; where 0 will be omitted and HCS will default.
|
|
ProcessorMaximum int64 `json:",omitempty"` // CPU maximum usage percent 1..100
|
|
StorageIOPSMaximum uint64 `json:",omitempty"` // Maximum Storage IOPS
|
|
StorageBandwidthMaximum uint64 `json:",omitempty"` // Maximum Storage Bandwidth in bytes per second
|
|
StorageSandboxSize uint64 `json:",omitempty"` // Size in bytes that the container system drive should be expanded to if smaller
|
|
MemoryMaximumInMB int64 `json:",omitempty"` // Maximum memory available to the container in Megabytes
|
|
HostName string // Hostname
|
|
MappedDirectories []mappedDir // List of mapped directories (volumes/mounts)
|
|
SandboxPath string // Location of unmounted sandbox (used for Hyper-V containers)
|
|
HvPartition bool // True if it a Hyper-V Container
|
|
EndpointList []string // List of networking endpoints to be attached to container
|
|
}
|
|
|
|
// defaultOwner is a tag passed to HCS to allow it to differentiate between
|
|
// container creator management stacks. We hard code "docker" in the case
|
|
// of docker.
|
|
const defaultOwner = "docker"
|
|
|
|
// Create is the entrypoint to create a container from a spec, and if successfully
|
|
// created, start it too.
|
|
func (clnt *client) Create(containerID string, spec Spec, options ...CreateOption) error {
|
|
logrus.Debugln("LCD client.Create() with spec", spec)
|
|
|
|
cu := &containerInit{
|
|
SystemType: "Container",
|
|
Name: containerID,
|
|
Owner: defaultOwner,
|
|
|
|
VolumePath: spec.Root.Path,
|
|
IgnoreFlushesDuringBoot: spec.Windows.FirstStart,
|
|
LayerFolderPath: spec.Windows.LayerFolder,
|
|
HostName: spec.Hostname,
|
|
}
|
|
|
|
if spec.Windows.Networking != nil {
|
|
cu.EndpointList = spec.Windows.Networking.EndpointList
|
|
}
|
|
|
|
if spec.Windows.Resources != nil {
|
|
if spec.Windows.Resources.CPU != nil {
|
|
if spec.Windows.Resources.CPU.Shares != nil {
|
|
cu.ProcessorWeight = *spec.Windows.Resources.CPU.Shares
|
|
}
|
|
if spec.Windows.Resources.CPU.Percent != nil {
|
|
cu.ProcessorMaximum = *spec.Windows.Resources.CPU.Percent * 100 // ProcessorMaximum is a value between 1 and 10000
|
|
}
|
|
}
|
|
if spec.Windows.Resources.Memory != nil {
|
|
if spec.Windows.Resources.Memory.Limit != nil {
|
|
cu.MemoryMaximumInMB = *spec.Windows.Resources.Memory.Limit / 1024 / 1024
|
|
}
|
|
}
|
|
if spec.Windows.Resources.Storage != nil {
|
|
if spec.Windows.Resources.Storage.Bps != nil {
|
|
cu.StorageBandwidthMaximum = *spec.Windows.Resources.Storage.Bps
|
|
}
|
|
if spec.Windows.Resources.Storage.Iops != nil {
|
|
cu.StorageIOPSMaximum = *spec.Windows.Resources.Storage.Iops
|
|
}
|
|
if spec.Windows.Resources.Storage.SandboxSize != nil {
|
|
cu.StorageSandboxSize = *spec.Windows.Resources.Storage.SandboxSize
|
|
}
|
|
}
|
|
}
|
|
|
|
cu.HvPartition = (spec.Windows.HvRuntime != nil)
|
|
|
|
// TODO Windows @jhowardmsft. FIXME post TP5.
|
|
// if spec.Windows.HvRuntime != nil {
|
|
// if spec.WIndows.HVRuntime.ImagePath != "" {
|
|
// cu.TBD = spec.Windows.HvRuntime.ImagePath
|
|
// }
|
|
// }
|
|
|
|
if cu.HvPartition {
|
|
cu.SandboxPath = filepath.Dir(spec.Windows.LayerFolder)
|
|
} else {
|
|
cu.VolumePath = spec.Root.Path
|
|
cu.LayerFolderPath = spec.Windows.LayerFolder
|
|
}
|
|
|
|
for _, layerPath := range spec.Windows.LayerPaths {
|
|
_, filename := filepath.Split(layerPath)
|
|
g, err := hcsshim.NameToGuid(filename)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
cu.Layers = append(cu.Layers, layer{
|
|
ID: g.ToString(),
|
|
Path: layerPath,
|
|
})
|
|
}
|
|
|
|
// Add the mounts (volumes, bind mounts etc) to the structure
|
|
mds := make([]mappedDir, len(spec.Mounts))
|
|
for i, mount := range spec.Mounts {
|
|
mds[i] = mappedDir{
|
|
HostPath: mount.Source,
|
|
ContainerPath: mount.Destination,
|
|
ReadOnly: mount.Readonly}
|
|
}
|
|
cu.MappedDirectories = mds
|
|
|
|
// TODO Windows: vv START OF TP4 BLOCK OF CODE. REMOVE ONCE TP4 IS NO LONGER SUPPORTED
|
|
if hcsshim.IsTP4() &&
|
|
spec.Windows.Networking != nil &&
|
|
spec.Windows.Networking.Bridge != "" {
|
|
// Enumerate through the port bindings specified by the user and convert
|
|
// them into the internal structure matching the JSON blob that can be
|
|
// understood by the HCS.
|
|
var pbs []portBinding
|
|
for i, v := range spec.Windows.Networking.PortBindings {
|
|
proto := strings.ToUpper(i.Proto())
|
|
if proto != "TCP" && proto != "UDP" {
|
|
return fmt.Errorf("invalid protocol %s", i.Proto())
|
|
}
|
|
|
|
if len(v) > 1 {
|
|
return fmt.Errorf("Windows does not support more than one host port in NAT settings")
|
|
}
|
|
|
|
for _, v2 := range v {
|
|
var (
|
|
iPort, ePort int
|
|
err error
|
|
)
|
|
if len(v2.HostIP) != 0 {
|
|
return fmt.Errorf("Windows does not support host IP addresses in NAT settings")
|
|
}
|
|
if ePort, err = strconv.Atoi(v2.HostPort); err != nil {
|
|
return fmt.Errorf("invalid container port %s: %s", v2.HostPort, err)
|
|
}
|
|
if iPort, err = strconv.Atoi(i.Port()); err != nil {
|
|
return fmt.Errorf("invalid internal port %s: %s", i.Port(), err)
|
|
}
|
|
if iPort < 0 || iPort > 65535 || ePort < 0 || ePort > 65535 {
|
|
return fmt.Errorf("specified NAT port is not in allowed range")
|
|
}
|
|
pbs = append(pbs,
|
|
portBinding{ExternalPort: ePort,
|
|
InternalPort: iPort,
|
|
Protocol: proto})
|
|
}
|
|
}
|
|
|
|
dev := device{
|
|
DeviceType: "Network",
|
|
Connection: &networkConnection{
|
|
NetworkName: spec.Windows.Networking.Bridge,
|
|
Nat: natSettings{
|
|
Name: defaultContainerNAT,
|
|
PortBindings: pbs,
|
|
},
|
|
},
|
|
}
|
|
|
|
if spec.Windows.Networking.MacAddress != "" {
|
|
windowsStyleMAC := strings.Replace(
|
|
spec.Windows.Networking.MacAddress, ":", "-", -1)
|
|
dev.Settings = networkSettings{
|
|
MacAddress: windowsStyleMAC,
|
|
}
|
|
}
|
|
cu.Devices = append(cu.Devices, dev)
|
|
} else {
|
|
logrus.Debugln("No network interface")
|
|
}
|
|
// TODO Windows: ^^ END OF TP4 BLOCK OF CODE. REMOVE ONCE TP4 IS NO LONGER SUPPORTED
|
|
|
|
configurationb, err := json.Marshal(cu)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
configuration := string(configurationb)
|
|
|
|
// TODO Windows TP5 timeframe. Remove when TP4 is no longer supported.
|
|
// The following a workaround for Windows TP4 which has a networking
|
|
// bug which fairly frequently returns an error. Back off and retry.
|
|
if !hcsshim.IsTP4() {
|
|
if err := hcsshim.CreateComputeSystem(containerID, configuration); err != nil {
|
|
return err
|
|
}
|
|
} else {
|
|
maxAttempts := 5
|
|
for i := 1; i <= maxAttempts; i++ {
|
|
err = hcsshim.CreateComputeSystem(containerID, configuration)
|
|
if err == nil {
|
|
break
|
|
}
|
|
|
|
if herr, ok := err.(*hcsshim.HcsError); ok {
|
|
if herr.Err != syscall.ERROR_NOT_FOUND && // Element not found
|
|
herr.Err != syscall.ERROR_FILE_NOT_FOUND && // The system cannot find the file specified
|
|
herr.Err != ErrorNoNetwork && // The network is not present or not started
|
|
herr.Err != ErrorBadPathname && // The specified path is invalid
|
|
herr.Err != CoEClassstring && // Invalid class string
|
|
herr.Err != ErrorInvalidObject { // The object identifier does not represent a valid object
|
|
logrus.Debugln("Failed to create temporary container ", err)
|
|
return err
|
|
}
|
|
logrus.Warnf("Invoking Windows TP4 retry hack (%d of %d)", i, maxAttempts-1)
|
|
time.Sleep(50 * time.Millisecond)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Construct a container object for calling start on it.
|
|
container := &container{
|
|
containerCommon: containerCommon{
|
|
process: process{
|
|
processCommon: processCommon{
|
|
containerID: containerID,
|
|
client: clnt,
|
|
friendlyName: InitFriendlyName,
|
|
},
|
|
commandLine: strings.Join(spec.Process.Args, " "),
|
|
},
|
|
processes: make(map[string]*process),
|
|
},
|
|
ociSpec: spec,
|
|
}
|
|
|
|
container.options = options
|
|
for _, option := range options {
|
|
if err := option.Apply(container); err != nil {
|
|
logrus.Error(err)
|
|
}
|
|
}
|
|
|
|
// Call start, and if it fails, delete the container from our
|
|
// internal structure, and also keep HCS in sync by deleting the
|
|
// container there.
|
|
logrus.Debugf("Create() id=%s, Calling start()", containerID)
|
|
if err := container.start(); err != nil {
|
|
clnt.deleteContainer(containerID)
|
|
return err
|
|
}
|
|
|
|
logrus.Debugf("Create() id=%s completed successfully", containerID)
|
|
return nil
|
|
|
|
}
|
|
|
|
// AddProcess is the handler for adding a process to an already running
|
|
// container. It's called through docker exec.
|
|
func (clnt *client) AddProcess(containerID, processFriendlyName string, procToAdd Process) error {
|
|
|
|
clnt.lock(containerID)
|
|
defer clnt.unlock(containerID)
|
|
container, err := clnt.getContainer(containerID)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
createProcessParms := hcsshim.CreateProcessParams{
|
|
EmulateConsole: procToAdd.Terminal,
|
|
ConsoleSize: procToAdd.InitialConsoleSize,
|
|
}
|
|
|
|
// Take working directory from the process to add if it is defined,
|
|
// otherwise take from the first process.
|
|
if procToAdd.Cwd != "" {
|
|
createProcessParms.WorkingDirectory = procToAdd.Cwd
|
|
} else {
|
|
createProcessParms.WorkingDirectory = container.ociSpec.Process.Cwd
|
|
}
|
|
|
|
// Configure the environment for the process
|
|
createProcessParms.Environment = setupEnvironmentVariables(procToAdd.Env)
|
|
createProcessParms.CommandLine = strings.Join(procToAdd.Args, " ")
|
|
|
|
logrus.Debugf("commandLine: %s", createProcessParms.CommandLine)
|
|
|
|
// Start the command running in the container. Note we always tell HCS to
|
|
// create stdout as it's required regardless of '-i' or '-t' options, so that
|
|
// docker can always grab the output through logs. We also tell HCS to always
|
|
// create stdin, even if it's not used - it will be closed shortly. Stderr
|
|
// is only created if it we're not -t.
|
|
var stdout, stderr io.ReadCloser
|
|
var pid uint32
|
|
iopipe := &IOPipe{Terminal: procToAdd.Terminal}
|
|
pid, iopipe.Stdin, stdout, stderr, err = hcsshim.CreateProcessInComputeSystem(
|
|
containerID,
|
|
true,
|
|
true,
|
|
!procToAdd.Terminal,
|
|
createProcessParms)
|
|
if err != nil {
|
|
logrus.Errorf("AddProcess %s CreateProcessInComputeSystem() failed %s", containerID, err)
|
|
return err
|
|
}
|
|
|
|
// Convert io.ReadClosers to io.Readers
|
|
if stdout != nil {
|
|
iopipe.Stdout = openReaderFromPipe(stdout)
|
|
}
|
|
if stderr != nil {
|
|
iopipe.Stderr = openReaderFromPipe(stderr)
|
|
}
|
|
|
|
// Add the process to the containers list of processes
|
|
container.processes[processFriendlyName] =
|
|
&process{
|
|
processCommon: processCommon{
|
|
containerID: containerID,
|
|
friendlyName: processFriendlyName,
|
|
client: clnt,
|
|
systemPid: pid,
|
|
},
|
|
commandLine: createProcessParms.CommandLine,
|
|
}
|
|
|
|
// Make sure the lock is not held while calling back into the daemon
|
|
clnt.unlock(containerID)
|
|
|
|
// Tell the engine to attach streams back to the client
|
|
if err := clnt.backend.AttachStreams(processFriendlyName, *iopipe); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Lock again so that the defer unlock doesn't fail. (I really don't like this code)
|
|
clnt.lock(containerID)
|
|
|
|
// Spin up a go routine waiting for exit to handle cleanup
|
|
go container.waitExit(pid, processFriendlyName, false)
|
|
|
|
return nil
|
|
}
|
|
|
|
// Signal handles `docker stop` on Windows. While Linux has support for
|
|
// the full range of signals, signals aren't really implemented on Windows.
|
|
// We fake supporting regular stop and -9 to force kill.
|
|
func (clnt *client) Signal(containerID string, sig int) error {
|
|
var (
|
|
cont *container
|
|
err error
|
|
)
|
|
|
|
// Get the container as we need it to find the pid of the process.
|
|
clnt.lock(containerID)
|
|
defer clnt.unlock(containerID)
|
|
if cont, err = clnt.getContainer(containerID); err != nil {
|
|
return err
|
|
}
|
|
|
|
logrus.Debugf("lcd: Signal() containerID=%s sig=%d pid=%d", containerID, sig, cont.systemPid)
|
|
context := fmt.Sprintf("Signal: sig=%d pid=%d", sig, cont.systemPid)
|
|
|
|
if syscall.Signal(sig) == syscall.SIGKILL {
|
|
// Terminate the compute system
|
|
if err := hcsshim.TerminateComputeSystem(containerID, hcsshim.TimeoutInfinite, context); err != nil {
|
|
logrus.Errorf("Failed to terminate %s - %q", containerID, err)
|
|
}
|
|
|
|
} else {
|
|
// Terminate Process
|
|
if err = hcsshim.TerminateProcessInComputeSystem(containerID, cont.systemPid); err != nil {
|
|
logrus.Warnf("Failed to terminate pid %d in %s: %q", cont.systemPid, containerID, err)
|
|
// Ignore errors
|
|
err = nil
|
|
}
|
|
|
|
// Shutdown the compute system
|
|
if err := hcsshim.ShutdownComputeSystem(containerID, hcsshim.TimeoutInfinite, context); err != nil {
|
|
logrus.Errorf("Failed to shutdown %s - %q", containerID, err)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Resize handles a CLI event to resize an interactive docker run or docker exec
|
|
// window.
|
|
func (clnt *client) Resize(containerID, processFriendlyName string, width, height int) error {
|
|
// Get the libcontainerd container object
|
|
clnt.lock(containerID)
|
|
defer clnt.unlock(containerID)
|
|
cont, err := clnt.getContainer(containerID)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if processFriendlyName == InitFriendlyName {
|
|
logrus.Debugln("Resizing systemPID in", containerID, cont.process.systemPid)
|
|
return hcsshim.ResizeConsoleInComputeSystem(containerID, cont.process.systemPid, height, width)
|
|
}
|
|
|
|
for _, p := range cont.processes {
|
|
if p.friendlyName == processFriendlyName {
|
|
logrus.Debugln("Resizing exec'd process", containerID, p.systemPid)
|
|
return hcsshim.ResizeConsoleInComputeSystem(containerID, p.systemPid, height, width)
|
|
}
|
|
}
|
|
|
|
return fmt.Errorf("Resize could not find containerID %s to resize", containerID)
|
|
|
|
}
|
|
|
|
// Pause handles pause requests for containers
|
|
func (clnt *client) Pause(containerID string) error {
|
|
return errors.New("Windows: Containers cannot be paused")
|
|
}
|
|
|
|
// Resume handles resume requests for containers
|
|
func (clnt *client) Resume(containerID string) error {
|
|
return errors.New("Windows: Containers cannot be paused")
|
|
}
|
|
|
|
// Stats handles stats requests for containers
|
|
func (clnt *client) Stats(containerID string) (*Stats, error) {
|
|
return nil, errors.New("Windows: Stats not implemented")
|
|
}
|
|
|
|
// Restore is the handler for restoring a container
|
|
func (clnt *client) Restore(containerID string, unusedOnWindows ...CreateOption) error {
|
|
// TODO Windows: Implement this. For now, just tell the backend the container exited.
|
|
logrus.Debugf("lcd Restore %s", containerID)
|
|
return clnt.backend.StateChanged(containerID, StateInfo{
|
|
State: StateExit,
|
|
ExitCode: 1 << 31,
|
|
})
|
|
}
|
|
|
|
// GetPidsForContainer returns a list of process IDs running in a container.
|
|
// Although implemented, this is not used in Windows.
|
|
func (clnt *client) GetPidsForContainer(containerID string) ([]int, error) {
|
|
var pids []int
|
|
clnt.lock(containerID)
|
|
defer clnt.unlock(containerID)
|
|
cont, err := clnt.getContainer(containerID)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Add the first process
|
|
pids = append(pids, int(cont.containerCommon.systemPid))
|
|
// And add all the exec'd processes
|
|
for _, p := range cont.processes {
|
|
pids = append(pids, int(p.processCommon.systemPid))
|
|
}
|
|
return pids, nil
|
|
}
|
|
|
|
// Summary returns a summary of the processes running in a container.
|
|
// This is present in Windows to support docker top. In linux, the
|
|
// engine shells out to ps to get process information. On Windows, as
|
|
// the containers could be Hyper-V containers, they would not be
|
|
// visible on the container host. However, libcontainerd does have
|
|
// that information.
|
|
func (clnt *client) Summary(containerID string) ([]Summary, error) {
|
|
var s []Summary
|
|
clnt.lock(containerID)
|
|
defer clnt.unlock(containerID)
|
|
cont, err := clnt.getContainer(containerID)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Add the first process
|
|
s = append(s, Summary{
|
|
Pid: cont.containerCommon.systemPid,
|
|
Command: cont.ociSpec.Process.Args[0]})
|
|
// And add all the exec'd processes
|
|
for _, p := range cont.processes {
|
|
s = append(s, Summary{
|
|
Pid: p.processCommon.systemPid,
|
|
Command: p.commandLine})
|
|
}
|
|
return s, nil
|
|
|
|
}
|
|
|
|
// UpdateResources updates resources for a running container.
|
|
func (clnt *client) UpdateResources(containerID string, resources Resources) error {
|
|
// Updating resource isn't supported on Windows
|
|
// but we should return nil for enabling updating container
|
|
return nil
|
|
}
|