diff --git a/Dockerfile b/Dockerfile index 1bcbca9924..aba9c9eda4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -249,6 +249,24 @@ RUN set -x \ && go build -v -o /usr/local/bin/rsrc github.com/akavel/rsrc \ && rm -rf "$GOPATH" +# Install runc +ENV RUNC_COMMIT bbde9c426ff363d813b8722f0744115c13b408b6 +RUN set -x \ + && export GOPATH="$(mktemp -d)" \ + && git clone git://github.com/opencontainers/runc.git "$GOPATH/src/github.com/opencontainers/runc" \ + && cd "$GOPATH/src/github.com/opencontainers/runc" \ + && git checkout -q "$RUNC_COMMIT" \ + && make BUILDTAGS="seccomp apparmor selinux" && make install + +# Install containerd +ENV CONTAINERD_COMMIT 7146b01a3d7aaa146414cdfb0a6c96cfba5d9091 +RUN set -x \ + && export GOPATH="$(mktemp -d)" \ + && git clone git://github.com/docker/containerd.git "$GOPATH/src/github.com/docker/containerd" \ + && cd "$GOPATH/src/github.com/docker/containerd" \ + && git checkout -q "$CONTAINERD_COMMIT" \ + && make && make install + # Wrap all commands in the "docker-in-docker" script to allow nested containers ENTRYPOINT ["hack/dind"] diff --git a/Dockerfile.aarch64 b/Dockerfile.aarch64 index f7a43da939..88cd6c2a69 100644 --- a/Dockerfile.aarch64 +++ b/Dockerfile.aarch64 @@ -186,6 +186,24 @@ RUN set -x \ && go build -v -o /usr/local/bin/tomlv github.com/BurntSushi/toml/cmd/tomlv \ && rm -rf "$GOPATH" +# Install runc +ENV RUNC_COMMIT bbde9c426ff363d813b8722f0744115c13b408b6 +RUN set -x \ + && export GOPATH="$(mktemp -d)" \ + && git clone git://github.com/opencontainers/runc.git "$GOPATH/src/github.com/opencontainers/runc" \ + && cd "$GOPATH/src/github.com/opencontainers/runc" \ + && git checkout -q "$RUNC_COMMIT" \ + && make BUILDTAGS="seccomp apparmor selinux" && make install + +# Install containerd +ENV CONTAINERD_COMMIT 7146b01a3d7aaa146414cdfb0a6c96cfba5d9091 +RUN set -x \ + && export GOPATH="$(mktemp -d)" \ + && git clone git://github.com/docker/containerd.git "$GOPATH/src/github.com/docker/containerd" \ + && cd "$GOPATH/src/github.com/docker/containerd" \ + && git checkout -q "$CONTAINERD_COMMIT" \ + && make && make install + # Wrap all commands in the "docker-in-docker" script to allow nested containers ENTRYPOINT ["hack/dind"] diff --git a/Dockerfile.armhf b/Dockerfile.armhf index f5b3420894..97dd14fba8 100644 --- a/Dockerfile.armhf +++ b/Dockerfile.armhf @@ -205,6 +205,24 @@ RUN set -x \ && go build -v -o /usr/local/bin/rsrc github.com/akavel/rsrc \ && rm -rf "$GOPATH" +# Install runc +ENV RUNC_COMMIT bbde9c426ff363d813b8722f0744115c13b408b6 +RUN set -x \ + && export GOPATH="$(mktemp -d)" \ + && git clone git://github.com/opencontainers/runc.git "$GOPATH/src/github.com/opencontainers/runc" \ + && cd "$GOPATH/src/github.com/opencontainers/runc" \ + && git checkout -q "$RUNC_COMMIT" \ + && make BUILDTAGS="seccomp apparmor selinux" && make install + +# Install containerd +ENV CONTAINERD_COMMIT 7146b01a3d7aaa146414cdfb0a6c96cfba5d9091 +RUN set -x \ + && export GOPATH="$(mktemp -d)" \ + && git clone git://github.com/docker/containerd.git "$GOPATH/src/github.com/docker/containerd" \ + && cd "$GOPATH/src/github.com/docker/containerd" \ + && git checkout -q "$CONTAINERD_COMMIT" \ + && make && make install + # Wrap all commands in the "docker-in-docker" script to allow nested containers ENTRYPOINT ["hack/dind"] diff --git a/Dockerfile.gccgo b/Dockerfile.gccgo index 85427b7a1e..337a22da07 100644 --- a/Dockerfile.gccgo +++ b/Dockerfile.gccgo @@ -73,6 +73,24 @@ VOLUME /var/lib/docker WORKDIR /go/src/github.com/docker/docker ENV DOCKER_BUILDTAGS apparmor seccomp selinux +# Install runc +ENV RUNC_COMMIT bbde9c426ff363d813b8722f0744115c13b408b6 +RUN set -x \ + && export GOPATH="$(mktemp -d)" \ + && git clone git://github.com/opencontainers/runc.git "$GOPATH/src/github.com/opencontainers/runc" \ + && cd "$GOPATH/src/github.com/opencontainers/runc" \ + && git checkout -q "$RUNC_COMMIT" \ + && make BUILDTAGS="seccomp apparmor selinux" && make install + +# Install containerd +ENV CONTAINERD_COMMIT 7146b01a3d7aaa146414cdfb0a6c96cfba5d9091 +RUN set -x \ + && export GOPATH="$(mktemp -d)" \ + && git clone git://github.com/docker/containerd.git "$GOPATH/src/github.com/docker/containerd" \ + && cd "$GOPATH/src/github.com/docker/containerd" \ + && git checkout -q "$CONTAINERD_COMMIT" \ + && make && make install + # Wrap all commands in the "docker-in-docker" script to allow nested containers ENTRYPOINT ["hack/dind"] diff --git a/Dockerfile.ppc64le b/Dockerfile.ppc64le index 7b202ce568..3abf527bd9 100644 --- a/Dockerfile.ppc64le +++ b/Dockerfile.ppc64le @@ -197,6 +197,24 @@ RUN set -x \ && go build -v -o /usr/local/bin/rsrc github.com/akavel/rsrc \ && rm -rf "$GOPATH" +# Install runc +ENV RUNC_COMMIT bbde9c426ff363d813b8722f0744115c13b408b6 +RUN set -x \ + && export GOPATH="$(mktemp -d)" \ + && git clone git://github.com/opencontainers/runc.git "$GOPATH/src/github.com/opencontainers/runc" \ + && cd "$GOPATH/src/github.com/opencontainers/runc" \ + && git checkout -q "$RUNC_COMMIT" \ + && make BUILDTAGS="seccomp apparmor selinux" && make install + +# Install containerd +ENV CONTAINERD_COMMIT 7146b01a3d7aaa146414cdfb0a6c96cfba5d9091 +RUN set -x \ + && export GOPATH="$(mktemp -d)" \ + && git clone git://github.com/docker/containerd.git "$GOPATH/src/github.com/docker/containerd" \ + && cd "$GOPATH/src/github.com/docker/containerd" \ + && git checkout -q "$CONTAINERD_COMMIT" \ + && make && make install + # Wrap all commands in the "docker-in-docker" script to allow nested containers ENTRYPOINT ["hack/dind"] diff --git a/Dockerfile.s390x b/Dockerfile.s390x index 03db594f15..460a235345 100644 --- a/Dockerfile.s390x +++ b/Dockerfile.s390x @@ -176,6 +176,24 @@ RUN set -x \ && go build -v -o /usr/local/bin/rsrc github.com/akavel/rsrc \ && rm -rf "$GOPATH" +# Install runc +ENV RUNC_COMMIT bbde9c426ff363d813b8722f0744115c13b408b6 +RUN set -x \ + && export GOPATH="$(mktemp -d)" \ + && git clone git://github.com/opencontainers/runc.git "$GOPATH/src/github.com/opencontainers/runc" \ + && cd "$GOPATH/src/github.com/opencontainers/runc" \ + && git checkout -q "$RUNC_COMMIT" \ + && make BUILDTAGS="seccomp apparmor selinux" && make install + +# Install containerd +ENV CONTAINERD_COMMIT 7146b01a3d7aaa146414cdfb0a6c96cfba5d9091 +RUN set -x \ + && export GOPATH="$(mktemp -d)" \ + && git clone git://github.com/docker/containerd.git "$GOPATH/src/github.com/docker/containerd" \ + && cd "$GOPATH/src/github.com/docker/containerd" \ + && git checkout -q "$CONTAINERD_COMMIT" \ + && make && make install + # Wrap all commands in the "docker-in-docker" script to allow nested containers ENTRYPOINT ["hack/dind"] diff --git a/Dockerfile.simple b/Dockerfile.simple index 427a0717fb..67fbdbb1ed 100644 --- a/Dockerfile.simple +++ b/Dockerfile.simple @@ -29,6 +29,24 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ aufs-tools \ && rm -rf /var/lib/apt/lists/* +# Install runc +ENV RUNC_COMMIT bbde9c426ff363d813b8722f0744115c13b408b6 +RUN set -x \ + && export GOPATH="$(mktemp -d)" \ + && git clone git://github.com/opencontainers/runc.git "$GOPATH/src/github.com/opencontainers/runc" \ + && cd "$GOPATH/src/github.com/opencontainers/runc" \ + && git checkout -q "$RUNC_COMMIT" \ + && make BUILDTAGS="seccomp apparmor selinux" && make install + +# Install containerd +ENV CONTAINERD_COMMIT 7146b01a3d7aaa146414cdfb0a6c96cfba5d9091 +RUN set -x \ + && export GOPATH="$(mktemp -d)" \ + && git clone git://github.com/docker/containerd.git "$GOPATH/src/github.com/docker/containerd" \ + && cd "$GOPATH/src/github.com/docker/containerd" \ + && git checkout -q "$CONTAINERD_COMMIT" \ + && make && make install + ENV AUTO_GOPATH 1 WORKDIR /usr/src/docker COPY . /usr/src/docker diff --git a/api/client/run.go b/api/client/run.go index c75441dcc2..503cfdcd48 100644 --- a/api/client/run.go +++ b/api/client/run.go @@ -14,7 +14,6 @@ import ( "github.com/docker/docker/opts" "github.com/docker/docker/pkg/promise" "github.com/docker/docker/pkg/signal" - "github.com/docker/docker/pkg/stringid" runconfigopts "github.com/docker/docker/runconfig/opts" "github.com/docker/engine-api/types" "github.com/docker/libnetwork/resolvconf/dns" @@ -256,16 +255,6 @@ func (cli *DockerCli) CmdRun(args ...string) error { // Attached mode if *flAutoRemove { - // Warn user if they detached us - js, err := cli.client.ContainerInspect(context.Background(), createResponse.ID) - if err != nil { - return runStartContainerErr(err) - } - if js.State.Running == true || js.State.Paused == true { - fmt.Fprintf(cli.out, "Detached from %s, awaiting its termination in order to uphold \"--rm\".\n", - stringid.TruncateID(createResponse.ID)) - } - // Autoremove: wait for the container to finish, retrieve // the exit code and remove the container if status, err = cli.client.ContainerWait(context.Background(), createResponse.ID); err != nil { diff --git a/api/server/router/container/exec.go b/api/server/router/container/exec.go index bc336f6039..ee0d855ce4 100644 --- a/api/server/router/container/exec.go +++ b/api/server/router/container/exec.go @@ -112,7 +112,9 @@ func (s *containerRouter) postContainerExecStart(ctx context.Context, w http.Res if execStartCheck.Detach { return err } + stdout.Write([]byte(err.Error())) logrus.Errorf("Error running exec in container: %v\n", err) + return err } return nil } diff --git a/container/container.go b/container/container.go index 39336ab168..2c407d19de 100644 --- a/container/container.go +++ b/container/container.go @@ -17,7 +17,6 @@ import ( "github.com/Sirupsen/logrus" "github.com/docker/docker/daemon/exec" - "github.com/docker/docker/daemon/execdriver" "github.com/docker/docker/daemon/logger" "github.com/docker/docker/daemon/logger/jsonfilelog" "github.com/docker/docker/daemon/network" @@ -27,6 +26,7 @@ import ( "github.com/docker/docker/pkg/promise" "github.com/docker/docker/pkg/signal" "github.com/docker/docker/pkg/symlink" + "github.com/docker/docker/restartmanager" "github.com/docker/docker/runconfig" runconfigopts "github.com/docker/docker/runconfig/opts" "github.com/docker/docker/volume" @@ -74,13 +74,12 @@ type CommonContainer struct { HasBeenManuallyStopped bool // used for unless-stopped restart policy MountPoints map[string]*volume.MountPoint HostConfig *containertypes.HostConfig `json:"-"` // do not serialize the host config in the json, otherwise we'll make the container unportable - Command *execdriver.Command `json:"-"` - monitor *containerMonitor - ExecCommands *exec.Store `json:"-"` + ExecCommands *exec.Store `json:"-"` // logDriver for closing - LogDriver logger.Logger `json:"-"` - LogCopier *logger.Copier `json:"-"` - attachContext *attachContext + LogDriver logger.Logger `json:"-"` + LogCopier *logger.Copier `json:"-"` + restartManager restartmanager.RestartManager + attachContext *attachContext } // NewBaseContainer creates a new container with its @@ -276,19 +275,9 @@ func (container *Container) GetRootResourcePath(path string) (string, error) { // ExitOnNext signals to the monitor that it should not restart the container // after we send the kill signal. func (container *Container) ExitOnNext() { - container.monitor.ExitOnNext() -} - -// Resize changes the TTY of the process running inside the container -// to the given height and width. The container must be running. -func (container *Container) Resize(h, w int) error { - if container.Command.ProcessConfig.Terminal == nil { - return fmt.Errorf("Container %s does not have a terminal ready", container.ID) + if container.restartManager != nil { + container.restartManager.Cancel() } - if err := container.Command.ProcessConfig.Terminal.Resize(h, w); err != nil { - return err - } - return nil } // HostConfigPath returns the path to the container's JSON hostconfig @@ -897,19 +886,33 @@ func (container *Container) BuildCreateEndpointOptions(n libnetwork.Network, epC // UpdateMonitor updates monitor configure for running container func (container *Container) UpdateMonitor(restartPolicy containertypes.RestartPolicy) { - monitor := container.monitor - // No need to update monitor if container hasn't got one - // monitor will be generated correctly according to container - if monitor == nil { - return + type policySetter interface { + SetPolicy(containertypes.RestartPolicy) } - monitor.mux.Lock() - // to check whether restart policy has changed. - if restartPolicy.Name != "" && !monitor.restartPolicy.IsSame(&restartPolicy) { - monitor.restartPolicy = restartPolicy + if rm, ok := container.RestartManager(false).(policySetter); ok { + rm.SetPolicy(restartPolicy) } - monitor.mux.Unlock() +} + +// FullHostname returns hostname and optional domain appended to it. +func (container *Container) FullHostname() string { + fullHostname := container.Config.Hostname + if container.Config.Domainname != "" { + fullHostname = fmt.Sprintf("%s.%s", fullHostname, container.Config.Domainname) + } + return fullHostname +} + +// RestartManager returns the current restartmanager instace connected to container. +func (container *Container) RestartManager(reset bool) restartmanager.RestartManager { + if reset { + container.RestartCount = 0 + } + if container.restartManager == nil { + container.restartManager = restartmanager.New(container.HostConfig.RestartPolicy) + } + return container.restartManager } type attachContext struct { diff --git a/container/container_unix.go b/container/container_unix.go index 64ff4ee1f7..4f86a45581 100644 --- a/container/container_unix.go +++ b/container/container_unix.go @@ -11,7 +11,6 @@ import ( "syscall" "github.com/Sirupsen/logrus" - "github.com/docker/docker/daemon/execdriver" "github.com/docker/docker/pkg/chrootarchive" "github.com/docker/docker/pkg/symlink" "github.com/docker/docker/pkg/system" @@ -39,6 +38,15 @@ type Container struct { NoNewPrivileges bool } +// ExitStatus provides exit reasons for a container. +type ExitStatus struct { + // The exit code with which the container exited. + ExitCode int + + // Whether the container encountered an OOM. + OOMKilled bool +} + // CreateDaemonEnvironment returns the list of all environment variables given the list of // environment variables related to links. // Sets PATH, HOSTNAME and if container.Config.Tty is set: TERM. @@ -57,7 +65,6 @@ func (container *Container) CreateDaemonEnvironment(linkedEnv []string) []string // we need to replace the 'env' keys where they match and append anything // else. env = utils.ReplaceOrAppendEnvValues(env, container.Config.Env) - return env } @@ -103,8 +110,8 @@ func appendNetworkMounts(container *Container, volumeMounts []volume.MountPoint) } // NetworkMounts returns the list of network mounts. -func (container *Container) NetworkMounts() []execdriver.Mount { - var mounts []execdriver.Mount +func (container *Container) NetworkMounts() []Mount { + var mounts []Mount shared := container.HostConfig.NetworkMode.IsContainer() if container.ResolvConfPath != "" { if _, err := os.Stat(container.ResolvConfPath); err != nil { @@ -115,7 +122,7 @@ func (container *Container) NetworkMounts() []execdriver.Mount { if m, exists := container.MountPoints["/etc/resolv.conf"]; exists { writable = m.RW } - mounts = append(mounts, execdriver.Mount{ + mounts = append(mounts, Mount{ Source: container.ResolvConfPath, Destination: "/etc/resolv.conf", Writable: writable, @@ -132,7 +139,7 @@ func (container *Container) NetworkMounts() []execdriver.Mount { if m, exists := container.MountPoints["/etc/hostname"]; exists { writable = m.RW } - mounts = append(mounts, execdriver.Mount{ + mounts = append(mounts, Mount{ Source: container.HostnamePath, Destination: "/etc/hostname", Writable: writable, @@ -149,7 +156,7 @@ func (container *Container) NetworkMounts() []execdriver.Mount { if m, exists := container.MountPoints["/etc/hosts"]; exists { writable = m.RW } - mounts = append(mounts, execdriver.Mount{ + mounts = append(mounts, Mount{ Source: container.HostsPath, Destination: "/etc/hosts", Writable: writable, @@ -224,37 +231,26 @@ func (container *Container) UnmountIpcMounts(unmount func(pth string) error) { } // IpcMounts returns the list of IPC mounts -func (container *Container) IpcMounts() []execdriver.Mount { - var mounts []execdriver.Mount +func (container *Container) IpcMounts() []Mount { + var mounts []Mount if !container.HasMountFor("/dev/shm") { label.SetFileLabel(container.ShmPath, container.MountLabel) - mounts = append(mounts, execdriver.Mount{ + mounts = append(mounts, Mount{ Source: container.ShmPath, Destination: "/dev/shm", Writable: true, Propagation: volume.DefaultPropagationMode, }) } - return mounts -} -func updateCommand(c *execdriver.Command, resources containertypes.Resources) { - c.Resources.BlkioWeight = resources.BlkioWeight - c.Resources.CPUShares = resources.CPUShares - c.Resources.CPUPeriod = resources.CPUPeriod - c.Resources.CPUQuota = resources.CPUQuota - c.Resources.CpusetCpus = resources.CpusetCpus - c.Resources.CpusetMems = resources.CpusetMems - c.Resources.Memory = resources.Memory - c.Resources.MemorySwap = resources.MemorySwap - c.Resources.MemoryReservation = resources.MemoryReservation - c.Resources.KernelMemory = resources.KernelMemory + return mounts } // UpdateContainer updates configuration of a container. func (container *Container) UpdateContainer(hostConfig *containertypes.HostConfig) error { container.Lock() + defer container.Unlock() // update resources of container resources := hostConfig.Resources @@ -294,19 +290,8 @@ func (container *Container) UpdateContainer(hostConfig *containertypes.HostConfi if hostConfig.RestartPolicy.Name != "" { container.HostConfig.RestartPolicy = hostConfig.RestartPolicy } - container.Unlock() - // If container is not running, update hostConfig struct is enough, - // resources will be updated when the container is started again. - // If container is running (including paused), we need to update - // the command so we can update configs to the real world. - if container.IsRunning() { - container.Lock() - updateCommand(container.Command, *cResources) - container.Unlock() - } - - if err := container.ToDiskLocking(); err != nil { + if err := container.ToDisk(); err != nil { logrus.Errorf("Error saving updated container: %v", err) return err } @@ -400,10 +385,10 @@ func copyOwnership(source, destination string) error { } // TmpfsMounts returns the list of tmpfs mounts -func (container *Container) TmpfsMounts() []execdriver.Mount { - var mounts []execdriver.Mount +func (container *Container) TmpfsMounts() []Mount { + var mounts []Mount for dest, data := range container.HostConfig.Tmpfs { - mounts = append(mounts, execdriver.Mount{ + mounts = append(mounts, Mount{ Source: "tmpfs", Destination: dest, Data: data, diff --git a/container/memory_store.go b/container/memory_store.go index 153242fdb4..30c1f7add7 100644 --- a/container/memory_store.go +++ b/container/memory_store.go @@ -5,7 +5,7 @@ import "sync" // memoryStore implements a Store in memory. type memoryStore struct { s map[string]*Container - sync.Mutex + sync.RWMutex } // NewMemoryStore initializes a new memory store. @@ -25,9 +25,9 @@ func (c *memoryStore) Add(id string, cont *Container) { // Get returns a container from the store by id. func (c *memoryStore) Get(id string) *Container { - c.Lock() + c.RLock() res := c.s[id] - c.Unlock() + c.RUnlock() return res } @@ -42,26 +42,26 @@ func (c *memoryStore) Delete(id string) { // The containers are ordered by creation date. func (c *memoryStore) List() []*Container { containers := new(History) - c.Lock() + c.RLock() for _, cont := range c.s { containers.Add(cont) } - c.Unlock() + c.RUnlock() containers.sort() return *containers } // Size returns the number of containers in the store. func (c *memoryStore) Size() int { - c.Lock() - defer c.Unlock() + c.RLock() + defer c.RUnlock() return len(c.s) } // First returns the first container found in the store by a given filter. func (c *memoryStore) First(filter StoreFilter) *Container { - c.Lock() - defer c.Unlock() + c.RLock() + defer c.RUnlock() for _, cont := range c.s { if filter(cont) { return cont @@ -72,9 +72,10 @@ func (c *memoryStore) First(filter StoreFilter) *Container { // ApplyAll calls the reducer function with every container in the store. // This operation is asyncronous in the memory store. +// NOTE: Modifications to the store MUST NOT be done by the StoreReducer. func (c *memoryStore) ApplyAll(apply StoreReducer) { - c.Lock() - defer c.Unlock() + c.RLock() + defer c.RUnlock() wg := new(sync.WaitGroup) for _, cont := range c.s { diff --git a/container/monitor.go b/container/monitor.go index afea01fcc9..ba82d875b7 100644 --- a/container/monitor.go +++ b/container/monitor.go @@ -1,24 +1,13 @@ package container import ( - "fmt" - "io" - "os/exec" - "strings" - "sync" - "syscall" "time" "github.com/Sirupsen/logrus" - "github.com/docker/docker/daemon/execdriver" - "github.com/docker/docker/pkg/promise" - "github.com/docker/docker/pkg/stringid" - "github.com/docker/engine-api/types/container" ) const ( - defaultTimeIncrement = 100 - loggerCloseTimeout = 10 * time.Second + loggerCloseTimeout = 10 * time.Second ) // supervisor defines the interface that a supervisor must implement @@ -30,311 +19,13 @@ type supervisor interface { // StartLogging starts the logging driver for the container StartLogging(*Container) error // Run starts a container - Run(c *Container, pipes *execdriver.Pipes, startCallback execdriver.DriverCallback) (execdriver.ExitStatus, error) + Run(c *Container) error // IsShuttingDown tells whether the supervisor is shutting down or not IsShuttingDown() bool } -// containerMonitor monitors the execution of a container's main process. -// If a restart policy is specified for the container the monitor will ensure that the -// process is restarted based on the rules of the policy. When the container is finally stopped -// the monitor will reset and cleanup any of the container resources such as networking allocations -// and the rootfs -type containerMonitor struct { - mux sync.Mutex - - // supervisor keeps track of the container and the events it generates - supervisor supervisor - - // container is the container being monitored - container *Container - - // restartPolicy is the current policy being applied to the container monitor - restartPolicy container.RestartPolicy - - // failureCount is the number of times the container has failed to - // start in a row - failureCount int - - // shouldStop signals the monitor that the next time the container exits it is - // either because docker or the user asked for the container to be stopped - shouldStop bool - - // startSignal is a channel that is closes after the container initially starts - startSignal chan struct{} - - // stopChan is used to signal to the monitor whenever there is a wait for the - // next restart so that the timeIncrement is not honored and the user is not - // left waiting for nothing to happen during this time - stopChan chan struct{} - - // timeIncrement is the amount of time to wait between restarts - // this is in milliseconds - timeIncrement int - - // lastStartTime is the time which the monitor last exec'd the container's process - lastStartTime time.Time -} - -// StartMonitor initializes a containerMonitor for this container with the provided supervisor and restart policy -// and starts the container's process. -func (container *Container) StartMonitor(s supervisor) error { - container.monitor = &containerMonitor{ - supervisor: s, - container: container, - restartPolicy: container.HostConfig.RestartPolicy, - timeIncrement: defaultTimeIncrement, - stopChan: make(chan struct{}), - startSignal: make(chan struct{}), - } - - return container.monitor.wait() -} - -// wait starts the container and wait until -// we either receive an error from the initial start of the container's -// process or until the process is running in the container -func (m *containerMonitor) wait() error { - select { - case <-m.startSignal: - case err := <-promise.Go(m.start): - return err - } - - return nil -} - -// Stop signals to the container monitor that it should stop monitoring the container -// for exits the next time the process dies -func (m *containerMonitor) ExitOnNext() { - m.mux.Lock() - - // we need to protect having a double close of the channel when stop is called - // twice or else we will get a panic - if !m.shouldStop { - m.shouldStop = true - close(m.stopChan) - } - - m.mux.Unlock() -} - -// Close closes the container's resources such as networking allocations and -// unmounts the container's root filesystem -func (m *containerMonitor) Close() error { - // Cleanup networking and mounts - m.supervisor.Cleanup(m.container) - - if err := m.container.ToDisk(); err != nil { - logrus.Errorf("Error dumping container %s state to disk: %s", m.container.ID, err) - - return err - } - - return nil -} - -// Start starts the containers process and monitors it according to the restart policy -func (m *containerMonitor) start() error { - var ( - err error - exitStatus execdriver.ExitStatus - // this variable indicates where we in execution flow: - // before Run or after - afterRun bool - ) - - // ensure that when the monitor finally exits we release the networking and unmount the rootfs - defer func() { - if afterRun { - m.container.Lock() - defer m.container.Unlock() - m.container.SetStopped(&exitStatus) - } - m.Close() - }() - // reset stopped flag - if m.container.HasBeenManuallyStopped { - m.container.HasBeenManuallyStopped = false - } - - // reset the restart count - m.container.RestartCount = -1 - - for { - m.container.RestartCount++ - - if err := m.supervisor.StartLogging(m.container); err != nil { - m.resetContainer(false) - - return err - } - - pipes := execdriver.NewPipes(m.container.Stdin(), m.container.Stdout(), m.container.Stderr(), m.container.Config.OpenStdin) - - m.logEvent("start") - - m.lastStartTime = time.Now() - - if exitStatus, err = m.supervisor.Run(m.container, pipes, m.callback); err != nil { - // if we receive an internal error from the initial start of a container then lets - // return it instead of entering the restart loop - // set to 127 for container cmd not found/does not exist) - if strings.Contains(err.Error(), "executable file not found") || - strings.Contains(err.Error(), "no such file or directory") || - strings.Contains(err.Error(), "system cannot find the file specified") { - if m.container.RestartCount == 0 { - m.container.ExitCode = 127 - m.resetContainer(false) - return fmt.Errorf("Container command not found or does not exist.") - } - } - // set to 126 for container cmd can't be invoked errors - if strings.Contains(err.Error(), syscall.EACCES.Error()) { - if m.container.RestartCount == 0 { - m.container.ExitCode = 126 - m.resetContainer(false) - return fmt.Errorf("Container command could not be invoked.") - } - } - - if m.container.RestartCount == 0 { - m.container.ExitCode = -1 - m.resetContainer(false) - - return fmt.Errorf("Cannot start container %s: %v", m.container.ID, err) - } - - logrus.Errorf("Error running container: %s", err) - } - - // here container.Lock is already lost - afterRun = true - - m.resetMonitor(err == nil && exitStatus.ExitCode == 0) - - if m.shouldRestart(exitStatus.ExitCode) { - m.container.SetRestartingLocking(&exitStatus) - m.logEvent("die") - m.resetContainer(true) - - // sleep with a small time increment between each restart to help avoid issues cased by quickly - // restarting the container because of some types of errors ( networking cut out, etc... ) - m.waitForNextRestart() - - // we need to check this before reentering the loop because the waitForNextRestart could have - // been terminated by a request from a user - if m.shouldStop { - return err - } - continue - } - - m.logEvent("die") - m.resetContainer(true) - return err - } -} - -// resetMonitor resets the stateful fields on the containerMonitor based on the -// previous runs success or failure. Regardless of success, if the container had -// an execution time of more than 10s then reset the timer back to the default -func (m *containerMonitor) resetMonitor(successful bool) { - executionTime := time.Now().Sub(m.lastStartTime).Seconds() - - if executionTime > 10 { - m.timeIncrement = defaultTimeIncrement - } else { - // otherwise we need to increment the amount of time we wait before restarting - // the process. We will build up by multiplying the increment by 2 - m.timeIncrement *= 2 - } - - // the container exited successfully so we need to reset the failure counter - if successful { - m.failureCount = 0 - } else { - m.failureCount++ - } -} - -// waitForNextRestart waits with the default time increment to restart the container unless -// a user or docker asks for the container to be stopped -func (m *containerMonitor) waitForNextRestart() { - select { - case <-time.After(time.Duration(m.timeIncrement) * time.Millisecond): - case <-m.stopChan: - } -} - -// shouldRestart checks the restart policy and applies the rules to determine if -// the container's process should be restarted -func (m *containerMonitor) shouldRestart(exitCode int) bool { - m.mux.Lock() - defer m.mux.Unlock() - - // do not restart if the user or docker has requested that this container be stopped - if m.shouldStop { - m.container.HasBeenManuallyStopped = !m.supervisor.IsShuttingDown() - return false - } - - switch { - case m.restartPolicy.IsAlways(), m.restartPolicy.IsUnlessStopped(): - return true - case m.restartPolicy.IsOnFailure(): - // the default value of 0 for MaximumRetryCount means that we will not enforce a maximum count - if max := m.restartPolicy.MaximumRetryCount; max != 0 && m.failureCount > max { - logrus.Debugf("stopping restart of container %s because maximum failure could of %d has been reached", - stringid.TruncateID(m.container.ID), max) - return false - } - - return exitCode != 0 - } - - return false -} - -// callback ensures that the container's state is properly updated after we -// received ack from the execution drivers -func (m *containerMonitor) callback(processConfig *execdriver.ProcessConfig, pid int, chOOM <-chan struct{}) error { - go func() { - for range chOOM { - m.logEvent("oom") - } - }() - - if processConfig.Tty { - // The callback is called after the process start() - // so we are in the parent process. In TTY mode, stdin/out/err is the PtySlave - // which we close here. - if c, ok := processConfig.Stdout.(io.Closer); ok { - c.Close() - } - } - - m.container.SetRunning(pid) - - // signal that the process has started - // close channel only if not closed - select { - case <-m.startSignal: - default: - close(m.startSignal) - } - - if err := m.container.ToDiskLocking(); err != nil { - logrus.Errorf("Error saving container to disk: %v", err) - } - return nil -} - -// resetContainer resets the container's IO and ensures that the command is able to be executed again -// by copying the data into a new struct -// if lock is true, then container locked during reset -func (m *containerMonitor) resetContainer(lock bool) { - container := m.container +// Reset puts a container into a state where it can be restarted again. +func (container *Container) Reset(lock bool) { if lock { container.Lock() defer container.Unlock() @@ -344,12 +35,6 @@ func (m *containerMonitor) resetContainer(lock bool) { logrus.Errorf("%s: %s", container.ID, err) } - if container.Command != nil && container.Command.ProcessConfig.Terminal != nil { - if err := container.Command.ProcessConfig.Terminal.Close(); err != nil { - logrus.Errorf("%s: Error closing terminal: %s", container.ID, err) - } - } - // Re-create a brand new stdin pipe once the container exited if container.Config.OpenStdin { container.NewInputPipes() @@ -365,9 +50,6 @@ func (m *containerMonitor) resetContainer(lock bool) { select { case <-time.After(loggerCloseTimeout): logrus.Warnf("Logger didn't exit in time: logs may be truncated") - container.LogCopier.Close() - // always waits for the LogCopier to finished before closing - <-exit case <-exit: } } @@ -375,22 +57,4 @@ func (m *containerMonitor) resetContainer(lock bool) { container.LogCopier = nil container.LogDriver = nil } - - c := container.Command.ProcessConfig.Cmd - - container.Command.ProcessConfig.Cmd = exec.Cmd{ - Stdin: c.Stdin, - Stdout: c.Stdout, - Stderr: c.Stderr, - Path: c.Path, - Env: c.Env, - ExtraFiles: c.ExtraFiles, - Args: c.Args, - Dir: c.Dir, - SysProcAttr: c.SysProcAttr, - } -} - -func (m *containerMonitor) logEvent(action string) { - m.supervisor.LogContainerEvent(m.container, action) } diff --git a/container/mounts_unix.go b/container/mounts_unix.go new file mode 100644 index 0000000000..c52abed2dc --- /dev/null +++ b/container/mounts_unix.go @@ -0,0 +1,12 @@ +// +build !windows + +package container + +// Mount contains information for a mount operation. +type Mount struct { + Source string `json:"source"` + Destination string `json:"destination"` + Writable bool `json:"writable"` + Data string `json:"data"` + Propagation string `json:"mountpropagation"` +} diff --git a/container/state.go b/container/state.go index 7173c7632f..a12a193e32 100644 --- a/container/state.go +++ b/container/state.go @@ -5,7 +5,6 @@ import ( "sync" "time" - "github.com/docker/docker/daemon/execdriver" "github.com/docker/go-units" ) @@ -179,28 +178,31 @@ func (s *State) getExitCode() int { } // SetRunning sets the state of the container to "running". -func (s *State) SetRunning(pid int) { +func (s *State) SetRunning(pid int, initial bool) { s.Error = "" s.Running = true s.Paused = false s.Restarting = false s.ExitCode = 0 s.Pid = pid - s.StartedAt = time.Now().UTC() + if initial { + s.StartedAt = time.Now().UTC() + } close(s.waitChan) // fire waiters for start s.waitChan = make(chan struct{}) } // SetStoppedLocking locks the container state is sets it to "stopped". -func (s *State) SetStoppedLocking(exitStatus *execdriver.ExitStatus) { +func (s *State) SetStoppedLocking(exitStatus *ExitStatus) { s.Lock() s.SetStopped(exitStatus) s.Unlock() } // SetStopped sets the container state to "stopped" without locking. -func (s *State) SetStopped(exitStatus *execdriver.ExitStatus) { +func (s *State) SetStopped(exitStatus *ExitStatus) { s.Running = false + s.Paused = false s.Restarting = false s.Pid = 0 s.FinishedAt = time.Now().UTC() @@ -211,7 +213,7 @@ func (s *State) SetStopped(exitStatus *execdriver.ExitStatus) { // SetRestartingLocking is when docker handles the auto restart of containers when they are // in the middle of a stop and being restarted again -func (s *State) SetRestartingLocking(exitStatus *execdriver.ExitStatus) { +func (s *State) SetRestartingLocking(exitStatus *ExitStatus) { s.Lock() s.SetRestarting(exitStatus) s.Unlock() @@ -219,7 +221,7 @@ func (s *State) SetRestartingLocking(exitStatus *execdriver.ExitStatus) { // SetRestarting sets the container state to "restarting". // It also sets the container PID to 0. -func (s *State) SetRestarting(exitStatus *execdriver.ExitStatus) { +func (s *State) SetRestarting(exitStatus *ExitStatus) { // we should consider the container running when it is restarting because of // all the checks in docker around rm/stop/etc s.Running = true diff --git a/container/state_test.go b/container/state_test.go index 75028168d4..7b35b17820 100644 --- a/container/state_test.go +++ b/container/state_test.go @@ -4,8 +4,6 @@ import ( "sync/atomic" "testing" "time" - - "github.com/docker/docker/daemon/execdriver" ) func TestStateRunStop(t *testing.T) { @@ -19,7 +17,7 @@ func TestStateRunStop(t *testing.T) { close(started) }() s.Lock() - s.SetRunning(i + 100) + s.SetRunning(i+100, false) s.Unlock() if !s.IsRunning() { @@ -52,7 +50,7 @@ func TestStateRunStop(t *testing.T) { atomic.StoreInt64(&exit, int64(exitCode)) close(stopped) }() - s.SetStoppedLocking(&execdriver.ExitStatus{ExitCode: i}) + s.SetStoppedLocking(&ExitStatus{ExitCode: i}) if s.IsRunning() { t.Fatal("State is running") } @@ -93,7 +91,7 @@ func TestStateTimeoutWait(t *testing.T) { } s.Lock() - s.SetRunning(49) + s.SetRunning(49, false) s.Unlock() stopped := make(chan struct{}) diff --git a/container/state_unix.go b/container/state_unix.go index 204b968b24..8d25a23790 100644 --- a/container/state_unix.go +++ b/container/state_unix.go @@ -2,11 +2,9 @@ package container -import "github.com/docker/docker/daemon/execdriver" - // setFromExitStatus is a platform specific helper function to set the state // based on the ExitStatus structure. -func (s *State) setFromExitStatus(exitStatus *execdriver.ExitStatus) { +func (s *State) setFromExitStatus(exitStatus *ExitStatus) { s.ExitCode = exitStatus.ExitCode s.OOMKilled = exitStatus.OOMKilled } diff --git a/daemon/apparmor_default.go b/daemon/apparmor_default.go new file mode 100644 index 0000000000..e4065b4ad9 --- /dev/null +++ b/daemon/apparmor_default.go @@ -0,0 +1,30 @@ +// +build linux + +package daemon + +import ( + "github.com/Sirupsen/logrus" + aaprofile "github.com/docker/docker/profiles/apparmor" + "github.com/opencontainers/runc/libcontainer/apparmor" +) + +// Define constants for native driver +const ( + defaultApparmorProfile = "docker-default" +) + +func installDefaultAppArmorProfile() { + if apparmor.IsEnabled() { + if err := aaprofile.InstallDefault(defaultApparmorProfile); err != nil { + apparmorProfiles := []string{defaultApparmorProfile} + + // Allow daemon to run if loading failed, but are active + // (possibly through another run, manually, or via system startup) + for _, policy := range apparmorProfiles { + if err := aaprofile.IsLoaded(policy); err != nil { + logrus.Errorf("AppArmor enabled on system but the %s profile could not be loaded.", policy) + } + } + } + } +} diff --git a/daemon/apparmor_default_unsupported.go b/daemon/apparmor_default_unsupported.go new file mode 100644 index 0000000000..f186a68af9 --- /dev/null +++ b/daemon/apparmor_default_unsupported.go @@ -0,0 +1,6 @@ +// +build !linux + +package daemon + +func installDefaultAppArmorProfile() { +} diff --git a/daemon/caps/utils_unix.go b/daemon/caps/utils_unix.go new file mode 100644 index 0000000000..c99485f51d --- /dev/null +++ b/daemon/caps/utils_unix.go @@ -0,0 +1,131 @@ +// +build !windows + +package caps + +import ( + "fmt" + "strings" + + "github.com/docker/docker/pkg/stringutils" + "github.com/syndtr/gocapability/capability" +) + +var capabilityList Capabilities + +func init() { + last := capability.CAP_LAST_CAP + // hack for RHEL6 which has no /proc/sys/kernel/cap_last_cap + if last == capability.Cap(63) { + last = capability.CAP_BLOCK_SUSPEND + } + for _, cap := range capability.List() { + if cap > last { + continue + } + capabilityList = append(capabilityList, + &CapabilityMapping{ + Key: "CAP_" + strings.ToUpper(cap.String()), + Value: cap, + }, + ) + } +} + +type ( + // CapabilityMapping maps linux capability name to its value of capability.Cap type + // Capabilities is one of the security systems in Linux Security Module (LSM) + // framework provided by the kernel. + // For more details on capabilities, see http://man7.org/linux/man-pages/man7/capabilities.7.html + CapabilityMapping struct { + Key string `json:"key,omitempty"` + Value capability.Cap `json:"value,omitempty"` + } + // Capabilities contains all CapabilityMapping + Capabilities []*CapabilityMapping +) + +// String returns of CapabilityMapping +func (c *CapabilityMapping) String() string { + return c.Key +} + +// GetCapability returns CapabilityMapping which contains specific key +func GetCapability(key string) *CapabilityMapping { + for _, capp := range capabilityList { + if capp.Key == key { + cpy := *capp + return &cpy + } + } + return nil +} + +// GetAllCapabilities returns all of the capabilities +func GetAllCapabilities() []string { + output := make([]string, len(capabilityList)) + for i, capability := range capabilityList { + output[i] = capability.String() + } + return output +} + +// TweakCapabilities can tweak capabilities by adding or dropping capabilities +// based on the basics capabilities. +func TweakCapabilities(basics, adds, drops []string) ([]string, error) { + var ( + newCaps []string + allCaps = GetAllCapabilities() + ) + + // FIXME(tonistiigi): docker format is without CAP_ prefix, oci is with prefix + // Currently they are mixed in here. We should do conversion in one place. + + // look for invalid cap in the drop list + for _, cap := range drops { + if strings.ToLower(cap) == "all" { + continue + } + + if !stringutils.InSlice(allCaps, "CAP_"+cap) { + return nil, fmt.Errorf("Unknown capability drop: %q", cap) + } + } + + // handle --cap-add=all + if stringutils.InSlice(adds, "all") { + basics = allCaps + } + + if !stringutils.InSlice(drops, "all") { + for _, cap := range basics { + // skip `all` already handled above + if strings.ToLower(cap) == "all" { + continue + } + + // if we don't drop `all`, add back all the non-dropped caps + if !stringutils.InSlice(drops, cap[4:]) { + newCaps = append(newCaps, strings.ToUpper(cap)) + } + } + } + + for _, cap := range adds { + // skip `all` already handled above + if strings.ToLower(cap) == "all" { + continue + } + + cap = "CAP_" + cap + + if !stringutils.InSlice(allCaps, cap) { + return nil, fmt.Errorf("Unknown capability to add: %q", cap) + } + + // add cap if not already in the list + if !stringutils.InSlice(newCaps, cap) { + newCaps = append(newCaps, strings.ToUpper(cap)) + } + } + return newCaps, nil +} diff --git a/daemon/config.go b/daemon/config.go index d37f6488f3..a1a5f48c53 100644 --- a/daemon/config.go +++ b/daemon/config.go @@ -115,7 +115,7 @@ func (config *Config) InstallCommonFlags(cmd *flag.FlagSet, usageFn func(string) cmd.Var(opts.NewNamedListOptsRef("exec-opts", &config.ExecOptions, nil), []string{"-exec-opt"}, usageFn("Set exec driver options")) cmd.StringVar(&config.Pidfile, []string{"p", "-pidfile"}, defaultPidFile, usageFn("Path to use for daemon PID file")) cmd.StringVar(&config.Root, []string{"g", "-graph"}, defaultGraph, usageFn("Root of the Docker runtime")) - cmd.StringVar(&config.ExecRoot, []string{"-exec-root"}, "/var/run/docker", usageFn("Root of the Docker execdriver")) + cmd.StringVar(&config.ExecRoot, []string{"-exec-root"}, defaultExecRoot, usageFn("Root of the Docker execdriver")) cmd.BoolVar(&config.AutoRestart, []string{"#r", "#-restart"}, true, usageFn("--restart on the daemon has been deprecated in favor of --restart policies on docker run")) cmd.StringVar(&config.GraphDriver, []string{"s", "-storage-driver"}, "", usageFn("Storage driver to use")) cmd.IntVar(&config.Mtu, []string{"#mtu", "-mtu"}, 0, usageFn("Set the containers network MTU")) diff --git a/daemon/config_unix.go b/daemon/config_unix.go index 1047f00f1e..866923e74f 100644 --- a/daemon/config_unix.go +++ b/daemon/config_unix.go @@ -12,8 +12,9 @@ import ( ) var ( - defaultPidFile = "/var/run/docker.pid" - defaultGraph = "/var/lib/docker" + defaultPidFile = "/var/run/docker.pid" + defaultGraph = "/var/lib/docker" + defaultExecRoot = "/var/run/docker" ) // Config defines the configuration of a docker daemon. @@ -30,6 +31,7 @@ type Config struct { RemappedRoot string `json:"userns-remap,omitempty"` CgroupParent string `json:"cgroup-parent,omitempty"` Ulimits map[string]*units.Ulimit `json:"default-ulimits,omitempty"` + ContainerdAddr string `json:"containerd,omitempty"` } // bridgeConfig stores all the bridge driver specific @@ -80,6 +82,7 @@ func (config *Config) InstallFlags(cmd *flag.FlagSet, usageFn func(string) strin cmd.StringVar(&config.CorsHeaders, []string{"-api-cors-header"}, "", usageFn("Set CORS headers in the remote API")) cmd.StringVar(&config.CgroupParent, []string{"-cgroup-parent"}, "", usageFn("Set parent cgroup for all containers")) cmd.StringVar(&config.RemappedRoot, []string{"-userns-remap"}, "", usageFn("User/Group setting for user namespaces")) + cmd.StringVar(&config.ContainerdAddr, []string{"-containerd"}, "", usageFn("Path to containerD socket")) config.attachExperimentalFlags(cmd, usageFn) } diff --git a/daemon/container_operations.go b/daemon/container_operations.go index 3df74032b8..45c6c2acd6 100644 --- a/daemon/container_operations.go +++ b/daemon/container_operations.go @@ -48,11 +48,10 @@ func (daemon *Daemon) buildSandboxOptions(container *container.Container, n libn sboxOptions = append(sboxOptions, libnetwork.OptionUseDefaultSandbox()) sboxOptions = append(sboxOptions, libnetwork.OptionOriginHostsPath("/etc/hosts")) sboxOptions = append(sboxOptions, libnetwork.OptionOriginResolvConfPath("/etc/resolv.conf")) - } else if daemon.execDriver.SupportsHooks() { - // OptionUseExternalKey is mandatory for userns support. - // But optional for non-userns support - sboxOptions = append(sboxOptions, libnetwork.OptionUseExternalKey()) } + // OptionUseExternalKey is mandatory for userns support. + // But optional for non-userns support + sboxOptions = append(sboxOptions, libnetwork.OptionUseExternalKey()) container.HostsPath, err = container.GetRootResourcePath("hosts") if err != nil { diff --git a/daemon/container_operations_unix.go b/daemon/container_operations_unix.go index 44454462c2..dd637f5a04 100644 --- a/daemon/container_operations_unix.go +++ b/daemon/container_operations_unix.go @@ -13,7 +13,6 @@ import ( "github.com/Sirupsen/logrus" "github.com/docker/docker/container" - "github.com/docker/docker/daemon/execdriver" "github.com/docker/docker/daemon/links" "github.com/docker/docker/pkg/fileutils" "github.com/docker/docker/pkg/idtools" @@ -22,13 +21,16 @@ import ( "github.com/docker/docker/runconfig" containertypes "github.com/docker/engine-api/types/container" networktypes "github.com/docker/engine-api/types/network" - "github.com/docker/go-units" "github.com/docker/libnetwork" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/devices" "github.com/opencontainers/runc/libcontainer/label" + "github.com/opencontainers/specs/specs-go" ) +func u32Ptr(i int64) *uint32 { u := uint32(i); return &u } +func fmPtr(i int64) *os.FileMode { fm := os.FileMode(i); return &fm } + func (daemon *Daemon) setupLinkedContainers(container *container.Container) ([]string, error) { var env []string children := daemon.children(container) @@ -64,220 +66,6 @@ func (daemon *Daemon) setupLinkedContainers(container *container.Container) ([]s return env, nil } -func (daemon *Daemon) populateCommand(c *container.Container, env []string) error { - var en *execdriver.Network - if !c.Config.NetworkDisabled { - en = &execdriver.Network{} - if !daemon.execDriver.SupportsHooks() || c.HostConfig.NetworkMode.IsHost() { - en.NamespacePath = c.NetworkSettings.SandboxKey - } - - if c.HostConfig.NetworkMode.IsContainer() { - nc, err := daemon.getNetworkedContainer(c.ID, c.HostConfig.NetworkMode.ConnectedContainer()) - if err != nil { - return err - } - en.ContainerID = nc.ID - } - } - - ipc := &execdriver.Ipc{} - var err error - c.ShmPath, err = c.ShmResourcePath() - if err != nil { - return err - } - - if c.HostConfig.IpcMode.IsContainer() { - ic, err := daemon.getIpcContainer(c) - if err != nil { - return err - } - ipc.ContainerID = ic.ID - c.ShmPath = ic.ShmPath - } else { - ipc.HostIpc = c.HostConfig.IpcMode.IsHost() - if ipc.HostIpc { - if _, err := os.Stat("/dev/shm"); err != nil { - return fmt.Errorf("/dev/shm is not mounted, but must be for --ipc=host") - } - c.ShmPath = "/dev/shm" - } - } - - pid := &execdriver.Pid{} - pid.HostPid = c.HostConfig.PidMode.IsHost() - - uts := &execdriver.UTS{ - HostUTS: c.HostConfig.UTSMode.IsHost(), - } - - // Build lists of devices allowed and created within the container. - var userSpecifiedDevices []*configs.Device - for _, deviceMapping := range c.HostConfig.Devices { - devs, err := getDevicesFromPath(deviceMapping) - if err != nil { - return err - } - - userSpecifiedDevices = append(userSpecifiedDevices, devs...) - } - - allowedDevices := mergeDevices(configs.DefaultAllowedDevices, userSpecifiedDevices) - - autoCreatedDevices := mergeDevices(configs.DefaultAutoCreatedDevices, userSpecifiedDevices) - - var rlimits []*units.Rlimit - ulimits := c.HostConfig.Ulimits - - // Merge ulimits with daemon defaults - ulIdx := make(map[string]*units.Ulimit) - for _, ul := range ulimits { - ulIdx[ul.Name] = ul - } - for name, ul := range daemon.configStore.Ulimits { - if _, exists := ulIdx[name]; !exists { - ulimits = append(ulimits, ul) - } - } - - weightDevices, err := getBlkioWeightDevices(c.HostConfig) - if err != nil { - return err - } - - readBpsDevice, err := getBlkioReadBpsDevices(c.HostConfig) - if err != nil { - return err - } - - writeBpsDevice, err := getBlkioWriteBpsDevices(c.HostConfig) - if err != nil { - return err - } - - readIOpsDevice, err := getBlkioReadIOpsDevices(c.HostConfig) - if err != nil { - return err - } - - writeIOpsDevice, err := getBlkioWriteIOpsDevices(c.HostConfig) - if err != nil { - return err - } - - for _, limit := range ulimits { - rl, err := limit.GetRlimit() - if err != nil { - return err - } - rlimits = append(rlimits, rl) - } - - resources := &execdriver.Resources{ - CommonResources: execdriver.CommonResources{ - Memory: c.HostConfig.Memory, - MemoryReservation: c.HostConfig.MemoryReservation, - CPUShares: c.HostConfig.CPUShares, - BlkioWeight: c.HostConfig.BlkioWeight, - }, - MemorySwap: c.HostConfig.MemorySwap, - KernelMemory: c.HostConfig.KernelMemory, - CpusetCpus: c.HostConfig.CpusetCpus, - CpusetMems: c.HostConfig.CpusetMems, - CPUPeriod: c.HostConfig.CPUPeriod, - CPUQuota: c.HostConfig.CPUQuota, - Rlimits: rlimits, - BlkioWeightDevice: weightDevices, - BlkioThrottleReadBpsDevice: readBpsDevice, - BlkioThrottleWriteBpsDevice: writeBpsDevice, - BlkioThrottleReadIOpsDevice: readIOpsDevice, - BlkioThrottleWriteIOpsDevice: writeIOpsDevice, - PidsLimit: c.HostConfig.PidsLimit, - MemorySwappiness: -1, - } - - if c.HostConfig.OomKillDisable != nil { - resources.OomKillDisable = *c.HostConfig.OomKillDisable - } - if c.HostConfig.MemorySwappiness != nil { - resources.MemorySwappiness = *c.HostConfig.MemorySwappiness - } - - processConfig := execdriver.ProcessConfig{ - CommonProcessConfig: execdriver.CommonProcessConfig{ - Entrypoint: c.Path, - Arguments: c.Args, - Tty: c.Config.Tty, - }, - Privileged: c.HostConfig.Privileged, - User: c.Config.User, - } - - processConfig.SysProcAttr = &syscall.SysProcAttr{Setsid: true} - processConfig.Env = env - - remappedRoot := &execdriver.User{} - if c.HostConfig.UsernsMode.IsPrivate() { - rootUID, rootGID := daemon.GetRemappedUIDGID() - if rootUID != 0 { - remappedRoot.UID = rootUID - remappedRoot.GID = rootGID - } - } - - uidMap, gidMap := daemon.GetUIDGIDMaps() - - if !daemon.seccompEnabled { - if c.SeccompProfile != "" && c.SeccompProfile != "unconfined" { - return fmt.Errorf("Seccomp is not enabled in your kernel, cannot run a custom seccomp profile.") - } - logrus.Warn("Seccomp is not enabled in your kernel, running container without default profile.") - c.SeccompProfile = "unconfined" - } - - defaultCgroupParent := "/docker" - if daemon.configStore.CgroupParent != "" { - defaultCgroupParent = daemon.configStore.CgroupParent - } else if daemon.usingSystemd() { - defaultCgroupParent = "system.slice" - } - c.Command = &execdriver.Command{ - CommonCommand: execdriver.CommonCommand{ - ID: c.ID, - MountLabel: c.GetMountLabel(), - Network: en, - ProcessConfig: processConfig, - ProcessLabel: c.GetProcessLabel(), - Rootfs: c.BaseFS, - Resources: resources, - WorkingDir: c.Config.WorkingDir, - }, - AllowedDevices: allowedDevices, - AppArmorProfile: c.AppArmorProfile, - AutoCreatedDevices: autoCreatedDevices, - CapAdd: c.HostConfig.CapAdd, - CapDrop: c.HostConfig.CapDrop, - CgroupParent: defaultCgroupParent, - GIDMapping: gidMap, - GroupAdd: c.HostConfig.GroupAdd, - Ipc: ipc, - OomScoreAdj: c.HostConfig.OomScoreAdj, - Pid: pid, - ReadonlyRootfs: c.HostConfig.ReadonlyRootfs, - RemappedRoot: remappedRoot, - SeccompProfile: c.SeccompProfile, - UIDMapping: uidMap, - UTS: uts, - NoNewPrivileges: c.NoNewPrivileges, - } - if c.HostConfig.CgroupParent != "" { - c.Command.CgroupParent = c.HostConfig.CgroupParent - } - - return nil -} - // getSize returns the real size & virtual size of the container. func (daemon *Daemon) getSize(container *container.Container) (int64, int64) { var ( @@ -395,28 +183,49 @@ func (daemon *Daemon) getIpcContainer(container *container.Container) (*containe } func (daemon *Daemon) setupIpcDirs(c *container.Container) error { - rootUID, rootGID := daemon.GetRemappedUIDGID() - if !c.HasMountFor("/dev/shm") { - shmPath, err := c.ShmResourcePath() + var err error + + c.ShmPath, err = c.ShmResourcePath() + if err != nil { + return err + } + + if c.HostConfig.IpcMode.IsContainer() { + ic, err := daemon.getIpcContainer(c) if err != nil { return err } + c.ShmPath = ic.ShmPath + } else if c.HostConfig.IpcMode.IsHost() { + if _, err := os.Stat("/dev/shm"); err != nil { + return fmt.Errorf("/dev/shm is not mounted, but must be for --ipc=host") + } + c.ShmPath = "/dev/shm" + } else { + rootUID, rootGID := daemon.GetRemappedUIDGID() + if !c.HasMountFor("/dev/shm") { + shmPath, err := c.ShmResourcePath() + if err != nil { + return err + } - if err := idtools.MkdirAllAs(shmPath, 0700, rootUID, rootGID); err != nil { - return err + if err := idtools.MkdirAllAs(shmPath, 0700, rootUID, rootGID); err != nil { + return err + } + + shmSize := container.DefaultSHMSize + if c.HostConfig.ShmSize != 0 { + shmSize = c.HostConfig.ShmSize + } + shmproperty := "mode=1777,size=" + strconv.FormatInt(shmSize, 10) + if err := syscall.Mount("shm", shmPath, "tmpfs", uintptr(syscall.MS_NOEXEC|syscall.MS_NOSUID|syscall.MS_NODEV), label.FormatMountLabel(shmproperty, c.GetMountLabel())); err != nil { + return fmt.Errorf("mounting shm tmpfs: %s", err) + } + if err := os.Chown(shmPath, rootUID, rootGID); err != nil { + return err + } } - shmSize := container.DefaultSHMSize - if c.HostConfig.ShmSize != 0 { - shmSize = c.HostConfig.ShmSize - } - shmproperty := "mode=1777,size=" + strconv.FormatInt(shmSize, 10) - if err := syscall.Mount("shm", shmPath, "tmpfs", uintptr(syscall.MS_NOEXEC|syscall.MS_NOSUID|syscall.MS_NODEV), label.FormatMountLabel(shmproperty, c.GetMountLabel())); err != nil { - return fmt.Errorf("mounting shm tmpfs: %s", err) - } - if err := os.Chown(shmPath, rootUID, rootGID); err != nil { - return err - } } return nil @@ -474,7 +283,19 @@ func killProcessDirectly(container *container.Container) error { return nil } -func getDevicesFromPath(deviceMapping containertypes.DeviceMapping) (devs []*configs.Device, err error) { +func specDevice(d *configs.Device) specs.Device { + return specs.Device{ + Type: string(d.Type), + Path: d.Path, + Major: d.Major, + Minor: d.Minor, + FileMode: fmPtr(int64(d.FileMode)), + UID: u32Ptr(int64(d.Uid)), + GID: u32Ptr(int64(d.Gid)), + } +} + +func getDevicesFromPath(deviceMapping containertypes.DeviceMapping) (devs []specs.Device, err error) { resolvedPathOnHost := deviceMapping.PathOnHost // check if it is a symbolic link @@ -488,7 +309,7 @@ func getDevicesFromPath(deviceMapping containertypes.DeviceMapping) (devs []*con // if there was no error, return the device if err == nil { device.Path = deviceMapping.PathInContainer - return append(devs, device), nil + return append(devs, specDevice(device)), nil } // if the device is not a device node @@ -508,7 +329,7 @@ func getDevicesFromPath(deviceMapping containertypes.DeviceMapping) (devs []*con // add the device to userSpecified devices childDevice.Path = strings.Replace(dpath, resolvedPathOnHost, deviceMapping.PathInContainer, 1) - devs = append(devs, childDevice) + devs = append(devs, specDevice(childDevice)) return nil }) diff --git a/daemon/daemon.go b/daemon/daemon.go index 8fd8edcc37..a01a8b4d9b 100644 --- a/daemon/daemon.go +++ b/daemon/daemon.go @@ -20,13 +20,12 @@ import ( "time" "github.com/Sirupsen/logrus" + containerd "github.com/docker/containerd/api/grpc/types" "github.com/docker/docker/api" "github.com/docker/docker/builder" "github.com/docker/docker/container" "github.com/docker/docker/daemon/events" "github.com/docker/docker/daemon/exec" - "github.com/docker/docker/daemon/execdriver" - "github.com/docker/docker/daemon/execdriver/execdrivers" "github.com/docker/docker/errors" "github.com/docker/engine-api/types" containertypes "github.com/docker/engine-api/types/container" @@ -46,12 +45,12 @@ import ( "github.com/docker/docker/image" "github.com/docker/docker/image/tarexport" "github.com/docker/docker/layer" + "github.com/docker/docker/libcontainerd" "github.com/docker/docker/migrate/v1" "github.com/docker/docker/pkg/archive" "github.com/docker/docker/pkg/fileutils" "github.com/docker/docker/pkg/graphdb" "github.com/docker/docker/pkg/idtools" - "github.com/docker/docker/pkg/mount" "github.com/docker/docker/pkg/namesgenerator" "github.com/docker/docker/pkg/progress" "github.com/docker/docker/pkg/registrar" @@ -115,7 +114,6 @@ type Daemon struct { trustKey libtrust.PrivateKey idIndex *truncindex.TruncIndex configStore *Config - execDriver execdriver.Driver statsCollector *statsCollector defaultLogConfig containertypes.LogConfig RegistryService *registry.Service @@ -132,6 +130,8 @@ type Daemon struct { imageStore image.Store nameIndex *registrar.Registrar linkIndex *linkIndex + containerd libcontainerd.Client + defaultIsolation containertypes.Isolation // Default isolation mode on Windows } // GetContainer looks for a container using the provided information, which could be @@ -220,36 +220,16 @@ func (daemon *Daemon) registerName(container *container.Container) error { } // Register makes a container object usable by the daemon as -func (daemon *Daemon) Register(container *container.Container) error { +func (daemon *Daemon) Register(c *container.Container) error { // Attach to stdout and stderr - if container.Config.OpenStdin { - container.NewInputPipes() + if c.Config.OpenStdin { + c.NewInputPipes() } else { - container.NewNopInputPipe() + c.NewNopInputPipe() } - daemon.containers.Add(container.ID, container) - daemon.idIndex.Add(container.ID) - - if container.IsRunning() { - logrus.Debugf("killing old running container %s", container.ID) - // Set exit code to 128 + SIGKILL (9) to properly represent unsuccessful exit - container.SetStoppedLocking(&execdriver.ExitStatus{ExitCode: 137}) - // use the current driver and ensure that the container is dead x.x - cmd := &execdriver.Command{ - CommonCommand: execdriver.CommonCommand{ - ID: container.ID, - }, - } - daemon.execDriver.Terminate(cmd) - - container.UnmountIpcMounts(mount.Unmount) - - daemon.Unmount(container) - if err := container.ToDiskLocking(); err != nil { - logrus.Errorf("Error saving stopped state to disk: %v", err) - } - } + daemon.containers.Add(c.ID, c) + daemon.idIndex.Add(c.ID) return nil } @@ -307,17 +287,38 @@ func (daemon *Daemon) restore() error { logrus.Errorf("Failed to register container %s: %s", c.ID, err) continue } - - // get list of containers we need to restart - if daemon.configStore.AutoRestart && c.ShouldRestart() { - restartContainers[c] = make(chan struct{}) - } - - // if c.hostConfig.Links is nil (not just empty), then it is using the old sqlite links and needs to be migrated - if c.HostConfig != nil && c.HostConfig.Links == nil { - migrateLegacyLinks = true - } } + var wg sync.WaitGroup + var mapLock sync.Mutex + for _, c := range containers { + wg.Add(1) + go func(c *container.Container) { + defer wg.Done() + if c.IsRunning() || c.IsPaused() { + if err := daemon.containerd.Restore(c.ID, libcontainerd.WithRestartManager(c.RestartManager(true))); err != nil { + logrus.Errorf("Failed to restore with containerd: %q", err) + return + } + } + // fixme: only if not running + // get list of containers we need to restart + if daemon.configStore.AutoRestart && !c.IsRunning() && !c.IsPaused() && c.ShouldRestart() { + mapLock.Lock() + restartContainers[c] = make(chan struct{}) + mapLock.Unlock() + } else if !c.IsRunning() && !c.IsPaused() { + if mountid, err := daemon.layerStore.GetMountID(c.ID); err == nil { + daemon.cleanupMountsByID(mountid) + } + } + + // if c.hostConfig.Links is nil (not just empty), then it is using the old sqlite links and needs to be migrated + if c.HostConfig != nil && c.HostConfig.Links == nil { + migrateLegacyLinks = true + } + }(c) + } + wg.Wait() // migrate any legacy links from sqlite linkdbFile := filepath.Join(daemon.root, "linkgraph.db") @@ -599,7 +600,7 @@ func (daemon *Daemon) registerLink(parent, child *container.Container, alias str // NewDaemon sets up everything for the daemon to be able to service // requests from the webserver. -func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemon, err error) { +func NewDaemon(config *Config, registryService *registry.Service, containerdRemote libcontainerd.Remote) (daemon *Daemon, err error) { setDefaultMtu(config) // Ensure we have compatible and valid configuration options @@ -659,7 +660,7 @@ func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemo } os.Setenv("TMPDIR", realTmp) - d := &Daemon{} + d := &Daemon{configStore: config} // Ensure the daemon is properly shutdown if there is a failure during // initialization defer func() { @@ -670,6 +671,11 @@ func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemo } }() + // Set the default isolation mode (only applicable on Windows) + if err := d.setDefaultIsolation(); err != nil { + return nil, fmt.Errorf("error setting default isolation mode: %v", err) + } + // Verify logging driver type if config.LogConfig.Type != "none" { if _, err := logger.GetLogDriver(config.LogConfig.Type); err != nil { @@ -682,6 +688,7 @@ func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemo logrus.Warnf("Failed to configure golang's threads limit: %v", err) } + installDefaultAppArmorProfile() daemonRepo := filepath.Join(config.Root, "containers") if err := idtools.MkdirAllAs(daemonRepo, 0700, rootUID, rootGID); err != nil && !os.IsExist(err) { return nil, err @@ -781,11 +788,6 @@ func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemo return nil, fmt.Errorf("Devices cgroup isn't mounted") } - ed, err := execdrivers.NewDriver(config.ExecOptions, config.ExecRoot, config.Root, sysInfo) - if err != nil { - return nil, err - } - d.ID = trustKey.PublicKey().KeyID() d.repository = daemonRepo d.containers = container.NewMemoryStore() @@ -794,8 +796,6 @@ func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemo d.distributionMetadataStore = distributionMetadataStore d.trustKey = trustKey d.idIndex = truncindex.NewTruncIndex([]string{}) - d.configStore = config - d.execDriver = ed d.statsCollector = d.newStatsCollector(1 * time.Second) d.defaultLogConfig = containertypes.LogConfig{ Type: config.LogConfig.Type, @@ -812,10 +812,12 @@ func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemo d.nameIndex = registrar.NewRegistrar() d.linkIndex = newLinkIndex() - if err := d.cleanupMounts(); err != nil { + go d.execCommandGC() + + d.containerd, err = containerdRemote.Client(d) + if err != nil { return nil, err } - go d.execCommandGC() if err := d.restore(); err != nil { return nil, err @@ -877,6 +879,9 @@ func (daemon *Daemon) Shutdown() error { logrus.Errorf("Stop container error: %v", err) return } + if mountid, err := daemon.layerStore.GetMountID(c.ID); err == nil { + daemon.cleanupMountsByID(mountid) + } logrus.Debugf("container stopped %s", c.ID) }) } @@ -923,29 +928,16 @@ func (daemon *Daemon) Mount(container *container.Container) error { } // Unmount unsets the container base filesystem -func (daemon *Daemon) Unmount(container *container.Container) { +func (daemon *Daemon) Unmount(container *container.Container) error { if err := container.RWLayer.Unmount(); err != nil { logrus.Errorf("Error unmounting container %s: %s", container.ID, err) + return err } -} - -// Run uses the execution driver to run a given container -func (daemon *Daemon) Run(c *container.Container, pipes *execdriver.Pipes, startCallback execdriver.DriverCallback) (execdriver.ExitStatus, error) { - hooks := execdriver.Hooks{ - Start: startCallback, - } - hooks.PreStart = append(hooks.PreStart, func(processConfig *execdriver.ProcessConfig, pid int, chOOM <-chan struct{}) error { - return daemon.setNetworkNamespaceKey(c.ID, pid) - }) - return daemon.execDriver.Run(c.Command, pipes, hooks) + return nil } func (daemon *Daemon) kill(c *container.Container, sig int) error { - return daemon.execDriver.Kill(c.Command, sig) -} - -func (daemon *Daemon) stats(c *container.Container) (*execdriver.ResourceStats, error) { - return daemon.execDriver.Stats(c.ID) + return daemon.containerd.Signal(c.ID, sig) } func (daemon *Daemon) subscribeToContainerStats(c *container.Container) chan interface{} { @@ -1322,12 +1314,6 @@ func (daemon *Daemon) GraphDriverName() string { return daemon.layerStore.DriverName() } -// ExecutionDriver returns the currently used driver for creating and -// starting execs in a container. -func (daemon *Daemon) ExecutionDriver() execdriver.Driver { - return daemon.execDriver -} - // GetUIDGIDMaps returns the current daemon's user namespace settings // for the full uid and gid maps which will be applied to containers // started in this instance. @@ -1536,7 +1522,7 @@ func (daemon *Daemon) IsShuttingDown() bool { } // GetContainerStats collects all the stats published by a container -func (daemon *Daemon) GetContainerStats(container *container.Container) (*execdriver.ResourceStats, error) { +func (daemon *Daemon) GetContainerStats(container *container.Container) (*types.StatsJSON, error) { stats, err := daemon.stats(container) if err != nil { return nil, err @@ -1547,7 +1533,22 @@ func (daemon *Daemon) GetContainerStats(container *container.Container) (*execdr if nwStats, err = daemon.getNetworkStats(container); err != nil { return nil, err } - stats.Interfaces = nwStats + + stats.Networks = make(map[string]types.NetworkStats) + for _, iface := range nwStats { + // For API Version >= 1.21, the original data of network will + // be returned. + stats.Networks[iface.Name] = types.NetworkStats{ + RxBytes: iface.RxBytes, + RxPackets: iface.RxPackets, + RxErrors: iface.RxErrors, + RxDropped: iface.RxDropped, + TxBytes: iface.TxBytes, + TxPackets: iface.TxPackets, + TxErrors: iface.TxErrors, + TxDropped: iface.TxDropped, + } + } return stats, nil } @@ -1735,3 +1736,16 @@ func (daemon *Daemon) networkOptions(dconfig *Config) ([]nwconfig.Option, error) options = append(options, driverOptions(dconfig)...) return options, nil } + +func copyBlkioEntry(entries []*containerd.BlkioStatsEntry) []types.BlkioStatEntry { + out := make([]types.BlkioStatEntry, len(entries)) + for i, re := range entries { + out[i] = types.BlkioStatEntry{ + Major: re.Major, + Minor: re.Minor, + Op: re.Op, + Value: re.Value, + } + } + return out +} diff --git a/daemon/daemon_linux.go b/daemon/daemon_linux.go index 22973069ca..deb3291155 100644 --- a/daemon/daemon_linux.go +++ b/daemon/daemon_linux.go @@ -12,6 +12,64 @@ import ( "github.com/docker/docker/pkg/mount" ) +func (daemon *Daemon) cleanupMountsByID(id string) error { + logrus.Debugf("Cleaning up old mountid %s: start.", id) + f, err := os.Open("/proc/self/mountinfo") + if err != nil { + return err + } + defer f.Close() + + return daemon.cleanupMountsFromReaderByID(f, id, mount.Unmount) +} + +func (daemon *Daemon) cleanupMountsFromReaderByID(reader io.Reader, id string, unmount func(target string) error) error { + if daemon.root == "" { + return nil + } + var errors []string + mountRoot := "" + shmSuffix := "/" + id + "/shm" + mergedSuffix := "/" + id + "/merged" + sc := bufio.NewScanner(reader) + for sc.Scan() { + line := sc.Text() + fields := strings.Fields(line) + if strings.HasPrefix(fields[4], daemon.root) { + logrus.Debugf("Mount base: %v", fields[4]) + mnt := fields[4] + if strings.HasSuffix(mnt, shmSuffix) || strings.HasSuffix(mnt, mergedSuffix) { + logrus.Debugf("Unmounting %v", mnt) + if err := unmount(mnt); err != nil { + logrus.Error(err) + errors = append(errors, err.Error()) + } + } else if mountBase := filepath.Base(mnt); mountBase == id { + mountRoot = mnt + } + } + } + + if mountRoot != "" { + logrus.Debugf("Unmounting %v", mountRoot) + if err := unmount(mountRoot); err != nil { + logrus.Error(err) + errors = append(errors, err.Error()) + } + } + + if err := sc.Err(); err != nil { + return err + } + + if len(errors) > 0 { + return fmt.Errorf("Error cleaningup mounts:\n%v", strings.Join(errors, "\n")) + } + + logrus.Debugf("Cleaning up old container shm/mqueue/rootfs mounts: done.") + return nil +} + // cleanupMounts umounts shm/mqueue mounts for old containers func (daemon *Daemon) cleanupMounts() error { logrus.Debugf("Cleaning up old container shm/mqueue/rootfs mounts: start.") @@ -25,7 +83,7 @@ func (daemon *Daemon) cleanupMounts() error { } func (daemon *Daemon) cleanupMountsFromReader(reader io.Reader, unmount func(target string) error) error { - if daemon.repository == "" { + if daemon.root == "" { return nil } sc := bufio.NewScanner(reader) @@ -37,7 +95,7 @@ func (daemon *Daemon) cleanupMountsFromReader(reader io.Reader, unmount func(tar logrus.Debugf("Mount base: %v", fields[4]) mnt := fields[4] mountBase := filepath.Base(mnt) - if mountBase == "mqueue" || mountBase == "shm" || mountBase == "merged" { + if mountBase == "shm" || mountBase == "merged" { logrus.Debugf("Unmounting %v", mnt) if err := unmount(mnt); err != nil { logrus.Error(err) diff --git a/daemon/daemon_linux_test.go b/daemon/daemon_linux_test.go index 0439d0bcd7..672d8fc72e 100644 --- a/daemon/daemon_linux_test.go +++ b/daemon/daemon_linux_test.go @@ -7,53 +7,83 @@ import ( "testing" ) -func TestCleanupMounts(t *testing.T) { - fixture := `230 138 0:60 / / rw,relatime - overlay overlay rw,lowerdir=/var/lib/docker/overlay/0ef9f93d5d365c1385b09d54bbee6afff3d92002c16f22eccb6e1549b2ff97d8/root,upperdir=/var/lib/docker/overlay/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb/upper,workdir=/var/lib/docker/overlay/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb/work -231 230 0:56 / /proc rw,nosuid,nodev,noexec,relatime - proc proc rw -232 230 0:57 / /dev rw,nosuid - tmpfs tmpfs rw,mode=755 -233 232 0:58 / /dev/pts rw,nosuid,noexec,relatime - devpts devpts rw,gid=5,mode=620,ptmxmode=666 -234 232 0:59 / /dev/shm rw,nosuid,nodev,noexec,relatime - tmpfs shm rw,size=65536k -235 232 0:55 / /dev/mqueue rw,nosuid,nodev,noexec,relatime - mqueue mqueue rw -236 230 0:61 / /sys rw,nosuid,nodev,noexec,relatime - sysfs sysfs rw -237 236 0:62 / /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime - tmpfs tmpfs rw -238 237 0:21 /system.slice/docker.service /sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,xattr,release_agent=/lib/systemd/systemd-cgroups-agent,name=systemd -239 237 0:23 /docker/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb /sys/fs/cgroup/perf_event rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,perf_event -240 237 0:24 /docker/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb /sys/fs/cgroup/cpuset rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,cpuset,clone_children -241 237 0:25 /docker/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb /sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,devices -242 237 0:26 /docker/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb /sys/fs/cgroup/freezer rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,freezer -243 237 0:27 /docker/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb /sys/fs/cgroup/cpu,cpuacct rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,cpu,cpuacct -244 237 0:28 /docker/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb /sys/fs/cgroup/blkio rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,blkio -245 237 0:29 /docker/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb /sys/fs/cgroup/net_cls,net_prio rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,net_cls,net_prio -246 237 0:30 /docker/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb /sys/fs/cgroup/hugetlb rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,hugetlb -247 237 0:31 /docker/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory -248 230 253:1 /var/lib/docker/volumes/510cc41ac68c48bd4eac932e3e09711673876287abf1b185312cfbfe6261a111/_data /var/lib/docker rw,relatime - ext4 /dev/disk/by-uuid/ba70ea0c-1a8f-4ee4-9687-cb393730e2b5 rw,errors=remount-ro,data=ordered -250 230 253:1 /var/lib/docker/containers/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb/hostname /etc/hostname rw,relatime - ext4 /dev/disk/by-uuid/ba70ea0c-1a8f-4ee4-9687-cb393730e2b5 rw,errors=remount-ro,data=ordered -251 230 253:1 /var/lib/docker/containers/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb/hosts /etc/hosts rw,relatime - ext4 /dev/disk/by-uuid/ba70ea0c-1a8f-4ee4-9687-cb393730e2b5 rw,errors=remount-ro,data=ordered -252 232 0:13 /1 /dev/console rw,nosuid,noexec,relatime - devpts devpts rw,gid=5,mode=620,ptmxmode=000 -139 236 0:11 / /sys/kernel/security rw,relatime - securityfs none rw -140 230 0:54 / /tmp rw,relatime - tmpfs none rw -145 230 0:3 / /run/docker/netns/default rw - nsfs nsfs rw -130 140 0:45 / /tmp/docker_recursive_mount_test312125472/tmpfs rw,relatime - tmpfs tmpfs rw -131 230 0:3 / /run/docker/netns/47903e2e6701 rw - nsfs nsfs rw -133 230 0:55 / /go/src/github.com/docker/docker/bundles/1.9.0-dev/test-integration-cli/d45526097/graph/containers/47903e2e67014246eba27607809d5f5c2437c3bf84c2986393448f84093cc40b/mqueue rw,nosuid,nodev,noexec,relatime - mqueue mqueue rw` +const mountsFixture = `142 78 0:38 / / rw,relatime - aufs none rw,si=573b861da0b3a05b,dio +143 142 0:60 / /proc rw,nosuid,nodev,noexec,relatime - proc proc rw +144 142 0:67 / /dev rw,nosuid - tmpfs tmpfs rw,mode=755 +145 144 0:78 / /dev/pts rw,nosuid,noexec,relatime - devpts devpts rw,gid=5,mode=620,ptmxmode=666 +146 144 0:49 / /dev/mqueue rw,nosuid,nodev,noexec,relatime - mqueue mqueue rw +147 142 0:84 / /sys rw,nosuid,nodev,noexec,relatime - sysfs sysfs rw +148 147 0:86 / /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime - tmpfs tmpfs rw,mode=755 +149 148 0:22 /docker/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a /sys/fs/cgroup/cpuset rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,cpuset +150 148 0:25 /docker/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a /sys/fs/cgroup/cpu rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,cpu +151 148 0:27 /docker/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a /sys/fs/cgroup/cpuacct rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,cpuacct +152 148 0:28 /docker/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory +153 148 0:29 /docker/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a /sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,devices +154 148 0:30 /docker/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a /sys/fs/cgroup/freezer rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,freezer +155 148 0:31 /docker/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a /sys/fs/cgroup/blkio rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,blkio +156 148 0:32 /docker/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a /sys/fs/cgroup/perf_event rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,perf_event +157 148 0:33 /docker/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a /sys/fs/cgroup/hugetlb rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,hugetlb +158 148 0:35 /docker/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a /sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime - cgroup systemd rw,name=systemd +159 142 8:4 /home/mlaventure/gopath /home/mlaventure/gopath rw,relatime - ext4 /dev/disk/by-uuid/d99e196c-1fc4-4b4f-bab9-9962b2b34e99 rw,errors=remount-ro,data=ordered +160 142 8:4 /var/lib/docker/volumes/9a428b651ee4c538130143cad8d87f603a4bf31b928afe7ff3ecd65480692b35/_data /var/lib/docker rw,relatime - ext4 /dev/disk/by-uuid/d99e196c-1fc4-4b4f-bab9-9962b2b34e99 rw,errors=remount-ro,data=ordered +164 142 8:4 /home/mlaventure/gopath/src/github.com/docker/docker /go/src/github.com/docker/docker rw,relatime - ext4 /dev/disk/by-uuid/d99e196c-1fc4-4b4f-bab9-9962b2b34e99 rw,errors=remount-ro,data=ordered +165 142 8:4 /var/lib/docker/containers/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a/resolv.conf /etc/resolv.conf rw,relatime - ext4 /dev/disk/by-uuid/d99e196c-1fc4-4b4f-bab9-9962b2b34e99 rw,errors=remount-ro,data=ordered +166 142 8:4 /var/lib/docker/containers/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a/hostname /etc/hostname rw,relatime - ext4 /dev/disk/by-uuid/d99e196c-1fc4-4b4f-bab9-9962b2b34e99 rw,errors=remount-ro,data=ordered +167 142 8:4 /var/lib/docker/containers/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a/hosts /etc/hosts rw,relatime - ext4 /dev/disk/by-uuid/d99e196c-1fc4-4b4f-bab9-9962b2b34e99 rw,errors=remount-ro,data=ordered +168 144 0:39 / /dev/shm rw,nosuid,nodev,noexec,relatime - tmpfs shm rw,size=65536k +169 144 0:12 /14 /dev/console rw,nosuid,noexec,relatime - devpts devpts rw,gid=5,mode=620,ptmxmode=000 +83 147 0:10 / /sys/kernel/security rw,relatime - securityfs none rw +89 142 0:87 / /tmp rw,relatime - tmpfs none rw +97 142 0:60 / /run/docker/netns/default rw,nosuid,nodev,noexec,relatime - proc proc rw +100 160 8:4 /var/lib/docker/volumes/9a428b651ee4c538130143cad8d87f603a4bf31b928afe7ff3ecd65480692b35/_data/aufs /var/lib/docker/aufs rw,relatime - ext4 /dev/disk/by-uuid/d99e196c-1fc4-4b4f-bab9-9962b2b34e99 rw,errors=remount-ro,data=ordered +115 100 0:102 / /var/lib/docker/aufs/mnt/0ecda1c63e5b58b3d89ff380bf646c95cc980252cf0b52466d43619aec7c8432 rw,relatime - aufs none rw,si=573b861dbc01905b,dio +116 160 0:107 / /var/lib/docker/containers/d045dc441d2e2e1d5b3e328d47e5943811a40819fb47497c5f5a5df2d6d13c37/shm rw,nosuid,nodev,noexec,relatime - tmpfs shm rw,size=65536k +118 142 0:102 / /run/docker/libcontainerd/d045dc441d2e2e1d5b3e328d47e5943811a40819fb47497c5f5a5df2d6d13c37/rootfs rw,relatime - aufs none rw,si=573b861dbc01905b,dio +242 142 0:60 / /run/docker/netns/c3664df2a0f7 rw,nosuid,nodev,noexec,relatime - proc proc rw +120 100 0:122 / /var/lib/docker/aufs/mnt/03ca4b49e71f1e49a41108829f4d5c70ac95934526e2af8984a1f65f1de0715d rw,relatime - aufs none rw,si=573b861eb147805b,dio +171 142 0:122 / /run/docker/libcontainerd/e406ff6f3e18516d50e03dbca4de54767a69a403a6f7ec1edc2762812824521e/rootfs rw,relatime - aufs none rw,si=573b861eb147805b,dio +310 142 0:60 / /run/docker/netns/71a18572176b rw,nosuid,nodev,noexec,relatime - proc proc rw +` +func TestCleanupMounts(t *testing.T) { d := &Daemon{ - repository: "/go/src/github.com/docker/docker/bundles/1.9.0-dev/test-integration-cli/d45526097/graph/containers/", + root: "/var/lib/docker/", } - expected := "/go/src/github.com/docker/docker/bundles/1.9.0-dev/test-integration-cli/d45526097/graph/containers/47903e2e67014246eba27607809d5f5c2437c3bf84c2986393448f84093cc40b/mqueue" - var unmounted bool + expected := "/var/lib/docker/containers/d045dc441d2e2e1d5b3e328d47e5943811a40819fb47497c5f5a5df2d6d13c37/shm" + var unmounted int unmount := func(target string) error { if target == expected { - unmounted = true + unmounted++ } return nil } - d.cleanupMountsFromReader(strings.NewReader(fixture), unmount) + d.cleanupMountsFromReader(strings.NewReader(mountsFixture), unmount) - if !unmounted { - t.Fatalf("Expected to unmount the mqueue") + if unmounted != 1 { + t.Fatalf("Expected to unmount the shm (and the shm only)") + } +} + +func TestCleanupMountsByID(t *testing.T) { + d := &Daemon{ + root: "/var/lib/docker/", + } + + expected := "/var/lib/docker/aufs/mnt/03ca4b49e71f1e49a41108829f4d5c70ac95934526e2af8984a1f65f1de0715d" + var unmounted int + unmount := func(target string) error { + if target == expected { + unmounted++ + } + return nil + } + + d.cleanupMountsFromReaderByID(strings.NewReader(mountsFixture), "03ca4b49e71f1e49a41108829f4d5c70ac95934526e2af8984a1f65f1de0715d", unmount) + + if unmounted != 1 { + t.Fatalf("Expected to unmount the auf root (and that only)") } } diff --git a/daemon/daemon_unix.go b/daemon/daemon_unix.go index 7cc5aed78b..d84a9ec3f4 100644 --- a/daemon/daemon_unix.go +++ b/daemon/daemon_unix.go @@ -13,6 +13,7 @@ import ( "strconv" "strings" "syscall" + "time" "github.com/Sirupsen/logrus" "github.com/docker/docker/container" @@ -25,6 +26,7 @@ import ( "github.com/docker/docker/reference" "github.com/docker/docker/runconfig" runconfigopts "github.com/docker/docker/runconfig/opts" + "github.com/docker/engine-api/types" pblkiodev "github.com/docker/engine-api/types/blkiodev" containertypes "github.com/docker/engine-api/types/container" "github.com/docker/libnetwork" @@ -33,10 +35,10 @@ import ( "github.com/docker/libnetwork/ipamutils" "github.com/docker/libnetwork/netlabel" "github.com/docker/libnetwork/options" - "github.com/docker/libnetwork/types" - blkiodev "github.com/opencontainers/runc/libcontainer/configs" + lntypes "github.com/docker/libnetwork/types" "github.com/opencontainers/runc/libcontainer/label" "github.com/opencontainers/runc/libcontainer/user" + "github.com/opencontainers/specs/specs-go" ) const ( @@ -51,16 +53,81 @@ const ( defaultRemappedID string = "dockremap" ) -func getBlkioWeightDevices(config *containertypes.HostConfig) ([]*blkiodev.WeightDevice, error) { +func getMemoryResources(config containertypes.Resources) *specs.Memory { + memory := specs.Memory{} + + if config.Memory > 0 { + limit := uint64(config.Memory) + memory.Limit = &limit + } + + if config.MemoryReservation > 0 { + reservation := uint64(config.MemoryReservation) + memory.Reservation = &reservation + } + + if config.MemorySwap != 0 { + swap := uint64(config.MemorySwap) + memory.Swap = &swap + } + + if config.MemorySwappiness != nil { + swappiness := uint64(*config.MemorySwappiness) + memory.Swappiness = &swappiness + } + + if config.KernelMemory != 0 { + kernelMemory := uint64(config.KernelMemory) + memory.Kernel = &kernelMemory + } + + return &memory +} + +func getCPUResources(config containertypes.Resources) *specs.CPU { + cpu := specs.CPU{} + + if config.CPUShares != 0 { + shares := uint64(config.CPUShares) + cpu.Shares = &shares + } + + if config.CpusetCpus != "" { + cpuset := config.CpusetCpus + cpu.Cpus = &cpuset + } + + if config.CpusetMems != "" { + cpuset := config.CpusetMems + cpu.Mems = &cpuset + } + + if config.CPUPeriod != 0 { + period := uint64(config.CPUPeriod) + cpu.Period = &period + } + + if config.CPUQuota != 0 { + quota := uint64(config.CPUQuota) + cpu.Quota = "a + } + + return &cpu +} + +func getBlkioWeightDevices(config containertypes.Resources) ([]specs.WeightDevice, error) { var stat syscall.Stat_t - var blkioWeightDevices []*blkiodev.WeightDevice + var blkioWeightDevices []specs.WeightDevice for _, weightDevice := range config.BlkioWeightDevice { if err := syscall.Stat(weightDevice.Path, &stat); err != nil { return nil, err } - weightDevice := blkiodev.NewWeightDevice(int64(stat.Rdev/256), int64(stat.Rdev%256), weightDevice.Weight, 0) - blkioWeightDevices = append(blkioWeightDevices, weightDevice) + weight := weightDevice.Weight + d := specs.WeightDevice{Weight: &weight} + d.Major = int64(stat.Rdev / 256) + d.Major = int64(stat.Rdev % 256) + blkioWeightDevices = append(blkioWeightDevices, d) } return blkioWeightDevices, nil @@ -99,61 +166,73 @@ func parseSecurityOpt(container *container.Container, config *containertypes.Hos return err } -func getBlkioReadIOpsDevices(config *containertypes.HostConfig) ([]*blkiodev.ThrottleDevice, error) { - var blkioReadIOpsDevice []*blkiodev.ThrottleDevice +func getBlkioReadIOpsDevices(config containertypes.Resources) ([]specs.ThrottleDevice, error) { + var blkioReadIOpsDevice []specs.ThrottleDevice var stat syscall.Stat_t for _, iopsDevice := range config.BlkioDeviceReadIOps { if err := syscall.Stat(iopsDevice.Path, &stat); err != nil { return nil, err } - readIOpsDevice := blkiodev.NewThrottleDevice(int64(stat.Rdev/256), int64(stat.Rdev%256), iopsDevice.Rate) - blkioReadIOpsDevice = append(blkioReadIOpsDevice, readIOpsDevice) + rate := iopsDevice.Rate + d := specs.ThrottleDevice{Rate: &rate} + d.Major = int64(stat.Rdev / 256) + d.Major = int64(stat.Rdev % 256) + blkioReadIOpsDevice = append(blkioReadIOpsDevice, d) } return blkioReadIOpsDevice, nil } -func getBlkioWriteIOpsDevices(config *containertypes.HostConfig) ([]*blkiodev.ThrottleDevice, error) { - var blkioWriteIOpsDevice []*blkiodev.ThrottleDevice +func getBlkioWriteIOpsDevices(config containertypes.Resources) ([]specs.ThrottleDevice, error) { + var blkioWriteIOpsDevice []specs.ThrottleDevice var stat syscall.Stat_t for _, iopsDevice := range config.BlkioDeviceWriteIOps { if err := syscall.Stat(iopsDevice.Path, &stat); err != nil { return nil, err } - writeIOpsDevice := blkiodev.NewThrottleDevice(int64(stat.Rdev/256), int64(stat.Rdev%256), iopsDevice.Rate) - blkioWriteIOpsDevice = append(blkioWriteIOpsDevice, writeIOpsDevice) + rate := iopsDevice.Rate + d := specs.ThrottleDevice{Rate: &rate} + d.Major = int64(stat.Rdev / 256) + d.Major = int64(stat.Rdev % 256) + blkioWriteIOpsDevice = append(blkioWriteIOpsDevice, d) } return blkioWriteIOpsDevice, nil } -func getBlkioReadBpsDevices(config *containertypes.HostConfig) ([]*blkiodev.ThrottleDevice, error) { - var blkioReadBpsDevice []*blkiodev.ThrottleDevice +func getBlkioReadBpsDevices(config containertypes.Resources) ([]specs.ThrottleDevice, error) { + var blkioReadBpsDevice []specs.ThrottleDevice var stat syscall.Stat_t for _, bpsDevice := range config.BlkioDeviceReadBps { if err := syscall.Stat(bpsDevice.Path, &stat); err != nil { return nil, err } - readBpsDevice := blkiodev.NewThrottleDevice(int64(stat.Rdev/256), int64(stat.Rdev%256), bpsDevice.Rate) - blkioReadBpsDevice = append(blkioReadBpsDevice, readBpsDevice) + rate := bpsDevice.Rate + d := specs.ThrottleDevice{Rate: &rate} + d.Major = int64(stat.Rdev / 256) + d.Major = int64(stat.Rdev % 256) + blkioReadBpsDevice = append(blkioReadBpsDevice, d) } return blkioReadBpsDevice, nil } -func getBlkioWriteBpsDevices(config *containertypes.HostConfig) ([]*blkiodev.ThrottleDevice, error) { - var blkioWriteBpsDevice []*blkiodev.ThrottleDevice +func getBlkioWriteBpsDevices(config containertypes.Resources) ([]specs.ThrottleDevice, error) { + var blkioWriteBpsDevice []specs.ThrottleDevice var stat syscall.Stat_t for _, bpsDevice := range config.BlkioDeviceWriteBps { if err := syscall.Stat(bpsDevice.Path, &stat); err != nil { return nil, err } - writeBpsDevice := blkiodev.NewThrottleDevice(int64(stat.Rdev/256), int64(stat.Rdev%256), bpsDevice.Rate) - blkioWriteBpsDevice = append(blkioWriteBpsDevice, writeBpsDevice) + rate := bpsDevice.Rate + d := specs.ThrottleDevice{Rate: &rate} + d.Major = int64(stat.Rdev / 256) + d.Major = int64(stat.Rdev % 256) + blkioWriteBpsDevice = append(blkioWriteBpsDevice, d) } return blkioWriteBpsDevice, nil @@ -594,8 +673,8 @@ func initBridgeDriver(controller libnetwork.NetworkController, config *Config) e nw, nw6List, err := ipamutils.ElectInterfaceAddresses(bridgeName) if err == nil { - ipamV4Conf.PreferredPool = types.GetIPNetCanonical(nw).String() - hip, _ := types.GetHostPartIP(nw.IP, nw.Mask) + ipamV4Conf.PreferredPool = lntypes.GetIPNetCanonical(nw).String() + hip, _ := lntypes.GetHostPartIP(nw.IP, nw.Mask) if hip.IsGlobalUnicast() { ipamV4Conf.Gateway = nw.IP.String() } @@ -947,11 +1026,69 @@ func (daemon *Daemon) conditionalMountOnStart(container *container.Container) er // conditionalUnmountOnCleanup is a platform specific helper function called // during the cleanup of a container to unmount. -func (daemon *Daemon) conditionalUnmountOnCleanup(container *container.Container) { - daemon.Unmount(container) +func (daemon *Daemon) conditionalUnmountOnCleanup(container *container.Container) error { + return daemon.Unmount(container) } func restoreCustomImage(is image.Store, ls layer.Store, rs reference.Store) error { // Unix has no custom images to register return nil } + +func (daemon *Daemon) stats(c *container.Container) (*types.StatsJSON, error) { + if !c.IsRunning() { + return nil, errNotRunning{c.ID} + } + stats, err := daemon.containerd.Stats(c.ID) + if err != nil { + return nil, err + } + s := &types.StatsJSON{} + cgs := stats.CgroupStats + if cgs != nil { + s.BlkioStats = types.BlkioStats{ + IoServiceBytesRecursive: copyBlkioEntry(cgs.BlkioStats.IoServiceBytesRecursive), + IoServicedRecursive: copyBlkioEntry(cgs.BlkioStats.IoServicedRecursive), + IoQueuedRecursive: copyBlkioEntry(cgs.BlkioStats.IoQueuedRecursive), + IoServiceTimeRecursive: copyBlkioEntry(cgs.BlkioStats.IoServiceTimeRecursive), + IoWaitTimeRecursive: copyBlkioEntry(cgs.BlkioStats.IoWaitTimeRecursive), + IoMergedRecursive: copyBlkioEntry(cgs.BlkioStats.IoMergedRecursive), + IoTimeRecursive: copyBlkioEntry(cgs.BlkioStats.IoTimeRecursive), + SectorsRecursive: copyBlkioEntry(cgs.BlkioStats.SectorsRecursive), + } + cpu := cgs.CpuStats + s.CPUStats = types.CPUStats{ + CPUUsage: types.CPUUsage{ + TotalUsage: cpu.CpuUsage.TotalUsage, + PercpuUsage: cpu.CpuUsage.PercpuUsage, + UsageInKernelmode: cpu.CpuUsage.UsageInKernelmode, + UsageInUsermode: cpu.CpuUsage.UsageInUsermode, + }, + ThrottlingData: types.ThrottlingData{ + Periods: cpu.ThrottlingData.Periods, + ThrottledPeriods: cpu.ThrottlingData.ThrottledPeriods, + ThrottledTime: cpu.ThrottlingData.ThrottledTime, + }, + } + mem := cgs.MemoryStats.Usage + s.MemoryStats = types.MemoryStats{ + Usage: mem.Usage, + MaxUsage: mem.MaxUsage, + Stats: cgs.MemoryStats.Stats, + Failcnt: mem.Failcnt, + } + if cgs.PidsStats != nil { + s.PidsStats = types.PidsStats{ + Current: cgs.PidsStats.Current, + } + } + } + s.Read = time.Unix(int64(stats.Timestamp), 0) + return s, nil +} + +// setDefaultIsolation determine the default isolation mode for the +// daemon to run in. This is only applicable on Windows +func (daemon *Daemon) setDefaultIsolation() error { + return nil +} diff --git a/daemon/delete.go b/daemon/delete.go index 75af4c01a1..008eefaa88 100644 --- a/daemon/delete.go +++ b/daemon/delete.go @@ -129,9 +129,6 @@ func (daemon *Daemon) cleanupContainer(container *container.Container, forceRemo return fmt.Errorf("Driver %s failed to remove root filesystem %s: %s", daemon.GraphDriverName(), container.ID, err) } - if err = daemon.execDriver.Clean(container.ID); err != nil { - return fmt.Errorf("Unable to remove execdriver data for %s: %s", container.ID, err) - } return nil } diff --git a/daemon/exec.go b/daemon/exec.go index 2b5250520f..be06845c68 100644 --- a/daemon/exec.go +++ b/daemon/exec.go @@ -11,10 +11,9 @@ import ( "github.com/Sirupsen/logrus" "github.com/docker/docker/container" "github.com/docker/docker/daemon/exec" - "github.com/docker/docker/daemon/execdriver" "github.com/docker/docker/errors" + "github.com/docker/docker/libcontainerd" "github.com/docker/docker/pkg/pools" - "github.com/docker/docker/pkg/promise" "github.com/docker/docker/pkg/term" "github.com/docker/engine-api/types" "github.com/docker/engine-api/types/strslice" @@ -106,33 +105,31 @@ func (d *Daemon) ContainerExecCreate(config *types.ExecConfig) (string, error) { } } - processConfig := &execdriver.ProcessConfig{ - CommonProcessConfig: execdriver.CommonProcessConfig{ - Tty: config.Tty, - Entrypoint: entrypoint, - Arguments: args, - }, - } - setPlatformSpecificExecProcessConfig(config, container, processConfig) - execConfig := exec.NewConfig() execConfig.OpenStdin = config.AttachStdin execConfig.OpenStdout = config.AttachStdout execConfig.OpenStderr = config.AttachStderr - execConfig.ProcessConfig = processConfig execConfig.ContainerID = container.ID execConfig.DetachKeys = keys + execConfig.Entrypoint = entrypoint + execConfig.Args = args + execConfig.Tty = config.Tty + execConfig.Privileged = config.Privileged + execConfig.User = config.User + if len(execConfig.User) == 0 { + execConfig.User = container.Config.User + } d.registerExecCommand(container, execConfig) - d.LogContainerEvent(container, "exec_create: "+execConfig.ProcessConfig.Entrypoint+" "+strings.Join(execConfig.ProcessConfig.Arguments, " ")) + d.LogContainerEvent(container, "exec_create: "+execConfig.Entrypoint+" "+strings.Join(execConfig.Args, " ")) return execConfig.ID, nil } // ContainerExecStart starts a previously set up exec instance. The // std streams are set up. -func (d *Daemon) ContainerExecStart(name string, stdin io.ReadCloser, stdout io.Writer, stderr io.Writer) error { +func (d *Daemon) ContainerExecStart(name string, stdin io.ReadCloser, stdout io.Writer, stderr io.Writer) (err error) { var ( cStdin io.ReadCloser cStdout, cStderr io.Writer @@ -155,11 +152,18 @@ func (d *Daemon) ContainerExecStart(name string, stdin io.ReadCloser, stdout io. return fmt.Errorf("Error: Exec command %s is already running", ec.ID) } ec.Running = true + defer func() { + if err != nil { + ec.Running = false + exitCode := 126 + ec.ExitCode = &exitCode + } + }() ec.Unlock() c := d.containers.Get(ec.ContainerID) logrus.Debugf("starting exec command %s in container %s", ec.ID, c.ID) - d.LogContainerEvent(c, "exec_start: "+ec.ProcessConfig.Entrypoint+" "+strings.Join(ec.ProcessConfig.Arguments, " ")) + d.LogContainerEvent(c, "exec_start: "+ec.Entrypoint+" "+strings.Join(ec.Args, " ")) if ec.OpenStdin && stdin != nil { r, w := io.Pipe() @@ -183,56 +187,26 @@ func (d *Daemon) ContainerExecStart(name string, stdin io.ReadCloser, stdout io. ec.NewNopInputPipe() } - attachErr := container.AttachStreams(context.Background(), ec.StreamConfig, ec.OpenStdin, true, ec.ProcessConfig.Tty, cStdin, cStdout, cStderr, ec.DetachKeys) + p := libcontainerd.Process{ + Args: append([]string{ec.Entrypoint}, ec.Args...), + Terminal: ec.Tty, + } - execErr := make(chan error) - - // Note, the ExecConfig data will be removed when the container - // itself is deleted. This allows us to query it (for things like - // the exitStatus) even after the cmd is done running. - - go func() { - execErr <- d.containerExec(c, ec) - }() - - select { - case err := <-attachErr: - if err != nil { - return fmt.Errorf("attach failed with error: %v", err) - } + if err := execSetPlatformOpt(c, ec, &p); err != nil { return nil - case err := <-execErr: - if aErr := <-attachErr; aErr != nil && err == nil { - return fmt.Errorf("attach failed with error: %v", aErr) - } - if err == nil { - return nil - } - - // Maybe the container stopped while we were trying to exec - if !c.IsRunning() { - return fmt.Errorf("container stopped while running exec: %s", c.ID) - } - return fmt.Errorf("Cannot run exec command %s in container %s: %s", ec.ID, c.ID, err) - } -} - -// Exec calls the underlying exec driver to run -func (d *Daemon) Exec(c *container.Container, execConfig *exec.Config, pipes *execdriver.Pipes, startCallback execdriver.DriverCallback) (int, error) { - hooks := execdriver.Hooks{ - Start: startCallback, - } - exitStatus, err := d.execDriver.Exec(c.Command, execConfig.ProcessConfig, pipes, hooks) - - // On err, make sure we don't leave ExitCode at zero - if err != nil && exitStatus == 0 { - exitStatus = 128 } - execConfig.ExitCode = &exitStatus - execConfig.Running = false + attachErr := container.AttachStreams(context.Background(), ec.StreamConfig, ec.OpenStdin, true, ec.Tty, cStdin, cStdout, cStderr, ec.DetachKeys) - return exitStatus, err + if err := d.containerd.AddProcess(c.ID, name, p); err != nil { + return err + } + + err = <-attachErr + if err != nil { + return fmt.Errorf("attach failed with error: %v", err) + } + return nil } // execCommandGC runs a ticker to clean up the daemon references @@ -270,52 +244,3 @@ func (d *Daemon) containerExecIds() map[string]struct{} { } return ids } - -func (d *Daemon) containerExec(container *container.Container, ec *exec.Config) error { - container.Lock() - defer container.Unlock() - - callback := func(processConfig *execdriver.ProcessConfig, pid int, chOOM <-chan struct{}) error { - if processConfig.Tty { - // The callback is called after the process Start() - // so we are in the parent process. In TTY mode, stdin/out/err is the PtySlave - // which we close here. - if c, ok := processConfig.Stdout.(io.Closer); ok { - c.Close() - } - } - ec.Close() - return nil - } - - // We use a callback here instead of a goroutine and an chan for - // synchronization purposes - cErr := promise.Go(func() error { return d.monitorExec(container, ec, callback) }) - return ec.Wait(cErr) -} - -func (d *Daemon) monitorExec(container *container.Container, execConfig *exec.Config, callback execdriver.DriverCallback) error { - pipes := execdriver.NewPipes(execConfig.Stdin(), execConfig.Stdout(), execConfig.Stderr(), execConfig.OpenStdin) - exitCode, err := d.Exec(container, execConfig, pipes, callback) - if err != nil { - logrus.Errorf("Error running command in existing container %s: %s", container.ID, err) - } - logrus.Debugf("Exec task in container %s exited with code %d", container.ID, exitCode) - - if err := execConfig.CloseStreams(); err != nil { - logrus.Errorf("%s: %s", container.ID, err) - } - - if execConfig.ProcessConfig.Terminal != nil { - if err := execConfig.WaitResize(); err != nil { - logrus.Errorf("Error waiting for resize: %v", err) - } - if err := execConfig.ProcessConfig.Terminal.Close(); err != nil { - logrus.Errorf("Error closing terminal while running in container %s: %s", container.ID, err) - } - } - // remove the exec command from the container's store only and not the - // daemon's store so that the exec command can be inspected. - container.ExecCommands.Delete(execConfig.ID) - return err -} diff --git a/daemon/exec/exec.go b/daemon/exec/exec.go index 7d10dd9f4e..bbeb1c16a6 100644 --- a/daemon/exec/exec.go +++ b/daemon/exec/exec.go @@ -1,11 +1,8 @@ package exec import ( - "fmt" "sync" - "time" - "github.com/docker/docker/daemon/execdriver" "github.com/docker/docker/pkg/stringid" "github.com/docker/docker/runconfig" ) @@ -16,22 +13,20 @@ import ( type Config struct { sync.Mutex *runconfig.StreamConfig - ID string - Running bool - ExitCode *int - ProcessConfig *execdriver.ProcessConfig - OpenStdin bool - OpenStderr bool - OpenStdout bool - CanRemove bool - ContainerID string - DetachKeys []byte - - // waitStart will be closed immediately after the exec is really started. - waitStart chan struct{} - - // waitResize will be closed after Resize is finished. - waitResize chan struct{} + ID string + Running bool + ExitCode *int + OpenStdin bool + OpenStderr bool + OpenStdout bool + CanRemove bool + ContainerID string + DetachKeys []byte + Entrypoint string + Args []string + Tty bool + Privileged bool + User string } // NewConfig initializes the a new exec configuration @@ -39,8 +34,6 @@ func NewConfig() *Config { return &Config{ ID: stringid.GenerateNonCryptoID(), StreamConfig: runconfig.NewStreamConfig(), - waitStart: make(chan struct{}), - waitResize: make(chan struct{}), } } @@ -98,45 +91,3 @@ func (e *Store) List() []string { e.RUnlock() return IDs } - -// Wait waits until the exec process finishes or there is an error in the error channel. -func (c *Config) Wait(cErr chan error) error { - // Exec should not return until the process is actually running - select { - case <-c.waitStart: - case err := <-cErr: - return err - } - return nil -} - -// WaitResize waits until terminal resize finishes or time out. -func (c *Config) WaitResize() error { - select { - case <-c.waitResize: - case <-time.After(time.Second): - return fmt.Errorf("Terminal resize for exec %s time out.", c.ID) - } - return nil -} - -// Close closes the wait channel for the progress. -func (c *Config) Close() { - close(c.waitStart) -} - -// CloseResize closes the wait channel for resizing terminal. -func (c *Config) CloseResize() { - close(c.waitResize) -} - -// Resize changes the size of the terminal for the exec process. -func (c *Config) Resize(h, w int) error { - defer c.CloseResize() - select { - case <-c.waitStart: - case <-time.After(time.Second): - return fmt.Errorf("Exec %s is not running, so it can not be resized.", c.ID) - } - return c.ProcessConfig.Terminal.Resize(h, w) -} diff --git a/daemon/exec_linux.go b/daemon/exec_linux.go new file mode 100644 index 0000000000..a2c86b2868 --- /dev/null +++ b/daemon/exec_linux.go @@ -0,0 +1,26 @@ +package daemon + +import ( + "github.com/docker/docker/container" + "github.com/docker/docker/daemon/caps" + "github.com/docker/docker/daemon/exec" + "github.com/docker/docker/libcontainerd" +) + +func execSetPlatformOpt(c *container.Container, ec *exec.Config, p *libcontainerd.Process) error { + if len(ec.User) > 0 { + uid, gid, additionalGids, err := getUser(c, ec.User) + if err != nil { + return err + } + p.User = &libcontainerd.User{ + UID: uid, + GID: gid, + AdditionalGids: additionalGids, + } + } + if ec.Privileged { + p.Capabilities = caps.GetAllCapabilities() + } + return nil +} diff --git a/daemon/exec_unix.go b/daemon/exec_unix.go deleted file mode 100644 index 754f73138a..0000000000 --- a/daemon/exec_unix.go +++ /dev/null @@ -1,21 +0,0 @@ -// +build linux freebsd - -package daemon - -import ( - "github.com/docker/docker/container" - "github.com/docker/docker/daemon/execdriver" - "github.com/docker/engine-api/types" -) - -// setPlatformSpecificExecProcessConfig sets platform-specific fields in the -// ProcessConfig structure. -func setPlatformSpecificExecProcessConfig(config *types.ExecConfig, container *container.Container, pc *execdriver.ProcessConfig) { - user := config.User - if len(user) == 0 { - user = container.Config.User - } - - pc.User = user - pc.Privileged = config.Privileged -} diff --git a/daemon/info.go b/daemon/info.go index 057b6cf50f..062b96493d 100644 --- a/daemon/info.go +++ b/daemon/info.go @@ -84,7 +84,6 @@ func (daemon *Daemon) SystemInfo() (*types.Info, error) { NFd: fileutils.GetTotalUsedFds(), NGoroutines: runtime.NumGoroutine(), SystemTime: time.Now().Format(time.RFC3339Nano), - ExecutionDriver: daemon.ExecutionDriver().Name(), LoggingDriver: daemon.defaultLogConfig.Type, CgroupDriver: daemon.getCgroupDriver(), NEventsListener: daemon.EventsService.SubscribersCount(), diff --git a/daemon/inspect_unix.go b/daemon/inspect_unix.go index bb224c8796..6033c02dd7 100644 --- a/daemon/inspect_unix.go +++ b/daemon/inspect_unix.go @@ -82,10 +82,10 @@ func addMountPoints(container *container.Container) []types.MountPoint { func inspectExecProcessConfig(e *exec.Config) *backend.ExecProcessConfig { return &backend.ExecProcessConfig{ - Tty: e.ProcessConfig.Tty, - Entrypoint: e.ProcessConfig.Entrypoint, - Arguments: e.ProcessConfig.Arguments, - Privileged: &e.ProcessConfig.Privileged, - User: e.ProcessConfig.User, + Tty: e.Tty, + Entrypoint: e.Entrypoint, + Arguments: e.Args, + Privileged: &e.Privileged, + User: e.User, } } diff --git a/daemon/kill.go b/daemon/kill.go index ffed439ce0..69a9c5e67b 100644 --- a/daemon/kill.go +++ b/daemon/kill.go @@ -69,6 +69,10 @@ func (daemon *Daemon) killWithSignal(container *container.Container, sig int) er container.ExitOnNext() + if !daemon.IsShuttingDown() { + container.HasBeenManuallyStopped = true + } + // if the container is currently restarting we do not need to send the signal // to the process. Telling the monitor that it should exit on it's next event // loop is enough diff --git a/daemon/monitor.go b/daemon/monitor.go new file mode 100644 index 0000000000..0a82c5f8fd --- /dev/null +++ b/daemon/monitor.go @@ -0,0 +1,143 @@ +package daemon + +import ( + "errors" + "fmt" + "io" + "runtime" + "strconv" + + "github.com/Sirupsen/logrus" + "github.com/docker/docker/libcontainerd" + "github.com/docker/docker/runconfig" +) + +// StateChanged updates daemon state changes from containerd +func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error { + c := daemon.containers.Get(id) + if c == nil { + return fmt.Errorf("no such container: %s", id) + } + + switch e.State { + case libcontainerd.StateOOM: + // StateOOM is Linux specific and should never be hit on Windows + if runtime.GOOS == "windows" { + return errors.New("Received StateOOM from libcontainerd on Windows. This should never happen.") + } + daemon.LogContainerEvent(c, "oom") + case libcontainerd.StateExit: + c.Lock() + defer c.Unlock() + c.Wait() + c.Reset(false) + c.SetStopped(platformConstructExitStatus(e)) + attributes := map[string]string{ + "exitCode": strconv.Itoa(int(e.ExitCode)), + } + daemon.LogContainerEventWithAttributes(c, "die", attributes) + daemon.Cleanup(c) + // FIXME: here is race condition between two RUN instructions in Dockerfile + // because they share same runconfig and change image. Must be fixed + // in builder/builder.go + return c.ToDisk() + case libcontainerd.StateRestart: + c.Lock() + defer c.Unlock() + c.Reset(false) + c.RestartCount++ + c.SetRestarting(platformConstructExitStatus(e)) + attributes := map[string]string{ + "exitCode": strconv.Itoa(int(e.ExitCode)), + } + daemon.LogContainerEventWithAttributes(c, "die", attributes) + return c.ToDisk() + case libcontainerd.StateExitProcess: + c.Lock() + defer c.Unlock() + if execConfig := c.ExecCommands.Get(e.ProcessID); execConfig != nil { + ec := int(e.ExitCode) + execConfig.ExitCode = &ec + execConfig.Running = false + execConfig.Wait() + if err := execConfig.CloseStreams(); err != nil { + logrus.Errorf("%s: %s", c.ID, err) + } + + // remove the exec command from the container's store only and not the + // daemon's store so that the exec command can be inspected. + c.ExecCommands.Delete(execConfig.ID) + } else { + logrus.Warnf("Ignoring StateExitProcess for %v but no exec command found", e) + } + case libcontainerd.StateStart, libcontainerd.StateRestore: + c.SetRunning(int(e.Pid), e.State == libcontainerd.StateStart) + c.HasBeenManuallyStopped = false + if err := c.ToDisk(); err != nil { + c.Reset(false) + return err + } + case libcontainerd.StatePause: + c.Paused = true + daemon.LogContainerEvent(c, "pause") + case libcontainerd.StateResume: + c.Paused = false + daemon.LogContainerEvent(c, "unpause") + } + + return nil +} + +// AttachStreams is called by libcontainerd to connect the stdio. +func (daemon *Daemon) AttachStreams(id string, iop libcontainerd.IOPipe) error { + var s *runconfig.StreamConfig + c := daemon.containers.Get(id) + if c == nil { + ec, err := daemon.getExecConfig(id) + if err != nil { + return fmt.Errorf("no such exec/container: %s", id) + } + s = ec.StreamConfig + } else { + s = c.StreamConfig + if err := daemon.StartLogging(c); err != nil { + c.Reset(false) + return err + } + } + + if stdin := s.Stdin(); stdin != nil { + if iop.Stdin != nil { + go func() { + io.Copy(iop.Stdin, stdin) + iop.Stdin.Close() + }() + } + } else { + if c != nil && !c.Config.Tty { + // tty is enabled, so dont close containerd's iopipe stdin. + if iop.Stdin != nil { + iop.Stdin.Close() + } + } + } + + copy := func(w io.Writer, r io.Reader) { + s.Add(1) + go func() { + if _, err := io.Copy(w, r); err != nil { + logrus.Errorf("%v stream copy error: %v", id, err) + } + s.Done() + }() + } + + if iop.Stdout != nil { + copy(s.Stdout(), iop.Stdout) + } + if iop.Stderr != nil { + copy(s.Stderr(), iop.Stderr) + } + + return nil +} diff --git a/daemon/monitor_linux.go b/daemon/monitor_linux.go new file mode 100644 index 0000000000..df8b6c5dba --- /dev/null +++ b/daemon/monitor_linux.go @@ -0,0 +1,14 @@ +package daemon + +import ( + "github.com/docker/docker/container" + "github.com/docker/docker/libcontainerd" +) + +// platformConstructExitStatus returns a platform specific exit status structure +func platformConstructExitStatus(e libcontainerd.StateInfo) *container.ExitStatus { + return &container.ExitStatus{ + ExitCode: int(e.ExitCode), + OOMKilled: e.OOMKilled, + } +} diff --git a/daemon/oci_linux.go b/daemon/oci_linux.go new file mode 100644 index 0000000000..0b61f5326b --- /dev/null +++ b/daemon/oci_linux.go @@ -0,0 +1,652 @@ +package daemon + +import ( + "fmt" + "io" + "os" + "path/filepath" + "strconv" + "strings" + + "github.com/docker/docker/container" + "github.com/docker/docker/daemon/caps" + "github.com/docker/docker/libcontainerd" + "github.com/docker/docker/oci" + "github.com/docker/docker/pkg/idtools" + "github.com/docker/docker/pkg/mount" + "github.com/docker/docker/pkg/stringutils" + "github.com/docker/docker/pkg/symlink" + "github.com/docker/docker/volume" + containertypes "github.com/docker/engine-api/types/container" + "github.com/opencontainers/runc/libcontainer/apparmor" + "github.com/opencontainers/runc/libcontainer/devices" + "github.com/opencontainers/runc/libcontainer/user" + "github.com/opencontainers/specs/specs-go" +) + +func setResources(s *specs.Spec, r containertypes.Resources) error { + weightDevices, err := getBlkioWeightDevices(r) + if err != nil { + return err + } + readBpsDevice, err := getBlkioReadBpsDevices(r) + if err != nil { + return err + } + writeBpsDevice, err := getBlkioWriteBpsDevices(r) + if err != nil { + return err + } + readIOpsDevice, err := getBlkioReadIOpsDevices(r) + if err != nil { + return err + } + writeIOpsDevice, err := getBlkioWriteIOpsDevices(r) + if err != nil { + return err + } + + memoryRes := getMemoryResources(r) + cpuRes := getCPUResources(r) + blkioWeight := r.BlkioWeight + + specResources := &specs.Resources{ + Memory: memoryRes, + CPU: cpuRes, + BlockIO: &specs.BlockIO{ + Weight: &blkioWeight, + WeightDevice: weightDevices, + ThrottleReadBpsDevice: readBpsDevice, + ThrottleWriteBpsDevice: writeBpsDevice, + ThrottleReadIOPSDevice: readIOpsDevice, + ThrottleWriteIOPSDevice: writeIOpsDevice, + }, + DisableOOMKiller: r.OomKillDisable, + Pids: &specs.Pids{ + Limit: &r.PidsLimit, + }, + } + + if s.Linux.Resources != nil && len(s.Linux.Resources.Devices) > 0 { + specResources.Devices = s.Linux.Resources.Devices + } + + s.Linux.Resources = specResources + return nil +} + +func setDevices(s *specs.Spec, c *container.Container) error { + // Build lists of devices allowed and created within the container. + var devs []specs.Device + if c.HostConfig.Privileged { + hostDevices, err := devices.HostDevices() + if err != nil { + return err + } + for _, d := range hostDevices { + devs = append(devs, specDevice(d)) + } + } else { + for _, deviceMapping := range c.HostConfig.Devices { + d, err := getDevicesFromPath(deviceMapping) + if err != nil { + return err + } + + devs = append(devs, d...) + } + } + + s.Linux.Devices = append(s.Linux.Devices, devs...) + return nil +} + +func setRlimits(daemon *Daemon, s *specs.Spec, c *container.Container) error { + var rlimits []specs.Rlimit + + ulimits := c.HostConfig.Ulimits + // Merge ulimits with daemon defaults + ulIdx := make(map[string]struct{}) + for _, ul := range ulimits { + ulIdx[ul.Name] = struct{}{} + } + for name, ul := range daemon.configStore.Ulimits { + if _, exists := ulIdx[name]; !exists { + ulimits = append(ulimits, ul) + } + } + + for _, ul := range ulimits { + rlimits = append(rlimits, specs.Rlimit{ + Type: "RLIMIT_" + strings.ToUpper(ul.Name), + Soft: uint64(ul.Soft), + Hard: uint64(ul.Hard), + }) + } + + s.Process.Rlimits = rlimits + return nil +} + +func setUser(s *specs.Spec, c *container.Container) error { + uid, gid, additionalGids, err := getUser(c, c.Config.User) + if err != nil { + return err + } + s.Process.User.UID = uid + s.Process.User.GID = gid + s.Process.User.AdditionalGids = additionalGids + return nil +} + +func readUserFile(c *container.Container, p string) (io.ReadCloser, error) { + fp, err := symlink.FollowSymlinkInScope(filepath.Join(c.BaseFS, p), c.BaseFS) + if err != nil { + return nil, err + } + return os.Open(fp) +} + +func getUser(c *container.Container, username string) (uint32, uint32, []uint32, error) { + passwdPath, err := user.GetPasswdPath() + if err != nil { + return 0, 0, nil, err + } + groupPath, err := user.GetGroupPath() + if err != nil { + return 0, 0, nil, err + } + passwdFile, err := readUserFile(c, passwdPath) + if err == nil { + defer passwdFile.Close() + } + groupFile, err := readUserFile(c, groupPath) + if err == nil { + defer groupFile.Close() + } + + execUser, err := user.GetExecUser(username, nil, passwdFile, groupFile) + if err != nil { + return 0, 0, nil, err + } + + // todo: fix this double read by a change to libcontainer/user pkg + groupFile, err = readUserFile(c, groupPath) + if err == nil { + defer groupFile.Close() + } + var addGroups []int + if len(c.HostConfig.GroupAdd) > 0 { + addGroups, err = user.GetAdditionalGroups(c.HostConfig.GroupAdd, groupFile) + if err != nil { + return 0, 0, nil, err + } + } + uid := uint32(execUser.Uid) + gid := uint32(execUser.Gid) + sgids := append(execUser.Sgids, addGroups...) + var additionalGids []uint32 + for _, g := range sgids { + additionalGids = append(additionalGids, uint32(g)) + } + return uid, gid, additionalGids, nil +} + +func setNamespace(s *specs.Spec, ns specs.Namespace) { + for i, n := range s.Linux.Namespaces { + if n.Type == ns.Type { + s.Linux.Namespaces[i] = ns + return + } + } + s.Linux.Namespaces = append(s.Linux.Namespaces, ns) +} + +func setCapabilities(s *specs.Spec, c *container.Container) error { + var caplist []string + var err error + if c.HostConfig.Privileged { + caplist = caps.GetAllCapabilities() + } else { + caplist, err = caps.TweakCapabilities(s.Process.Capabilities, c.HostConfig.CapAdd, c.HostConfig.CapDrop) + if err != nil { + return err + } + } + s.Process.Capabilities = caplist + return nil +} + +func delNamespace(s *specs.Spec, nsType specs.NamespaceType) { + idx := -1 + for i, n := range s.Linux.Namespaces { + if n.Type == nsType { + idx = i + } + } + if idx >= 0 { + s.Linux.Namespaces = append(s.Linux.Namespaces[:idx], s.Linux.Namespaces[idx+1:]...) + } +} + +func setNamespaces(daemon *Daemon, s *specs.Spec, c *container.Container) error { + // network + if !c.Config.NetworkDisabled { + ns := specs.Namespace{Type: "network"} + parts := strings.SplitN(string(c.HostConfig.NetworkMode), ":", 2) + if parts[0] == "container" { + nc, err := daemon.getNetworkedContainer(c.ID, c.HostConfig.NetworkMode.ConnectedContainer()) + if err != nil { + return err + } + ns.Path = fmt.Sprintf("/proc/%d/ns/net", nc.State.GetPID()) + } else if c.HostConfig.NetworkMode.IsHost() { + ns.Path = c.NetworkSettings.SandboxKey + } + setNamespace(s, ns) + } + // ipc + if c.HostConfig.IpcMode.IsContainer() { + ns := specs.Namespace{Type: "ipc"} + ic, err := daemon.getIpcContainer(c) + if err != nil { + return err + } + ns.Path = fmt.Sprintf("/proc/%d/ns/ipc", ic.State.GetPID()) + setNamespace(s, ns) + } else if c.HostConfig.IpcMode.IsHost() { + delNamespace(s, specs.NamespaceType("ipc")) + } else { + ns := specs.Namespace{Type: "ipc"} + setNamespace(s, ns) + } + // pid + if c.HostConfig.PidMode.IsHost() { + delNamespace(s, specs.NamespaceType("pid")) + } + // uts + if c.HostConfig.UTSMode.IsHost() { + delNamespace(s, specs.NamespaceType("uts")) + s.Hostname = "" + } + // user + if c.HostConfig.UsernsMode.IsPrivate() { + uidMap, gidMap := daemon.GetUIDGIDMaps() + if uidMap != nil { + ns := specs.Namespace{Type: "user"} + setNamespace(s, ns) + s.Linux.UIDMappings = specMapping(uidMap) + s.Linux.GIDMappings = specMapping(gidMap) + } + } + + return nil +} + +func specMapping(s []idtools.IDMap) []specs.IDMapping { + var ids []specs.IDMapping + for _, item := range s { + ids = append(ids, specs.IDMapping{ + HostID: uint32(item.HostID), + ContainerID: uint32(item.ContainerID), + Size: uint32(item.Size), + }) + } + return ids +} + +func getMountInfo(mountinfo []*mount.Info, dir string) *mount.Info { + for _, m := range mountinfo { + if m.Mountpoint == dir { + return m + } + } + return nil +} + +// Get the source mount point of directory passed in as argument. Also return +// optional fields. +func getSourceMount(source string) (string, string, error) { + // Ensure any symlinks are resolved. + sourcePath, err := filepath.EvalSymlinks(source) + if err != nil { + return "", "", err + } + + mountinfos, err := mount.GetMounts() + if err != nil { + return "", "", err + } + + mountinfo := getMountInfo(mountinfos, sourcePath) + if mountinfo != nil { + return sourcePath, mountinfo.Optional, nil + } + + path := sourcePath + for { + path = filepath.Dir(path) + + mountinfo = getMountInfo(mountinfos, path) + if mountinfo != nil { + return path, mountinfo.Optional, nil + } + + if path == "/" { + break + } + } + + // If we are here, we did not find parent mount. Something is wrong. + return "", "", fmt.Errorf("Could not find source mount of %s", source) +} + +// Ensure mount point on which path is mounted, is shared. +func ensureShared(path string) error { + sharedMount := false + + sourceMount, optionalOpts, err := getSourceMount(path) + if err != nil { + return err + } + // Make sure source mount point is shared. + optsSplit := strings.Split(optionalOpts, " ") + for _, opt := range optsSplit { + if strings.HasPrefix(opt, "shared:") { + sharedMount = true + break + } + } + + if !sharedMount { + return fmt.Errorf("Path %s is mounted on %s but it is not a shared mount.", path, sourceMount) + } + return nil +} + +// Ensure mount point on which path is mounted, is either shared or slave. +func ensureSharedOrSlave(path string) error { + sharedMount := false + slaveMount := false + + sourceMount, optionalOpts, err := getSourceMount(path) + if err != nil { + return err + } + // Make sure source mount point is shared. + optsSplit := strings.Split(optionalOpts, " ") + for _, opt := range optsSplit { + if strings.HasPrefix(opt, "shared:") { + sharedMount = true + break + } else if strings.HasPrefix(opt, "master:") { + slaveMount = true + break + } + } + + if !sharedMount && !slaveMount { + return fmt.Errorf("Path %s is mounted on %s but it is not a shared or slave mount.", path, sourceMount) + } + return nil +} + +var ( + mountPropagationMap = map[string]int{ + "private": mount.PRIVATE, + "rprivate": mount.RPRIVATE, + "shared": mount.SHARED, + "rshared": mount.RSHARED, + "slave": mount.SLAVE, + "rslave": mount.RSLAVE, + } + + mountPropagationReverseMap = map[int]string{ + mount.PRIVATE: "private", + mount.RPRIVATE: "rprivate", + mount.SHARED: "shared", + mount.RSHARED: "rshared", + mount.SLAVE: "slave", + mount.RSLAVE: "rslave", + } +) + +func setMounts(daemon *Daemon, s *specs.Spec, c *container.Container, mounts []container.Mount) error { + userMounts := make(map[string]struct{}) + for _, m := range mounts { + userMounts[m.Destination] = struct{}{} + } + + // Filter out mounts that are overriden by user supplied mounts + var defaultMounts []specs.Mount + _, mountDev := userMounts["/dev"] + for _, m := range s.Mounts { + if _, ok := userMounts[m.Destination]; !ok { + if mountDev && strings.HasPrefix(m.Destination, "/dev/") { + continue + } + defaultMounts = append(defaultMounts, m) + } + } + + s.Mounts = defaultMounts + for _, m := range mounts { + for _, cm := range s.Mounts { + if cm.Destination == m.Destination { + return fmt.Errorf("Duplicate mount point '%s'", m.Destination) + } + } + + if m.Source == "tmpfs" { + opt := []string{"noexec", "nosuid", "nodev", volume.DefaultPropagationMode} + if m.Data != "" { + opt = append(opt, strings.Split(m.Data, ",")...) + } else { + opt = append(opt, "size=65536k") + } + + s.Mounts = append(s.Mounts, specs.Mount{Destination: m.Destination, Source: m.Source, Type: "tmpfs", Options: opt}) + continue + } + + mt := specs.Mount{Destination: m.Destination, Source: m.Source, Type: "bind"} + + // Determine property of RootPropagation based on volume + // properties. If a volume is shared, then keep root propagation + // shared. This should work for slave and private volumes too. + // + // For slave volumes, it can be either [r]shared/[r]slave. + // + // For private volumes any root propagation value should work. + pFlag := mountPropagationMap[m.Propagation] + if pFlag == mount.SHARED || pFlag == mount.RSHARED { + if err := ensureShared(m.Source); err != nil { + return err + } + rootpg := mountPropagationMap[s.Linux.RootfsPropagation] + if rootpg != mount.SHARED && rootpg != mount.RSHARED { + s.Linux.RootfsPropagation = mountPropagationReverseMap[mount.SHARED] + } + } else if pFlag == mount.SLAVE || pFlag == mount.RSLAVE { + if err := ensureSharedOrSlave(m.Source); err != nil { + return err + } + rootpg := mountPropagationMap[s.Linux.RootfsPropagation] + if rootpg != mount.SHARED && rootpg != mount.RSHARED && rootpg != mount.SLAVE && rootpg != mount.RSLAVE { + s.Linux.RootfsPropagation = mountPropagationReverseMap[mount.RSLAVE] + } + } + + opts := []string{"rbind"} + if !m.Writable { + opts = append(opts, "ro") + } + if pFlag != 0 { + opts = append(opts, mountPropagationReverseMap[pFlag]) + } + + mt.Options = opts + s.Mounts = append(s.Mounts, mt) + } + + if s.Root.Readonly { + for i, m := range s.Mounts { + switch m.Destination { + case "/proc", "/dev/pts", "/dev/mqueue": // /dev is remounted by runc + continue + } + if _, ok := userMounts[m.Destination]; !ok { + if !stringutils.InSlice(m.Options, "ro") { + s.Mounts[i].Options = append(s.Mounts[i].Options, "ro") + } + } + } + } + + if c.HostConfig.Privileged { + if !s.Root.Readonly { + // clear readonly for /sys + for i := range s.Mounts { + if s.Mounts[i].Destination == "/sys" { + clearReadOnly(&s.Mounts[i]) + } + } + } + } + + // TODO: until a kernel/mount solution exists for handling remount in a user namespace, + // we must clear the readonly flag for the cgroups mount (@mrunalp concurs) + if uidMap, _ := daemon.GetUIDGIDMaps(); uidMap != nil || c.HostConfig.Privileged { + for i, m := range s.Mounts { + if m.Type == "cgroup" { + clearReadOnly(&s.Mounts[i]) + } + } + } + + return nil +} + +func (daemon *Daemon) populateCommonSpec(s *specs.Spec, c *container.Container) error { + linkedEnv, err := daemon.setupLinkedContainers(c) + if err != nil { + return err + } + s.Root = specs.Root{ + Path: c.BaseFS, + Readonly: c.HostConfig.ReadonlyRootfs, + } + rootUID, rootGID := daemon.GetRemappedUIDGID() + if err := c.SetupWorkingDirectory(rootUID, rootGID); err != nil { + return err + } + cwd := c.Config.WorkingDir + if len(cwd) == 0 { + cwd = "/" + } + s.Process.Args = append([]string{c.Path}, c.Args...) + s.Process.Cwd = cwd + s.Process.Env = c.CreateDaemonEnvironment(linkedEnv) + s.Process.Terminal = c.Config.Tty + s.Hostname = c.FullHostname() + + return nil +} + +func (daemon *Daemon) createSpec(c *container.Container) (*libcontainerd.Spec, error) { + s := oci.DefaultSpec() + if err := daemon.populateCommonSpec(&s, c); err != nil { + return nil, err + } + + var cgroupsPath string + if c.HostConfig.CgroupParent != "" { + cgroupsPath = filepath.Join(c.HostConfig.CgroupParent, c.ID) + } else { + defaultCgroupParent := "/docker" + if daemon.configStore.CgroupParent != "" { + defaultCgroupParent = daemon.configStore.CgroupParent + } else if daemon.usingSystemd() { + defaultCgroupParent = "system.slice" + } + cgroupsPath = filepath.Join(defaultCgroupParent, c.ID) + } + s.Linux.CgroupsPath = &cgroupsPath + + if err := setResources(&s, c.HostConfig.Resources); err != nil { + return nil, fmt.Errorf("linux runtime spec resources: %v", err) + } + s.Linux.Resources.OOMScoreAdj = &c.HostConfig.OomScoreAdj + if err := setDevices(&s, c); err != nil { + return nil, fmt.Errorf("linux runtime spec devices: %v", err) + } + if err := setRlimits(daemon, &s, c); err != nil { + return nil, fmt.Errorf("linux runtime spec rlimits: %v", err) + } + if err := setUser(&s, c); err != nil { + return nil, fmt.Errorf("linux spec user: %v", err) + } + if err := setNamespaces(daemon, &s, c); err != nil { + return nil, fmt.Errorf("linux spec namespaces: %v", err) + } + if err := setCapabilities(&s, c); err != nil { + return nil, fmt.Errorf("linux spec capabilities: %v", err) + } + if err := setSeccomp(daemon, &s, c); err != nil { + return nil, fmt.Errorf("linux seccomp: %v", err) + } + + if err := daemon.setupIpcDirs(c); err != nil { + return nil, err + } + + mounts, err := daemon.setupMounts(c) + if err != nil { + return nil, err + } + mounts = append(mounts, c.IpcMounts()...) + mounts = append(mounts, c.TmpfsMounts()...) + if err := setMounts(daemon, &s, c, mounts); err != nil { + return nil, fmt.Errorf("linux mounts: %v", err) + } + + for _, ns := range s.Linux.Namespaces { + if ns.Type == "network" && ns.Path == "" && !c.Config.NetworkDisabled { + target, err := os.Readlink(filepath.Join("/proc", strconv.Itoa(os.Getpid()), "exe")) + if err != nil { + return nil, err + } + + s.Hooks = specs.Hooks{ + Prestart: []specs.Hook{{ + Path: target, // FIXME: cross-platform + Args: []string{"libnetwork-setkey", c.ID, daemon.netController.ID()}, + }}, + } + } + } + + if apparmor.IsEnabled() { + appArmorProfile := "docker-default" + if c.HostConfig.Privileged { + appArmorProfile = "unconfined" + } else if len(c.AppArmorProfile) > 0 { + appArmorProfile = c.AppArmorProfile + } + s.Process.ApparmorProfile = appArmorProfile + } + s.Process.SelinuxLabel = c.GetProcessLabel() + s.Process.NoNewPrivileges = c.NoNewPrivileges + + return (*libcontainerd.Spec)(&s), nil +} + +func clearReadOnly(m *specs.Mount) { + var opt []string + for _, o := range m.Options { + if o != "ro" { + opt = append(opt, o) + } + } + m.Options = opt +} diff --git a/daemon/pause.go b/daemon/pause.go index 2ec0df7030..dbfafbc5fd 100644 --- a/daemon/pause.go +++ b/daemon/pause.go @@ -41,10 +41,9 @@ func (daemon *Daemon) containerPause(container *container.Container) error { return errContainerIsRestarting(container.ID) } - if err := daemon.execDriver.Pause(container.Command); err != nil { + if err := daemon.containerd.Pause(container.ID); err != nil { return fmt.Errorf("Cannot pause container %s: %s", container.ID, err) } - container.Paused = true - daemon.LogContainerEvent(container, "pause") + return nil } diff --git a/daemon/resize.go b/daemon/resize.go index d7bb105b36..747353852e 100644 --- a/daemon/resize.go +++ b/daemon/resize.go @@ -1,6 +1,10 @@ package daemon -import "fmt" +import ( + "fmt" + + "github.com/docker/docker/libcontainerd" +) // ContainerResize changes the size of the TTY of the process running // in the container with the given name to the given height and width. @@ -14,7 +18,7 @@ func (daemon *Daemon) ContainerResize(name string, height, width int) error { return errNotRunning{container.ID} } - if err = container.Resize(height, width); err == nil { + if err = daemon.containerd.Resize(container.ID, libcontainerd.InitFriendlyName, width, height); err == nil { attributes := map[string]string{ "height": fmt.Sprintf("%d", height), "width": fmt.Sprintf("%d", width), @@ -28,10 +32,9 @@ func (daemon *Daemon) ContainerResize(name string, height, width int) error { // running in the exec with the given name to the given height and // width. func (daemon *Daemon) ContainerExecResize(name string, height, width int) error { - ExecConfig, err := daemon.getExecConfig(name) + ec, err := daemon.getExecConfig(name) if err != nil { return err } - - return ExecConfig.Resize(height, width) + return daemon.containerd.Resize(ec.ContainerID, ec.ID, width, height) } diff --git a/daemon/seccomp_default_linux.go b/daemon/seccomp_default_linux.go new file mode 100644 index 0000000000..c5121dd9a6 --- /dev/null +++ b/daemon/seccomp_default_linux.go @@ -0,0 +1,1600 @@ +// +build linux,seccomp + +package daemon + +import ( + "syscall" + + "github.com/opencontainers/specs/specs-go" + libseccomp "github.com/seccomp/libseccomp-golang" +) + +func arches() []specs.Arch { + var native, err = libseccomp.GetNativeArch() + if err != nil { + return []specs.Arch{} + } + var a = native.String() + switch a { + case "amd64": + return []specs.Arch{specs.ArchX86_64, specs.ArchX86, specs.ArchX32} + case "arm64": + return []specs.Arch{specs.ArchAARCH64, specs.ArchARM} + case "mips64": + return []specs.Arch{specs.ArchMIPS, specs.ArchMIPS64, specs.ArchMIPS64N32} + case "mips64n32": + return []specs.Arch{specs.ArchMIPS, specs.ArchMIPS64, specs.ArchMIPS64N32} + case "mipsel64": + return []specs.Arch{specs.ArchMIPSEL, specs.ArchMIPSEL64, specs.ArchMIPSEL64N32} + case "mipsel64n32": + return []specs.Arch{specs.ArchMIPSEL, specs.ArchMIPSEL64, specs.ArchMIPSEL64N32} + default: + return []specs.Arch{} + } +} + +var defaultSeccompProfile = specs.Seccomp{ + DefaultAction: specs.ActErrno, + Architectures: arches(), + Syscalls: []specs.Syscall{ + { + Name: "accept", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "accept4", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "access", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "alarm", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "arch_prctl", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "bind", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "brk", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "capget", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "capset", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "chdir", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "chmod", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "chown", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "chown32", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "chroot", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "clock_getres", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "clock_gettime", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "clock_nanosleep", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "clone", + Action: specs.ActAllow, + Args: []specs.Arg{ + { + Index: 0, + Value: syscall.CLONE_NEWNS | syscall.CLONE_NEWUTS | syscall.CLONE_NEWIPC | syscall.CLONE_NEWUSER | syscall.CLONE_NEWPID | syscall.CLONE_NEWNET, + ValueTwo: 0, + Op: specs.OpMaskedEqual, + }, + }, + }, + { + Name: "close", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "connect", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "creat", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "dup", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "dup2", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "dup3", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "epoll_create", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "epoll_create1", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "epoll_ctl", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "epoll_ctl_old", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "epoll_pwait", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "epoll_wait", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "epoll_wait_old", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "eventfd", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "eventfd2", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "execve", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "execveat", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "exit", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "exit_group", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "faccessat", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "fadvise64", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "fadvise64_64", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "fallocate", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "fanotify_init", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "fanotify_mark", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "fchdir", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "fchmod", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "fchmodat", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "fchown", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "fchown32", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "fchownat", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "fcntl", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "fcntl64", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "fdatasync", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "fgetxattr", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "flistxattr", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "flock", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "fork", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "fremovexattr", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "fsetxattr", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "fstat", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "fstat64", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "fstatat64", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "fstatfs", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "fstatfs64", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "fsync", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "ftruncate", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "ftruncate64", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "futex", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "futimesat", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "getcpu", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "getcwd", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "getdents", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "getdents64", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "getegid", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "getegid32", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "geteuid", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "geteuid32", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "getgid", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "getgid32", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "getgroups", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "getgroups32", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "getitimer", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "getpeername", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "getpgid", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "getpgrp", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "getpid", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "getppid", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "getpriority", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "getrandom", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "getresgid", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "getresgid32", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "getresuid", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "getresuid32", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "getrlimit", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "get_robust_list", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "getrusage", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "getsid", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "getsockname", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "getsockopt", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "get_thread_area", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "gettid", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "gettimeofday", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "getuid", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "getuid32", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "getxattr", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "inotify_add_watch", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "inotify_init", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "inotify_init1", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "inotify_rm_watch", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "io_cancel", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "ioctl", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "io_destroy", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "io_getevents", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "ioprio_get", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "ioprio_set", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "io_setup", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "io_submit", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "kill", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "lchown", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "lchown32", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "lgetxattr", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "link", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "linkat", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "listen", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "listxattr", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "llistxattr", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "_llseek", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "lremovexattr", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "lseek", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "lsetxattr", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "lstat", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "lstat64", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "madvise", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "memfd_create", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "mincore", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "mkdir", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "mkdirat", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "mknod", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "mknodat", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "mlock", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "mlockall", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "mmap", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "mmap2", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "mprotect", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "mq_getsetattr", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "mq_notify", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "mq_open", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "mq_timedreceive", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "mq_timedsend", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "mq_unlink", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "mremap", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "msgctl", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "msgget", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "msgrcv", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "msgsnd", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "msync", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "munlock", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "munlockall", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "munmap", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "nanosleep", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "newfstatat", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "_newselect", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "open", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "openat", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "pause", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "pipe", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "pipe2", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "poll", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "ppoll", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "prctl", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "pread64", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "preadv", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "prlimit64", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "pselect6", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "pwrite64", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "pwritev", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "read", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "readahead", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "readlink", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "readlinkat", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "readv", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "recv", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "recvfrom", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "recvmmsg", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "recvmsg", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "remap_file_pages", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "removexattr", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "rename", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "renameat", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "renameat2", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "rmdir", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "rt_sigaction", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "rt_sigpending", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "rt_sigprocmask", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "rt_sigqueueinfo", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "rt_sigreturn", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "rt_sigsuspend", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "rt_sigtimedwait", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "rt_tgsigqueueinfo", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "sched_getaffinity", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "sched_getattr", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "sched_getparam", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "sched_get_priority_max", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "sched_get_priority_min", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "sched_getscheduler", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "sched_rr_get_interval", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "sched_setaffinity", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "sched_setattr", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "sched_setparam", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "sched_setscheduler", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "sched_yield", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "seccomp", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "select", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "semctl", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "semget", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "semop", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "semtimedop", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "send", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "sendfile", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "sendfile64", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "sendmmsg", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "sendmsg", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "sendto", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "setdomainname", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "setfsgid", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "setfsgid32", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "setfsuid", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "setfsuid32", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "setgid", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "setgid32", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "setgroups", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "setgroups32", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "sethostname", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "setitimer", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "setpgid", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "setpriority", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "setregid", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "setregid32", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "setresgid", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "setresgid32", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "setresuid", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "setresuid32", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "setreuid", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "setreuid32", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "setrlimit", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "set_robust_list", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "setsid", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "setsockopt", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "set_thread_area", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "set_tid_address", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "setuid", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "setuid32", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "setxattr", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "shmat", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "shmctl", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "shmdt", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "shmget", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "shutdown", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "sigaltstack", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "signalfd", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "signalfd4", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "sigreturn", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "socket", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "socketpair", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "splice", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "stat", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "stat64", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "statfs", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "statfs64", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "symlink", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "symlinkat", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "sync", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "sync_file_range", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "syncfs", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "sysinfo", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "syslog", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "tee", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "tgkill", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "time", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "timer_create", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "timer_delete", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "timerfd_create", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "timerfd_gettime", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "timerfd_settime", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "timer_getoverrun", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "timer_gettime", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "timer_settime", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "times", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "tkill", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "truncate", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "truncate64", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "ugetrlimit", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "umask", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "uname", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "unlink", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "unlinkat", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "utime", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "utimensat", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "utimes", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "vfork", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "vhangup", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "vmsplice", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "wait4", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "waitid", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "waitpid", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "write", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "writev", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + // i386 specific syscalls + { + Name: "modify_ldt", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + // arm specific syscalls + { + Name: "breakpoint", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "cacheflush", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + { + Name: "set_tls", + Action: specs.ActAllow, + Args: []specs.Arg{}, + }, + }, +} diff --git a/daemon/seccomp_disabled.go b/daemon/seccomp_disabled.go new file mode 100644 index 0000000000..620eee29bf --- /dev/null +++ b/daemon/seccomp_disabled.go @@ -0,0 +1,12 @@ +// +build !seccomp,!windows + +package daemon + +import ( + "github.com/docker/docker/container" + "github.com/opencontainers/specs/specs-go" +) + +func setSeccomp(daemon *Daemon, rs *specs.Spec, c *container.Container) error { + return nil +} diff --git a/daemon/seccomp_linux.go b/daemon/seccomp_linux.go new file mode 100644 index 0000000000..02a7650ea6 --- /dev/null +++ b/daemon/seccomp_linux.go @@ -0,0 +1,100 @@ +// +build linux,seccomp + +package daemon + +import ( + "encoding/json" + "fmt" + + "github.com/Sirupsen/logrus" + "github.com/docker/docker/container" + "github.com/docker/engine-api/types" + "github.com/opencontainers/specs/specs-go" +) + +func setSeccomp(daemon *Daemon, rs *specs.Spec, c *container.Container) error { + var seccomp *specs.Seccomp + var err error + + if c.HostConfig.Privileged { + return nil + } + + if !daemon.seccompEnabled { + if c.SeccompProfile != "" && c.SeccompProfile != "unconfined" { + return fmt.Errorf("Seccomp is not enabled in your kernel, cannot run a custom seccomp profile.") + } + logrus.Warn("Seccomp is not enabled in your kernel, running container without default profile.") + c.SeccompProfile = "unconfined" + } + if c.SeccompProfile == "unconfined" { + return nil + } + if c.SeccompProfile != "" { + seccomp, err = loadSeccompProfile(c.SeccompProfile) + if err != nil { + return err + } + } else { + seccomp = &defaultSeccompProfile + } + + rs.Linux.Seccomp = seccomp + return nil +} + +func loadSeccompProfile(body string) (*specs.Seccomp, error) { + var config types.Seccomp + if err := json.Unmarshal([]byte(body), &config); err != nil { + return nil, fmt.Errorf("Decoding seccomp profile failed: %v", err) + } + + return setupSeccomp(&config) +} + +func setupSeccomp(config *types.Seccomp) (newConfig *specs.Seccomp, err error) { + if config == nil { + return nil, nil + } + + // No default action specified, no syscalls listed, assume seccomp disabled + if config.DefaultAction == "" && len(config.Syscalls) == 0 { + return nil, nil + } + + newConfig = &specs.Seccomp{} + + // if config.Architectures == 0 then libseccomp will figure out the architecture to use + if len(config.Architectures) > 0 { + // newConfig.Architectures = []string{} + for _, arch := range config.Architectures { + newConfig.Architectures = append(newConfig.Architectures, specs.Arch(arch)) + } + } + + newConfig.DefaultAction = specs.Action(config.DefaultAction) + + // Loop through all syscall blocks and convert them to libcontainer format + for _, call := range config.Syscalls { + newCall := specs.Syscall{ + Name: call.Name, + Action: specs.Action(call.Action), + } + + // Loop through all the arguments of the syscall and convert them + for _, arg := range call.Args { + newArg := specs.Arg{ + Index: arg.Index, + Value: arg.Value, + ValueTwo: arg.ValueTwo, + Op: specs.Operator(arg.Op), + } + + newCall.Args = append(newCall.Args, newArg) + } + + newConfig.Syscalls = append(newConfig.Syscalls, newCall) + } + + return newConfig, nil +} diff --git a/daemon/start.go b/daemon/start.go index 8c7e5bf5aa..be1fef7737 100644 --- a/daemon/start.go +++ b/daemon/start.go @@ -4,10 +4,13 @@ import ( "fmt" "net/http" "runtime" + "strings" + "syscall" "github.com/Sirupsen/logrus" "github.com/docker/docker/container" "github.com/docker/docker/errors" + "github.com/docker/docker/libcontainerd" "github.com/docker/docker/runconfig" containertypes "github.com/docker/engine-api/types/container" ) @@ -122,44 +125,36 @@ func (daemon *Daemon) containerStart(container *container.Container) (err error) if err := daemon.initializeNetworking(container); err != nil { return err } - linkedEnv, err := daemon.setupLinkedContainers(container) + + spec, err := daemon.createSpec(container) if err != nil { return err } - rootUID, rootGID := daemon.GetRemappedUIDGID() - if err := container.SetupWorkingDirectory(rootUID, rootGID); err != nil { - return err - } - env := container.CreateDaemonEnvironment(linkedEnv) - if err := daemon.populateCommand(container, env); err != nil { - return err - } - if !container.HostConfig.IpcMode.IsContainer() && !container.HostConfig.IpcMode.IsHost() { - if err := daemon.setupIpcDirs(container); err != nil { - return err + defer daemon.LogContainerEvent(container, "start") // this is logged even on error + if err := daemon.containerd.Create(container.ID, *spec, libcontainerd.WithRestartManager(container.RestartManager(true))); err != nil { + // if we receive an internal error from the initial start of a container then lets + // return it instead of entering the restart loop + // set to 127 for container cmd not found/does not exist) + if strings.Contains(err.Error(), "executable file not found") || + strings.Contains(err.Error(), "no such file or directory") || + strings.Contains(err.Error(), "system cannot find the file specified") { + container.ExitCode = 127 + err = fmt.Errorf("Container command not found or does not exist.") + } + // set to 126 for container cmd can't be invoked errors + if strings.Contains(err.Error(), syscall.EACCES.Error()) { + container.ExitCode = 126 + err = fmt.Errorf("Container command could not be invoked.") } - } - mounts, err := daemon.setupMounts(container) - if err != nil { + container.Reset(false) return err } - mounts = append(mounts, container.IpcMounts()...) - mounts = append(mounts, container.TmpfsMounts()...) - container.Command.Mounts = mounts - if err := daemon.waitForStart(container); err != nil { - return err - } - container.HasBeenStartedBefore = true return nil } -func (daemon *Daemon) waitForStart(container *container.Container) error { - return container.StartMonitor(daemon) -} - // Cleanup releases any network resources allocated to the container along with any rules // around how containers are linked together. It also unmounts the container's root filesystem. func (daemon *Daemon) Cleanup(container *container.Container) { @@ -167,7 +162,13 @@ func (daemon *Daemon) Cleanup(container *container.Container) { container.UnmountIpcMounts(detachMounted) - daemon.conditionalUnmountOnCleanup(container) + if err := daemon.conditionalUnmountOnCleanup(container); err != nil { + // FIXME: remove once reference counting for graphdrivers has been refactored + // Ensure that all the mounts are gone + if mountid, err := daemon.layerStore.GetMountID(container.ID); err == nil { + daemon.cleanupMountsByID(mountid) + } + } for _, eConfig := range container.ExecCommands.Commands() { daemon.unregisterExecCommand(container, eConfig) diff --git a/daemon/stats.go b/daemon/stats.go index 1cbc1193bf..1942ccac3c 100644 --- a/daemon/stats.go +++ b/daemon/stats.go @@ -6,7 +6,6 @@ import ( "runtime" "github.com/docker/docker/api/types/backend" - "github.com/docker/docker/daemon/execdriver" "github.com/docker/docker/pkg/ioutils" "github.com/docker/docker/pkg/version" "github.com/docker/engine-api/types" @@ -42,12 +41,9 @@ func (daemon *Daemon) ContainerStats(prefixOrName string, config *backend.Contai var preCPUStats types.CPUStats getStatJSON := func(v interface{}) *types.StatsJSON { - update := v.(*execdriver.ResourceStats) - ss := convertStatsToAPITypes(update.Stats) + ss := v.(*types.StatsJSON) ss.PreCPUStats = preCPUStats - ss.MemoryStats.Limit = uint64(update.MemoryLimit) - ss.Read = update.Read - ss.CPUStats.SystemUsage = update.SystemUsage + // ss.MemoryStats.Limit = uint64(update.MemoryLimit) preCPUStats = ss.CPUStats return ss } diff --git a/daemon/stats_collector_unix.go b/daemon/stats_collector_unix.go index a8de5a2062..7c9a45c649 100644 --- a/daemon/stats_collector_unix.go +++ b/daemon/stats_collector_unix.go @@ -13,14 +13,14 @@ import ( "github.com/Sirupsen/logrus" "github.com/docker/docker/container" - "github.com/docker/docker/daemon/execdriver" "github.com/docker/docker/pkg/pubsub" + "github.com/docker/engine-api/types" "github.com/opencontainers/runc/libcontainer/system" ) type statsSupervisor interface { // GetContainerStats collects all the stats related to a container - GetContainerStats(container *container.Container) (*execdriver.ResourceStats, error) + GetContainerStats(container *container.Container) (*types.StatsJSON, error) } // newStatsCollector returns a new statsCollector that collections @@ -120,12 +120,13 @@ func (s *statsCollector) run() { for _, pair := range pairs { stats, err := s.supervisor.GetContainerStats(pair.container) if err != nil { - if err != execdriver.ErrNotRunning { + if err, ok := err.(errNotRunning); ok { logrus.Errorf("collecting stats for %s: %v", pair.container.ID, err) } continue } - stats.SystemUsage = systemUsage + // FIXME: move to containerd + stats.CPUStats.SystemUsage = systemUsage pair.publisher.Publish(stats) } diff --git a/daemon/stats_linux.go b/daemon/stats_linux.go deleted file mode 100644 index 1a907e015a..0000000000 --- a/daemon/stats_linux.go +++ /dev/null @@ -1,84 +0,0 @@ -package daemon - -import ( - "github.com/docker/engine-api/types" - "github.com/opencontainers/runc/libcontainer" - "github.com/opencontainers/runc/libcontainer/cgroups" -) - -// convertStatsToAPITypes converts the libcontainer.Stats to the api specific -// structs. This is done to preserve API compatibility and versioning. -func convertStatsToAPITypes(ls *libcontainer.Stats) *types.StatsJSON { - s := &types.StatsJSON{} - if ls.Interfaces != nil { - s.Networks = make(map[string]types.NetworkStats) - for _, iface := range ls.Interfaces { - // For API Version >= 1.21, the original data of network will - // be returned. - s.Networks[iface.Name] = types.NetworkStats{ - RxBytes: iface.RxBytes, - RxPackets: iface.RxPackets, - RxErrors: iface.RxErrors, - RxDropped: iface.RxDropped, - TxBytes: iface.TxBytes, - TxPackets: iface.TxPackets, - TxErrors: iface.TxErrors, - TxDropped: iface.TxDropped, - } - } - } - - cs := ls.CgroupStats - if cs != nil { - s.BlkioStats = types.BlkioStats{ - IoServiceBytesRecursive: copyBlkioEntry(cs.BlkioStats.IoServiceBytesRecursive), - IoServicedRecursive: copyBlkioEntry(cs.BlkioStats.IoServicedRecursive), - IoQueuedRecursive: copyBlkioEntry(cs.BlkioStats.IoQueuedRecursive), - IoServiceTimeRecursive: copyBlkioEntry(cs.BlkioStats.IoServiceTimeRecursive), - IoWaitTimeRecursive: copyBlkioEntry(cs.BlkioStats.IoWaitTimeRecursive), - IoMergedRecursive: copyBlkioEntry(cs.BlkioStats.IoMergedRecursive), - IoTimeRecursive: copyBlkioEntry(cs.BlkioStats.IoTimeRecursive), - SectorsRecursive: copyBlkioEntry(cs.BlkioStats.SectorsRecursive), - } - cpu := cs.CpuStats - s.CPUStats = types.CPUStats{ - CPUUsage: types.CPUUsage{ - TotalUsage: cpu.CpuUsage.TotalUsage, - PercpuUsage: cpu.CpuUsage.PercpuUsage, - UsageInKernelmode: cpu.CpuUsage.UsageInKernelmode, - UsageInUsermode: cpu.CpuUsage.UsageInUsermode, - }, - ThrottlingData: types.ThrottlingData{ - Periods: cpu.ThrottlingData.Periods, - ThrottledPeriods: cpu.ThrottlingData.ThrottledPeriods, - ThrottledTime: cpu.ThrottlingData.ThrottledTime, - }, - } - mem := cs.MemoryStats - s.MemoryStats = types.MemoryStats{ - Usage: mem.Usage.Usage, - MaxUsage: mem.Usage.MaxUsage, - Stats: mem.Stats, - Failcnt: mem.Usage.Failcnt, - } - pids := cs.PidsStats - s.PidsStats = types.PidsStats{ - Current: pids.Current, - } - } - - return s -} - -func copyBlkioEntry(entries []cgroups.BlkioStatEntry) []types.BlkioStatEntry { - out := make([]types.BlkioStatEntry, len(entries)) - for i, re := range entries { - out[i] = types.BlkioStatEntry{ - Major: re.Major, - Minor: re.Minor, - Op: re.Op, - Value: re.Value, - } - } - return out -} diff --git a/daemon/stats_windows.go b/daemon/stats_windows.go deleted file mode 100644 index 0f47cf09e0..0000000000 --- a/daemon/stats_windows.go +++ /dev/null @@ -1,14 +0,0 @@ -package daemon - -import ( - "github.com/docker/engine-api/types" - "github.com/opencontainers/runc/libcontainer" -) - -// convertStatsToAPITypes converts the libcontainer.Stats to the api specific -// structs. This is done to preserve API compatibility and versioning. -func convertStatsToAPITypes(ls *libcontainer.Stats) *types.StatsJSON { - // TODO Windows. Refactor accordingly to fill in stats. - s := &types.StatsJSON{} - return s -} diff --git a/daemon/top_unix.go b/daemon/top_unix.go index 1f8ab07f04..d4a9528c98 100644 --- a/daemon/top_unix.go +++ b/daemon/top_unix.go @@ -33,7 +33,8 @@ func (daemon *Daemon) ContainerTop(name string, psArgs string) (*types.Container if container.IsRestarting() { return nil, errContainerIsRestarting(container.ID) } - pids, err := daemon.ExecutionDriver().GetPidsForContainer(container.ID) + + pids, err := daemon.containerd.GetPidsForContainer(container.ID) if err != nil { return nil, err } diff --git a/daemon/unpause.go b/daemon/unpause.go index 4af6f11222..c1ab74b0bf 100644 --- a/daemon/unpause.go +++ b/daemon/unpause.go @@ -35,11 +35,9 @@ func (daemon *Daemon) containerUnpause(container *container.Container) error { return fmt.Errorf("Container %s is not paused", container.ID) } - if err := daemon.execDriver.Unpause(container.Command); err != nil { + if err := daemon.containerd.Resume(container.ID); err != nil { return fmt.Errorf("Cannot unpause container %s: %s", container.ID, err) } - container.Paused = false - daemon.LogContainerEvent(container, "unpause") return nil } diff --git a/daemon/update.go b/daemon/update.go index ef1f6bcfd7..fee470a39c 100644 --- a/daemon/update.go +++ b/daemon/update.go @@ -84,7 +84,7 @@ func (daemon *Daemon) update(name string, hostConfig *container.HostConfig) erro // If container is running (including paused), we need to update configs // to the real world. if container.IsRunning() && !container.IsRestarting() { - if err := daemon.execDriver.Update(container.Command); err != nil { + if err := daemon.containerd.UpdateResources(container.ID, toContainerdResources(hostConfig.Resources)); err != nil { restoreConfig = true return errCannotUpdate(container.ID, err) } diff --git a/daemon/update_linux.go b/daemon/update_linux.go new file mode 100644 index 0000000000..97ba7c09a4 --- /dev/null +++ b/daemon/update_linux.go @@ -0,0 +1,25 @@ +// +build linux + +package daemon + +import ( + "github.com/docker/docker/libcontainerd" + "github.com/docker/engine-api/types/container" +) + +func toContainerdResources(resources container.Resources) libcontainerd.Resources { + var r libcontainerd.Resources + r.BlkioWeight = uint32(resources.BlkioWeight) + r.CpuShares = uint32(resources.CPUShares) + r.CpuPeriod = uint32(resources.CPUPeriod) + r.CpuQuota = uint32(resources.CPUQuota) + r.CpusetCpus = resources.CpusetCpus + r.CpusetMems = resources.CpusetMems + r.MemoryLimit = uint32(resources.Memory) + if resources.MemorySwap > 0 { + r.MemorySwap = uint32(resources.MemorySwap) + } + r.MemoryReservation = uint32(resources.MemoryReservation) + r.KernelMemoryLimit = uint32(resources.KernelMemory) + return r +} diff --git a/daemon/volumes.go b/daemon/volumes.go index d32715997d..d1b220cd91 100644 --- a/daemon/volumes.go +++ b/daemon/volumes.go @@ -8,7 +8,6 @@ import ( "strings" "github.com/docker/docker/container" - "github.com/docker/docker/daemon/execdriver" "github.com/docker/docker/volume" "github.com/docker/engine-api/types" containertypes "github.com/docker/engine-api/types/container" @@ -21,7 +20,7 @@ var ( ErrVolumeReadonly = errors.New("mounted volume is marked read-only") ) -type mounts []execdriver.Mount +type mounts []container.Mount // volumeToAPIType converts a volume.Volume to the type used by the remote API func volumeToAPIType(v volume.Volume) *types.Volume { diff --git a/daemon/volumes_unix.go b/daemon/volumes_unix.go index 2668114f47..078fd10bf0 100644 --- a/daemon/volumes_unix.go +++ b/daemon/volumes_unix.go @@ -8,25 +8,24 @@ import ( "strconv" "github.com/docker/docker/container" - "github.com/docker/docker/daemon/execdriver" "github.com/docker/docker/volume" ) // setupMounts iterates through each of the mount points for a container and // calls Setup() on each. It also looks to see if is a network mount such as // /etc/resolv.conf, and if it is not, appends it to the array of mounts. -func (daemon *Daemon) setupMounts(container *container.Container) ([]execdriver.Mount, error) { - var mounts []execdriver.Mount - for _, m := range container.MountPoints { - if err := daemon.lazyInitializeVolume(container.ID, m); err != nil { +func (daemon *Daemon) setupMounts(c *container.Container) ([]container.Mount, error) { + var mounts []container.Mount + for _, m := range c.MountPoints { + if err := daemon.lazyInitializeVolume(c.ID, m); err != nil { return nil, err } path, err := m.Setup() if err != nil { return nil, err } - if !container.TrySetNetworkMount(m.Destination, path) { - mnt := execdriver.Mount{ + if !c.TrySetNetworkMount(m.Destination, path) { + mnt := container.Mount{ Source: path, Destination: m.Destination, Writable: m.RW, @@ -35,7 +34,7 @@ func (daemon *Daemon) setupMounts(container *container.Container) ([]execdriver. if m.Volume != nil { attributes := map[string]string{ "driver": m.Volume.DriverName(), - "container": container.ID, + "container": c.ID, "destination": m.Destination, "read/write": strconv.FormatBool(m.RW), "propagation": m.Propagation, @@ -47,7 +46,7 @@ func (daemon *Daemon) setupMounts(container *container.Container) ([]execdriver. } mounts = sortMounts(mounts) - netMounts := container.NetworkMounts() + netMounts := c.NetworkMounts() // if we are going to mount any of the network files from container // metadata, the ownership must be set properly for potential container // remapped root (user namespaces) @@ -63,7 +62,7 @@ func (daemon *Daemon) setupMounts(container *container.Container) ([]execdriver. // sortMounts sorts an array of mounts in lexicographic order. This ensure that // when mounting, the mounts don't shadow other mounts. For example, if mounting // /etc and /etc/resolv.conf, /etc/resolv.conf must not be mounted first. -func sortMounts(m []execdriver.Mount) []execdriver.Mount { +func sortMounts(m []container.Mount) []container.Mount { sort.Sort(mounts(m)) return m } diff --git a/distribution/xfer/download_test.go b/distribution/xfer/download_test.go index 9be9a24a3b..2e4d724cd2 100644 --- a/distribution/xfer/download_test.go +++ b/distribution/xfer/download_test.go @@ -112,12 +112,13 @@ func (ls *mockLayerStore) CreateRWLayer(string, layer.ChainID, string, layer.Mou func (ls *mockLayerStore) GetRWLayer(string) (layer.RWLayer, error) { return nil, errors.New("not implemented") - } func (ls *mockLayerStore) ReleaseRWLayer(layer.RWLayer) ([]layer.Metadata, error) { return nil, errors.New("not implemented") - +} +func (ls *mockLayerStore) GetMountID(string) (string, error) { + return "", errors.New("not implemented") } func (ls *mockLayerStore) Cleanup() error { diff --git a/docker/daemon.go b/docker/daemon.go index 9846e6280b..bee921c782 100644 --- a/docker/daemon.go +++ b/docker/daemon.go @@ -29,6 +29,7 @@ import ( "github.com/docker/docker/daemon/logger" "github.com/docker/docker/docker/listeners" "github.com/docker/docker/dockerversion" + "github.com/docker/docker/libcontainerd" "github.com/docker/docker/opts" "github.com/docker/docker/pkg/jsonlog" flag "github.com/docker/docker/pkg/mflag" @@ -264,7 +265,13 @@ func (cli *DaemonCli) CmdDaemon(args ...string) error { cli.TrustKeyPath = commonFlags.TrustKey registryService := registry.NewService(cli.Config.ServiceOptions) - d, err := daemon.NewDaemon(cli.Config, registryService) + + containerdRemote, err := libcontainerd.New(filepath.Join(cli.Config.ExecRoot, "libcontainerd"), cli.getPlatformRemoteOptions()...) + if err != nil { + logrus.Fatal(err) + } + + d, err := daemon.NewDaemon(cli.Config, registryService, containerdRemote) if err != nil { if pfile != nil { if err := pfile.Remove(); err != nil { @@ -279,7 +286,6 @@ func (cli *DaemonCli) CmdDaemon(args ...string) error { logrus.WithFields(logrus.Fields{ "version": dockerversion.Version, "commit": dockerversion.GitCommit, - "execdriver": d.ExecutionDriver().Name(), "graphdriver": d.GraphDriverName(), }).Info("Docker daemon") @@ -330,6 +336,7 @@ func (cli *DaemonCli) CmdDaemon(args ...string) error { // Wait for serve API to complete errAPI := <-serveAPIWait shutdownDaemon(d, 15) + containerdRemote.Cleanup() if errAPI != nil { if pfile != nil { if err := pfile.Remove(); err != nil { diff --git a/docker/daemon_unix.go b/docker/daemon_unix.go index c76700f014..775a20aa75 100644 --- a/docker/daemon_unix.go +++ b/docker/daemon_unix.go @@ -11,10 +11,9 @@ import ( "github.com/Sirupsen/logrus" apiserver "github.com/docker/docker/api/server" "github.com/docker/docker/daemon" + "github.com/docker/docker/libcontainerd" "github.com/docker/docker/pkg/mflag" "github.com/docker/docker/pkg/system" - - _ "github.com/docker/docker/daemon/execdriver/native" ) const defaultDaemonConfigFile = "/etc/docker/daemon.json" @@ -65,3 +64,15 @@ func setupConfigReloadTrap(configFile string, flags *mflag.FlagSet, reload func( } }() } + +func (cli *DaemonCli) getPlatformRemoteOptions() []libcontainerd.RemoteOption { + opts := []libcontainerd.RemoteOption{ + libcontainerd.WithDebugLog(cli.Config.Debug), + } + if cli.Config.ContainerdAddr != "" { + opts = append(opts, libcontainerd.WithRemoteAddr(cli.Config.ContainerdAddr)) + } else { + opts = append(opts, libcontainerd.WithStartDaemon(true)) + } + return opts +} diff --git a/integration-cli/daemon.go b/integration-cli/daemon.go index 3d28b709b6..00493ab38a 100644 --- a/integration-cli/daemon.go +++ b/integration-cli/daemon.go @@ -142,6 +142,7 @@ func (d *Daemon) StartWithLogFile(out *os.File, providedArgs ...string) error { args := append(d.GlobalFlags, d.Command, + "--containerd", "/var/run/docker/libcontainerd/containerd.sock", "--graph", d.root, "--pidfile", fmt.Sprintf("%s/docker.pid", d.folder), fmt.Sprintf("--userland-proxy=%t", d.userlandProxy), @@ -245,6 +246,29 @@ func (d *Daemon) StartWithBusybox(arg ...string) error { return d.LoadBusybox() } +// Kill will send a SIGKILL to the daemon +func (d *Daemon) Kill() error { + if d.cmd == nil || d.wait == nil { + return errors.New("daemon not started") + } + + defer func() { + d.logFile.Close() + d.cmd = nil + }() + + if err := d.cmd.Process.Kill(); err != nil { + d.c.Logf("Could not kill daemon: %v", err) + return err + } + + if err := os.Remove(fmt.Sprintf("%s/docker.pid", d.folder)); err != nil { + return err + } + + return nil +} + // Stop will send a SIGINT every second and wait for the daemon to stop. // If it timeouts, a SIGKILL is sent. // Stop will not delete the daemon directory. If a purged daemon is needed, @@ -300,6 +324,10 @@ out2: return err } + if err := os.Remove(fmt.Sprintf("%s/docker.pid", d.folder)); err != nil { + return err + } + return nil } diff --git a/integration-cli/docker_cli_daemon_experimental_test.go b/integration-cli/docker_cli_daemon_experimental_test.go new file mode 100644 index 0000000000..d450858c6c --- /dev/null +++ b/integration-cli/docker_cli_daemon_experimental_test.go @@ -0,0 +1,150 @@ +// +build daemon,!windows,experimental + +package main + +import ( + "os/exec" + "strings" + "time" + + "github.com/go-check/check" +) + +// TestDaemonRestartWithKilledRunningContainer requires live restore of running containers +func (s *DockerDaemonSuite) TestDaemonRestartWithKilledRunningContainer(t *check.C) { + // TODO(mlaventure): Not sure what would the exit code be on windows + testRequires(t, DaemonIsLinux) + if err := s.d.StartWithBusybox(); err != nil { + t.Fatal(err) + } + + cid, err := s.d.Cmd("run", "-d", "--name", "test", "busybox", "top") + defer s.d.Stop() + if err != nil { + t.Fatal(cid, err) + } + cid = strings.TrimSpace(cid) + + // Kill the daemon + if err := s.d.Kill(); err != nil { + t.Fatal(err) + } + + // kill the container + runCmd := exec.Command("ctr", "--address", "/var/run/docker/libcontainerd/containerd.sock", "containers", "kill", cid) + if out, ec, err := runCommandWithOutput(runCmd); err != nil { + t.Fatalf("Failed to run ctr, ExitCode: %d, err: '%v' output: '%s' cid: '%s'\n", ec, err, out, cid) + } + + // Give time to containerd to process the command if we don't + // the exit event might be received after we do the inspect + time.Sleep(3 * time.Second) + + // restart the daemon + if err := s.d.Start(); err != nil { + t.Fatal(err) + } + + // Check that we've got the correct exit code + out, err := s.d.Cmd("inspect", "-f", "{{.State.ExitCode}}", cid) + t.Assert(err, check.IsNil) + + out = strings.TrimSpace(out) + if out != "143" { + t.Fatalf("Expected exit code '%s' got '%s' for container '%s'\n", "143", out, cid) + } + +} + +// TestDaemonRestartWithPausedRunningContainer requires live restore of running containers +func (s *DockerDaemonSuite) TestDaemonRestartWithPausedRunningContainer(t *check.C) { + if err := s.d.StartWithBusybox(); err != nil { + t.Fatal(err) + } + + cid, err := s.d.Cmd("run", "-d", "--name", "test", "busybox", "top") + defer s.d.Stop() + if err != nil { + t.Fatal(cid, err) + } + cid = strings.TrimSpace(cid) + + // Kill the daemon + if err := s.d.Kill(); err != nil { + t.Fatal(err) + } + + // kill the container + runCmd := exec.Command("ctr", "--address", "/var/run/docker/libcontainerd/containerd.sock", "containers", "pause", cid) + if out, ec, err := runCommandWithOutput(runCmd); err != nil { + t.Fatalf("Failed to run ctr, ExitCode: %d, err: '%v' output: '%s' cid: '%s'\n", ec, err, out, cid) + } + + // Give time to containerd to process the command if we don't + // the pause event might be received after we do the inspect + time.Sleep(3 * time.Second) + + // restart the daemon + if err := s.d.Start(); err != nil { + t.Fatal(err) + } + + // Check that we've got the correct status + out, err := s.d.Cmd("inspect", "-f", "{{.State.Status}}", cid) + t.Assert(err, check.IsNil) + + out = strings.TrimSpace(out) + if out != "paused" { + t.Fatalf("Expected exit code '%s' got '%s' for container '%s'\n", "paused", out, cid) + } +} + +// TestDaemonRestartWithUnpausedRunningContainer requires live restore of running containers. +func (s *DockerDaemonSuite) TestDaemonRestartWithUnpausedRunningContainer(t *check.C) { + // TODO(mlaventure): Not sure what would the exit code be on windows + testRequires(t, DaemonIsLinux) + if err := s.d.StartWithBusybox(); err != nil { + t.Fatal(err) + } + + cid, err := s.d.Cmd("run", "-d", "--name", "test", "busybox", "top") + defer s.d.Stop() + if err != nil { + t.Fatal(cid, err) + } + cid = strings.TrimSpace(cid) + + // pause the container + if _, err := s.d.Cmd("pause", cid); err != nil { + t.Fatal(cid, err) + } + + // Kill the daemon + if err := s.d.Kill(); err != nil { + t.Fatal(err) + } + + // resume the container + runCmd := exec.Command("ctr", "--address", "/var/run/docker/libcontainerd/containerd.sock", "containers", "resume", cid) + if out, ec, err := runCommandWithOutput(runCmd); err != nil { + t.Fatalf("Failed to run ctr, ExitCode: %d, err: '%v' output: '%s' cid: '%s'\n", ec, err, out, cid) + } + + // Give time to containerd to process the command if we don't + // the resume event might be received after we do the inspect + time.Sleep(3 * time.Second) + + // restart the daemon + if err := s.d.Start(); err != nil { + t.Fatal(err) + } + + // Check that we've got the correct status + out, err := s.d.Cmd("inspect", "-f", "{{.State.Status}}", cid) + t.Assert(err, check.IsNil) + + out = strings.TrimSpace(out) + if out != "running" { + t.Fatalf("Expected exit code '%s' got '%s' for container '%s'\n", "running", out, cid) + } +} diff --git a/integration-cli/docker_cli_daemon_test.go b/integration-cli/docker_cli_daemon_test.go index b474867cf3..e9514c281f 100644 --- a/integration-cli/docker_cli_daemon_test.go +++ b/integration-cli/docker_cli_daemon_test.go @@ -1507,7 +1507,18 @@ func (s *DockerDaemonSuite) TestCleanupMountsAfterCrash(c *check.C) { out, err := s.d.Cmd("run", "-d", "busybox", "top") c.Assert(err, check.IsNil, check.Commentf("Output: %s", out)) id := strings.TrimSpace(out) - c.Assert(s.d.cmd.Process.Signal(os.Kill), check.IsNil) + c.Assert(s.d.Kill(), check.IsNil) + + // kill the container + runCmd := exec.Command("ctr", "--address", "/var/run/docker/libcontainerd/containerd.sock", "containers", "kill", id) + if out, ec, err := runCommandWithOutput(runCmd); err != nil { + c.Fatalf("Failed to run ctr, ExitCode: %d, err: '%v' output: '%s' cid: '%s'\n", ec, err, out, id) + } + + // Give time to containerd to process the command if we don't + // the exit event might be received after we do the inspect + time.Sleep(3 * time.Second) + c.Assert(s.d.Start(), check.IsNil) mountOut, err := ioutil.ReadFile("/proc/self/mountinfo") c.Assert(err, check.IsNil, check.Commentf("Output: %s", mountOut)) @@ -1840,6 +1851,7 @@ func (s *DockerDaemonSuite) TestDaemonNoSpaceleftOnDeviceError(c *check.C) { // Test daemon restart with container links + auto restart func (s *DockerDaemonSuite) TestDaemonRestartContainerLinksRestart(c *check.C) { d := NewDaemon(c) + defer d.Stop() err := d.StartWithBusybox() c.Assert(err, checker.IsNil) diff --git a/integration-cli/docker_cli_exec_test.go b/integration-cli/docker_cli_exec_test.go index 4d04126350..81f36711bd 100644 --- a/integration-cli/docker_cli_exec_test.go +++ b/integration-cli/docker_cli_exec_test.go @@ -8,7 +8,6 @@ import ( "net/http" "os" "os/exec" - "path/filepath" "reflect" "sort" "strings" @@ -375,57 +374,6 @@ func (s *DockerSuite) TestLinksPingLinkedContainersOnRename(c *check.C) { dockerCmd(c, "exec", "container2", "ping", "-c", "1", "alias1", "-W", "1") } -func (s *DockerSuite) TestExecDir(c *check.C) { - // TODO Windows CI. This requires some work to port as it uses execDriverPath - // which is currently (and incorrectly) hard coded as a string assuming - // the daemon is running Linux :( - testRequires(c, SameHostDaemon, DaemonIsLinux) - - out, _ := runSleepingContainer(c, "-d") - id := strings.TrimSpace(out) - - execDir := filepath.Join(execDriverPath, id) - stateFile := filepath.Join(execDir, "state.json") - - { - fi, err := os.Stat(execDir) - c.Assert(err, checker.IsNil) - if !fi.IsDir() { - c.Fatalf("%q must be a directory", execDir) - } - fi, err = os.Stat(stateFile) - c.Assert(err, checker.IsNil) - } - - dockerCmd(c, "stop", id) - { - _, err := os.Stat(execDir) - c.Assert(err, checker.NotNil) - c.Assert(err, checker.NotNil, check.Commentf("Exec directory %q exists for removed container!", execDir)) - if !os.IsNotExist(err) { - c.Fatalf("Error should be about non-existing, got %s", err) - } - } - dockerCmd(c, "start", id) - { - fi, err := os.Stat(execDir) - c.Assert(err, checker.IsNil) - if !fi.IsDir() { - c.Fatalf("%q must be a directory", execDir) - } - fi, err = os.Stat(stateFile) - c.Assert(err, checker.IsNil) - } - dockerCmd(c, "rm", "-f", id) - { - _, err := os.Stat(execDir) - c.Assert(err, checker.NotNil, check.Commentf("Exec directory %q exists for removed container!", execDir)) - if !os.IsNotExist(err) { - c.Fatalf("Error should be about non-existing, got %s", err) - } - } -} - func (s *DockerSuite) TestRunMutableNetworkFiles(c *check.C) { // Not applicable on Windows to Windows CI. testRequires(c, SameHostDaemon, DaemonIsLinux) diff --git a/integration-cli/docker_cli_info_test.go b/integration-cli/docker_cli_info_test.go index c74f7b4eac..dd2369452d 100644 --- a/integration-cli/docker_cli_info_test.go +++ b/integration-cli/docker_cli_info_test.go @@ -22,7 +22,6 @@ func (s *DockerSuite) TestInfoEnsureSucceeds(c *check.C) { " Paused:", " Stopped:", "Images:", - "Execution Driver:", "OSType:", "Architecture:", "Logging Driver:", diff --git a/integration-cli/docker_cli_run_test.go b/integration-cli/docker_cli_run_test.go index 0d1dd34f91..849358024f 100644 --- a/integration-cli/docker_cli_run_test.go +++ b/integration-cli/docker_cli_run_test.go @@ -1109,7 +1109,7 @@ func (s *DockerSuite) TestRunProcNotWritableInNonPrivilegedContainers(c *check.C func (s *DockerSuite) TestRunProcWritableInPrivilegedContainers(c *check.C) { // Not applicable for Windows as there is no concept of --privileged testRequires(c, DaemonIsLinux, NotUserNamespace) - if _, code := dockerCmd(c, "run", "--privileged", "busybox", "touch", "/proc/sysrq-trigger"); code != 0 { + if _, code := dockerCmd(c, "run", "--privileged", "busybox", "sh", "-c", "umount /proc/sysrq-trigger && touch /proc/sysrq-trigger"); code != 0 { c.Fatalf("proc should be writable in privileged container") } } @@ -3021,7 +3021,8 @@ func (s *DockerSuite) TestRunUnshareProc(c *check.C) { out, _, err := dockerCmdWithError("run", "--name", name, "--security-opt", "seccomp:unconfined", "debian:jessie", "unshare", "-p", "-m", "-f", "-r", "mount", "-t", "proc", "none", "/proc") if err == nil || !(strings.Contains(strings.ToLower(out), "mount: cannot mount none") || - strings.Contains(strings.ToLower(out), "permission denied")) { + strings.Contains(strings.ToLower(out), "permission denied") || + strings.Contains(strings.ToLower(out), "operation not permitted")) { errChan <- fmt.Errorf("unshare and mount of /proc should have failed with 'mount: cannot mount none' or 'permission denied', got: %s, %v", out, err) } else { errChan <- nil @@ -3034,7 +3035,8 @@ func (s *DockerSuite) TestRunUnshareProc(c *check.C) { out, _, err := dockerCmdWithError("run", "--privileged", "--security-opt", "seccomp:unconfined", "--security-opt", "apparmor:docker-default", "--name", name, "debian:jessie", "unshare", "-p", "-m", "-f", "-r", "mount", "-t", "proc", "none", "/proc") if err == nil || !(strings.Contains(strings.ToLower(out), "mount: cannot mount none") || - strings.Contains(strings.ToLower(out), "permission denied")) { + strings.Contains(strings.ToLower(out), "permission denied") || + strings.Contains(strings.ToLower(out), "operation not permitted")) { errChan <- fmt.Errorf("privileged unshare with apparmor should have failed with 'mount: cannot mount none' or 'permission denied', got: %s, %v", out, err) } else { errChan <- nil @@ -4232,7 +4234,10 @@ func (s *DockerSuite) TestRunAttachFailedNoLeak(c *check.C) { out, _, err := dockerCmdWithError("run", "-p", "8000:8000", "busybox", "true") c.Assert(err, checker.NotNil) // check for windows error as well - c.Assert(strings.Contains(string(out), "port is already allocated") || strings.Contains(string(out), "were not connected because a duplicate name exists"), checker.Equals, true, check.Commentf("Output: %s", out)) + // TODO Windows Post TP5. Fix the error message string + c.Assert(strings.Contains(string(out), "port is already allocated") || + strings.Contains(string(out), "were not connected because a duplicate name exists") || + strings.Contains(string(out), "HNS failed with error : Failed to create endpoint"), checker.Equals, true, check.Commentf("Output: %s", out)) dockerCmd(c, "rm", "-f", "test") // NGoroutines is not updated right away, so we need to wait before failing diff --git a/layer/layer.go b/layer/layer.go index bdfe6e75e5..26a82440ea 100644 --- a/layer/layer.go +++ b/layer/layer.go @@ -169,6 +169,7 @@ type Store interface { CreateRWLayer(id string, parent ChainID, mountLabel string, initFunc MountInit) (RWLayer, error) GetRWLayer(id string) (RWLayer, error) + GetMountID(id string) (string, error) ReleaseRWLayer(RWLayer) ([]Metadata, error) Cleanup() error diff --git a/layer/layer_store.go b/layer/layer_store.go index 4b01ea0fc0..fa436f098b 100644 --- a/layer/layer_store.go +++ b/layer/layer_store.go @@ -480,6 +480,18 @@ func (ls *layerStore) GetRWLayer(id string) (RWLayer, error) { return mount.getReference(), nil } +func (ls *layerStore) GetMountID(id string) (string, error) { + ls.mountL.Lock() + defer ls.mountL.Unlock() + mount, ok := ls.mounts[id] + if !ok { + return "", ErrMountDoesNotExist + } + logrus.Debugf("GetRWLayer id: %s -> mountID: %s", id, mount.mountID) + + return mount.mountID, nil +} + func (ls *layerStore) ReleaseRWLayer(l RWLayer) ([]Metadata, error) { ls.mountL.Lock() defer ls.mountL.Unlock() diff --git a/libcontainerd/client.go b/libcontainerd/client.go new file mode 100644 index 0000000000..4485b75cf9 --- /dev/null +++ b/libcontainerd/client.go @@ -0,0 +1,58 @@ +package libcontainerd + +import ( + "fmt" + "sync" + + "github.com/Sirupsen/logrus" +) + +// clientCommon contains the platform agnostic fields used in the client structure +type clientCommon struct { + backend Backend + containers map[string]*container + containerMutexes map[string]*sync.Mutex // lock by container ID + mapMutex sync.RWMutex // protects read/write oprations from containers map + sync.Mutex // lock for containerMutexes map access +} + +func (clnt *client) lock(containerID string) { + clnt.Lock() + if _, ok := clnt.containerMutexes[containerID]; !ok { + clnt.containerMutexes[containerID] = &sync.Mutex{} + } + clnt.Unlock() + clnt.containerMutexes[containerID].Lock() +} + +func (clnt *client) unlock(containerID string) { + clnt.Lock() + if l, ok := clnt.containerMutexes[containerID]; ok { + l.Unlock() + } else { + logrus.Warnf("unlock of non-existing mutex: %s", containerID) + } + clnt.Unlock() +} + +// must hold a lock for cont.containerID +func (clnt *client) appendContainer(cont *container) { + clnt.mapMutex.Lock() + clnt.containers[cont.containerID] = cont + clnt.mapMutex.Unlock() +} +func (clnt *client) deleteContainer(friendlyName string) { + clnt.mapMutex.Lock() + delete(clnt.containers, friendlyName) + clnt.mapMutex.Unlock() +} + +func (clnt *client) getContainer(containerID string) (*container, error) { + clnt.mapMutex.RLock() + container, ok := clnt.containers[containerID] + defer clnt.mapMutex.RUnlock() + if !ok { + return nil, fmt.Errorf("invalid container: %s", containerID) // fixme: typed error + } + return container, nil +} diff --git a/libcontainerd/client_linux.go b/libcontainerd/client_linux.go new file mode 100644 index 0000000000..189345b286 --- /dev/null +++ b/libcontainerd/client_linux.go @@ -0,0 +1,394 @@ +package libcontainerd + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "strings" + "sync" + "syscall" + + "github.com/Sirupsen/logrus" + containerd "github.com/docker/containerd/api/grpc/types" + "github.com/docker/docker/pkg/idtools" + "github.com/docker/docker/pkg/mount" + "github.com/opencontainers/specs/specs-go" + "golang.org/x/net/context" +) + +type client struct { + clientCommon + + // Platform specific properties below here. + remote *remote + q queue + exitNotifiers map[string]*exitNotifier +} + +func (clnt *client) AddProcess(containerID, processFriendlyName string, specp Process) error { + clnt.lock(containerID) + defer clnt.unlock(containerID) + container, err := clnt.getContainer(containerID) + if err != nil { + return err + } + + spec, err := container.spec() + if err != nil { + return err + } + sp := spec.Process + sp.Args = specp.Args + sp.Terminal = specp.Terminal + if specp.Env != nil { + sp.Env = specp.Env + } + if specp.Cwd != nil { + sp.Cwd = *specp.Cwd + } + if specp.User != nil { + sp.User = specs.User{ + UID: specp.User.UID, + GID: specp.User.GID, + AdditionalGids: specp.User.AdditionalGids, + } + } + if specp.Capabilities != nil { + sp.Capabilities = specp.Capabilities + } + + p := container.newProcess(processFriendlyName) + + r := &containerd.AddProcessRequest{ + Args: sp.Args, + Cwd: sp.Cwd, + Terminal: sp.Terminal, + Id: containerID, + Env: sp.Env, + User: &containerd.User{ + Uid: sp.User.UID, + Gid: sp.User.GID, + AdditionalGids: sp.User.AdditionalGids, + }, + Pid: processFriendlyName, + Stdin: p.fifo(syscall.Stdin), + Stdout: p.fifo(syscall.Stdout), + Stderr: p.fifo(syscall.Stderr), + Capabilities: sp.Capabilities, + ApparmorProfile: sp.ApparmorProfile, + SelinuxLabel: sp.SelinuxLabel, + NoNewPrivileges: sp.NoNewPrivileges, + } + + iopipe, err := p.openFifos(sp.Terminal) + if err != nil { + return err + } + + if _, err := clnt.remote.apiClient.AddProcess(context.Background(), r); err != nil { + p.closeFifos(iopipe) + return err + } + + container.processes[processFriendlyName] = p + + clnt.unlock(containerID) + + if err := clnt.backend.AttachStreams(processFriendlyName, *iopipe); err != nil { + return err + } + clnt.lock(containerID) + + return nil +} + +func (clnt *client) prepareBundleDir(uid, gid int) (string, error) { + root, err := filepath.Abs(clnt.remote.stateDir) + if err != nil { + return "", err + } + if uid == 0 && gid == 0 { + return root, nil + } + p := string(filepath.Separator) + for _, d := range strings.Split(root, string(filepath.Separator))[1:] { + p = filepath.Join(p, d) + fi, err := os.Stat(p) + if err != nil && !os.IsNotExist(err) { + return "", err + } + if os.IsNotExist(err) || fi.Mode()&1 == 0 { + p = fmt.Sprintf("%s.%d.%d", p, uid, gid) + if err := idtools.MkdirAs(p, 0700, uid, gid); err != nil && !os.IsExist(err) { + return "", err + } + } + } + return p, nil +} + +func (clnt *client) Create(containerID string, spec Spec, options ...CreateOption) (err error) { + clnt.lock(containerID) + defer clnt.unlock(containerID) + + if ctr, err := clnt.getContainer(containerID); err == nil { + if ctr.restarting { // docker doesn't actually call start if restart is on atm, but probably should in the future + ctr.restartManager.Cancel() + ctr.clean() + } else { + return fmt.Errorf("Container %s is aleady active", containerID) + } + } + + uid, gid, err := getRootIDs(specs.Spec(spec)) + if err != nil { + return err + } + dir, err := clnt.prepareBundleDir(uid, gid) + if err != nil { + return err + } + + container := clnt.newContainer(filepath.Join(dir, containerID), options...) + if err := container.clean(); err != nil { + return err + } + + defer func() { + if err != nil { + container.clean() + clnt.deleteContainer(containerID) + } + }() + + // uid/gid + rootfsDir := filepath.Join(container.dir, "rootfs") + if err := idtools.MkdirAllAs(rootfsDir, 0700, uid, gid); err != nil && !os.IsExist(err) { + return err + } + if err := syscall.Mount(spec.Root.Path, rootfsDir, "bind", syscall.MS_REC|syscall.MS_BIND, ""); err != nil { + return err + } + spec.Root.Path = "rootfs" + + f, err := os.Create(filepath.Join(container.dir, configFilename)) + if err != nil { + return err + } + defer f.Close() + if err := json.NewEncoder(f).Encode(spec); err != nil { + return err + } + + return container.start() +} + +func (clnt *client) Signal(containerID string, sig int) error { + clnt.lock(containerID) + defer clnt.unlock(containerID) + _, err := clnt.remote.apiClient.Signal(context.Background(), &containerd.SignalRequest{ + Id: containerID, + Pid: InitFriendlyName, + Signal: uint32(sig), + }) + return err +} + +func (clnt *client) Resize(containerID, processFriendlyName string, width, height int) error { + clnt.lock(containerID) + defer clnt.unlock(containerID) + if _, err := clnt.getContainer(containerID); err != nil { + return err + } + _, err := clnt.remote.apiClient.UpdateProcess(context.Background(), &containerd.UpdateProcessRequest{ + Id: containerID, + Pid: processFriendlyName, + Width: uint32(width), + Height: uint32(height), + }) + return err +} + +func (clnt *client) Pause(containerID string) error { + return clnt.setState(containerID, StatePause) +} + +func (clnt *client) setState(containerID, state string) error { + clnt.lock(containerID) + container, err := clnt.getContainer(containerID) + if err != nil { + clnt.unlock(containerID) + return err + } + if container.systemPid == 0 { + clnt.unlock(containerID) + return fmt.Errorf("No active process for container %s", containerID) + } + st := "running" + if state == StatePause { + st = "paused" + } + chstate := make(chan struct{}) + _, err = clnt.remote.apiClient.UpdateContainer(context.Background(), &containerd.UpdateContainerRequest{ + Id: containerID, + Pid: InitFriendlyName, + Status: st, + }) + if err != nil { + clnt.unlock(containerID) + return err + } + container.pauseMonitor.append(state, chstate) + clnt.unlock(containerID) + <-chstate + return nil +} + +func (clnt *client) Resume(containerID string) error { + return clnt.setState(containerID, StateResume) +} + +func (clnt *client) Stats(containerID string) (*Stats, error) { + resp, err := clnt.remote.apiClient.Stats(context.Background(), &containerd.StatsRequest{containerID}) + if err != nil { + return nil, err + } + return (*Stats)(resp), nil +} + +func (clnt *client) setExited(containerID string) error { + clnt.lock(containerID) + defer clnt.unlock(containerID) + + var exitCode uint32 + if event, ok := clnt.remote.pastEvents[containerID]; ok { + exitCode = event.Status + delete(clnt.remote.pastEvents, containerID) + } + + err := clnt.backend.StateChanged(containerID, StateInfo{ + State: StateExit, + ExitCode: exitCode, + }) + + // Unmount and delete the bundle folder + if mts, err := mount.GetMounts(); err == nil { + for _, mts := range mts { + if strings.HasSuffix(mts.Mountpoint, containerID+"/rootfs") { + if err := syscall.Unmount(mts.Mountpoint, syscall.MNT_DETACH); err == nil { + os.RemoveAll(strings.TrimSuffix(mts.Mountpoint, "/rootfs")) + } + break + } + } + } + + return err +} + +func (clnt *client) GetPidsForContainer(containerID string) ([]int, error) { + cont, err := clnt.getContainerdContainer(containerID) + if err != nil { + return nil, err + } + pids := make([]int, len(cont.Pids)) + for i, p := range cont.Pids { + pids[i] = int(p) + } + return pids, nil +} + +func (clnt *client) getContainerdContainer(containerID string) (*containerd.Container, error) { + resp, err := clnt.remote.apiClient.State(context.Background(), &containerd.StateRequest{Id: containerID}) + if err != nil { + return nil, err + } + for _, cont := range resp.Containers { + if cont.Id == containerID { + return cont, nil + } + } + return nil, fmt.Errorf("invalid state response") +} + +func (clnt *client) newContainer(dir string, options ...CreateOption) *container { + container := &container{ + containerCommon: containerCommon{ + process: process{ + dir: dir, + processCommon: processCommon{ + containerID: filepath.Base(dir), + client: clnt, + friendlyName: InitFriendlyName, + }, + }, + processes: make(map[string]*process), + }, + } + for _, option := range options { + if err := option.Apply(container); err != nil { + logrus.Error(err) + } + } + return container +} + +func (clnt *client) UpdateResources(containerID string, resources Resources) error { + clnt.lock(containerID) + defer clnt.unlock(containerID) + container, err := clnt.getContainer(containerID) + if err != nil { + return err + } + if container.systemPid == 0 { + return fmt.Errorf("No active process for container %s", containerID) + } + _, err = clnt.remote.apiClient.UpdateContainer(context.Background(), &containerd.UpdateContainerRequest{ + Id: containerID, + Pid: InitFriendlyName, + Resources: (*containerd.UpdateResource)(&resources), + }) + if err != nil { + return err + } + return nil +} + +func (clnt *client) getExitNotifier(containerID string) *exitNotifier { + clnt.mapMutex.RLock() + defer clnt.mapMutex.RUnlock() + return clnt.exitNotifiers[containerID] +} + +func (clnt *client) getOrCreateExitNotifier(containerID string) *exitNotifier { + clnt.mapMutex.Lock() + w, ok := clnt.exitNotifiers[containerID] + defer clnt.mapMutex.Unlock() + if !ok { + w = &exitNotifier{c: make(chan struct{}), client: clnt} + clnt.exitNotifiers[containerID] = w + } + return w +} + +type exitNotifier struct { + id string + client *client + c chan struct{} + once sync.Once +} + +func (en *exitNotifier) close() { + en.once.Do(func() { + close(en.c) + en.client.mapMutex.Lock() + if en == en.client.exitNotifiers[en.id] { + delete(en.client.exitNotifiers, en.id) + } + en.client.mapMutex.Unlock() + }) +} +func (en *exitNotifier) wait() <-chan struct{} { + return en.c +} diff --git a/libcontainerd/client_liverestore_linux.go b/libcontainerd/client_liverestore_linux.go new file mode 100644 index 0000000000..1a1f7fe73c --- /dev/null +++ b/libcontainerd/client_liverestore_linux.go @@ -0,0 +1,83 @@ +// +build experimental + +package libcontainerd + +import ( + "fmt" + + "github.com/Sirupsen/logrus" + containerd "github.com/docker/containerd/api/grpc/types" +) + +func (clnt *client) restore(cont *containerd.Container, options ...CreateOption) (err error) { + clnt.lock(cont.Id) + defer clnt.unlock(cont.Id) + + logrus.Debugf("restore container %s state %s", cont.Id, cont.Status) + + containerID := cont.Id + if _, err := clnt.getContainer(containerID); err == nil { + return fmt.Errorf("container %s is aleady active", containerID) + } + + defer func() { + if err != nil { + clnt.deleteContainer(cont.Id) + } + }() + + container := clnt.newContainer(cont.BundlePath, options...) + container.systemPid = systemPid(cont) + + var terminal bool + for _, p := range cont.Processes { + if p.Pid == InitFriendlyName { + terminal = p.Terminal + } + } + + iopipe, err := container.openFifos(terminal) + if err != nil { + return err + } + + if err := clnt.backend.AttachStreams(containerID, *iopipe); err != nil { + return err + } + + clnt.appendContainer(container) + + err = clnt.backend.StateChanged(containerID, StateInfo{ + State: StateRestore, + Pid: container.systemPid, + }) + + if err != nil { + return err + } + + if event, ok := clnt.remote.pastEvents[containerID]; ok { + // This should only be a pause or resume event + if event.Type == StatePause || event.Type == StateResume { + return clnt.backend.StateChanged(containerID, StateInfo{ + State: event.Type, + Pid: container.systemPid, + }) + } + + logrus.Warnf("unexpected backlog event: %#v", event) + } + + return nil +} + +func (clnt *client) Restore(containerID string, options ...CreateOption) error { + cont, err := clnt.getContainerdContainer(containerID) + if err == nil && cont.Status != "stopped" { + if err := clnt.restore(cont, options...); err != nil { + logrus.Errorf("error restoring %s: %v", containerID, err) + } + return nil + } + return clnt.setExited(containerID) +} diff --git a/libcontainerd/client_shutdownrestore_linux.go b/libcontainerd/client_shutdownrestore_linux.go new file mode 100644 index 0000000000..9d32b1d6ca --- /dev/null +++ b/libcontainerd/client_shutdownrestore_linux.go @@ -0,0 +1,39 @@ +// +build !experimental + +package libcontainerd + +import ( + "syscall" + "time" + + "github.com/Sirupsen/logrus" +) + +func (clnt *client) Restore(containerID string, options ...CreateOption) error { + w := clnt.getOrCreateExitNotifier(containerID) + defer w.close() + cont, err := clnt.getContainerdContainer(containerID) + if err == nil && cont.Status != "stopped" { + clnt.lock(cont.Id) + container := clnt.newContainer(cont.BundlePath) + container.systemPid = systemPid(cont) + clnt.appendContainer(container) + clnt.unlock(cont.Id) + + if err := clnt.Signal(containerID, int(syscall.SIGTERM)); err != nil { + logrus.Errorf("error sending sigterm to %v: %v", containerID, err) + } + select { + case <-time.After(10 * time.Second): + if err := clnt.Signal(containerID, int(syscall.SIGKILL)); err != nil { + logrus.Errorf("error sending sigkill to %v: %v", containerID, err) + } + select { + case <-time.After(2 * time.Second): + case <-w.wait(): + } + case <-w.wait(): + } + } + return clnt.setExited(containerID) +} diff --git a/libcontainerd/container.go b/libcontainerd/container.go new file mode 100644 index 0000000000..197990f2b2 --- /dev/null +++ b/libcontainerd/container.go @@ -0,0 +1,38 @@ +package libcontainerd + +import ( + "fmt" + + "github.com/docker/docker/restartmanager" +) + +const ( + // InitFriendlyName is the name given in the lookup map of processes + // for the first process started in a container. + InitFriendlyName = "init" + configFilename = "config.json" +) + +type containerCommon struct { + process + restartManager restartmanager.RestartManager + restarting bool + processes map[string]*process +} + +// WithRestartManager sets the restartmanager to be used with the container. +func WithRestartManager(rm restartmanager.RestartManager) CreateOption { + return restartManager{rm} +} + +type restartManager struct { + rm restartmanager.RestartManager +} + +func (rm restartManager) Apply(p interface{}) error { + if pr, ok := p.(*container); ok { + pr.restartManager = rm.rm + return nil + } + return fmt.Errorf("WithRestartManager option not supported for this client") +} diff --git a/libcontainerd/container_linux.go b/libcontainerd/container_linux.go new file mode 100644 index 0000000000..8efd06ac22 --- /dev/null +++ b/libcontainerd/container_linux.go @@ -0,0 +1,166 @@ +package libcontainerd + +import ( + "encoding/json" + "io/ioutil" + "os" + "path/filepath" + "syscall" + + "github.com/Sirupsen/logrus" + containerd "github.com/docker/containerd/api/grpc/types" + "github.com/opencontainers/specs/specs-go" + "golang.org/x/net/context" +) + +type container struct { + containerCommon + + // Platform specific fields are below here. + pauseMonitor + oom bool +} + +func (ctr *container) clean() error { + if _, err := os.Lstat(ctr.dir); err != nil { + if os.IsNotExist(err) { + return nil + } + return err + } + + syscall.Unmount(filepath.Join(ctr.dir, "rootfs"), syscall.MNT_DETACH) // ignore error + if err := os.RemoveAll(ctr.dir); err != nil { + return err + } + return nil +} + +func (ctr *container) spec() (*specs.Spec, error) { + var spec specs.Spec + dt, err := ioutil.ReadFile(filepath.Join(ctr.dir, configFilename)) + if err != nil { + return nil, err + } + if err := json.Unmarshal(dt, &spec); err != nil { + return nil, err + } + return &spec, nil +} + +func (ctr *container) start() error { + spec, err := ctr.spec() + if err != nil { + return nil + } + iopipe, err := ctr.openFifos(spec.Process.Terminal) + if err != nil { + return err + } + + r := &containerd.CreateContainerRequest{ + Id: ctr.containerID, + BundlePath: ctr.dir, + Stdin: ctr.fifo(syscall.Stdin), + Stdout: ctr.fifo(syscall.Stdout), + Stderr: ctr.fifo(syscall.Stderr), + } + ctr.client.appendContainer(ctr) + + resp, err := ctr.client.remote.apiClient.CreateContainer(context.Background(), r) + if err != nil { + ctr.closeFifos(iopipe) + return err + } + + if err := ctr.client.backend.AttachStreams(ctr.containerID, *iopipe); err != nil { + return err + } + ctr.systemPid = systemPid(resp.Container) + + return ctr.client.backend.StateChanged(ctr.containerID, StateInfo{ + State: StateStart, + Pid: ctr.systemPid, + }) +} + +func (ctr *container) newProcess(friendlyName string) *process { + return &process{ + dir: ctr.dir, + processCommon: processCommon{ + containerID: ctr.containerID, + friendlyName: friendlyName, + client: ctr.client, + }, + } +} + +func (ctr *container) handleEvent(e *containerd.Event) error { + ctr.client.lock(ctr.containerID) + defer ctr.client.unlock(ctr.containerID) + switch e.Type { + case StateExit, StatePause, StateResume, StateOOM: + st := StateInfo{ + State: e.Type, + ExitCode: e.Status, + OOMKilled: e.Type == StateExit && ctr.oom, + } + if e.Type == StateOOM { + ctr.oom = true + } + if e.Type == StateExit && e.Pid != InitFriendlyName { + st.ProcessID = e.Pid + st.State = StateExitProcess + } + if st.State == StateExit && ctr.restartManager != nil { + restart, wait, err := ctr.restartManager.ShouldRestart(e.Status) + if err != nil { + logrus.Error(err) + } else if restart { + st.State = StateRestart + ctr.restarting = true + go func() { + err := <-wait + ctr.restarting = false + if err != nil { + st.State = StateExit + ctr.client.q.append(e.Id, func() { + if err := ctr.client.backend.StateChanged(e.Id, st); err != nil { + logrus.Error(err) + } + }) + logrus.Error(err) + } else { + ctr.start() + } + }() + } + } + + // Remove process from list if we have exited + // We need to do so here in case the Message Handler decides to restart it. + if st.State == StateExit { + if os.Getenv("LIBCONTAINERD_NOCLEAN") != "1" { + ctr.clean() + } + ctr.client.deleteContainer(e.Id) + } + ctr.client.q.append(e.Id, func() { + if err := ctr.client.backend.StateChanged(e.Id, st); err != nil { + logrus.Error(err) + } + if e.Type == StatePause || e.Type == StateResume { + ctr.pauseMonitor.handle(e.Type) + } + if e.Type == StateExit { + if en := ctr.client.getExitNotifier(e.Id); en != nil { + en.close() + } + } + }) + + default: + logrus.Debugf("event unhandled: %+v", e) + } + return nil +} diff --git a/libcontainerd/pausemonitor_linux.go b/libcontainerd/pausemonitor_linux.go new file mode 100644 index 0000000000..379cbf1fcb --- /dev/null +++ b/libcontainerd/pausemonitor_linux.go @@ -0,0 +1,31 @@ +package libcontainerd + +// pauseMonitor is helper to get notifications from pause state changes. +type pauseMonitor struct { + waiters map[string][]chan struct{} +} + +func (m *pauseMonitor) handle(t string) { + if m.waiters == nil { + return + } + q, ok := m.waiters[t] + if !ok { + return + } + if len(q) > 0 { + close(q[0]) + m.waiters[t] = q[1:] + } +} + +func (m *pauseMonitor) append(t string, waiter chan struct{}) { + if m.waiters == nil { + m.waiters = make(map[string][]chan struct{}) + } + _, ok := m.waiters[t] + if !ok { + m.waiters[t] = make([]chan struct{}, 0) + } + m.waiters[t] = append(m.waiters[t], waiter) +} diff --git a/libcontainerd/process.go b/libcontainerd/process.go new file mode 100644 index 0000000000..57562c8789 --- /dev/null +++ b/libcontainerd/process.go @@ -0,0 +1,18 @@ +package libcontainerd + +// processCommon are the platform common fields as part of the process structure +// which keeps the state for the main container process, as well as any exec +// processes. +type processCommon struct { + client *client + + // containerID is the Container ID + containerID string + + // friendlyName is an identifier for the process (or `InitFriendlyName` + // for the first process) + friendlyName string + + // systemPid is the PID of the main container process + systemPid uint32 +} diff --git a/libcontainerd/process_linux.go b/libcontainerd/process_linux.go new file mode 100644 index 0000000000..136a6e250c --- /dev/null +++ b/libcontainerd/process_linux.go @@ -0,0 +1,107 @@ +package libcontainerd + +import ( + "fmt" + "io" + "os" + "path/filepath" + "syscall" + + containerd "github.com/docker/containerd/api/grpc/types" + "github.com/docker/docker/pkg/ioutils" + "golang.org/x/net/context" +) + +var fdNames = map[int]string{ + syscall.Stdin: "stdin", + syscall.Stdout: "stdout", + syscall.Stderr: "stderr", +} + +// process keeps the state for both main container process and exec process. +type process struct { + processCommon + + // Platform specific fields are below here. + dir string +} + +func (p *process) openFifos(terminal bool) (*IOPipe, error) { + bundleDir := p.dir + if err := os.MkdirAll(bundleDir, 0700); err != nil { + return nil, err + } + + for i := 0; i < 3; i++ { + f := p.fifo(i) + if err := syscall.Mkfifo(f, 0700); err != nil && !os.IsExist(err) { + return nil, fmt.Errorf("mkfifo: %s %v", f, err) + } + } + + io := &IOPipe{} + stdinf, err := os.OpenFile(p.fifo(syscall.Stdin), syscall.O_RDWR, 0) + if err != nil { + return nil, err + } + + io.Stdout = openReaderFromFifo(p.fifo(syscall.Stdout)) + if !terminal { + io.Stderr = openReaderFromFifo(p.fifo(syscall.Stderr)) + } else { + io.Stderr = emptyReader{} + } + + io.Stdin = ioutils.NewWriteCloserWrapper(stdinf, func() error { + stdinf.Close() + _, err := p.client.remote.apiClient.UpdateProcess(context.Background(), &containerd.UpdateProcessRequest{ + Id: p.containerID, + Pid: p.friendlyName, + CloseStdin: true, + }) + return err + }) + + return io, nil +} + +func (p *process) closeFifos(io *IOPipe) { + io.Stdin.Close() + closeReaderFifo(p.fifo(syscall.Stdout)) + closeReaderFifo(p.fifo(syscall.Stderr)) +} + +type emptyReader struct{} + +func (r emptyReader) Read(b []byte) (int, error) { + return 0, io.EOF +} + +func openReaderFromFifo(fn string) io.Reader { + r, w := io.Pipe() + go func() { + stdoutf, err := os.OpenFile(fn, syscall.O_RDONLY, 0) + if err != nil { + r.CloseWithError(err) + } + if _, err := io.Copy(w, stdoutf); err != nil { + r.CloseWithError(err) + } + w.Close() + stdoutf.Close() + }() + return r +} + +// closeReaderFifo closes fifo that may be blocked on open by opening the write side. +func closeReaderFifo(fn string) { + f, err := os.OpenFile(fn, syscall.O_WRONLY|syscall.O_NONBLOCK, 0) + if err != nil { + return + } + f.Close() +} + +func (p *process) fifo(index int) string { + return filepath.Join(p.dir, p.friendlyName+"-"+fdNames[index]) +} diff --git a/libcontainerd/queue_linux.go b/libcontainerd/queue_linux.go new file mode 100644 index 0000000000..34bc81d24e --- /dev/null +++ b/libcontainerd/queue_linux.go @@ -0,0 +1,29 @@ +package libcontainerd + +import "sync" + +type queue struct { + sync.Mutex + fns map[string]chan struct{} +} + +func (q *queue) append(id string, f func()) { + q.Lock() + defer q.Unlock() + + if q.fns == nil { + q.fns = make(map[string]chan struct{}) + } + + done := make(chan struct{}) + + fn, ok := q.fns[id] + q.fns[id] = done + go func() { + if ok { + <-fn + } + f() + close(done) + }() +} diff --git a/libcontainerd/remote.go b/libcontainerd/remote.go new file mode 100644 index 0000000000..a679edcfdc --- /dev/null +++ b/libcontainerd/remote.go @@ -0,0 +1,18 @@ +package libcontainerd + +// Remote on Linux defines the accesspoint to the containerd grpc API. +// Remote on Windows is largely an unimplemented interface as there is +// no remote containerd. +type Remote interface { + // Client returns a new Client instance connected with given Backend. + Client(Backend) (Client, error) + // Cleanup stops containerd if it was started by libcontainerd. + // Note this is not used on Windows as there is no remote containerd. + Cleanup() +} + +// RemoteOption allows to configure paramters of remotes. +// This is unused on Windows. +type RemoteOption interface { + Apply(Remote) error +} diff --git a/libcontainerd/remote_linux.go b/libcontainerd/remote_linux.go new file mode 100644 index 0000000000..8c817e4c59 --- /dev/null +++ b/libcontainerd/remote_linux.go @@ -0,0 +1,401 @@ +package libcontainerd + +import ( + "fmt" + "io" + "net" + "os" + "os/exec" + "path/filepath" + "strconv" + "sync" + "syscall" + "time" + + "github.com/Sirupsen/logrus" + containerd "github.com/docker/containerd/api/grpc/types" + sysinfo "github.com/docker/docker/pkg/system" + "github.com/docker/docker/utils" + "golang.org/x/net/context" + "google.golang.org/grpc" +) + +const ( + maxConnectionRetryCount = 3 + connectionRetryDelay = 3 * time.Second + containerdShutdownTimeout = 15 * time.Second + containerdBinary = "containerd" + containerdPidFilename = "containerd.pid" + containerdSockFilename = "containerd.sock" + eventTimestampFilename = "event.ts" +) + +type remote struct { + sync.RWMutex + apiClient containerd.APIClient + daemonPid int + stateDir string + rpcAddr string + startDaemon bool + debugLog bool + rpcConn *grpc.ClientConn + clients []*client + eventTsPath string + pastEvents map[string]*containerd.Event +} + +// New creates a fresh instance of libcontainerd remote. +func New(stateDir string, options ...RemoteOption) (_ Remote, err error) { + defer func() { + if err != nil { + err = fmt.Errorf("Failed to connect to containerd. Please make sure containerd is installed in your PATH or you have specificed the correct address. Got error: %v", err) + } + }() + r := &remote{ + stateDir: stateDir, + daemonPid: -1, + eventTsPath: filepath.Join(stateDir, eventTimestampFilename), + pastEvents: make(map[string]*containerd.Event), + } + for _, option := range options { + if err := option.Apply(r); err != nil { + return nil, err + } + } + + if err := sysinfo.MkdirAll(stateDir, 0700); err != nil { + return nil, err + } + + if r.rpcAddr == "" { + r.rpcAddr = filepath.Join(stateDir, containerdSockFilename) + } + + if r.startDaemon { + if err := r.runContainerdDaemon(); err != nil { + return nil, err + } + } + + dialOpts := append([]grpc.DialOption{grpc.WithInsecure()}, + grpc.WithDialer(func(addr string, timeout time.Duration) (net.Conn, error) { + return net.DialTimeout("unix", addr, timeout) + }), + ) + conn, err := grpc.Dial(r.rpcAddr, dialOpts...) + if err != nil { + return nil, fmt.Errorf("error connecting to containerd: %v", err) + } + + r.rpcConn = conn + r.apiClient = containerd.NewAPIClient(conn) + + go r.handleConnectionChange() + + if err := r.startEventsMonitor(); err != nil { + return nil, err + } + + return r, nil +} + +func (r *remote) handleConnectionChange() { + var transientFailureCount = 0 + state := grpc.Idle + for { + s, err := r.rpcConn.WaitForStateChange(context.Background(), state) + if err != nil { + break + } + state = s + logrus.Debugf("containerd connection state change: %v", s) + + if r.daemonPid != -1 { + switch state { + case grpc.TransientFailure: + // Reset state to be notified of next failure + transientFailureCount++ + if transientFailureCount >= maxConnectionRetryCount { + transientFailureCount = 0 + if utils.IsProcessAlive(r.daemonPid) { + utils.KillProcess(r.daemonPid) + } + if err := r.runContainerdDaemon(); err != nil { //FIXME: Handle error + logrus.Errorf("error restarting containerd: %v", err) + } + } else { + state = grpc.Idle + time.Sleep(connectionRetryDelay) + } + case grpc.Shutdown: + // Well, we asked for it to stop, just return + return + } + } + } +} + +func (r *remote) Cleanup() { + if r.daemonPid == -1 { + return + } + r.rpcConn.Close() + // Ask the daemon to quit + syscall.Kill(r.daemonPid, syscall.SIGTERM) + + // Wait up to 15secs for it to stop + for i := time.Duration(0); i < containerdShutdownTimeout; i += time.Second { + if !utils.IsProcessAlive(r.daemonPid) { + break + } + time.Sleep(time.Second) + } + + if utils.IsProcessAlive(r.daemonPid) { + logrus.Warnf("libcontainerd: containerd (%d) didn't stop within 15 secs, killing it\n", r.daemonPid) + syscall.Kill(r.daemonPid, syscall.SIGKILL) + } + + // cleanup some files + os.Remove(filepath.Join(r.stateDir, containerdPidFilename)) + os.Remove(filepath.Join(r.stateDir, containerdSockFilename)) +} + +func (r *remote) Client(b Backend) (Client, error) { + c := &client{ + clientCommon: clientCommon{ + backend: b, + containerMutexes: make(map[string]*sync.Mutex), + containers: make(map[string]*container), + }, + remote: r, + exitNotifiers: make(map[string]*exitNotifier), + } + + r.Lock() + r.clients = append(r.clients, c) + r.Unlock() + return c, nil +} + +func (r *remote) updateEventTimestamp(t time.Time) { + f, err := os.OpenFile(r.eventTsPath, syscall.O_CREAT|syscall.O_WRONLY|syscall.O_TRUNC, 0600) + defer f.Close() + if err != nil { + logrus.Warnf("libcontainerd: failed to open event timestamp file: %v", err) + return + } + + b, err := t.MarshalText() + if err != nil { + logrus.Warnf("libcontainerd: failed to encode timestamp: %v", err) + return + } + + n, err := f.Write(b) + if err != nil || n != len(b) { + logrus.Warnf("libcontainerd: failed to update event timestamp file: %v", err) + f.Truncate(0) + return + } + +} + +func (r *remote) getLastEventTimestamp() int64 { + t := time.Now() + + fi, err := os.Stat(r.eventTsPath) + if os.IsNotExist(err) { + return t.Unix() + } + + f, err := os.Open(r.eventTsPath) + defer f.Close() + if err != nil { + logrus.Warn("libcontainerd: Unable to access last event ts: %v", err) + return t.Unix() + } + + b := make([]byte, fi.Size()) + n, err := f.Read(b) + if err != nil || n != len(b) { + logrus.Warn("libcontainerd: Unable to read last event ts: %v", err) + return t.Unix() + } + + t.UnmarshalText(b) + + return t.Unix() +} + +func (r *remote) startEventsMonitor() error { + // First, get past events + er := &containerd.EventsRequest{ + Timestamp: uint64(r.getLastEventTimestamp()), + } + events, err := r.apiClient.Events(context.Background(), er) + if err != nil { + return err + } + go r.handleEventStream(events) + return nil +} + +func (r *remote) handleEventStream(events containerd.API_EventsClient) { + live := false + for { + e, err := events.Recv() + if err != nil { + logrus.Errorf("failed to receive event from containerd: %v", err) + go r.startEventsMonitor() + return + } + + if live == false { + logrus.Debugf("received past containerd event: %#v", e) + + // Pause/Resume events should never happens after exit one + switch e.Type { + case StateExit: + r.pastEvents[e.Id] = e + case StatePause: + r.pastEvents[e.Id] = e + case StateResume: + r.pastEvents[e.Id] = e + case stateLive: + live = true + r.updateEventTimestamp(time.Unix(int64(e.Timestamp), 0)) + } + } else { + logrus.Debugf("received containerd event: %#v", e) + + var container *container + var c *client + r.RLock() + for _, c = range r.clients { + container, err = c.getContainer(e.Id) + if err == nil { + break + } + } + r.RUnlock() + if container == nil { + logrus.Errorf("no state for container: %q", err) + continue + } + + if err := container.handleEvent(e); err != nil { + logrus.Errorf("error processing state change for %s: %v", e.Id, err) + } + + r.updateEventTimestamp(time.Unix(int64(e.Timestamp), 0)) + } + } +} + +func (r *remote) runContainerdDaemon() error { + pidFilename := filepath.Join(r.stateDir, containerdPidFilename) + f, err := os.OpenFile(pidFilename, os.O_RDWR|os.O_CREATE, 0600) + defer f.Close() + if err != nil { + return err + } + + // File exist, check if the daemon is alive + b := make([]byte, 8) + n, err := f.Read(b) + if err != nil && err != io.EOF { + return err + } + + if n > 0 { + pid, err := strconv.ParseUint(string(b[:n]), 10, 64) + if err != nil { + return err + } + if utils.IsProcessAlive(int(pid)) { + logrus.Infof("previous instance of containerd still alive (%d)", pid) + r.daemonPid = int(pid) + return nil + } + } + + // rewind the file + _, err = f.Seek(0, os.SEEK_SET) + if err != nil { + return err + } + + // Truncate it + err = f.Truncate(0) + if err != nil { + return err + } + + // Start a new instance + args := []string{"-l", r.rpcAddr} + if r.debugLog { + args = append(args, "--debug", "true") + } + cmd := exec.Command(containerdBinary, args...) + // TODO: store logs? + cmd.SysProcAttr = &syscall.SysProcAttr{Setsid: true} + if err := cmd.Start(); err != nil { + return err + } + logrus.Infof("New containerd process, pid: %d\n", cmd.Process.Pid) + + if _, err := f.WriteString(fmt.Sprintf("%d", cmd.Process.Pid)); err != nil { + utils.KillProcess(cmd.Process.Pid) + return err + } + + go cmd.Wait() // Reap our child when needed + r.daemonPid = cmd.Process.Pid + return nil +} + +// WithRemoteAddr sets the external containerd socket to connect to. +func WithRemoteAddr(addr string) RemoteOption { + return rpcAddr(addr) +} + +type rpcAddr string + +func (a rpcAddr) Apply(r Remote) error { + if remote, ok := r.(*remote); ok { + remote.rpcAddr = string(a) + return nil + } + return fmt.Errorf("WithRemoteAddr option not supported for this remote") +} + +// WithStartDaemon defines if libcontainerd should also run containerd daemon. +func WithStartDaemon(start bool) RemoteOption { + return startDaemon(start) +} + +type startDaemon bool + +func (s startDaemon) Apply(r Remote) error { + if remote, ok := r.(*remote); ok { + remote.startDaemon = bool(s) + return nil + } + return fmt.Errorf("WithStartDaemon option not supported for this remote") +} + +// WithDebugLog defines if containerd debug logs will be enabled for daemon. +func WithDebugLog(debug bool) RemoteOption { + return debugLog(debug) +} + +type debugLog bool + +func (d debugLog) Apply(r Remote) error { + if remote, ok := r.(*remote); ok { + remote.debugLog = bool(d) + return nil + } + return fmt.Errorf("WithDebugLog option not supported for this remote") +} diff --git a/libcontainerd/types.go b/libcontainerd/types.go new file mode 100644 index 0000000000..85b9a30218 --- /dev/null +++ b/libcontainerd/types.go @@ -0,0 +1,59 @@ +package libcontainerd + +import "io" + +// State constants used in state change reporting. +const ( + StateStart = "start-container" + StatePause = "pause" + StateResume = "resume" + StateExit = "exit" + StateRestart = "restart" + StateRestore = "restore" + StateStartProcess = "start-process" + StateExitProcess = "exit-process" + StateOOM = "oom" // fake state + stateLive = "live" +) + +// StateInfo contains description about the new state container has entered. +type StateInfo struct { // FIXME: event? + State string + Pid uint32 + ExitCode uint32 + ProcessID string + OOMKilled bool // TODO Windows containerd factor out +} + +// Backend defines callbacks that the client of the library needs to implement. +type Backend interface { + StateChanged(containerID string, state StateInfo) error + AttachStreams(processFriendlyName string, io IOPipe) error +} + +// Client provides access to containerd features. +type Client interface { + Create(containerID string, spec Spec, options ...CreateOption) error + Signal(containerID string, sig int) error + AddProcess(containerID, processFriendlyName string, process Process) error + Resize(containerID, processFriendlyName string, width, height int) error + Pause(containerID string) error + Resume(containerID string) error + Restore(containerID string, options ...CreateOption) error + Stats(containerID string) (*Stats, error) + GetPidsForContainer(containerID string) ([]int, error) + UpdateResources(containerID string, resources Resources) error +} + +// CreateOption allows to configure parameters of container creation. +type CreateOption interface { + Apply(interface{}) error +} + +// IOPipe contains the stdio streams. +type IOPipe struct { + Stdin io.WriteCloser + Stdout io.Reader + Stderr io.Reader + Terminal bool // Whether stderr is connected on Windows +} diff --git a/libcontainerd/types_linux.go b/libcontainerd/types_linux.go new file mode 100644 index 0000000000..7cbfe79d6f --- /dev/null +++ b/libcontainerd/types_linux.go @@ -0,0 +1,44 @@ +package libcontainerd + +import ( + containerd "github.com/docker/containerd/api/grpc/types" + "github.com/opencontainers/specs/specs-go" +) + +// Spec is the base configuration for the container. It specifies platform +// independent configuration. This information must be included when the +// bundle is packaged for distribution. +type Spec specs.Spec + +// Process contains information to start a specific application inside the container. +type Process struct { + // Terminal creates an interactive terminal for the container. + Terminal bool `json:"terminal"` + // User specifies user information for the process. + User *User `json:"user"` + // Args specifies the binary and arguments for the application to execute. + Args []string `json:"args"` + // Env populates the process environment for the process. + Env []string `json:"env,omitempty"` + // Cwd is the current working directory for the process and must be + // relative to the container's root. + Cwd *string `json:"cwd"` + // Capabilities are linux capabilities that are kept for the container. + Capabilities []string `json:"capabilities,omitempty"` + // Rlimits specifies rlimit options to apply to the process. + Rlimits []specs.Rlimit `json:"rlimits,omitempty"` + // ApparmorProfile specified the apparmor profile for the container. + ApparmorProfile *string `json:"apparmorProfile,omitempty"` + // SelinuxProcessLabel specifies the selinux context that the container process is run as. + SelinuxLabel *string `json:"selinuxLabel,omitempty"` +} + +// Stats contains a stats properties from containerd. +type Stats containerd.StatsResponse + +// User specifies linux specific user and group information for the container's +// main process. +type User specs.User + +// Resources defines updatable container resource values. +type Resources containerd.UpdateResource diff --git a/libcontainerd/utils_linux.go b/libcontainerd/utils_linux.go new file mode 100644 index 0000000000..cf0f2e0d31 --- /dev/null +++ b/libcontainerd/utils_linux.go @@ -0,0 +1,41 @@ +package libcontainerd + +import ( + containerd "github.com/docker/containerd/api/grpc/types" + "github.com/opencontainers/specs/specs-go" +) + +func getRootIDs(s specs.Spec) (int, int, error) { + var hasUserns bool + for _, ns := range s.Linux.Namespaces { + if ns.Type == specs.UserNamespace { + hasUserns = true + break + } + } + if !hasUserns { + return 0, 0, nil + } + uid := hostIDFromMap(0, s.Linux.UIDMappings) + gid := hostIDFromMap(0, s.Linux.GIDMappings) + return uid, gid, nil +} + +func hostIDFromMap(id uint32, mp []specs.IDMapping) int { + for _, m := range mp { + if id >= m.ContainerID && id <= m.ContainerID+m.Size-1 { + return int(m.HostID + id - m.ContainerID) + } + } + return 0 +} + +func systemPid(ctr *containerd.Container) uint32 { + var pid uint32 + for _, p := range ctr.Processes { + if p.Pid == InitFriendlyName { + pid = p.SystemPid + } + } + return pid +} diff --git a/oci/defaults_linux.go b/oci/defaults_linux.go new file mode 100644 index 0000000000..4159500cc5 --- /dev/null +++ b/oci/defaults_linux.go @@ -0,0 +1,214 @@ +package oci + +import ( + "os" + "runtime" + + "github.com/opencontainers/specs/specs-go" +) + +func sPtr(s string) *string { return &s } +func rPtr(r rune) *rune { return &r } +func iPtr(i int64) *int64 { return &i } +func u32Ptr(i int64) *uint32 { u := uint32(i); return &u } +func fmPtr(i int64) *os.FileMode { fm := os.FileMode(i); return &fm } + +// DefaultSpec returns default oci spec used by docker. +func DefaultSpec() specs.Spec { + s := specs.Spec{ + Version: specs.Version, + Platform: specs.Platform{ + OS: runtime.GOOS, + Arch: runtime.GOARCH, + }, + } + s.Mounts = []specs.Mount{ + { + Destination: "/proc", + Type: "proc", + Source: "proc", + Options: []string{"nosuid", "noexec", "nodev"}, + }, + { + Destination: "/dev", + Type: "tmpfs", + Source: "tmpfs", + Options: []string{"nosuid", "strictatime", "mode=755"}, + }, + { + Destination: "/dev/pts", + Type: "devpts", + Source: "devpts", + Options: []string{"nosuid", "noexec", "newinstance", "ptmxmode=0666", "mode=0620", "gid=5"}, + }, + { + Destination: "/sys", + Type: "sysfs", + Source: "sysfs", + Options: []string{"nosuid", "noexec", "nodev", "ro"}, + }, + { + Destination: "/sys/fs/cgroup", + Type: "cgroup", + Source: "cgroup", + Options: []string{"ro", "nosuid", "noexec", "nodev"}, + }, + { + Destination: "/dev/mqueue", + Type: "mqueue", + Source: "mqueue", + Options: []string{"nosuid", "noexec", "nodev"}, + }, + } + + s.Process.Capabilities = []string{ + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_FSETID", + "CAP_FOWNER", + "CAP_MKNOD", + "CAP_NET_RAW", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETFCAP", + "CAP_SETPCAP", + "CAP_NET_BIND_SERVICE", + "CAP_SYS_CHROOT", + "CAP_KILL", + "CAP_AUDIT_WRITE", + } + + s.Linux = specs.Linux{ + Namespaces: []specs.Namespace{ + {Type: "mount"}, + {Type: "network"}, + {Type: "uts"}, + {Type: "pid"}, + {Type: "ipc"}, + }, + Devices: []specs.Device{ + { + Type: "c", + Path: "/dev/zero", + Major: 1, + Minor: 5, + FileMode: fmPtr(0666), + UID: u32Ptr(0), + GID: u32Ptr(0), + }, + { + Type: "c", + Path: "/dev/null", + Major: 1, + Minor: 3, + FileMode: fmPtr(0666), + UID: u32Ptr(0), + GID: u32Ptr(0), + }, + { + Type: "c", + Path: "/dev/urandom", + Major: 1, + Minor: 9, + FileMode: fmPtr(0666), + UID: u32Ptr(0), + GID: u32Ptr(0), + }, + { + Type: "c", + Path: "/dev/random", + Major: 1, + Minor: 8, + FileMode: fmPtr(0666), + UID: u32Ptr(0), + GID: u32Ptr(0), + }, + // { + // Type: "c", + // Path: "/dev/tty", + // Major: 5, + // Minor: 0, + // FileMode: fmPtr(0666), + // UID: u32Ptr(0), + // GID: u32Ptr(0), + // }, + // { + // Type: "c", + // Path: "/dev/console", + // Major: 5, + // Minor: 1, + // FileMode: fmPtr(0666), + // UID: u32Ptr(0), + // GID: u32Ptr(0), + // }, + { + Type: "c", + Path: "/dev/fuse", + Major: 10, + Minor: 229, + FileMode: fmPtr(0666), + UID: u32Ptr(0), + GID: u32Ptr(0), + }, + }, + Resources: &specs.Resources{ + Devices: []specs.DeviceCgroup{ + { + Allow: false, + Access: sPtr("rwm"), + }, + { + Allow: true, + Type: sPtr("c"), + Major: iPtr(1), + Minor: iPtr(5), + Access: sPtr("rwm"), + }, + { + Allow: true, + Type: sPtr("c"), + Major: iPtr(1), + Minor: iPtr(3), + Access: sPtr("rwm"), + }, + { + Allow: true, + Type: sPtr("c"), + Major: iPtr(1), + Minor: iPtr(9), + Access: sPtr("rwm"), + }, + { + Allow: true, + Type: sPtr("c"), + Major: iPtr(1), + Minor: iPtr(8), + Access: sPtr("rwm"), + }, + { + Allow: true, + Type: sPtr("c"), + Major: iPtr(5), + Minor: iPtr(0), + Access: sPtr("rwm"), + }, + { + Allow: true, + Type: sPtr("c"), + Major: iPtr(5), + Minor: iPtr(1), + Access: sPtr("rwm"), + }, + { + Allow: false, + Type: sPtr("c"), + Major: iPtr(10), + Minor: iPtr(229), + Access: sPtr("rwm"), + }, + }, + }, + } + + return s +} diff --git a/pkg/system/syscall_unix.go b/pkg/system/syscall_unix.go index f1497c587e..3ae9128468 100644 --- a/pkg/system/syscall_unix.go +++ b/pkg/system/syscall_unix.go @@ -9,3 +9,9 @@ import "syscall" func Unmount(dest string) error { return syscall.Unmount(dest, 0) } + +// CommandLineToArgv should not be used on Unix. +// It simply returns commandLine in the only element in the returned array. +func CommandLineToArgv(commandLine string) ([]string, error) { + return []string{commandLine}, nil +} diff --git a/pkg/system/syscall_windows.go b/pkg/system/syscall_windows.go index 273aa234bb..061e220f79 100644 --- a/pkg/system/syscall_windows.go +++ b/pkg/system/syscall_windows.go @@ -3,6 +3,7 @@ package system import ( "fmt" "syscall" + "unsafe" ) // OSVersion is a wrapper for Windows version information @@ -34,3 +35,26 @@ func GetOSVersion() (OSVersion, error) { func Unmount(dest string) error { return nil } + +// CommandLineToArgv wraps the Windows syscall to turn a commandline into an argument array. +func CommandLineToArgv(commandLine string) ([]string, error) { + var argc int32 + + argsPtr, err := syscall.UTF16PtrFromString(commandLine) + if err != nil { + return nil, err + } + + argv, err := syscall.CommandLineToArgv(argsPtr, &argc) + if err != nil { + return nil, err + } + defer syscall.LocalFree(syscall.Handle(uintptr(unsafe.Pointer(argv)))) + + newArgs := make([]string, argc) + for i, v := range (*argv)[:argc] { + newArgs[i] = string(syscall.UTF16ToString((*v)[:])) + } + + return newArgs, nil +} diff --git a/restartmanager/restartmanager.go b/restartmanager/restartmanager.go new file mode 100644 index 0000000000..e534b2cf73 --- /dev/null +++ b/restartmanager/restartmanager.go @@ -0,0 +1,118 @@ +package restartmanager + +import ( + "fmt" + "sync" + "time" + + "github.com/docker/engine-api/types/container" +) + +const ( + backoffMultiplier = 2 + defaultTimeout = 100 * time.Millisecond +) + +// RestartManager defines object that controls container restarting rules. +type RestartManager interface { + Cancel() error + ShouldRestart(exitCode uint32) (bool, chan error, error) +} + +type restartManager struct { + sync.Mutex + sync.Once + policy container.RestartPolicy + failureCount int + timeout time.Duration + active bool + cancel chan struct{} + canceled bool +} + +// New returns a new restartmanager based on a policy. +func New(policy container.RestartPolicy) RestartManager { + return &restartManager{policy: policy, cancel: make(chan struct{})} +} + +func (rm *restartManager) SetPolicy(policy container.RestartPolicy) { + rm.Lock() + rm.policy = policy + rm.Unlock() +} + +func (rm *restartManager) ShouldRestart(exitCode uint32) (bool, chan error, error) { + rm.Lock() + unlockOnExit := true + defer func() { + if unlockOnExit { + rm.Unlock() + } + }() + + if rm.canceled { + return false, nil, nil + } + + if rm.active { + return false, nil, fmt.Errorf("invalid call on active restartmanager") + } + + if exitCode != 0 { + rm.failureCount++ + } else { + rm.failureCount = 0 + } + + if rm.timeout == 0 { + rm.timeout = defaultTimeout + } else { + rm.timeout *= backoffMultiplier + } + + var restart bool + switch { + case rm.policy.IsAlways(), rm.policy.IsUnlessStopped(): + restart = true + case rm.policy.IsOnFailure(): + // the default value of 0 for MaximumRetryCount means that we will not enforce a maximum count + if max := rm.policy.MaximumRetryCount; max == 0 || rm.failureCount <= max { + restart = exitCode != 0 + } + } + + if !restart { + rm.active = false + return false, nil, nil + } + + unlockOnExit = false + rm.active = true + rm.Unlock() + + ch := make(chan error) + go func() { + select { + case <-rm.cancel: + ch <- fmt.Errorf("restartmanager canceled") + close(ch) + case <-time.After(rm.timeout): + rm.Lock() + close(ch) + rm.active = false + rm.Unlock() + } + }() + + return true, ch, nil +} + +func (rm *restartManager) Cancel() error { + rm.Do(func() { + rm.Lock() + rm.canceled = true + close(rm.cancel) + rm.Unlock() + }) + return nil +} diff --git a/restartmanager/restartmanager_test.go b/restartmanager/restartmanager_test.go new file mode 100644 index 0000000000..22f6a0c20e --- /dev/null +++ b/restartmanager/restartmanager_test.go @@ -0,0 +1,3 @@ +package restartmanager + +// FIXME diff --git a/runconfig/streams.go b/runconfig/streams.go index 7a35dd7d31..548c7826ee 100644 --- a/runconfig/streams.go +++ b/runconfig/streams.go @@ -5,6 +5,7 @@ import ( "io" "io/ioutil" "strings" + "sync" "github.com/docker/docker/pkg/broadcaster" "github.com/docker/docker/pkg/ioutils" @@ -20,6 +21,7 @@ import ( // copied and delivered to all StdoutPipe and StderrPipe consumers, using // a kind of "broadcaster". type StreamConfig struct { + sync.WaitGroup stdout *broadcaster.Unbuffered stderr *broadcaster.Unbuffered stdin io.ReadCloser diff --git a/utils/process_unix.go b/utils/process_unix.go new file mode 100644 index 0000000000..bdb1b46b3d --- /dev/null +++ b/utils/process_unix.go @@ -0,0 +1,22 @@ +// +build linux freebsd + +package utils + +import ( + "syscall" +) + +// IsProcessAlive returns true if process with a given pid is running. +func IsProcessAlive(pid int) bool { + err := syscall.Kill(pid, syscall.Signal(0)) + if err == nil || err == syscall.EPERM { + return true + } + + return false +} + +// KillProcess force-stops a process. +func KillProcess(pid int) { + syscall.Kill(pid, syscall.SIGKILL) +} diff --git a/utils/process_windows.go b/utils/process_windows.go new file mode 100644 index 0000000000..03cb855197 --- /dev/null +++ b/utils/process_windows.go @@ -0,0 +1,20 @@ +package utils + +// IsProcessAlive returns true if process with a given pid is running. +func IsProcessAlive(pid int) bool { + // TODO Windows containerd. Not sure this is needed + // p, err := os.FindProcess(pid) + // if err == nil { + // return true + // } + return false +} + +// KillProcess force-stops a process. +func KillProcess(pid int) { + // TODO Windows containerd. Not sure this is needed + // p, err := os.FindProcess(pid) + // if err == nil { + // p.Kill() + // } +}