2018-02-05 16:05:59 -05:00
|
|
|
package daemon // import "github.com/docker/docker/daemon"
|
2016-03-18 14:50:19 -04:00
|
|
|
|
|
|
|
import (
|
2017-09-22 09:52:41 -04:00
|
|
|
"context"
|
2016-03-18 14:50:19 -04:00
|
|
|
"errors"
|
|
|
|
"fmt"
|
|
|
|
"runtime"
|
|
|
|
"strconv"
|
2016-10-05 16:29:56 -04:00
|
|
|
"time"
|
2016-03-18 14:50:19 -04:00
|
|
|
|
2016-09-06 14:18:12 -04:00
|
|
|
"github.com/docker/docker/api/types"
|
2017-02-09 21:57:35 -05:00
|
|
|
"github.com/docker/docker/container"
|
2016-03-18 14:50:19 -04:00
|
|
|
"github.com/docker/docker/libcontainerd"
|
2016-10-05 16:29:56 -04:00
|
|
|
"github.com/docker/docker/restartmanager"
|
2017-07-26 17:42:13 -04:00
|
|
|
"github.com/sirupsen/logrus"
|
2016-03-18 14:50:19 -04:00
|
|
|
)
|
|
|
|
|
2017-02-09 21:57:35 -05:00
|
|
|
func (daemon *Daemon) setStateCounter(c *container.Container) {
|
|
|
|
switch c.StateString() {
|
|
|
|
case "paused":
|
|
|
|
stateCtr.set(c.ID, "paused")
|
|
|
|
case "running":
|
|
|
|
stateCtr.set(c.ID, "running")
|
|
|
|
default:
|
|
|
|
stateCtr.set(c.ID, "stopped")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-09-22 09:52:41 -04:00
|
|
|
// ProcessEvent is called by libcontainerd whenever an event occurs
|
|
|
|
func (daemon *Daemon) ProcessEvent(id string, e libcontainerd.EventType, ei libcontainerd.EventInfo) error {
|
|
|
|
c, err := daemon.GetContainer(id)
|
|
|
|
if c == nil || err != nil {
|
2016-03-18 14:50:19 -04:00
|
|
|
return fmt.Errorf("no such container: %s", id)
|
|
|
|
}
|
|
|
|
|
2017-09-22 09:52:41 -04:00
|
|
|
switch e {
|
|
|
|
case libcontainerd.EventOOM:
|
2016-03-18 14:50:19 -04:00
|
|
|
// StateOOM is Linux specific and should never be hit on Windows
|
|
|
|
if runtime.GOOS == "windows" {
|
2017-08-17 15:16:30 -04:00
|
|
|
return errors.New("received StateOOM from libcontainerd on Windows. This should never happen")
|
2016-03-18 14:50:19 -04:00
|
|
|
}
|
2017-11-14 20:59:40 -05:00
|
|
|
|
|
|
|
c.Lock()
|
|
|
|
defer c.Unlock()
|
2016-04-18 05:48:13 -04:00
|
|
|
daemon.updateHealthMonitor(c)
|
2017-04-06 17:42:10 -04:00
|
|
|
if err := c.CheckpointTo(daemon.containersReplica); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2017-11-14 20:59:40 -05:00
|
|
|
|
2016-03-18 14:50:19 -04:00
|
|
|
daemon.LogContainerEvent(c, "oom")
|
2017-09-22 09:52:41 -04:00
|
|
|
case libcontainerd.EventExit:
|
|
|
|
if int(ei.Pid) == c.Pid {
|
2017-11-15 22:19:26 -05:00
|
|
|
c.Lock()
|
2017-09-22 09:52:41 -04:00
|
|
|
_, _, err := daemon.containerd.DeleteTask(context.Background(), c.ID)
|
|
|
|
if err != nil {
|
|
|
|
logrus.WithError(err).Warnf("failed to delete container %s from containerd", c.ID)
|
|
|
|
}
|
2016-10-05 16:29:56 -04:00
|
|
|
|
2017-09-22 09:52:41 -04:00
|
|
|
c.StreamConfig.Wait()
|
|
|
|
c.Reset(false)
|
2016-10-05 16:29:56 -04:00
|
|
|
|
2017-09-22 09:52:41 -04:00
|
|
|
exitStatus := container.ExitStatus{
|
|
|
|
ExitCode: int(ei.ExitCode),
|
|
|
|
ExitedAt: ei.ExitedAt,
|
|
|
|
OOMKilled: ei.OOMKilled,
|
|
|
|
}
|
|
|
|
restart, wait, err := c.RestartManager().ShouldRestart(ei.ExitCode, daemon.IsShuttingDown() || c.HasBeenManuallyStopped, time.Since(c.StartedAt))
|
|
|
|
if err == nil && restart {
|
|
|
|
c.RestartCount++
|
|
|
|
c.SetRestarting(&exitStatus)
|
|
|
|
} else {
|
Windows: Pass back system errors on container exit
Signed-off-by: John Howard <jhoward@microsoft.com>
While debugging #32838, it was found (https://github.com/moby/moby/issues/32838#issuecomment-356005845) that the utility VM in some circumstances was crashing. Unfortunately, this was silently thrown away, and as far as the build step (also applies to docker run) was concerned, the exit code was zero and the error was thrown away. Windows containers operate differently to containers on Linux, and there can be legitimate system errors during container shutdown after the init process exits. This PR handles this and passes the error all the way back to the client, and correctly causes a build step running a container which hits a system error to fail, rather than blindly trying to keep going, assuming all is good, and get a subsequent failure on a commit.
With this change, assuming an error occurs, here's an example of a failure which previous was reported as a commit error:
```
The command 'powershell -Command $ErrorActionPreference = 'Stop'; $ProgressPreference = 'SilentlyContinue'; Install-WindowsFeature -Name Web-App-Dev ; Install-WindowsFeature -Name ADLDS; Install-WindowsFeature -Name Web-Mgmt-Compat; Install-WindowsFeature -Name Web-Mgmt-Service; Install-WindowsFeature -Name Web-Metabase; Install-WindowsFeature -Name Web-Lgcy-Scripting; Install-WindowsFeature -Name Web-WMI; Install-WindowsFeature -Name Web-WHC; Install-WindowsFeature -Name Web-Scripting-Tools; Install-WindowsFeature -Name Web-Net-Ext45; Install-WindowsFeature -Name Web-ASP; Install-WindowsFeature -Name Web-ISAPI-Ext; Install-WindowsFeature -Name Web-ISAPI-Filter; Install-WindowsFeature -Name Web-Default-Doc; Install-WindowsFeature -Name Web-Dir-Browsing; Install-WindowsFeature -Name Web-Http-Errors; Install-WindowsFeature -Name Web-Static-Content; Install-WindowsFeature -Name Web-Http-Redirect; Install-WindowsFeature -Name Web-DAV-Publishing; Install-WindowsFeature -Name Web-Health; Install-WindowsFeature -Name Web-Http-Logging; Install-WindowsFeature -Name Web-Custom-Logging; Install-WindowsFeature -Name Web-Log-Libraries; Install-WindowsFeature -Name Web-Request-Monitor; Install-WindowsFeature -Name Web-Http-Tracing; Install-WindowsFeature -Name Web-Stat-Compression; Install-WindowsFeature -Name Web-Dyn-Compression; Install-WindowsFeature -Name Web-Security; Install-WindowsFeature -Name Web-Windows-Auth; Install-WindowsFeature -Name Web-Basic-Auth; Install-WindowsFeature -Name Web-Url-Auth; Install-WindowsFeature -Name Web-WebSockets; Install-WindowsFeature -Name Web-AppInit; Install-WindowsFeature -Name NET-WCF-HTTP-Activation45; Install-WindowsFeature -Name NET-WCF-Pipe-Activation45; Install-WindowsFeature -Name NET-WCF-TCP-Activation45;' returned a non-zero code: 4294967295: container shutdown failed: container ba9c65054d42d4830fb25ef55e4ab3287550345aa1a2bb265df4e5bfcd79c78a encountered an error during WaitTimeout: failure in a Windows system call: The compute system exited unexpectedly. (0xc0370106)
```
Without this change, it would be incorrectly reported such as in this comment: https://github.com/moby/moby/issues/32838#issuecomment-309621097
```
Step 3/8 : ADD buildtools C:/buildtools
re-exec error: exit status 1: output: time="2017-06-20T11:37:38+10:00" level=error msg="hcsshim::ImportLayer failed in Win32: The system cannot find the path specified. (0x3) layerId=\\\\?\\C:\\ProgramData\\docker\\windowsfilter\\b41d28c95f98368b73fc192cb9205700e21
6691495c1f9ac79b9b04ec4923ea2 flavour=1 folder=C:\\Windows\\TEMP\\hcs232661915"
hcsshim::ImportLayer failed in Win32: The system cannot find the path specified. (0x3) layerId=\\?\C:\ProgramData\docker\windowsfilter\b41d28c95f98368b73fc192cb9205700e216691495c1f9ac79b9b04ec4923ea2 flavour=1 folder=C:\Windows\TEMP\hcs232661915
```
2018-01-09 14:46:29 -05:00
|
|
|
if ei.Error != nil {
|
|
|
|
c.SetError(ei.Error)
|
|
|
|
}
|
2017-09-22 09:52:41 -04:00
|
|
|
c.SetStopped(&exitStatus)
|
|
|
|
defer daemon.autoRemove(c)
|
|
|
|
}
|
2017-11-15 22:19:26 -05:00
|
|
|
defer c.Unlock() // needs to be called before autoRemove
|
2017-09-22 09:52:41 -04:00
|
|
|
|
|
|
|
// cancel healthcheck here, they will be automatically
|
|
|
|
// restarted if/when the container is started again
|
|
|
|
daemon.stopHealthchecks(c)
|
|
|
|
attributes := map[string]string{
|
|
|
|
"exitCode": strconv.Itoa(int(ei.ExitCode)),
|
|
|
|
}
|
|
|
|
daemon.LogContainerEventWithAttributes(c, "die", attributes)
|
|
|
|
daemon.Cleanup(c)
|
Fix stopped containers with restart-policy showing as "restarting"
When manually stopping a container with a restart-policy, the container
would show as "restarting" in `docker ps` whereas its actual state
is "exited".
Stopping a container with a restart policy shows the container as "restarting"
docker run -d --name test --restart unless-stopped busybox false
docker stop test
docker ps
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
7e07409fa1d3 busybox "false" 5 minutes ago Restarting (1) 4 minutes ago test
However, inspecting the same container shows that it's exited:
docker inspect test --format '{{ json .State }}'
{
"Status": "exited",
"Running": false,
"Paused": false,
"Restarting": false,
"OOMKilled": false,
"Dead": false,
"Pid": 0,
"ExitCode": 1,
"Error": "",
"StartedAt": "2019-02-14T13:26:27.6091648Z",
"FinishedAt": "2019-02-14T13:26:27.689427Z"
}
And killing the container confirms this;
docker kill test
Error response from daemon: Cannot kill container: test: Container 7e07409fa1d36dc8d8cb8f25cf12ee1168ad9040183b85fafa73ee2c1fcf9361 is not running
docker run -d --name test --restart unless-stopped busybox false
docker stop test
docker ps
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
d0595237054a busybox "false" 5 minutes ago Restarting (1) 4 minutes ago exit
Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
2019-02-14 12:59:55 -05:00
|
|
|
daemon.setStateCounter(c)
|
|
|
|
cpErr := c.CheckpointTo(daemon.containersReplica)
|
2017-09-22 09:52:41 -04:00
|
|
|
|
|
|
|
if err == nil && restart {
|
|
|
|
go func() {
|
|
|
|
err := <-wait
|
|
|
|
if err == nil {
|
|
|
|
// daemon.netController is initialized when daemon is restoring containers.
|
|
|
|
// But containerStart will use daemon.netController segment.
|
|
|
|
// So to avoid panic at startup process, here must wait util daemon restore done.
|
|
|
|
daemon.waitForStartupDone()
|
|
|
|
if err = daemon.containerStart(c, "", "", false); err != nil {
|
|
|
|
logrus.Debugf("failed to restart container: %+v", err)
|
|
|
|
}
|
2016-10-05 16:29:56 -04:00
|
|
|
}
|
2017-09-22 09:52:41 -04:00
|
|
|
if err != nil {
|
2017-11-15 22:19:26 -05:00
|
|
|
c.Lock()
|
2017-09-22 09:52:41 -04:00
|
|
|
c.SetStopped(&exitStatus)
|
Fix stopped containers with restart-policy showing as "restarting"
When manually stopping a container with a restart-policy, the container
would show as "restarting" in `docker ps` whereas its actual state
is "exited".
Stopping a container with a restart policy shows the container as "restarting"
docker run -d --name test --restart unless-stopped busybox false
docker stop test
docker ps
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
7e07409fa1d3 busybox "false" 5 minutes ago Restarting (1) 4 minutes ago test
However, inspecting the same container shows that it's exited:
docker inspect test --format '{{ json .State }}'
{
"Status": "exited",
"Running": false,
"Paused": false,
"Restarting": false,
"OOMKilled": false,
"Dead": false,
"Pid": 0,
"ExitCode": 1,
"Error": "",
"StartedAt": "2019-02-14T13:26:27.6091648Z",
"FinishedAt": "2019-02-14T13:26:27.689427Z"
}
And killing the container confirms this;
docker kill test
Error response from daemon: Cannot kill container: test: Container 7e07409fa1d36dc8d8cb8f25cf12ee1168ad9040183b85fafa73ee2c1fcf9361 is not running
docker run -d --name test --restart unless-stopped busybox false
docker stop test
docker ps
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
d0595237054a busybox "false" 5 minutes ago Restarting (1) 4 minutes ago exit
Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
2019-02-14 12:59:55 -05:00
|
|
|
daemon.setStateCounter(c)
|
|
|
|
c.CheckpointTo(daemon.containersReplica)
|
2017-11-15 22:19:26 -05:00
|
|
|
c.Unlock()
|
2017-09-22 09:52:41 -04:00
|
|
|
defer daemon.autoRemove(c)
|
|
|
|
if err != restartmanager.ErrRestartCanceled {
|
|
|
|
logrus.Errorf("restartmanger wait error: %+v", err)
|
|
|
|
}
|
2016-10-05 16:29:56 -04:00
|
|
|
}
|
2017-09-22 09:52:41 -04:00
|
|
|
}()
|
|
|
|
}
|
2017-02-09 21:57:35 -05:00
|
|
|
|
Fix stopped containers with restart-policy showing as "restarting"
When manually stopping a container with a restart-policy, the container
would show as "restarting" in `docker ps` whereas its actual state
is "exited".
Stopping a container with a restart policy shows the container as "restarting"
docker run -d --name test --restart unless-stopped busybox false
docker stop test
docker ps
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
7e07409fa1d3 busybox "false" 5 minutes ago Restarting (1) 4 minutes ago test
However, inspecting the same container shows that it's exited:
docker inspect test --format '{{ json .State }}'
{
"Status": "exited",
"Running": false,
"Paused": false,
"Restarting": false,
"OOMKilled": false,
"Dead": false,
"Pid": 0,
"ExitCode": 1,
"Error": "",
"StartedAt": "2019-02-14T13:26:27.6091648Z",
"FinishedAt": "2019-02-14T13:26:27.689427Z"
}
And killing the container confirms this;
docker kill test
Error response from daemon: Cannot kill container: test: Container 7e07409fa1d36dc8d8cb8f25cf12ee1168ad9040183b85fafa73ee2c1fcf9361 is not running
docker run -d --name test --restart unless-stopped busybox false
docker stop test
docker ps
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
d0595237054a busybox "false" 5 minutes ago Restarting (1) 4 minutes ago exit
Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
2019-02-14 12:59:55 -05:00
|
|
|
return cpErr
|
2016-04-01 20:02:38 -04:00
|
|
|
}
|
2017-09-22 09:52:41 -04:00
|
|
|
|
2017-11-27 10:53:16 -05:00
|
|
|
if execConfig := c.ExecCommands.Get(ei.ProcessID); execConfig != nil {
|
2017-09-22 09:52:41 -04:00
|
|
|
ec := int(ei.ExitCode)
|
2016-10-12 19:56:52 -04:00
|
|
|
execConfig.Lock()
|
|
|
|
defer execConfig.Unlock()
|
2016-03-18 14:50:19 -04:00
|
|
|
execConfig.ExitCode = &ec
|
|
|
|
execConfig.Running = false
|
2016-11-14 15:15:09 -05:00
|
|
|
execConfig.StreamConfig.Wait()
|
2016-03-18 14:50:19 -04:00
|
|
|
if err := execConfig.CloseStreams(); err != nil {
|
2017-01-21 06:35:54 -05:00
|
|
|
logrus.Errorf("failed to cleanup exec %s streams: %s", c.ID, err)
|
2016-03-18 14:50:19 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
// remove the exec command from the container's store only and not the
|
|
|
|
// daemon's store so that the exec command can be inspected.
|
2017-09-22 09:52:41 -04:00
|
|
|
c.ExecCommands.Delete(execConfig.ID, execConfig.Pid)
|
2017-12-08 03:01:34 -05:00
|
|
|
attributes := map[string]string{
|
|
|
|
"execID": execConfig.ID,
|
|
|
|
"exitCode": strconv.Itoa(ec),
|
|
|
|
}
|
|
|
|
daemon.LogContainerEventWithAttributes(c, "exec_die", attributes)
|
2016-03-18 14:50:19 -04:00
|
|
|
} else {
|
2017-09-22 09:52:41 -04:00
|
|
|
logrus.WithFields(logrus.Fields{
|
|
|
|
"container": c.ID,
|
2017-11-27 10:53:16 -05:00
|
|
|
"exec-id": ei.ProcessID,
|
2017-09-22 09:52:41 -04:00
|
|
|
"exec-pid": ei.Pid,
|
2018-07-11 09:51:51 -04:00
|
|
|
}).Warn("Ignoring Exit Event, no such exec command found")
|
2016-03-18 14:50:19 -04:00
|
|
|
}
|
2017-09-22 09:52:41 -04:00
|
|
|
case libcontainerd.EventStart:
|
|
|
|
c.Lock()
|
|
|
|
defer c.Unlock()
|
|
|
|
|
|
|
|
// This is here to handle start not generated by docker
|
|
|
|
if !c.Running {
|
|
|
|
c.SetRunning(int(ei.Pid), false)
|
|
|
|
c.HasBeenManuallyStopped = false
|
|
|
|
c.HasBeenStartedBefore = true
|
|
|
|
daemon.setStateCounter(c)
|
|
|
|
|
|
|
|
daemon.initHealthMonitor(c)
|
|
|
|
|
|
|
|
if err := c.CheckpointTo(daemon.containersReplica); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
daemon.LogContainerEvent(c, "start")
|
2016-03-18 14:50:19 -04:00
|
|
|
}
|
2017-02-09 21:57:35 -05:00
|
|
|
|
2017-09-22 09:52:41 -04:00
|
|
|
case libcontainerd.EventPaused:
|
|
|
|
c.Lock()
|
|
|
|
defer c.Unlock()
|
|
|
|
|
|
|
|
if !c.Paused {
|
|
|
|
c.Paused = true
|
|
|
|
daemon.setStateCounter(c)
|
|
|
|
daemon.updateHealthMonitor(c)
|
|
|
|
if err := c.CheckpointTo(daemon.containersReplica); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
daemon.LogContainerEvent(c, "pause")
|
2016-08-19 05:12:01 -04:00
|
|
|
}
|
2017-09-22 09:52:41 -04:00
|
|
|
case libcontainerd.EventResumed:
|
|
|
|
c.Lock()
|
|
|
|
defer c.Unlock()
|
|
|
|
|
|
|
|
if c.Paused {
|
|
|
|
c.Paused = false
|
|
|
|
daemon.setStateCounter(c)
|
|
|
|
daemon.updateHealthMonitor(c)
|
|
|
|
|
|
|
|
if err := c.CheckpointTo(daemon.containersReplica); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
daemon.LogContainerEvent(c, "unpause")
|
2016-08-19 05:12:01 -04:00
|
|
|
}
|
2016-03-18 14:50:19 -04:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
2017-02-14 13:35:20 -05:00
|
|
|
|
|
|
|
func (daemon *Daemon) autoRemove(c *container.Container) {
|
|
|
|
c.Lock()
|
|
|
|
ar := c.HostConfig.AutoRemove
|
|
|
|
c.Unlock()
|
|
|
|
if !ar {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
var err error
|
|
|
|
if err = daemon.ContainerRm(c.ID, &types.ContainerRmConfig{ForceRemove: true, RemoveVolume: true}); err == nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
if c := daemon.containers.Get(c.ID); c == nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
logrus.WithError(err).WithField("container", c.ID).Error("error removing container")
|
|
|
|
}
|
|
|
|
}
|