Windows: Pass back system errors on container exit

Signed-off-by: John Howard <jhoward@microsoft.com>

While debugging #32838, it was found (https://github.com/moby/moby/issues/32838#issuecomment-356005845) that the utility VM in some circumstances was crashing. Unfortunately, this was silently thrown away, and as far as the build step (also applies to docker run) was concerned, the exit code was zero and the error was thrown away. Windows containers operate differently to containers on Linux, and there can be legitimate system errors during container shutdown after the init process exits. This PR handles this and passes the error all the way back to the client, and correctly causes a build step running a container which hits a system error to fail, rather than blindly trying to keep going, assuming all is good, and get a subsequent failure on a commit.

With this change, assuming an error occurs, here's an example of a failure which previous was reported as a commit error:

```
The command 'powershell -Command $ErrorActionPreference = 'Stop'; $ProgressPreference = 'SilentlyContinue'; Install-WindowsFeature -Name Web-App-Dev ;   Install-WindowsFeature -Name ADLDS;   Install-WindowsFeature -Name Web-Mgmt-Compat;   Install-WindowsFeature -Name Web-Mgmt-Service;   Install-WindowsFeature -Name Web-Metabase;   Install-WindowsFeature -Name Web-Lgcy-Scripting;   Install-WindowsFeature -Name Web-WMI;   Install-WindowsFeature -Name Web-WHC;   Install-WindowsFeature -Name Web-Scripting-Tools;   Install-WindowsFeature -Name Web-Net-Ext45;   Install-WindowsFeature -Name Web-ASP;   Install-WindowsFeature -Name Web-ISAPI-Ext;   Install-WindowsFeature -Name Web-ISAPI-Filter;   Install-WindowsFeature -Name Web-Default-Doc;   Install-WindowsFeature -Name Web-Dir-Browsing;   Install-WindowsFeature -Name Web-Http-Errors;   Install-WindowsFeature -Name Web-Static-Content;   Install-WindowsFeature -Name Web-Http-Redirect;   Install-WindowsFeature -Name Web-DAV-Publishing;   Install-WindowsFeature -Name Web-Health;   Install-WindowsFeature -Name Web-Http-Logging;   Install-WindowsFeature -Name Web-Custom-Logging;   Install-WindowsFeature -Name Web-Log-Libraries;   Install-WindowsFeature -Name Web-Request-Monitor;   Install-WindowsFeature -Name Web-Http-Tracing;   Install-WindowsFeature -Name Web-Stat-Compression;   Install-WindowsFeature -Name Web-Dyn-Compression;   Install-WindowsFeature -Name Web-Security;   Install-WindowsFeature -Name Web-Windows-Auth;   Install-WindowsFeature -Name Web-Basic-Auth;   Install-WindowsFeature -Name Web-Url-Auth;   Install-WindowsFeature -Name Web-WebSockets;   Install-WindowsFeature -Name Web-AppInit;   Install-WindowsFeature -Name NET-WCF-HTTP-Activation45;   Install-WindowsFeature -Name NET-WCF-Pipe-Activation45;   Install-WindowsFeature -Name NET-WCF-TCP-Activation45;' returned a non-zero code: 4294967295: container shutdown failed: container ba9c65054d42d4830fb25ef55e4ab3287550345aa1a2bb265df4e5bfcd79c78a encountered an error during WaitTimeout: failure in a Windows system call: The compute system exited unexpectedly. (0xc0370106)
```

Without this change, it would be incorrectly reported such as in this comment: https://github.com/moby/moby/issues/32838#issuecomment-309621097

```
Step 3/8 : ADD buildtools C:/buildtools
re-exec error: exit status 1: output: time="2017-06-20T11:37:38+10:00" level=error msg="hcsshim::ImportLayer failed in Win32: The system cannot find the path specified. (0x3) layerId=\\\\?\\C:\\ProgramData\\docker\\windowsfilter\\b41d28c95f98368b73fc192cb9205700e21
6691495c1f9ac79b9b04ec4923ea2 flavour=1 folder=C:\\Windows\\TEMP\\hcs232661915"
hcsshim::ImportLayer failed in Win32: The system cannot find the path specified. (0x3) layerId=\\?\C:\ProgramData\docker\windowsfilter\b41d28c95f98368b73fc192cb9205700e216691495c1f9ac79b9b04ec4923ea2 flavour=1 folder=C:\Windows\TEMP\hcs232661915
```
This commit is contained in:
John Howard 2018-01-09 11:46:29 -08:00
parent 15001f83bd
commit 8c52560ea4
6 changed files with 43 additions and 7 deletions

View File

@ -93,7 +93,7 @@ func (c *containerManager) Run(ctx context.Context, cID string, stdout, stderr i
close(finished)
logCancellationError(cancelErrCh,
fmt.Sprintf("a non-zero code from ContainerWait: %d", status.ExitCode()))
return &statusCodeError{code: status.ExitCode(), err: err}
return &statusCodeError{code: status.ExitCode(), err: status.Err()}
}
close(finished)
@ -112,6 +112,9 @@ type statusCodeError struct {
}
func (e *statusCodeError) Error() string {
if e.err == nil {
return ""
}
return e.err.Error()
}

View File

@ -348,11 +348,15 @@ func dispatchRun(d dispatchRequest, c *instructions.RunCommand) error {
if err := d.builder.containerManager.Run(d.builder.clientCtx, cID, d.builder.Stdout, d.builder.Stderr); err != nil {
if err, ok := err.(*statusCodeError); ok {
// TODO: change error type, because jsonmessage.JSONError assumes HTTP
msg := fmt.Sprintf(
"The command '%s' returned a non-zero code: %d",
strings.Join(runConfig.Cmd, " "), err.StatusCode())
if err.Error() != "" {
msg = fmt.Sprintf("%s: %s", msg, err.Error())
}
return &jsonmessage.JSONError{
Message: fmt.Sprintf(
"The command '%s' returned a non-zero code: %d",
strings.Join(runConfig.Cmd, " "), err.StatusCode()),
Code: err.StatusCode(),
Message: msg,
Code: err.StatusCode(),
}
}
return err

View File

@ -29,7 +29,7 @@ type State struct {
Dead bool
Pid int
ExitCodeValue int `json:"ExitCode"`
ErrorMsg string `json:"Error"` // contains last known error during container start or remove
ErrorMsg string `json:"Error"` // contains last known error during container start, stop, or remove
StartedAt time.Time
FinishedAt time.Time
Health *Health

View File

@ -69,6 +69,9 @@ func (daemon *Daemon) ProcessEvent(id string, e libcontainerd.EventType, ei libc
c.RestartCount++
c.SetRestarting(&exitStatus)
} else {
if ei.Error != nil {
c.SetError(ei.Error)
}
c.SetStopped(&exitStatus)
defer daemon.autoRemove(c)
}

View File

@ -1203,7 +1203,13 @@ func (c *client) shutdownContainer(ctr *container) error {
if err != nil {
c.logger.WithError(err).WithField("container", ctr.id).
Debug("failed to shutdown container, terminating it")
return c.terminateContainer(ctr)
terminateErr := c.terminateContainer(ctr)
if terminateErr != nil {
c.logger.WithError(terminateErr).WithField("container", ctr.id).
Error("failed to shutdown container, and subsequent terminate also failed")
return fmt.Errorf("%s: subsequent terminate failed %s", err, terminateErr)
}
return err
}
return nil
@ -1234,6 +1240,8 @@ func (c *client) reapProcess(ctr *container, p *process) int {
"process": p.id,
})
var eventErr error
// Block indefinitely for the process to exit.
if err := p.hcsProcess.Wait(); err != nil {
if herr, ok := err.(*hcsshim.ProcessError); ok && herr.Err != windows.ERROR_BROKEN_PIPE {
@ -1263,6 +1271,8 @@ func (c *client) reapProcess(ctr *container, p *process) int {
if err := p.hcsProcess.Close(); err != nil {
logger.WithError(err).Warnf("failed to cleanup hcs process resources")
exitCode = -1
eventErr = fmt.Errorf("hcsProcess.Close() failed %s", err)
}
var pendingUpdates bool
@ -1286,13 +1296,27 @@ func (c *client) reapProcess(ctr *container, p *process) int {
}
if err := c.shutdownContainer(ctr); err != nil {
exitCode = -1
logger.WithError(err).Warn("failed to shutdown container")
thisErr := fmt.Errorf("failed to shutdown container: %s", err)
if eventErr != nil {
eventErr = fmt.Errorf("%s: %s", eventErr, thisErr)
} else {
eventErr = thisErr
}
} else {
logger.Debug("completed container shutdown")
}
if err := ctr.hcsContainer.Close(); err != nil {
exitCode = -1
logger.WithError(err).Error("failed to clean hcs container resources")
thisErr := fmt.Errorf("failed to terminate container: %s", err)
if eventErr != nil {
eventErr = fmt.Errorf("%s: %s", eventErr, thisErr)
} else {
eventErr = thisErr
}
}
}
@ -1305,6 +1329,7 @@ func (c *client) reapProcess(ctr *container, p *process) int {
ExitCode: uint32(exitCode),
ExitedAt: exitedAt,
UpdatePending: pendingUpdates,
Error: eventErr,
}
c.logger.WithFields(logrus.Fields{
"container": ctr.id,

View File

@ -73,6 +73,7 @@ type EventInfo struct {
OOMKilled bool
// Windows Only field
UpdatePending bool
Error error
}
// Backend defines callbacks that the client of the library needs to implement.