From f963500c544daa3c158c0ca3d2985295c875cb6b Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Wed, 27 Sep 2017 11:49:22 -0700 Subject: [PATCH] ContainerWait on remove: don't stuck on rm fail Currently, if a container removal has failed for some reason, any client waiting for removal (e.g. `docker run --rm`) is stuck, waiting for removal to succeed while it has failed already. For more details and the reproducer, please check https://github.com/moby/moby/issues/34945 This commit addresses that by allowing `ContainerWait()` with `container.WaitCondition == "removed"` argument to return an error in case of removal failure. The `ContainerWaitOKBody` stucture returned to a client is amended with a pointer to `struct Error`, containing an error message string, and the `Client.ContainerWait()` is modified to return the error, if any, to the client. Note that this feature is only available for API version >= 1.34. In order for the old clients to be unstuck, we just close the connection without writing anything -- this causes client's error. Now, docker-cli would need a separate commit to bump the API to 1.34 and to show an error returned, if any. [v2: recreate the waitRemove channel after closing] [v3: document; keep legacy behavior for older clients] [v4: convert Error from string to pointer to a struct] [v5: don't emulate old behavior, send empty response in error case] [v6: rename legacy* vars to include version suffix] Signed-off-by: Kir Kolyshkin --- .../router/container/container_routes.go | 22 ++++++++++++++++--- api/swagger.yaml | 7 ++++++ api/types/container/container_wait.go | 12 ++++++++++ container/state.go | 17 ++++++++++++-- daemon/delete.go | 8 +++++-- docs/api/version-history.md | 9 +++++++- 6 files changed, 67 insertions(+), 8 deletions(-) diff --git a/api/server/router/container/container_routes.go b/api/server/router/container/container_routes.go index 95bbe0e533..106a7087cd 100644 --- a/api/server/router/container/container_routes.go +++ b/api/server/router/container/container_routes.go @@ -280,11 +280,12 @@ func (s *containerRouter) postContainersWait(ctx context.Context, w http.Respons // Behavior changed in version 1.30 to handle wait condition and to // return headers immediately. version := httputils.VersionFromContext(ctx) - legacyBehavior := versions.LessThan(version, "1.30") + legacyBehaviorPre130 := versions.LessThan(version, "1.30") + legacyRemovalWaitPre134 := false // The wait condition defaults to "not-running". waitCondition := containerpkg.WaitConditionNotRunning - if !legacyBehavior { + if !legacyBehaviorPre130 { if err := httputils.ParseForm(r); err != nil { return err } @@ -293,6 +294,7 @@ func (s *containerRouter) postContainersWait(ctx context.Context, w http.Respons waitCondition = containerpkg.WaitConditionNextExit case container.WaitConditionRemoved: waitCondition = containerpkg.WaitConditionRemoved + legacyRemovalWaitPre134 = versions.LessThan(version, "1.34") } } @@ -306,7 +308,7 @@ func (s *containerRouter) postContainersWait(ctx context.Context, w http.Respons w.Header().Set("Content-Type", "application/json") - if !legacyBehavior { + if !legacyBehaviorPre130 { // Write response header immediately. w.WriteHeader(http.StatusOK) if flusher, ok := w.(http.Flusher); ok { @@ -317,8 +319,22 @@ func (s *containerRouter) postContainersWait(ctx context.Context, w http.Respons // Block on the result of the wait operation. status := <-waitC + // With API < 1.34, wait on WaitConditionRemoved did not return + // in case container removal failed. The only way to report an + // error back to the client is to not write anything (i.e. send + // an empty response which will be treated as an error). + if legacyRemovalWaitPre134 && status.Err() != nil { + return nil + } + + var waitError *container.ContainerWaitOKBodyError + if status.Err() != nil { + waitError = &container.ContainerWaitOKBodyError{Message: status.Err().Error()} + } + return json.NewEncoder(w).Encode(&container.ContainerWaitOKBody{ StatusCode: int64(status.ExitCode()), + Error: waitError, }) } diff --git a/api/swagger.yaml b/api/swagger.yaml index c3b9c29244..b0c0575fc0 100644 --- a/api/swagger.yaml +++ b/api/swagger.yaml @@ -5723,6 +5723,13 @@ paths: description: "Exit code of the container" type: "integer" x-nullable: false + Error: + description: "container waiting error, if any" + type: "object" + properties: + Message: + description: "Details of an error" + type: "string" 404: description: "no such container" schema: diff --git a/api/types/container/container_wait.go b/api/types/container/container_wait.go index 77ecdbaf7a..47fb17578a 100644 --- a/api/types/container/container_wait.go +++ b/api/types/container/container_wait.go @@ -7,10 +7,22 @@ package container // See hack/generate-swagger-api.sh // ---------------------------------------------------------------------------- +// ContainerWaitOKBodyError container waiting error, if any +// swagger:model ContainerWaitOKBodyError +type ContainerWaitOKBodyError struct { + + // Details of an error + Message string `json:"Message,omitempty"` +} + // ContainerWaitOKBody container wait o k body // swagger:model ContainerWaitOKBody type ContainerWaitOKBody struct { + // error + // Required: true + Error *ContainerWaitOKBodyError `json:"Error"` + // Exit code of the container // Required: true StatusCode int64 `json:"StatusCode"` diff --git a/container/state.go b/container/state.go index cdf51d37d2..b624fe1dee 100644 --- a/container/state.go +++ b/container/state.go @@ -29,7 +29,7 @@ type State struct { Dead bool Pid int ExitCodeValue int `json:"ExitCode"` - ErrorMsg string `json:"Error"` // contains last known error when starting the container + ErrorMsg string `json:"Error"` // contains last known error during container start or remove StartedAt time.Time FinishedAt time.Time Health *Health @@ -319,7 +319,10 @@ func (s *State) SetRestarting(exitStatus *ExitStatus) { // know the error that occurred when container transits to another state // when inspecting it func (s *State) SetError(err error) { - s.ErrorMsg = err.Error() + s.ErrorMsg = "" + if err != nil { + s.ErrorMsg = err.Error() + } } // IsPaused returns whether the container is paused or not. @@ -385,8 +388,18 @@ func (s *State) IsDead() bool { // closes the internal waitRemove channel to unblock callers waiting for a // container to be removed. func (s *State) SetRemoved() { + s.SetRemovalError(nil) +} + +// SetRemovalError is to be called in case a container remove failed. +// It sets an error and closes the internal waitRemove channel to unblock +// callers waiting for the container to be removed. +func (s *State) SetRemovalError(err error) { + s.SetError(err) s.Lock() close(s.waitRemove) // Unblock those waiting on remove. + // Recreate the channel so next ContainerWait will work + s.waitRemove = make(chan struct{}) s.Unlock() } diff --git a/daemon/delete.go b/daemon/delete.go index 3009400c09..b5dd258a66 100644 --- a/daemon/delete.go +++ b/daemon/delete.go @@ -120,12 +120,16 @@ func (daemon *Daemon) cleanupContainer(container *container.Container, forceRemo metadata, err := daemon.stores[container.OS].layerStore.ReleaseRWLayer(container.RWLayer) layer.LogReleaseMetadata(metadata) if err != nil && err != layer.ErrMountDoesNotExist && !os.IsNotExist(errors.Cause(err)) { - return errors.Wrapf(err, "driver %q failed to remove root filesystem for %s", daemon.GraphDriverName(container.OS), container.ID) + e := errors.Wrapf(err, "driver %q failed to remove root filesystem for %s", daemon.GraphDriverName(container.OS), container.ID) + container.SetRemovalError(e) + return e } } if err := system.EnsureRemoveAll(container.Root); err != nil { - return errors.Wrapf(err, "unable to remove filesystem for %s", container.ID) + e := errors.Wrapf(err, "unable to remove filesystem for %s", container.ID) + container.SetRemovalError(e) + return e } linkNames := daemon.linkIndex.delete(container) diff --git a/docs/api/version-history.md b/docs/api/version-history.md index 9ac31dcaaf..77b8545bcc 100644 --- a/docs/api/version-history.md +++ b/docs/api/version-history.md @@ -17,12 +17,19 @@ keywords: "API, Docker, rcli, REST, documentation" [Docker Engine API v1.34](https://docs.docker.com/engine/api/v1.34/) documentation +* `POST /containers/(name)/wait?condition=removed` now also also returns + in case of container removal failure. A pointer to a structure named + `Error` added to the response JSON in order to indicate a failure. + If `Error` is `null`, container removal has succeeded, otherwise + the test of an error message indicating why container removal has failed + is available from `Error.Message` field. + ## v1.33 API changes [Docker Engine API v1.33](https://docs.docker.com/engine/api/v1.33/) documentation * `GET /events` now supports filtering 4 more kinds of events: `config`, `node`, -`secret` and `service`. +`secret` and `service`. ## v1.32 API changes