mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00
ContainerWait on remove: don't stuck on rm fail
Currently, if a container removal has failed for some reason, any client waiting for removal (e.g. `docker run --rm`) is stuck, waiting for removal to succeed while it has failed already. For more details and the reproducer, please check https://github.com/moby/moby/issues/34945 This commit addresses that by allowing `ContainerWait()` with `container.WaitCondition == "removed"` argument to return an error in case of removal failure. The `ContainerWaitOKBody` stucture returned to a client is amended with a pointer to `struct Error`, containing an error message string, and the `Client.ContainerWait()` is modified to return the error, if any, to the client. Note that this feature is only available for API version >= 1.34. In order for the old clients to be unstuck, we just close the connection without writing anything -- this causes client's error. Now, docker-cli would need a separate commit to bump the API to 1.34 and to show an error returned, if any. [v2: recreate the waitRemove channel after closing] [v3: document; keep legacy behavior for older clients] [v4: convert Error from string to pointer to a struct] [v5: don't emulate old behavior, send empty response in error case] [v6: rename legacy* vars to include version suffix] Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
This commit is contained in:
parent
239d61f04b
commit
f963500c54
6 changed files with 67 additions and 8 deletions
|
@ -280,11 +280,12 @@ func (s *containerRouter) postContainersWait(ctx context.Context, w http.Respons
|
||||||
// Behavior changed in version 1.30 to handle wait condition and to
|
// Behavior changed in version 1.30 to handle wait condition and to
|
||||||
// return headers immediately.
|
// return headers immediately.
|
||||||
version := httputils.VersionFromContext(ctx)
|
version := httputils.VersionFromContext(ctx)
|
||||||
legacyBehavior := versions.LessThan(version, "1.30")
|
legacyBehaviorPre130 := versions.LessThan(version, "1.30")
|
||||||
|
legacyRemovalWaitPre134 := false
|
||||||
|
|
||||||
// The wait condition defaults to "not-running".
|
// The wait condition defaults to "not-running".
|
||||||
waitCondition := containerpkg.WaitConditionNotRunning
|
waitCondition := containerpkg.WaitConditionNotRunning
|
||||||
if !legacyBehavior {
|
if !legacyBehaviorPre130 {
|
||||||
if err := httputils.ParseForm(r); err != nil {
|
if err := httputils.ParseForm(r); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -293,6 +294,7 @@ func (s *containerRouter) postContainersWait(ctx context.Context, w http.Respons
|
||||||
waitCondition = containerpkg.WaitConditionNextExit
|
waitCondition = containerpkg.WaitConditionNextExit
|
||||||
case container.WaitConditionRemoved:
|
case container.WaitConditionRemoved:
|
||||||
waitCondition = containerpkg.WaitConditionRemoved
|
waitCondition = containerpkg.WaitConditionRemoved
|
||||||
|
legacyRemovalWaitPre134 = versions.LessThan(version, "1.34")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -306,7 +308,7 @@ func (s *containerRouter) postContainersWait(ctx context.Context, w http.Respons
|
||||||
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
|
||||||
if !legacyBehavior {
|
if !legacyBehaviorPre130 {
|
||||||
// Write response header immediately.
|
// Write response header immediately.
|
||||||
w.WriteHeader(http.StatusOK)
|
w.WriteHeader(http.StatusOK)
|
||||||
if flusher, ok := w.(http.Flusher); ok {
|
if flusher, ok := w.(http.Flusher); ok {
|
||||||
|
@ -317,8 +319,22 @@ func (s *containerRouter) postContainersWait(ctx context.Context, w http.Respons
|
||||||
// Block on the result of the wait operation.
|
// Block on the result of the wait operation.
|
||||||
status := <-waitC
|
status := <-waitC
|
||||||
|
|
||||||
|
// With API < 1.34, wait on WaitConditionRemoved did not return
|
||||||
|
// in case container removal failed. The only way to report an
|
||||||
|
// error back to the client is to not write anything (i.e. send
|
||||||
|
// an empty response which will be treated as an error).
|
||||||
|
if legacyRemovalWaitPre134 && status.Err() != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var waitError *container.ContainerWaitOKBodyError
|
||||||
|
if status.Err() != nil {
|
||||||
|
waitError = &container.ContainerWaitOKBodyError{Message: status.Err().Error()}
|
||||||
|
}
|
||||||
|
|
||||||
return json.NewEncoder(w).Encode(&container.ContainerWaitOKBody{
|
return json.NewEncoder(w).Encode(&container.ContainerWaitOKBody{
|
||||||
StatusCode: int64(status.ExitCode()),
|
StatusCode: int64(status.ExitCode()),
|
||||||
|
Error: waitError,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -5723,6 +5723,13 @@ paths:
|
||||||
description: "Exit code of the container"
|
description: "Exit code of the container"
|
||||||
type: "integer"
|
type: "integer"
|
||||||
x-nullable: false
|
x-nullable: false
|
||||||
|
Error:
|
||||||
|
description: "container waiting error, if any"
|
||||||
|
type: "object"
|
||||||
|
properties:
|
||||||
|
Message:
|
||||||
|
description: "Details of an error"
|
||||||
|
type: "string"
|
||||||
404:
|
404:
|
||||||
description: "no such container"
|
description: "no such container"
|
||||||
schema:
|
schema:
|
||||||
|
|
|
@ -7,10 +7,22 @@ package container
|
||||||
// See hack/generate-swagger-api.sh
|
// See hack/generate-swagger-api.sh
|
||||||
// ----------------------------------------------------------------------------
|
// ----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
// ContainerWaitOKBodyError container waiting error, if any
|
||||||
|
// swagger:model ContainerWaitOKBodyError
|
||||||
|
type ContainerWaitOKBodyError struct {
|
||||||
|
|
||||||
|
// Details of an error
|
||||||
|
Message string `json:"Message,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
// ContainerWaitOKBody container wait o k body
|
// ContainerWaitOKBody container wait o k body
|
||||||
// swagger:model ContainerWaitOKBody
|
// swagger:model ContainerWaitOKBody
|
||||||
type ContainerWaitOKBody struct {
|
type ContainerWaitOKBody struct {
|
||||||
|
|
||||||
|
// error
|
||||||
|
// Required: true
|
||||||
|
Error *ContainerWaitOKBodyError `json:"Error"`
|
||||||
|
|
||||||
// Exit code of the container
|
// Exit code of the container
|
||||||
// Required: true
|
// Required: true
|
||||||
StatusCode int64 `json:"StatusCode"`
|
StatusCode int64 `json:"StatusCode"`
|
||||||
|
|
|
@ -29,7 +29,7 @@ type State struct {
|
||||||
Dead bool
|
Dead bool
|
||||||
Pid int
|
Pid int
|
||||||
ExitCodeValue int `json:"ExitCode"`
|
ExitCodeValue int `json:"ExitCode"`
|
||||||
ErrorMsg string `json:"Error"` // contains last known error when starting the container
|
ErrorMsg string `json:"Error"` // contains last known error during container start or remove
|
||||||
StartedAt time.Time
|
StartedAt time.Time
|
||||||
FinishedAt time.Time
|
FinishedAt time.Time
|
||||||
Health *Health
|
Health *Health
|
||||||
|
@ -319,7 +319,10 @@ func (s *State) SetRestarting(exitStatus *ExitStatus) {
|
||||||
// know the error that occurred when container transits to another state
|
// know the error that occurred when container transits to another state
|
||||||
// when inspecting it
|
// when inspecting it
|
||||||
func (s *State) SetError(err error) {
|
func (s *State) SetError(err error) {
|
||||||
s.ErrorMsg = err.Error()
|
s.ErrorMsg = ""
|
||||||
|
if err != nil {
|
||||||
|
s.ErrorMsg = err.Error()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsPaused returns whether the container is paused or not.
|
// IsPaused returns whether the container is paused or not.
|
||||||
|
@ -385,8 +388,18 @@ func (s *State) IsDead() bool {
|
||||||
// closes the internal waitRemove channel to unblock callers waiting for a
|
// closes the internal waitRemove channel to unblock callers waiting for a
|
||||||
// container to be removed.
|
// container to be removed.
|
||||||
func (s *State) SetRemoved() {
|
func (s *State) SetRemoved() {
|
||||||
|
s.SetRemovalError(nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetRemovalError is to be called in case a container remove failed.
|
||||||
|
// It sets an error and closes the internal waitRemove channel to unblock
|
||||||
|
// callers waiting for the container to be removed.
|
||||||
|
func (s *State) SetRemovalError(err error) {
|
||||||
|
s.SetError(err)
|
||||||
s.Lock()
|
s.Lock()
|
||||||
close(s.waitRemove) // Unblock those waiting on remove.
|
close(s.waitRemove) // Unblock those waiting on remove.
|
||||||
|
// Recreate the channel so next ContainerWait will work
|
||||||
|
s.waitRemove = make(chan struct{})
|
||||||
s.Unlock()
|
s.Unlock()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -120,12 +120,16 @@ func (daemon *Daemon) cleanupContainer(container *container.Container, forceRemo
|
||||||
metadata, err := daemon.stores[container.OS].layerStore.ReleaseRWLayer(container.RWLayer)
|
metadata, err := daemon.stores[container.OS].layerStore.ReleaseRWLayer(container.RWLayer)
|
||||||
layer.LogReleaseMetadata(metadata)
|
layer.LogReleaseMetadata(metadata)
|
||||||
if err != nil && err != layer.ErrMountDoesNotExist && !os.IsNotExist(errors.Cause(err)) {
|
if err != nil && err != layer.ErrMountDoesNotExist && !os.IsNotExist(errors.Cause(err)) {
|
||||||
return errors.Wrapf(err, "driver %q failed to remove root filesystem for %s", daemon.GraphDriverName(container.OS), container.ID)
|
e := errors.Wrapf(err, "driver %q failed to remove root filesystem for %s", daemon.GraphDriverName(container.OS), container.ID)
|
||||||
|
container.SetRemovalError(e)
|
||||||
|
return e
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := system.EnsureRemoveAll(container.Root); err != nil {
|
if err := system.EnsureRemoveAll(container.Root); err != nil {
|
||||||
return errors.Wrapf(err, "unable to remove filesystem for %s", container.ID)
|
e := errors.Wrapf(err, "unable to remove filesystem for %s", container.ID)
|
||||||
|
container.SetRemovalError(e)
|
||||||
|
return e
|
||||||
}
|
}
|
||||||
|
|
||||||
linkNames := daemon.linkIndex.delete(container)
|
linkNames := daemon.linkIndex.delete(container)
|
||||||
|
|
|
@ -17,12 +17,19 @@ keywords: "API, Docker, rcli, REST, documentation"
|
||||||
|
|
||||||
[Docker Engine API v1.34](https://docs.docker.com/engine/api/v1.34/) documentation
|
[Docker Engine API v1.34](https://docs.docker.com/engine/api/v1.34/) documentation
|
||||||
|
|
||||||
|
* `POST /containers/(name)/wait?condition=removed` now also also returns
|
||||||
|
in case of container removal failure. A pointer to a structure named
|
||||||
|
`Error` added to the response JSON in order to indicate a failure.
|
||||||
|
If `Error` is `null`, container removal has succeeded, otherwise
|
||||||
|
the test of an error message indicating why container removal has failed
|
||||||
|
is available from `Error.Message` field.
|
||||||
|
|
||||||
## v1.33 API changes
|
## v1.33 API changes
|
||||||
|
|
||||||
[Docker Engine API v1.33](https://docs.docker.com/engine/api/v1.33/) documentation
|
[Docker Engine API v1.33](https://docs.docker.com/engine/api/v1.33/) documentation
|
||||||
|
|
||||||
* `GET /events` now supports filtering 4 more kinds of events: `config`, `node`,
|
* `GET /events` now supports filtering 4 more kinds of events: `config`, `node`,
|
||||||
`secret` and `service`.
|
`secret` and `service`.
|
||||||
|
|
||||||
## v1.32 API changes
|
## v1.32 API changes
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue