From dcfe99278db113e79dc037a6f854f4cea9eebe22 Mon Sep 17 00:00:00 2001 From: Tonis Tiigi Date: Tue, 14 Jun 2016 11:11:43 -0700 Subject: [PATCH] Fix race on force deleting container created by task Signed-off-by: Tonis Tiigi --- container/container.go | 2 +- container/state.go | 78 +++++++++++-------- container/state_solaris.go | 2 +- container/state_test.go | 8 +- container/state_unix.go | 2 +- container/state_windows.go | 2 +- daemon/cluster/executor/backend.go | 2 +- daemon/cluster/executor/container/adapter.go | 2 +- .../cluster/executor/container/controller.go | 33 +++----- daemon/inspect.go | 4 +- daemon/list.go | 2 +- daemon/monitor_windows.go | 2 +- daemon/start.go | 8 +- daemon/wait.go | 6 +- 14 files changed, 77 insertions(+), 76 deletions(-) diff --git a/container/container.go b/container/container.go index 1300d96d9c..85064a358f 100644 --- a/container/container.go +++ b/container/container.go @@ -539,7 +539,7 @@ func copyEscapable(dst io.Writer, src io.ReadCloser, keys []byte) (written int64 // ShouldRestart decides whether the daemon should restart the container or not. // This is based on the container's restart policy. func (container *Container) ShouldRestart() bool { - shouldRestart, _, _ := container.restartManager.ShouldRestart(uint32(container.ExitCode), container.HasBeenManuallyStopped, container.FinishedAt.Sub(container.StartedAt)) + shouldRestart, _, _ := container.restartManager.ShouldRestart(uint32(container.ExitCode()), container.HasBeenManuallyStopped, container.FinishedAt.Sub(container.StartedAt)) return shouldRestart } diff --git a/container/state.go b/container/state.go index b01bb3806c..3922897165 100644 --- a/container/state.go +++ b/container/state.go @@ -24,8 +24,8 @@ type State struct { RemovalInProgress bool // Not need for this to be persistent on disk. Dead bool Pid int - ExitCode int - Error string // contains last known error when starting the container + exitCode int + error string // contains last known error when starting the container StartedAt time.Time FinishedAt time.Time waitChan chan struct{} @@ -46,7 +46,7 @@ func (s *State) String() string { return fmt.Sprintf("Up %s (Paused)", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt))) } if s.Restarting { - return fmt.Sprintf("Restarting (%d) %s ago", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt))) + return fmt.Sprintf("Restarting (%d) %s ago", s.exitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt))) } if h := s.Health; h != nil { @@ -71,7 +71,7 @@ func (s *State) String() string { return "" } - return fmt.Sprintf("Exited (%d) %s ago", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt))) + return fmt.Sprintf("Exited (%d) %s ago", s.exitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt))) } // StateString returns a single string to describe state @@ -129,7 +129,7 @@ func wait(waitChan <-chan struct{}, timeout time.Duration) error { func (s *State) WaitStop(timeout time.Duration) (int, error) { s.Lock() if !s.Running { - exitCode := s.ExitCode + exitCode := s.exitCode s.Unlock() return exitCode, nil } @@ -138,33 +138,38 @@ func (s *State) WaitStop(timeout time.Duration) (int, error) { if err := wait(waitChan, timeout); err != nil { return -1, err } - return s.getExitCode(), nil + s.Lock() + defer s.Unlock() + return s.ExitCode(), nil } // WaitWithContext waits for the container to stop. Optional context can be // passed for canceling the request. -func (s *State) WaitWithContext(ctx context.Context) <-chan int { +func (s *State) WaitWithContext(ctx context.Context) error { // todo(tonistiigi): make other wait functions use this - c := make(chan int) - go func() { + s.Lock() + if !s.Running { + state := *s + defer s.Unlock() + if state.exitCode == 0 { + return nil + } + return &state + } + waitChan := s.waitChan + s.Unlock() + select { + case <-waitChan: s.Lock() - if !s.Running { - exitCode := s.ExitCode - s.Unlock() - c <- exitCode - close(c) - return - } - waitChan := s.waitChan + state := *s s.Unlock() - select { - case <-waitChan: - c <- s.getExitCode() - case <-ctx.Done(): + if state.exitCode == 0 { + return nil } - close(c) - }() - return c + return &state + case <-ctx.Done(): + return ctx.Err() + } } // IsRunning returns whether the running flag is set. Used by Container to check whether a container is running. @@ -183,20 +188,26 @@ func (s *State) GetPID() int { return res } -func (s *State) getExitCode() int { - s.Lock() - res := s.ExitCode - s.Unlock() +// ExitCode returns current exitcode for the state. Take lock before if state +// may be shared. +func (s *State) ExitCode() int { + res := s.exitCode return res } +// SetExitCode set current exitcode for the state. Take lock before if state +// may be shared. +func (s *State) SetExitCode(ec int) { + s.exitCode = ec +} + // SetRunning sets the state of the container to "running". func (s *State) SetRunning(pid int, initial bool) { - s.Error = "" + s.error = "" s.Running = true s.Paused = false s.Restarting = false - s.ExitCode = 0 + s.exitCode = 0 s.Pid = pid if initial { s.StartedAt = time.Now().UTC() @@ -248,7 +259,7 @@ func (s *State) SetRestarting(exitStatus *ExitStatus) { // know the error that occurred when container transits to another state // when inspecting it func (s *State) SetError(err error) { - s.Error = err.Error() + s.error = err.Error() } // IsPaused returns whether the container is paused or not. @@ -292,3 +303,8 @@ func (s *State) SetDead() { s.Dead = true s.Unlock() } + +// Error returns current error for the state. +func (s *State) Error() string { + return s.error +} diff --git a/container/state_solaris.go b/container/state_solaris.go index 02802a02a4..9aef1d518e 100644 --- a/container/state_solaris.go +++ b/container/state_solaris.go @@ -3,5 +3,5 @@ package container // setFromExitStatus is a platform specific helper function to set the state // based on the ExitStatus structure. func (s *State) setFromExitStatus(exitStatus *ExitStatus) { - s.ExitCode = exitStatus.ExitCode + s.exitCode = exitStatus.ExitCode } diff --git a/container/state_test.go b/container/state_test.go index 0d0acab385..83ff1efcb6 100644 --- a/container/state_test.go +++ b/container/state_test.go @@ -19,8 +19,8 @@ func TestStateRunStop(t *testing.T) { if s.Pid != i+100 { t.Fatalf("Pid %v, expected %v", s.Pid, i+100) } - if s.ExitCode != 0 { - t.Fatalf("ExitCode %v, expected 0", s.ExitCode) + if s.ExitCode() != 0 { + t.Fatalf("ExitCode %v, expected 0", s.ExitCode()) } stopped := make(chan struct{}) @@ -34,8 +34,8 @@ func TestStateRunStop(t *testing.T) { if s.IsRunning() { t.Fatal("State is running") } - if s.ExitCode != i { - t.Fatalf("ExitCode %v, expected %v", s.ExitCode, i) + if s.ExitCode() != i { + t.Fatalf("ExitCode %v, expected %v", s.ExitCode(), i) } if s.Pid != 0 { t.Fatalf("Pid %v, expected 0", s.Pid) diff --git a/container/state_unix.go b/container/state_unix.go index 8d25a23790..f09d015e0b 100644 --- a/container/state_unix.go +++ b/container/state_unix.go @@ -5,6 +5,6 @@ package container // setFromExitStatus is a platform specific helper function to set the state // based on the ExitStatus structure. func (s *State) setFromExitStatus(exitStatus *ExitStatus) { - s.ExitCode = exitStatus.ExitCode + s.exitCode = exitStatus.ExitCode s.OOMKilled = exitStatus.OOMKilled } diff --git a/container/state_windows.go b/container/state_windows.go index 02802a02a4..9aef1d518e 100644 --- a/container/state_windows.go +++ b/container/state_windows.go @@ -3,5 +3,5 @@ package container // setFromExitStatus is a platform specific helper function to set the state // based on the ExitStatus structure. func (s *State) setFromExitStatus(exitStatus *ExitStatus) { - s.ExitCode = exitStatus.ExitCode + s.exitCode = exitStatus.ExitCode } diff --git a/daemon/cluster/executor/backend.go b/daemon/cluster/executor/backend.go index 6b0d0e5a48..2840e4d689 100644 --- a/daemon/cluster/executor/backend.go +++ b/daemon/cluster/executor/backend.go @@ -24,7 +24,7 @@ type Backend interface { ConnectContainerToNetwork(containerName, networkName string, endpointConfig *network.EndpointSettings) error UpdateContainerServiceConfig(containerName string, serviceConfig *clustertypes.ServiceConfig) error ContainerInspectCurrent(name string, size bool) (*types.ContainerJSON, error) - ContainerWaitWithContext(ctx context.Context, name string) (<-chan int, error) + ContainerWaitWithContext(ctx context.Context, name string) error ContainerRm(name string, config *types.ContainerRmConfig) error ContainerKill(name string, sig uint64) error SystemInfo() (*types.Info, error) diff --git a/daemon/cluster/executor/container/adapter.go b/daemon/cluster/executor/container/adapter.go index ffe1ce6538..f24d91bb8e 100644 --- a/daemon/cluster/executor/container/adapter.go +++ b/daemon/cluster/executor/container/adapter.go @@ -160,7 +160,7 @@ func (c *containerAdapter) inspect(ctx context.Context) (types.ContainerJSON, er // // A chan struct{} is returned that will be closed if the event procressing // fails and needs to be restarted. -func (c *containerAdapter) wait(ctx context.Context) (<-chan int, error) { +func (c *containerAdapter) wait(ctx context.Context) error { return c.backend.ContainerWaitWithContext(ctx, c.container.name()) } diff --git a/daemon/cluster/executor/container/controller.go b/daemon/cluster/executor/container/controller.go index 17aa454093..7cb11132ea 100644 --- a/daemon/cluster/executor/container/controller.go +++ b/daemon/cluster/executor/container/controller.go @@ -1,7 +1,6 @@ package container import ( - "errors" "fmt" "strings" @@ -151,33 +150,20 @@ func (r *controller) Wait(pctx context.Context) error { ctx, cancel := context.WithCancel(pctx) defer cancel() - c, err := r.adapter.wait(ctx) + err := r.adapter.wait(ctx) if err != nil { return err } - - <-c if ctx.Err() != nil { return ctx.Err() } - ctnr, err := r.adapter.inspect(ctx) if err != nil { - // TODO(stevvooe): Need to handle missing container here. It is likely - // that a Wait call with a not found error should result in no waiting - // and no error at all. - return err - } - - if ctnr.State.ExitCode != 0 { - var cause error - if ctnr.State.Error != "" { - cause = errors.New(ctnr.State.Error) + ee := &exitError{} + if err.Error() != "" { + ee.cause = err } - cstatus, _ := parseContainerStatus(ctnr) - return &exitError{ - code: ctnr.State.ExitCode, - cause: cause, - containerStatus: cstatus, + if ec, ok := err.(exec.ExitCoder); ok { + ee.code = ec.ExitCode() } } return nil @@ -283,9 +269,8 @@ func parseContainerStatus(ctnr types.ContainerJSON) (*api.ContainerStatus, error } type exitError struct { - code int - cause error - containerStatus *api.ContainerStatus + code int + cause error } func (e *exitError) Error() string { @@ -297,7 +282,7 @@ func (e *exitError) Error() string { } func (e *exitError) ExitCode() int { - return int(e.containerStatus.ExitCode) + return int(e.code) } func (e *exitError) Cause() error { diff --git a/daemon/inspect.go b/daemon/inspect.go index ba9f6ecb2b..6499fb89c6 100644 --- a/daemon/inspect.go +++ b/daemon/inspect.go @@ -127,8 +127,8 @@ func (daemon *Daemon) getInspectData(container *container.Container, size bool) OOMKilled: container.State.OOMKilled, Dead: container.State.Dead, Pid: container.State.Pid, - ExitCode: container.State.ExitCode, - Error: container.State.Error, + ExitCode: container.State.ExitCode(), + Error: container.State.Error(), StartedAt: container.State.StartedAt.Format(time.RFC3339Nano), FinishedAt: container.State.FinishedAt.Format(time.RFC3339Nano), Health: containerHealth, diff --git a/daemon/list.go b/daemon/list.go index 48323d730c..5d45de6964 100644 --- a/daemon/list.go +++ b/daemon/list.go @@ -337,7 +337,7 @@ func includeContainerInList(container *container.Container, ctx *listContext) it if len(ctx.exitAllowed) > 0 { shouldSkip := true for _, code := range ctx.exitAllowed { - if code == container.ExitCode && !container.Running && !container.StartedAt.IsZero() { + if code == container.ExitCode() && !container.Running && !container.StartedAt.IsZero() { shouldSkip = false break } diff --git a/daemon/monitor_windows.go b/daemon/monitor_windows.go index db6360badb..b500ee60b9 100644 --- a/daemon/monitor_windows.go +++ b/daemon/monitor_windows.go @@ -29,7 +29,7 @@ func (daemon *Daemon) postRunProcessing(container *container.Container, e libcon // Create a new servicing container, which will start, complete the update, and merge back the // results if it succeeded, all as part of the below function call. if err := daemon.containerd.Create((container.ID + "_servicing"), *spec, servicingOption); err != nil { - container.ExitCode = -1 + container.SetExitCode(-1) return fmt.Errorf("Post-run update servicing failed: %s", err) } } diff --git a/daemon/start.go b/daemon/start.go index 7ced9e1440..8def3ccd1b 100644 --- a/daemon/start.go +++ b/daemon/start.go @@ -107,8 +107,8 @@ func (daemon *Daemon) containerStart(container *container.Container) (err error) if err != nil { container.SetError(err) // if no one else has set it, make sure we don't leave it at zero - if container.ExitCode == 0 { - container.ExitCode = 128 + if container.ExitCode() == 0 { + container.SetExitCode(128) } container.ToDisk() daemon.Cleanup(container) @@ -151,11 +151,11 @@ func (daemon *Daemon) containerStart(container *container.Container) (err error) (strings.Contains(errDesc, "executable file not found") || strings.Contains(errDesc, "no such file or directory") || strings.Contains(errDesc, "system cannot find the file specified")) { - container.ExitCode = 127 + container.SetExitCode(127) } // set to 126 for container cmd can't be invoked errors if strings.Contains(errDesc, syscall.EACCES.Error()) { - container.ExitCode = 126 + container.SetExitCode(126) } container.Reset(false) diff --git a/daemon/wait.go b/daemon/wait.go index bf7e2c7149..2dab22e991 100644 --- a/daemon/wait.go +++ b/daemon/wait.go @@ -22,11 +22,11 @@ func (daemon *Daemon) ContainerWait(name string, timeout time.Duration) (int, er // ContainerWaitWithContext returns a channel where exit code is sent // when container stops. Channel can be cancelled with a context. -func (daemon *Daemon) ContainerWaitWithContext(ctx context.Context, name string) (<-chan int, error) { +func (daemon *Daemon) ContainerWaitWithContext(ctx context.Context, name string) error { container, err := daemon.GetContainer(name) if err != nil { - return nil, err + return err } - return container.WaitWithContext(ctx), nil + return container.WaitWithContext(ctx) }