Fix race on force deleting container created by task

Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com>
This commit is contained in:
Tonis Tiigi 2016-06-14 11:11:43 -07:00
parent 1704914d7c
commit dcfe99278d
14 changed files with 77 additions and 76 deletions

View File

@ -539,7 +539,7 @@ func copyEscapable(dst io.Writer, src io.ReadCloser, keys []byte) (written int64
// ShouldRestart decides whether the daemon should restart the container or not.
// This is based on the container's restart policy.
func (container *Container) ShouldRestart() bool {
shouldRestart, _, _ := container.restartManager.ShouldRestart(uint32(container.ExitCode), container.HasBeenManuallyStopped, container.FinishedAt.Sub(container.StartedAt))
shouldRestart, _, _ := container.restartManager.ShouldRestart(uint32(container.ExitCode()), container.HasBeenManuallyStopped, container.FinishedAt.Sub(container.StartedAt))
return shouldRestart
}

View File

@ -24,8 +24,8 @@ type State struct {
RemovalInProgress bool // Not need for this to be persistent on disk.
Dead bool
Pid int
ExitCode int
Error string // contains last known error when starting the container
exitCode int
error string // contains last known error when starting the container
StartedAt time.Time
FinishedAt time.Time
waitChan chan struct{}
@ -46,7 +46,7 @@ func (s *State) String() string {
return fmt.Sprintf("Up %s (Paused)", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt)))
}
if s.Restarting {
return fmt.Sprintf("Restarting (%d) %s ago", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt)))
return fmt.Sprintf("Restarting (%d) %s ago", s.exitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt)))
}
if h := s.Health; h != nil {
@ -71,7 +71,7 @@ func (s *State) String() string {
return ""
}
return fmt.Sprintf("Exited (%d) %s ago", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt)))
return fmt.Sprintf("Exited (%d) %s ago", s.exitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt)))
}
// StateString returns a single string to describe state
@ -129,7 +129,7 @@ func wait(waitChan <-chan struct{}, timeout time.Duration) error {
func (s *State) WaitStop(timeout time.Duration) (int, error) {
s.Lock()
if !s.Running {
exitCode := s.ExitCode
exitCode := s.exitCode
s.Unlock()
return exitCode, nil
}
@ -138,33 +138,38 @@ func (s *State) WaitStop(timeout time.Duration) (int, error) {
if err := wait(waitChan, timeout); err != nil {
return -1, err
}
return s.getExitCode(), nil
s.Lock()
defer s.Unlock()
return s.ExitCode(), nil
}
// WaitWithContext waits for the container to stop. Optional context can be
// passed for canceling the request.
func (s *State) WaitWithContext(ctx context.Context) <-chan int {
func (s *State) WaitWithContext(ctx context.Context) error {
// todo(tonistiigi): make other wait functions use this
c := make(chan int)
go func() {
s.Lock()
if !s.Running {
state := *s
defer s.Unlock()
if state.exitCode == 0 {
return nil
}
return &state
}
waitChan := s.waitChan
s.Unlock()
select {
case <-waitChan:
s.Lock()
if !s.Running {
exitCode := s.ExitCode
s.Unlock()
c <- exitCode
close(c)
return
}
waitChan := s.waitChan
state := *s
s.Unlock()
select {
case <-waitChan:
c <- s.getExitCode()
case <-ctx.Done():
if state.exitCode == 0 {
return nil
}
close(c)
}()
return c
return &state
case <-ctx.Done():
return ctx.Err()
}
}
// IsRunning returns whether the running flag is set. Used by Container to check whether a container is running.
@ -183,20 +188,26 @@ func (s *State) GetPID() int {
return res
}
func (s *State) getExitCode() int {
s.Lock()
res := s.ExitCode
s.Unlock()
// ExitCode returns current exitcode for the state. Take lock before if state
// may be shared.
func (s *State) ExitCode() int {
res := s.exitCode
return res
}
// SetExitCode set current exitcode for the state. Take lock before if state
// may be shared.
func (s *State) SetExitCode(ec int) {
s.exitCode = ec
}
// SetRunning sets the state of the container to "running".
func (s *State) SetRunning(pid int, initial bool) {
s.Error = ""
s.error = ""
s.Running = true
s.Paused = false
s.Restarting = false
s.ExitCode = 0
s.exitCode = 0
s.Pid = pid
if initial {
s.StartedAt = time.Now().UTC()
@ -248,7 +259,7 @@ func (s *State) SetRestarting(exitStatus *ExitStatus) {
// know the error that occurred when container transits to another state
// when inspecting it
func (s *State) SetError(err error) {
s.Error = err.Error()
s.error = err.Error()
}
// IsPaused returns whether the container is paused or not.
@ -292,3 +303,8 @@ func (s *State) SetDead() {
s.Dead = true
s.Unlock()
}
// Error returns current error for the state.
func (s *State) Error() string {
return s.error
}

View File

@ -3,5 +3,5 @@ package container
// setFromExitStatus is a platform specific helper function to set the state
// based on the ExitStatus structure.
func (s *State) setFromExitStatus(exitStatus *ExitStatus) {
s.ExitCode = exitStatus.ExitCode
s.exitCode = exitStatus.ExitCode
}

View File

@ -19,8 +19,8 @@ func TestStateRunStop(t *testing.T) {
if s.Pid != i+100 {
t.Fatalf("Pid %v, expected %v", s.Pid, i+100)
}
if s.ExitCode != 0 {
t.Fatalf("ExitCode %v, expected 0", s.ExitCode)
if s.ExitCode() != 0 {
t.Fatalf("ExitCode %v, expected 0", s.ExitCode())
}
stopped := make(chan struct{})
@ -34,8 +34,8 @@ func TestStateRunStop(t *testing.T) {
if s.IsRunning() {
t.Fatal("State is running")
}
if s.ExitCode != i {
t.Fatalf("ExitCode %v, expected %v", s.ExitCode, i)
if s.ExitCode() != i {
t.Fatalf("ExitCode %v, expected %v", s.ExitCode(), i)
}
if s.Pid != 0 {
t.Fatalf("Pid %v, expected 0", s.Pid)

View File

@ -5,6 +5,6 @@ package container
// setFromExitStatus is a platform specific helper function to set the state
// based on the ExitStatus structure.
func (s *State) setFromExitStatus(exitStatus *ExitStatus) {
s.ExitCode = exitStatus.ExitCode
s.exitCode = exitStatus.ExitCode
s.OOMKilled = exitStatus.OOMKilled
}

View File

@ -3,5 +3,5 @@ package container
// setFromExitStatus is a platform specific helper function to set the state
// based on the ExitStatus structure.
func (s *State) setFromExitStatus(exitStatus *ExitStatus) {
s.ExitCode = exitStatus.ExitCode
s.exitCode = exitStatus.ExitCode
}

View File

@ -24,7 +24,7 @@ type Backend interface {
ConnectContainerToNetwork(containerName, networkName string, endpointConfig *network.EndpointSettings) error
UpdateContainerServiceConfig(containerName string, serviceConfig *clustertypes.ServiceConfig) error
ContainerInspectCurrent(name string, size bool) (*types.ContainerJSON, error)
ContainerWaitWithContext(ctx context.Context, name string) (<-chan int, error)
ContainerWaitWithContext(ctx context.Context, name string) error
ContainerRm(name string, config *types.ContainerRmConfig) error
ContainerKill(name string, sig uint64) error
SystemInfo() (*types.Info, error)

View File

@ -160,7 +160,7 @@ func (c *containerAdapter) inspect(ctx context.Context) (types.ContainerJSON, er
//
// A chan struct{} is returned that will be closed if the event procressing
// fails and needs to be restarted.
func (c *containerAdapter) wait(ctx context.Context) (<-chan int, error) {
func (c *containerAdapter) wait(ctx context.Context) error {
return c.backend.ContainerWaitWithContext(ctx, c.container.name())
}

View File

@ -1,7 +1,6 @@
package container
import (
"errors"
"fmt"
"strings"
@ -151,33 +150,20 @@ func (r *controller) Wait(pctx context.Context) error {
ctx, cancel := context.WithCancel(pctx)
defer cancel()
c, err := r.adapter.wait(ctx)
err := r.adapter.wait(ctx)
if err != nil {
return err
}
<-c
if ctx.Err() != nil {
return ctx.Err()
}
ctnr, err := r.adapter.inspect(ctx)
if err != nil {
// TODO(stevvooe): Need to handle missing container here. It is likely
// that a Wait call with a not found error should result in no waiting
// and no error at all.
return err
}
if ctnr.State.ExitCode != 0 {
var cause error
if ctnr.State.Error != "" {
cause = errors.New(ctnr.State.Error)
ee := &exitError{}
if err.Error() != "" {
ee.cause = err
}
cstatus, _ := parseContainerStatus(ctnr)
return &exitError{
code: ctnr.State.ExitCode,
cause: cause,
containerStatus: cstatus,
if ec, ok := err.(exec.ExitCoder); ok {
ee.code = ec.ExitCode()
}
}
return nil
@ -283,9 +269,8 @@ func parseContainerStatus(ctnr types.ContainerJSON) (*api.ContainerStatus, error
}
type exitError struct {
code int
cause error
containerStatus *api.ContainerStatus
code int
cause error
}
func (e *exitError) Error() string {
@ -297,7 +282,7 @@ func (e *exitError) Error() string {
}
func (e *exitError) ExitCode() int {
return int(e.containerStatus.ExitCode)
return int(e.code)
}
func (e *exitError) Cause() error {

View File

@ -127,8 +127,8 @@ func (daemon *Daemon) getInspectData(container *container.Container, size bool)
OOMKilled: container.State.OOMKilled,
Dead: container.State.Dead,
Pid: container.State.Pid,
ExitCode: container.State.ExitCode,
Error: container.State.Error,
ExitCode: container.State.ExitCode(),
Error: container.State.Error(),
StartedAt: container.State.StartedAt.Format(time.RFC3339Nano),
FinishedAt: container.State.FinishedAt.Format(time.RFC3339Nano),
Health: containerHealth,

View File

@ -337,7 +337,7 @@ func includeContainerInList(container *container.Container, ctx *listContext) it
if len(ctx.exitAllowed) > 0 {
shouldSkip := true
for _, code := range ctx.exitAllowed {
if code == container.ExitCode && !container.Running && !container.StartedAt.IsZero() {
if code == container.ExitCode() && !container.Running && !container.StartedAt.IsZero() {
shouldSkip = false
break
}

View File

@ -29,7 +29,7 @@ func (daemon *Daemon) postRunProcessing(container *container.Container, e libcon
// Create a new servicing container, which will start, complete the update, and merge back the
// results if it succeeded, all as part of the below function call.
if err := daemon.containerd.Create((container.ID + "_servicing"), *spec, servicingOption); err != nil {
container.ExitCode = -1
container.SetExitCode(-1)
return fmt.Errorf("Post-run update servicing failed: %s", err)
}
}

View File

@ -107,8 +107,8 @@ func (daemon *Daemon) containerStart(container *container.Container) (err error)
if err != nil {
container.SetError(err)
// if no one else has set it, make sure we don't leave it at zero
if container.ExitCode == 0 {
container.ExitCode = 128
if container.ExitCode() == 0 {
container.SetExitCode(128)
}
container.ToDisk()
daemon.Cleanup(container)
@ -151,11 +151,11 @@ func (daemon *Daemon) containerStart(container *container.Container) (err error)
(strings.Contains(errDesc, "executable file not found") ||
strings.Contains(errDesc, "no such file or directory") ||
strings.Contains(errDesc, "system cannot find the file specified")) {
container.ExitCode = 127
container.SetExitCode(127)
}
// set to 126 for container cmd can't be invoked errors
if strings.Contains(errDesc, syscall.EACCES.Error()) {
container.ExitCode = 126
container.SetExitCode(126)
}
container.Reset(false)

View File

@ -22,11 +22,11 @@ func (daemon *Daemon) ContainerWait(name string, timeout time.Duration) (int, er
// ContainerWaitWithContext returns a channel where exit code is sent
// when container stops. Channel can be cancelled with a context.
func (daemon *Daemon) ContainerWaitWithContext(ctx context.Context, name string) (<-chan int, error) {
func (daemon *Daemon) ContainerWaitWithContext(ctx context.Context, name string) error {
container, err := daemon.GetContainer(name)
if err != nil {
return nil, err
return err
}
return container.WaitWithContext(ctx), nil
return container.WaitWithContext(ctx)
}