From 1326f0cba5f933674e23769de1385d3b0841e758 Mon Sep 17 00:00:00 2001 From: Zhang Wei Date: Thu, 17 Dec 2015 11:20:30 +0800 Subject: [PATCH] Break big lock into some tiny locks Don't involve code waiting for blocking channel in locked critical section because it has potential risk of hanging forever. Signed-off-by: Zhang Wei --- container/monitor.go | 19 ++++++++++++++++--- container/state.go | 9 ++++++++- daemon/start.go | 6 ++++++ 3 files changed, 30 insertions(+), 4 deletions(-) diff --git a/container/monitor.go b/container/monitor.go index 8098f23bae..60473d9c04 100644 --- a/container/monitor.go +++ b/container/monitor.go @@ -80,6 +80,7 @@ type containerMonitor struct { // StartMonitor initializes a containerMonitor for this container with the provided supervisor and restart policy // and starts the container's process. func (container *Container) StartMonitor(s supervisor, policy container.RestartPolicy) error { + container.Lock() container.monitor = &containerMonitor{ supervisor: s, container: container, @@ -88,6 +89,7 @@ func (container *Container) StartMonitor(s supervisor, policy container.RestartP stopChan: make(chan struct{}), startSignal: make(chan struct{}), } + container.Unlock() return container.monitor.wait() } @@ -157,6 +159,8 @@ func (m *containerMonitor) start() error { } m.Close() }() + + m.container.Lock() // reset stopped flag if m.container.HasBeenManuallyStopped { m.container.HasBeenManuallyStopped = false @@ -171,16 +175,20 @@ func (m *containerMonitor) start() error { if err := m.supervisor.StartLogging(m.container); err != nil { m.resetContainer(false) + m.container.Unlock() return err } pipes := execdriver.NewPipes(m.container.Stdin(), m.container.Stdout(), m.container.Stderr(), m.container.Config.OpenStdin) + m.container.Unlock() m.logEvent("start") m.lastStartTime = time.Now() + // don't lock Run because m.callback has own lock if exitStatus, err = m.supervisor.Run(m.container, pipes, m.callback); err != nil { + m.container.Lock() // if we receive an internal error from the initial start of a container then lets // return it instead of entering the restart loop // set to 127 for container cmd not found/does not exist) @@ -190,6 +198,7 @@ func (m *containerMonitor) start() error { if m.container.RestartCount == 0 { m.container.ExitCode = 127 m.resetContainer(false) + m.container.Unlock() return derr.ErrorCodeCmdNotFound } } @@ -198,6 +207,7 @@ func (m *containerMonitor) start() error { if m.container.RestartCount == 0 { m.container.ExitCode = 126 m.resetContainer(false) + m.container.Unlock() return derr.ErrorCodeCmdCouldNotBeInvoked } } @@ -206,11 +216,13 @@ func (m *containerMonitor) start() error { m.container.ExitCode = -1 m.resetContainer(false) + m.container.Unlock() return derr.ErrorCodeCantStart.WithArgs(m.container.ID, utils.GetErrorMessage(err)) } + m.container.Unlock() logrus.Errorf("Error running container: %s", err) - } + } // end if // here container.Lock is already lost afterRun = true @@ -231,13 +243,14 @@ func (m *containerMonitor) start() error { if m.shouldStop { return err } + m.container.Lock() continue } m.logEvent("die") m.resetContainer(true) return err - } + } // end for } // resetMonitor resets the stateful fields on the containerMonitor based on the @@ -319,7 +332,7 @@ func (m *containerMonitor) callback(processConfig *execdriver.ProcessConfig, pid } } - m.container.SetRunning(pid) + m.container.SetRunningLocking(pid) // signal that the process has started // close channel only if not closed diff --git a/container/state.go b/container/state.go index 138d79874f..d36ade956f 100644 --- a/container/state.go +++ b/container/state.go @@ -179,6 +179,13 @@ func (s *State) getExitCode() int { return res } +// SetRunningLocking locks container and sets it to "running" +func (s *State) SetRunningLocking(pid int) { + s.Lock() + s.SetRunning(pid) + s.Unlock() +} + // SetRunning sets the state of the container to "running". func (s *State) SetRunning(pid int) { s.Error = "" @@ -192,7 +199,7 @@ func (s *State) SetRunning(pid int) { s.waitChan = make(chan struct{}) } -// SetStoppedLocking locks the container state is sets it to "stopped". +// SetStoppedLocking locks the container state and sets it to "stopped". func (s *State) SetStoppedLocking(exitStatus *execdriver.ExitStatus) { s.Lock() s.SetStopped(exitStatus) diff --git a/daemon/start.go b/daemon/start.go index 27279d6b37..4b6b9aa537 100644 --- a/daemon/start.go +++ b/daemon/start.go @@ -129,9 +129,15 @@ func (daemon *Daemon) containerStart(container *container.Container) (err error) mounts = append(mounts, container.TmpfsMounts()...) container.Command.Mounts = mounts + container.Unlock() + + // don't lock waitForStart because it has potential risk of blocking + // which will lead to dead lock, forever. if err := daemon.waitForStart(container); err != nil { + container.Lock() return err } + container.Lock() container.HasBeenStartedBefore = true return nil }