mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00
container: protect the health status with mutex
Adds a mutex to protect the status, as well. When running the race detector with the unit test, we can see that the Status field is written without holding this lock. Adding a mutex to read and set status addresses the issue. Signed-off-by: Stephen J Day <stephen.day@docker.com>
This commit is contained in:
parent
7c53e73253
commit
7db30ab0cd
4 changed files with 47 additions and 23 deletions
|
@ -16,19 +16,42 @@ type Health struct {
|
||||||
|
|
||||||
// String returns a human-readable description of the health-check state
|
// String returns a human-readable description of the health-check state
|
||||||
func (s *Health) String() string {
|
func (s *Health) String() string {
|
||||||
// This happens when the monitor has yet to be setup.
|
status := s.Status()
|
||||||
if s.Status == "" {
|
|
||||||
return types.Unhealthy
|
|
||||||
}
|
|
||||||
|
|
||||||
switch s.Status {
|
switch status {
|
||||||
case types.Starting:
|
case types.Starting:
|
||||||
return "health: starting"
|
return "health: starting"
|
||||||
default: // Healthy and Unhealthy are clear on their own
|
default: // Healthy and Unhealthy are clear on their own
|
||||||
return s.Status
|
return s.Health.Status
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Status returns the current health status.
|
||||||
|
//
|
||||||
|
// Note that this takes a lock and the value may change after being read.
|
||||||
|
func (s *Health) Status() string {
|
||||||
|
s.mu.Lock()
|
||||||
|
defer s.mu.Unlock()
|
||||||
|
|
||||||
|
// This happens when the monitor has yet to be setup.
|
||||||
|
if s.Health.Status == "" {
|
||||||
|
return types.Unhealthy
|
||||||
|
}
|
||||||
|
|
||||||
|
return s.Health.Status
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetStatus writes the current status to the underlying health structure,
|
||||||
|
// obeying the locking semantics.
|
||||||
|
//
|
||||||
|
// Status may be set directly if another lock is used.
|
||||||
|
func (s *Health) SetStatus(new string) {
|
||||||
|
s.mu.Lock()
|
||||||
|
defer s.mu.Unlock()
|
||||||
|
|
||||||
|
s.Health.Status = new
|
||||||
|
}
|
||||||
|
|
||||||
// OpenMonitorChannel creates and returns a new monitor channel. If there
|
// OpenMonitorChannel creates and returns a new monitor channel. If there
|
||||||
// already is one, it returns nil.
|
// already is one, it returns nil.
|
||||||
func (s *Health) OpenMonitorChannel() chan struct{} {
|
func (s *Health) OpenMonitorChannel() chan struct{} {
|
||||||
|
@ -53,7 +76,7 @@ func (s *Health) CloseMonitorChannel() {
|
||||||
close(s.stop)
|
close(s.stop)
|
||||||
s.stop = nil
|
s.stop = nil
|
||||||
// unhealthy when the monitor has stopped for compatibility reasons
|
// unhealthy when the monitor has stopped for compatibility reasons
|
||||||
s.Status = types.Unhealthy
|
s.Health.Status = types.Unhealthy
|
||||||
logrus.Debug("CloseMonitorChannel done")
|
logrus.Debug("CloseMonitorChannel done")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -129,7 +129,7 @@ func handleProbeResult(d *Daemon, c *container.Container, result *types.Healthch
|
||||||
}
|
}
|
||||||
|
|
||||||
h := c.State.Health
|
h := c.State.Health
|
||||||
oldStatus := h.Status
|
oldStatus := h.Status()
|
||||||
|
|
||||||
if len(h.Log) >= maxLogEntries {
|
if len(h.Log) >= maxLogEntries {
|
||||||
h.Log = append(h.Log[len(h.Log)+1-maxLogEntries:], result)
|
h.Log = append(h.Log[len(h.Log)+1-maxLogEntries:], result)
|
||||||
|
@ -139,14 +139,14 @@ func handleProbeResult(d *Daemon, c *container.Container, result *types.Healthch
|
||||||
|
|
||||||
if result.ExitCode == exitStatusHealthy {
|
if result.ExitCode == exitStatusHealthy {
|
||||||
h.FailingStreak = 0
|
h.FailingStreak = 0
|
||||||
h.Status = types.Healthy
|
h.SetStatus(types.Healthy)
|
||||||
} else { // Failure (including invalid exit code)
|
} else { // Failure (including invalid exit code)
|
||||||
shouldIncrementStreak := true
|
shouldIncrementStreak := true
|
||||||
|
|
||||||
// If the container is starting (i.e. we never had a successful health check)
|
// If the container is starting (i.e. we never had a successful health check)
|
||||||
// then we check if we are within the start period of the container in which
|
// then we check if we are within the start period of the container in which
|
||||||
// case we do not increment the failure streak.
|
// case we do not increment the failure streak.
|
||||||
if h.Status == types.Starting {
|
if h.Status() == types.Starting {
|
||||||
startPeriod := timeoutWithDefault(c.Config.Healthcheck.StartPeriod, defaultStartPeriod)
|
startPeriod := timeoutWithDefault(c.Config.Healthcheck.StartPeriod, defaultStartPeriod)
|
||||||
timeSinceStart := result.Start.Sub(c.State.StartedAt)
|
timeSinceStart := result.Start.Sub(c.State.StartedAt)
|
||||||
|
|
||||||
|
@ -160,7 +160,7 @@ func handleProbeResult(d *Daemon, c *container.Container, result *types.Healthch
|
||||||
h.FailingStreak++
|
h.FailingStreak++
|
||||||
|
|
||||||
if h.FailingStreak >= retries {
|
if h.FailingStreak >= retries {
|
||||||
h.Status = types.Unhealthy
|
h.SetStatus(types.Unhealthy)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Else we're starting or healthy. Stay in that state.
|
// Else we're starting or healthy. Stay in that state.
|
||||||
|
@ -173,8 +173,9 @@ func handleProbeResult(d *Daemon, c *container.Container, result *types.Healthch
|
||||||
logrus.Errorf("Error replicating health state for container %s: %v", c.ID, err)
|
logrus.Errorf("Error replicating health state for container %s: %v", c.ID, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if oldStatus != h.Status {
|
current := h.Status()
|
||||||
d.LogContainerEvent(c, "health_status: "+h.Status)
|
if oldStatus != current {
|
||||||
|
d.LogContainerEvent(c, "health_status: "+current)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -293,11 +294,11 @@ func (d *Daemon) initHealthMonitor(c *container.Container) {
|
||||||
d.stopHealthchecks(c)
|
d.stopHealthchecks(c)
|
||||||
|
|
||||||
if h := c.State.Health; h != nil {
|
if h := c.State.Health; h != nil {
|
||||||
h.Status = types.Starting
|
h.SetStatus(types.Starting)
|
||||||
h.FailingStreak = 0
|
h.FailingStreak = 0
|
||||||
} else {
|
} else {
|
||||||
h := &container.Health{}
|
h := &container.Health{}
|
||||||
h.Status = types.Starting
|
h.SetStatus(types.Starting)
|
||||||
c.State.Health = h
|
c.State.Health = h
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -14,7 +14,7 @@ import (
|
||||||
func reset(c *container.Container) {
|
func reset(c *container.Container) {
|
||||||
c.State = &container.State{}
|
c.State = &container.State{}
|
||||||
c.State.Health = &container.Health{}
|
c.State.Health = &container.Health{}
|
||||||
c.State.Health.Status = types.Starting
|
c.State.Health.SetStatus(types.Starting)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestNoneHealthcheck(t *testing.T) {
|
func TestNoneHealthcheck(t *testing.T) {
|
||||||
|
@ -111,8 +111,8 @@ func TestHealthStates(t *testing.T) {
|
||||||
|
|
||||||
handleResult(c.State.StartedAt.Add(20*time.Second), 1)
|
handleResult(c.State.StartedAt.Add(20*time.Second), 1)
|
||||||
handleResult(c.State.StartedAt.Add(40*time.Second), 1)
|
handleResult(c.State.StartedAt.Add(40*time.Second), 1)
|
||||||
if c.State.Health.Status != types.Starting {
|
if status := c.State.Health.Status(); status != types.Starting {
|
||||||
t.Errorf("Expecting starting, but got %#v\n", c.State.Health.Status)
|
t.Errorf("Expecting starting, but got %#v\n", status)
|
||||||
}
|
}
|
||||||
if c.State.Health.FailingStreak != 2 {
|
if c.State.Health.FailingStreak != 2 {
|
||||||
t.Errorf("Expecting FailingStreak=2, but got %d\n", c.State.Health.FailingStreak)
|
t.Errorf("Expecting FailingStreak=2, but got %d\n", c.State.Health.FailingStreak)
|
||||||
|
@ -133,15 +133,15 @@ func TestHealthStates(t *testing.T) {
|
||||||
c.Config.Healthcheck.StartPeriod = 30 * time.Second
|
c.Config.Healthcheck.StartPeriod = 30 * time.Second
|
||||||
|
|
||||||
handleResult(c.State.StartedAt.Add(20*time.Second), 1)
|
handleResult(c.State.StartedAt.Add(20*time.Second), 1)
|
||||||
if c.State.Health.Status != types.Starting {
|
if status := c.State.Health.Status(); status != types.Starting {
|
||||||
t.Errorf("Expecting starting, but got %#v\n", c.State.Health.Status)
|
t.Errorf("Expecting starting, but got %#v\n", status)
|
||||||
}
|
}
|
||||||
if c.State.Health.FailingStreak != 0 {
|
if c.State.Health.FailingStreak != 0 {
|
||||||
t.Errorf("Expecting FailingStreak=0, but got %d\n", c.State.Health.FailingStreak)
|
t.Errorf("Expecting FailingStreak=0, but got %d\n", c.State.Health.FailingStreak)
|
||||||
}
|
}
|
||||||
handleResult(c.State.StartedAt.Add(50*time.Second), 1)
|
handleResult(c.State.StartedAt.Add(50*time.Second), 1)
|
||||||
if c.State.Health.Status != types.Starting {
|
if status := c.State.Health.Status(); status != types.Starting {
|
||||||
t.Errorf("Expecting starting, but got %#v\n", c.State.Health.Status)
|
t.Errorf("Expecting starting, but got %#v\n", status)
|
||||||
}
|
}
|
||||||
if c.State.Health.FailingStreak != 1 {
|
if c.State.Health.FailingStreak != 1 {
|
||||||
t.Errorf("Expecting FailingStreak=1, but got %d\n", c.State.Health.FailingStreak)
|
t.Errorf("Expecting FailingStreak=1, but got %d\n", c.State.Health.FailingStreak)
|
||||||
|
|
|
@ -139,7 +139,7 @@ func (daemon *Daemon) getInspectData(container *container.Container) (*types.Con
|
||||||
var containerHealth *types.Health
|
var containerHealth *types.Health
|
||||||
if container.State.Health != nil {
|
if container.State.Health != nil {
|
||||||
containerHealth = &types.Health{
|
containerHealth = &types.Health{
|
||||||
Status: container.State.Health.Status,
|
Status: container.State.Health.Status(),
|
||||||
FailingStreak: container.State.Health.FailingStreak,
|
FailingStreak: container.State.Health.FailingStreak,
|
||||||
Log: append([]*types.HealthcheckResult{}, container.State.Health.Log...),
|
Log: append([]*types.HealthcheckResult{}, container.State.Health.Log...),
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue