daemon/restart: Don't mutate AutoRemove when restarting

This caused a race condition where AutoRemove could be restored before
container was considered for restart and made autoremove containers
impossible to restart.

```
$ make DOCKER_GRAPHDRIVER=vfs BIND_DIR=. TEST_FILTER='TestContainerWithAutoRemoveCanBeRestarted' TESTFLAGS='-test.count 1' test-integration
...
=== RUN   TestContainerWithAutoRemoveCanBeRestarted
=== RUN   TestContainerWithAutoRemoveCanBeRestarted/kill
=== RUN   TestContainerWithAutoRemoveCanBeRestarted/stop
--- PASS: TestContainerWithAutoRemoveCanBeRestarted (1.61s)
    --- PASS: TestContainerWithAutoRemoveCanBeRestarted/kill (0.70s)
    --- PASS: TestContainerWithAutoRemoveCanBeRestarted/stop (0.86s)
PASS

DONE 3 tests in 3.062s
```

Signed-off-by: Paweł Gronowski <pawel.gronowski@docker.com>
This commit is contained in:
Paweł Gronowski 2022-07-19 12:17:08 +02:00
parent 44fde1bdb7
commit e2bd8edb0d
3 changed files with 28 additions and 16 deletions

View File

@ -52,7 +52,20 @@ func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontaine
}
}
restart, wait, err := c.RestartManager().ShouldRestart(ec, daemon.IsShuttingDown() || c.HasBeenManuallyStopped, time.Since(c.StartedAt))
daemonShutdown := daemon.IsShuttingDown()
execDuration := time.Since(c.StartedAt)
restart, wait, err := c.RestartManager().ShouldRestart(ec, daemonShutdown || c.HasBeenManuallyStopped, execDuration)
if err != nil {
logrus.WithError(err).
WithField("container", c.ID).
WithField("restartCount", c.RestartCount).
WithField("exitStatus", exitStatus).
WithField("daemonShuttingDown", daemonShutdown).
WithField("hasBeenManuallyStopped", c.HasBeenManuallyStopped).
WithField("execDuration", execDuration).
Warn("ShouldRestart failed, container will not be restarted")
restart = false
}
// cancel healthcheck here, they will be automatically
// restarted if/when the container is started again
@ -62,12 +75,19 @@ func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontaine
}
daemon.Cleanup(c)
if err == nil && restart {
if restart {
c.RestartCount++
logrus.WithField("container", c.ID).
WithField("restartCount", c.RestartCount).
WithField("exitStatus", exitStatus).
WithField("manualRestart", c.HasBeenManuallyRestarted).
Debug("Restarting container")
c.SetRestarting(&exitStatus)
} else {
c.SetStopped(&exitStatus)
defer daemon.autoRemove(c)
if !c.HasBeenManuallyRestarted {
defer daemon.autoRemove(c)
}
}
defer c.Unlock() // needs to be called before autoRemove
@ -76,7 +96,7 @@ func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontaine
daemon.LogContainerEventWithAttributes(c, "die", attributes)
if err == nil && restart {
if restart {
go func() {
err := <-wait
if err == nil {

View File

@ -6,7 +6,6 @@ import (
containertypes "github.com/docker/docker/api/types/container"
"github.com/docker/docker/container"
"github.com/sirupsen/logrus"
)
// ContainerRestart stops and starts a container. It attempts to
@ -52,19 +51,11 @@ func (daemon *Daemon) containerRestart(ctx context.Context, container *container
}
if container.IsRunning() {
// set AutoRemove flag to false before stop so the container won't be
// removed during restart process
autoRemove := container.HostConfig.AutoRemove
container.Lock()
container.HasBeenManuallyRestarted = true
container.Unlock()
container.HostConfig.AutoRemove = false
err := daemon.containerStop(ctx, container, options)
// restore AutoRemove irrespective of whether the stop worked or not
container.HostConfig.AutoRemove = autoRemove
// containerStop will write HostConfig to disk, we shall restore AutoRemove
// in disk too
if toDiskErr := daemon.checkpointAndSave(container); toDiskErr != nil {
logrus.Errorf("Write container to disk error: %v", toDiskErr)
}
if err != nil {
return err

View File

@ -206,6 +206,7 @@ func (daemon *Daemon) containerStart(container *container.Container, checkpoint
return translateContainerdStartErr(container.Path, container.SetExitCode, err)
}
container.HasBeenManuallyRestarted = false
container.SetRunning(pid, true)
container.HasBeenStartedBefore = true
daemon.setStateCounter(container)