1
0
Fork 0
mirror of https://github.com/moby/moby.git synced 2022-11-09 12:21:53 -05:00

Merge pull request #41227 from cpuguy83/work_around_missing_shim_event

Work around missing shim event
This commit is contained in:
Sebastiaan van Stijn 2020-07-30 20:42:41 +02:00 committed by GitHub
commit 07746cc972
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 78 additions and 66 deletions

View file

@ -99,6 +99,7 @@ func (daemon *Daemon) killWithSignal(container *containerpkg.Container, sig int)
if errdefs.IsNotFound(err) {
unpause = false
logrus.WithError(err).WithField("container", container.ID).WithField("action", "kill").Debug("container kill failed because of 'container not found' or 'no such process'")
go daemon.handleContainerExit(container, nil)
} else {
return errors.Wrapf(err, "Cannot kill container %s", container.ID)
}

View file

@ -24,6 +24,82 @@ func (daemon *Daemon) setStateCounter(c *container.Container) {
}
}
func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontainerdtypes.EventInfo) error {
c.Lock()
ec, et, err := daemon.containerd.DeleteTask(context.Background(), c.ID)
if err != nil {
logrus.WithError(err).Warnf("failed to delete container %s from containerd", c.ID)
}
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
c.StreamConfig.Wait(ctx)
cancel()
c.Reset(false)
exitStatus := container.ExitStatus{
ExitCode: int(ec),
ExitedAt: et,
}
if e != nil {
exitStatus.ExitCode = int(e.ExitCode)
exitStatus.ExitedAt = e.ExitedAt
exitStatus.OOMKilled = e.OOMKilled
if e.Error != nil {
c.SetError(e.Error)
}
}
restart, wait, err := c.RestartManager().ShouldRestart(ec, daemon.IsShuttingDown() || c.HasBeenManuallyStopped, time.Since(c.StartedAt))
if err == nil && restart {
c.RestartCount++
c.SetRestarting(&exitStatus)
} else {
c.SetStopped(&exitStatus)
defer daemon.autoRemove(c)
}
defer c.Unlock() // needs to be called before autoRemove
// cancel healthcheck here, they will be automatically
// restarted if/when the container is started again
daemon.stopHealthchecks(c)
attributes := map[string]string{
"exitCode": strconv.Itoa(int(ec)),
}
daemon.LogContainerEventWithAttributes(c, "die", attributes)
daemon.Cleanup(c)
daemon.setStateCounter(c)
cpErr := c.CheckpointTo(daemon.containersReplica)
if err == nil && restart {
go func() {
err := <-wait
if err == nil {
// daemon.netController is initialized when daemon is restoring containers.
// But containerStart will use daemon.netController segment.
// So to avoid panic at startup process, here must wait util daemon restore done.
daemon.waitForStartupDone()
if err = daemon.containerStart(c, "", "", false); err != nil {
logrus.Debugf("failed to restart container: %+v", err)
}
}
if err != nil {
c.Lock()
c.SetStopped(&exitStatus)
daemon.setStateCounter(c)
c.CheckpointTo(daemon.containersReplica)
c.Unlock()
defer daemon.autoRemove(c)
if err != restartmanager.ErrRestartCanceled {
logrus.Errorf("restartmanger wait error: %+v", err)
}
}
}()
}
return cpErr
}
// ProcessEvent is called by libcontainerd whenever an event occurs
func (daemon *Daemon) ProcessEvent(id string, e libcontainerdtypes.EventType, ei libcontainerdtypes.EventInfo) error {
c, err := daemon.GetContainer(id)
@ -48,72 +124,7 @@ func (daemon *Daemon) ProcessEvent(id string, e libcontainerdtypes.EventType, ei
daemon.LogContainerEvent(c, "oom")
case libcontainerdtypes.EventExit:
if int(ei.Pid) == c.Pid {
c.Lock()
_, _, err := daemon.containerd.DeleteTask(context.Background(), c.ID)
if err != nil {
logrus.WithError(err).Warnf("failed to delete container %s from containerd", c.ID)
}
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
c.StreamConfig.Wait(ctx)
cancel()
c.Reset(false)
exitStatus := container.ExitStatus{
ExitCode: int(ei.ExitCode),
ExitedAt: ei.ExitedAt,
OOMKilled: ei.OOMKilled,
}
restart, wait, err := c.RestartManager().ShouldRestart(ei.ExitCode, daemon.IsShuttingDown() || c.HasBeenManuallyStopped, time.Since(c.StartedAt))
if err == nil && restart {
c.RestartCount++
c.SetRestarting(&exitStatus)
} else {
if ei.Error != nil {
c.SetError(ei.Error)
}
c.SetStopped(&exitStatus)
defer daemon.autoRemove(c)
}
defer c.Unlock() // needs to be called before autoRemove
// cancel healthcheck here, they will be automatically
// restarted if/when the container is started again
daemon.stopHealthchecks(c)
attributes := map[string]string{
"exitCode": strconv.Itoa(int(ei.ExitCode)),
}
daemon.LogContainerEventWithAttributes(c, "die", attributes)
daemon.Cleanup(c)
daemon.setStateCounter(c)
cpErr := c.CheckpointTo(daemon.containersReplica)
if err == nil && restart {
go func() {
err := <-wait
if err == nil {
// daemon.netController is initialized when daemon is restoring containers.
// But containerStart will use daemon.netController segment.
// So to avoid panic at startup process, here must wait util daemon restore done.
daemon.waitForStartupDone()
if err = daemon.containerStart(c, "", "", false); err != nil {
logrus.Debugf("failed to restart container: %+v", err)
}
}
if err != nil {
c.Lock()
c.SetStopped(&exitStatus)
daemon.setStateCounter(c)
c.CheckpointTo(daemon.containersReplica)
c.Unlock()
defer daemon.autoRemove(c)
if err != restartmanager.ErrRestartCanceled {
logrus.Errorf("restartmanger wait error: %+v", err)
}
}
}()
}
return cpErr
return daemon.handleContainerExit(c, &ei)
}
exitCode := 127