mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00
Merge pull request #43994 from corhere/backport-22.06/healthcheck_timeout
[v22.06 backport] don't use canceled context to send KILL signal to healthcheck process
This commit is contained in:
commit
2dc3e510d4
3 changed files with 52 additions and 18 deletions
|
@ -23,9 +23,6 @@ import (
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Seconds to wait after sending TERM before trying KILL
|
|
||||||
const termProcessTimeout = 10 * time.Second
|
|
||||||
|
|
||||||
func (daemon *Daemon) registerExecCommand(container *container.Container, config *exec.Config) {
|
func (daemon *Daemon) registerExecCommand(container *container.Container, config *exec.Config) {
|
||||||
// Storing execs in container in order to kill them gracefully whenever the container is stopped or removed.
|
// Storing execs in container in order to kill them gracefully whenever the container is stopped or removed.
|
||||||
container.ExecCommands.Add(config.ID, config)
|
container.ExecCommands.Add(config.ID, config)
|
||||||
|
@ -272,7 +269,10 @@ func (daemon *Daemon) ContainerExecStart(ctx context.Context, name string, optio
|
||||||
CloseStdin: true,
|
CloseStdin: true,
|
||||||
}
|
}
|
||||||
ec.StreamConfig.AttachStreams(&attachConfig)
|
ec.StreamConfig.AttachStreams(&attachConfig)
|
||||||
attachErr := ec.StreamConfig.CopyStreams(ctx, &attachConfig)
|
// using context.Background() so that attachErr does not race ctx.Done().
|
||||||
|
copyCtx, cancel := context.WithCancel(context.Background())
|
||||||
|
defer cancel()
|
||||||
|
attachErr := ec.StreamConfig.CopyStreams(copyCtx, &attachConfig)
|
||||||
|
|
||||||
// Synchronize with libcontainerd event loop
|
// Synchronize with libcontainerd event loop
|
||||||
ec.Lock()
|
ec.Lock()
|
||||||
|
@ -292,18 +292,15 @@ func (daemon *Daemon) ContainerExecStart(ctx context.Context, name string, optio
|
||||||
|
|
||||||
select {
|
select {
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
logrus.Debugf("Sending TERM signal to process %v in container %v", name, c.ID)
|
log := logrus.
|
||||||
daemon.containerd.SignalProcess(ctx, c.ID, name, signal.SignalMap["TERM"])
|
WithField("container", c.ID).
|
||||||
|
WithField("exec", name)
|
||||||
timeout := time.NewTimer(termProcessTimeout)
|
log.Debug("Sending KILL signal to container process")
|
||||||
defer timeout.Stop()
|
sigCtx, cancelFunc := context.WithTimeout(context.Background(), 30*time.Second)
|
||||||
|
defer cancelFunc()
|
||||||
select {
|
err := daemon.containerd.SignalProcess(sigCtx, c.ID, name, signal.SignalMap["KILL"])
|
||||||
case <-timeout.C:
|
if err != nil {
|
||||||
logrus.Infof("Container %v, process %v failed to exit within %v of signal TERM - using the force", c.ID, name, termProcessTimeout)
|
log.WithError(err).Error("Could not send KILL signal to container process")
|
||||||
daemon.containerd.SignalProcess(ctx, c.ID, name, signal.SignalMap["KILL"])
|
|
||||||
case <-attachErr:
|
|
||||||
// TERM signal worked
|
|
||||||
}
|
}
|
||||||
return ctx.Err()
|
return ctx.Err()
|
||||||
case err := <-attachErr:
|
case err := <-attachErr:
|
||||||
|
|
|
@ -133,8 +133,8 @@ func (p *cmdProbe) run(ctx context.Context, d *Daemon, cntr *container.Container
|
||||||
case <-tm.C:
|
case <-tm.C:
|
||||||
cancelProbe()
|
cancelProbe()
|
||||||
logrus.WithContext(ctx).Debugf("Health check for container %s taking too long", cntr.ID)
|
logrus.WithContext(ctx).Debugf("Health check for container %s taking too long", cntr.ID)
|
||||||
// Wait for probe to exit (it might take a while to respond to the TERM
|
// Wait for probe to exit (it might take some time to call containerd to kill
|
||||||
// signal and we don't want dying probes to pile up).
|
// the process and we don't want dying probes to pile up).
|
||||||
<-execErr
|
<-execErr
|
||||||
return &types.HealthcheckResult{
|
return &types.HealthcheckResult{
|
||||||
ExitCode: -1,
|
ExitCode: -1,
|
||||||
|
|
|
@ -2,6 +2,7 @@ package container // import "github.com/docker/docker/integration/container"
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"fmt"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
@ -93,6 +94,42 @@ while true; do sleep 1; done
|
||||||
poll.WaitOn(t, pollForHealthStatus(ctxPoll, client, id, "healthy"), poll.WithDelay(100*time.Millisecond))
|
poll.WaitOn(t, pollForHealthStatus(ctxPoll, client, id, "healthy"), poll.WithDelay(100*time.Millisecond))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestHealthCheckProcessKilled verifies that health-checks exec get killed on time-out.
|
||||||
|
func TestHealthCheckProcessKilled(t *testing.T) {
|
||||||
|
skip.If(t, testEnv.RuntimeIsWindowsContainerd(), "FIXME: Broken on Windows + containerd combination")
|
||||||
|
defer setupTest(t)()
|
||||||
|
ctx := context.Background()
|
||||||
|
apiClient := testEnv.APIClient()
|
||||||
|
|
||||||
|
cID := container.Run(ctx, t, apiClient, func(c *container.TestContainerConfig) {
|
||||||
|
c.Config.Healthcheck = &containertypes.HealthConfig{
|
||||||
|
Test: []string{"CMD", "sh", "-c", "sleep 60"},
|
||||||
|
Interval: 100 * time.Millisecond,
|
||||||
|
Timeout: 50 * time.Millisecond,
|
||||||
|
Retries: 1,
|
||||||
|
}
|
||||||
|
})
|
||||||
|
poll.WaitOn(t, pollForHealthCheckLog(ctx, apiClient, cID, "Health check exceeded timeout (50ms)"))
|
||||||
|
}
|
||||||
|
|
||||||
|
func pollForHealthCheckLog(ctx context.Context, client client.APIClient, containerID string, expected string) func(log poll.LogT) poll.Result {
|
||||||
|
return func(log poll.LogT) poll.Result {
|
||||||
|
inspect, err := client.ContainerInspect(ctx, containerID)
|
||||||
|
if err != nil {
|
||||||
|
return poll.Error(err)
|
||||||
|
}
|
||||||
|
healthChecksTotal := len(inspect.State.Health.Log)
|
||||||
|
if healthChecksTotal > 0 {
|
||||||
|
output := inspect.State.Health.Log[healthChecksTotal-1].Output
|
||||||
|
if output == expected {
|
||||||
|
return poll.Success()
|
||||||
|
}
|
||||||
|
return poll.Error(fmt.Errorf("expected %q, got %q", expected, output))
|
||||||
|
}
|
||||||
|
return poll.Continue("waiting for container healthcheck logs")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func pollForHealthStatus(ctx context.Context, client client.APIClient, containerID string, healthStatus string) func(log poll.LogT) poll.Result {
|
func pollForHealthStatus(ctx context.Context, client client.APIClient, containerID string, healthStatus string) func(log poll.LogT) poll.Result {
|
||||||
return func(log poll.LogT) poll.Result {
|
return func(log poll.LogT) poll.Result {
|
||||||
inspect, err := client.ContainerInspect(ctx, containerID)
|
inspect, err := client.ContainerInspect(ctx, containerID)
|
||||||
|
|
Loading…
Reference in a new issue