mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00
Merge pull request #43994 from corhere/backport-22.06/healthcheck_timeout
[v22.06 backport] don't use canceled context to send KILL signal to healthcheck process
This commit is contained in:
commit
2dc3e510d4
3 changed files with 52 additions and 18 deletions
|
@ -23,9 +23,6 @@ import (
|
|||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// Seconds to wait after sending TERM before trying KILL
|
||||
const termProcessTimeout = 10 * time.Second
|
||||
|
||||
func (daemon *Daemon) registerExecCommand(container *container.Container, config *exec.Config) {
|
||||
// Storing execs in container in order to kill them gracefully whenever the container is stopped or removed.
|
||||
container.ExecCommands.Add(config.ID, config)
|
||||
|
@ -272,7 +269,10 @@ func (daemon *Daemon) ContainerExecStart(ctx context.Context, name string, optio
|
|||
CloseStdin: true,
|
||||
}
|
||||
ec.StreamConfig.AttachStreams(&attachConfig)
|
||||
attachErr := ec.StreamConfig.CopyStreams(ctx, &attachConfig)
|
||||
// using context.Background() so that attachErr does not race ctx.Done().
|
||||
copyCtx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
attachErr := ec.StreamConfig.CopyStreams(copyCtx, &attachConfig)
|
||||
|
||||
// Synchronize with libcontainerd event loop
|
||||
ec.Lock()
|
||||
|
@ -292,18 +292,15 @@ func (daemon *Daemon) ContainerExecStart(ctx context.Context, name string, optio
|
|||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
logrus.Debugf("Sending TERM signal to process %v in container %v", name, c.ID)
|
||||
daemon.containerd.SignalProcess(ctx, c.ID, name, signal.SignalMap["TERM"])
|
||||
|
||||
timeout := time.NewTimer(termProcessTimeout)
|
||||
defer timeout.Stop()
|
||||
|
||||
select {
|
||||
case <-timeout.C:
|
||||
logrus.Infof("Container %v, process %v failed to exit within %v of signal TERM - using the force", c.ID, name, termProcessTimeout)
|
||||
daemon.containerd.SignalProcess(ctx, c.ID, name, signal.SignalMap["KILL"])
|
||||
case <-attachErr:
|
||||
// TERM signal worked
|
||||
log := logrus.
|
||||
WithField("container", c.ID).
|
||||
WithField("exec", name)
|
||||
log.Debug("Sending KILL signal to container process")
|
||||
sigCtx, cancelFunc := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancelFunc()
|
||||
err := daemon.containerd.SignalProcess(sigCtx, c.ID, name, signal.SignalMap["KILL"])
|
||||
if err != nil {
|
||||
log.WithError(err).Error("Could not send KILL signal to container process")
|
||||
}
|
||||
return ctx.Err()
|
||||
case err := <-attachErr:
|
||||
|
|
|
@ -133,8 +133,8 @@ func (p *cmdProbe) run(ctx context.Context, d *Daemon, cntr *container.Container
|
|||
case <-tm.C:
|
||||
cancelProbe()
|
||||
logrus.WithContext(ctx).Debugf("Health check for container %s taking too long", cntr.ID)
|
||||
// Wait for probe to exit (it might take a while to respond to the TERM
|
||||
// signal and we don't want dying probes to pile up).
|
||||
// Wait for probe to exit (it might take some time to call containerd to kill
|
||||
// the process and we don't want dying probes to pile up).
|
||||
<-execErr
|
||||
return &types.HealthcheckResult{
|
||||
ExitCode: -1,
|
||||
|
|
|
@ -2,6 +2,7 @@ package container // import "github.com/docker/docker/integration/container"
|
|||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
|
@ -93,6 +94,42 @@ while true; do sleep 1; done
|
|||
poll.WaitOn(t, pollForHealthStatus(ctxPoll, client, id, "healthy"), poll.WithDelay(100*time.Millisecond))
|
||||
}
|
||||
|
||||
// TestHealthCheckProcessKilled verifies that health-checks exec get killed on time-out.
|
||||
func TestHealthCheckProcessKilled(t *testing.T) {
|
||||
skip.If(t, testEnv.RuntimeIsWindowsContainerd(), "FIXME: Broken on Windows + containerd combination")
|
||||
defer setupTest(t)()
|
||||
ctx := context.Background()
|
||||
apiClient := testEnv.APIClient()
|
||||
|
||||
cID := container.Run(ctx, t, apiClient, func(c *container.TestContainerConfig) {
|
||||
c.Config.Healthcheck = &containertypes.HealthConfig{
|
||||
Test: []string{"CMD", "sh", "-c", "sleep 60"},
|
||||
Interval: 100 * time.Millisecond,
|
||||
Timeout: 50 * time.Millisecond,
|
||||
Retries: 1,
|
||||
}
|
||||
})
|
||||
poll.WaitOn(t, pollForHealthCheckLog(ctx, apiClient, cID, "Health check exceeded timeout (50ms)"))
|
||||
}
|
||||
|
||||
func pollForHealthCheckLog(ctx context.Context, client client.APIClient, containerID string, expected string) func(log poll.LogT) poll.Result {
|
||||
return func(log poll.LogT) poll.Result {
|
||||
inspect, err := client.ContainerInspect(ctx, containerID)
|
||||
if err != nil {
|
||||
return poll.Error(err)
|
||||
}
|
||||
healthChecksTotal := len(inspect.State.Health.Log)
|
||||
if healthChecksTotal > 0 {
|
||||
output := inspect.State.Health.Log[healthChecksTotal-1].Output
|
||||
if output == expected {
|
||||
return poll.Success()
|
||||
}
|
||||
return poll.Error(fmt.Errorf("expected %q, got %q", expected, output))
|
||||
}
|
||||
return poll.Continue("waiting for container healthcheck logs")
|
||||
}
|
||||
}
|
||||
|
||||
func pollForHealthStatus(ctx context.Context, client client.APIClient, containerID string, healthStatus string) func(log poll.LogT) poll.Result {
|
||||
return func(log poll.LogT) poll.Result {
|
||||
inspect, err := client.ContainerInspect(ctx, containerID)
|
||||
|
|
Loading…
Reference in a new issue