From 671c12204cb469d868f646da1474d5bad6541770 Mon Sep 17 00:00:00 2001 From: Peter Waller Date: Tue, 10 Mar 2015 22:10:00 +0000 Subject: [PATCH] Implement build cancellation Add the capability to cancel the build by disconnecting the client. This adds a `cancelled` channel which is used to signal that a build should halt. The build is halted by sending a Kill signal and noticing that the cancellation channel is closed. This first pass implementation does not allow cancellation during a pull, but that will come in a subsequent PR. * Add documentation of cancellation to cli and API * Protect job cancellation with sync.Once * Add TestBuildCancelationKillsSleep * Add test case for build cancellation of RUN statements. Signed-off-by: Peter Waller --- api/server/server.go | 14 ++ builder/evaluator.go | 10 ++ builder/internals.go | 11 ++ builder/job.go | 1 + .../reference/api/docker_remote_api.md | 5 + .../reference/api/docker_remote_api_v1.18.md | 3 + docs/sources/reference/commandline/cli.md | 6 + engine/engine.go | 2 + engine/job.go | 19 +++ integration-cli/docker_cli_build_test.go | 128 ++++++++++++++++++ integration-cli/utils.go | 12 ++ 11 files changed, 211 insertions(+) diff --git a/api/server/server.go b/api/server/server.go index d244d2a0ce..4a86442454 100644 --- a/api/server/server.go +++ b/api/server/server.go @@ -1087,6 +1087,20 @@ func postBuild(eng *engine.Engine, version version.Version, w http.ResponseWrite job.Setenv("cpusetcpus", r.FormValue("cpusetcpus")) job.Setenv("cpushares", r.FormValue("cpushares")) + // Job cancellation. Note: not all job types support this. + if closeNotifier, ok := w.(http.CloseNotifier); ok { + finished := make(chan struct{}) + defer close(finished) + go func() { + select { + case <-finished: + case <-closeNotifier.CloseNotify(): + log.Infof("Client disconnected, cancelling job: %v", job) + job.Cancel() + } + }() + } + if err := job.Run(); err != nil { if !job.Stdout.Used() { return err diff --git a/builder/evaluator.go b/builder/evaluator.go index 985656f16a..79ca4d934a 100644 --- a/builder/evaluator.go +++ b/builder/evaluator.go @@ -131,6 +131,8 @@ type Builder struct { cpuShares int64 memory int64 memorySwap int64 + + cancelled <-chan struct{} // When closed, job was cancelled. } // Run the builder with the context. This is the lynchpin of this package. This @@ -166,6 +168,14 @@ func (b *Builder) Run(context io.Reader) (string, error) { b.TmpContainers = map[string]struct{}{} for i, n := range b.dockerfile.Children { + select { + case <-b.cancelled: + log.Debug("Builder: build cancelled!") + fmt.Fprintf(b.OutStream, "Build cancelled") + return "", fmt.Errorf("Build cancelled") + default: + // Not cancelled yet, keep going... + } if err := b.dispatch(i, n); err != nil { if b.ForceRemove { b.clearTmp() diff --git a/builder/internals.go b/builder/internals.go index 67650f75bc..7c22b47b2b 100644 --- a/builder/internals.go +++ b/builder/internals.go @@ -581,6 +581,17 @@ func (b *Builder) run(c *daemon.Container) error { return err } + finished := make(chan struct{}) + defer close(finished) + go func() { + select { + case <-b.cancelled: + log.Debugln("Build cancelled, killing container:", c.ID) + c.Kill() + case <-finished: + } + }() + if b.Verbose { // Block on reading output from container, stop on err or chan closed if err := <-errCh; err != nil { diff --git a/builder/job.go b/builder/job.go index 27591129cd..59df87e8c6 100644 --- a/builder/job.go +++ b/builder/job.go @@ -153,6 +153,7 @@ func (b *BuilderJob) CmdBuild(job *engine.Job) engine.Status { cpuSetCpus: cpuSetCpus, memory: memory, memorySwap: memorySwap, + cancelled: job.WaitCancelled(), } id, err := builder.Run(context) diff --git a/docs/sources/reference/api/docker_remote_api.md b/docs/sources/reference/api/docker_remote_api.md index 122546cf75..3da4cc82d5 100644 --- a/docs/sources/reference/api/docker_remote_api.md +++ b/docs/sources/reference/api/docker_remote_api.md @@ -76,6 +76,11 @@ Builds can now set resource constraints for all containers created for the build (`CgroupParent`) can be passed in the host config to setup container cgroups under a specific cgroup. +`POST /build` + +**New!** +Closing the HTTP request will now cause the build to be canceled. + ## v1.17 ### Full Documentation diff --git a/docs/sources/reference/api/docker_remote_api_v1.18.md b/docs/sources/reference/api/docker_remote_api_v1.18.md index 3ebddb7d13..b5d9c0e5f4 100644 --- a/docs/sources/reference/api/docker_remote_api_v1.18.md +++ b/docs/sources/reference/api/docker_remote_api_v1.18.md @@ -1144,6 +1144,9 @@ The archive may include any number of other files, which will be accessible in the build context (See the [*ADD build command*](/reference/builder/#dockerbuilder)). +The build will also be canceled if the client drops the connection by quitting +or being killed. + Query Parameters: - **dockerfile** - path within the build context to the Dockerfile. This is diff --git a/docs/sources/reference/commandline/cli.md b/docs/sources/reference/commandline/cli.md index 322f5f401e..6de1dae787 100644 --- a/docs/sources/reference/commandline/cli.md +++ b/docs/sources/reference/commandline/cli.md @@ -599,6 +599,12 @@ in cases where the same set of files are used for multiple builds. The path must be to a file within the build context. If a relative path is specified then it must to be relative to the current directory. +If the Docker client loses connection to the daemon, the build is canceled. +This happens if you interrupt the Docker client with `ctrl-c` or if the Docker +client is killed for any reason. + +> **Note:** Currently only the "run" phase of the build can be canceled until +> pull cancelation is implemented). See also: diff --git a/engine/engine.go b/engine/engine.go index e8286d89f7..83b6efbb80 100644 --- a/engine/engine.go +++ b/engine/engine.go @@ -123,6 +123,8 @@ func (eng *Engine) Job(name string, args ...string) *Job { Stderr: NewOutput(), env: &Env{}, closeIO: true, + + cancelled: make(chan struct{}), } if eng.Logging { job.Stderr.Add(ioutils.NopWriteCloser(eng.Stderr)) diff --git a/engine/job.go b/engine/job.go index 4b2befb425..ecb68c3eb7 100644 --- a/engine/job.go +++ b/engine/job.go @@ -5,6 +5,7 @@ import ( "fmt" "io" "strings" + "sync" "time" log "github.com/Sirupsen/logrus" @@ -34,6 +35,12 @@ type Job struct { status Status end time.Time closeIO bool + + // When closed, the job has been cancelled. + // Note: not all jobs implement cancellation. + // See Job.Cancel() and Job.WaitCancelled() + cancelled chan struct{} + cancelOnce sync.Once } type Status int @@ -248,3 +255,15 @@ func (job *Job) StatusCode() int { func (job *Job) SetCloseIO(val bool) { job.closeIO = val } + +// When called, causes the Job.WaitCancelled channel to unblock. +func (job *Job) Cancel() { + job.cancelOnce.Do(func() { + close(job.cancelled) + }) +} + +// Returns a channel which is closed ("never blocks") when the job is cancelled. +func (job *Job) WaitCancelled() <-chan struct{} { + return job.cancelled +} diff --git a/integration-cli/docker_cli_build_test.go b/integration-cli/docker_cli_build_test.go index 6a83595746..cb084255f7 100644 --- a/integration-cli/docker_cli_build_test.go +++ b/integration-cli/docker_cli_build_test.go @@ -2,6 +2,7 @@ package main import ( "archive/tar" + "bufio" "bytes" "encoding/json" "fmt" @@ -14,6 +15,7 @@ import ( "runtime" "strconv" "strings" + "sync" "testing" "text/template" "time" @@ -1924,6 +1926,132 @@ func TestBuildForceRm(t *testing.T) { logDone("build - ensure --force-rm doesn't leave containers behind") } +// Test that an infinite sleep during a build is killed if the client disconnects. +// This test is fairly hairy because there are lots of ways to race. +// Strategy: +// * Monitor the output of docker events starting from before +// * Run a 1-year-long sleep from a docker build. +// * When docker events sees container start, close the "docker build" command +// * Wait for docker events to emit a dying event. +func TestBuildCancelationKillsSleep(t *testing.T) { + // TODO(jfrazelle): Make this work on Windows. + testRequires(t, SameHostDaemon) + + name := "testbuildcancelation" + defer deleteImages(name) + + // (Note: one year, will never finish) + ctx, err := fakeContext("FROM busybox\nRUN sleep 31536000", nil) + if err != nil { + t.Fatal(err) + } + defer ctx.Close() + + var wg sync.WaitGroup + defer wg.Wait() + + finish := make(chan struct{}) + defer close(finish) + + eventStart := make(chan struct{}) + eventDie := make(chan struct{}) + + // Start one second ago, to avoid rounding problems + startEpoch := time.Now().Add(-1 * time.Second) + + // Goroutine responsible for watching start/die events from `docker events` + wg.Add(1) + go func() { + defer wg.Done() + + // Watch for events since epoch. + eventsCmd := exec.Command(dockerBinary, "events", + "-since", fmt.Sprint(startEpoch.Unix())) + stdout, err := eventsCmd.StdoutPipe() + err = eventsCmd.Start() + if err != nil { + t.Fatalf("failed to start 'docker events': %s", err) + } + + go func() { + <-finish + eventsCmd.Process.Kill() + }() + + var started, died bool + matchStart := regexp.MustCompile(" \\(from busybox\\:latest\\) start$") + matchDie := regexp.MustCompile(" \\(from busybox\\:latest\\) die$") + + // + // Read lines of `docker events` looking for container start and stop. + // + scanner := bufio.NewScanner(stdout) + for scanner.Scan() { + if ok := matchStart.MatchString(scanner.Text()); ok { + if started { + t.Fatal("assertion fail: more than one container started") + } + close(eventStart) + started = true + } + if ok := matchDie.MatchString(scanner.Text()); ok { + if died { + t.Fatal("assertion fail: more than one container died") + } + close(eventDie) + died = true + } + } + + err = eventsCmd.Wait() + if err != nil && !IsKilled(err) { + t.Fatalf("docker events had bad exit status: %s", err) + } + }() + + buildCmd := exec.Command(dockerBinary, "build", "-t", name, ".") + buildCmd.Dir = ctx.Dir + buildCmd.Stdout = os.Stdout + + err = buildCmd.Start() + if err != nil { + t.Fatalf("failed to run build: %s", err) + } + + select { + case <-time.After(30 * time.Second): + t.Fatal("failed to observe build container start in timely fashion") + case <-eventStart: + // Proceeds from here when we see the container fly past in the + // output of "docker events". + // Now we know the container is running. + } + + // Send a kill to the `docker build` command. + // Causes the underlying build to be cancelled due to socket close. + err = buildCmd.Process.Kill() + if err != nil { + t.Fatalf("error killing build command: %s", err) + } + + // Get the exit status of `docker build`, check it exited because killed. + err = buildCmd.Wait() + if err != nil && !IsKilled(err) { + t.Fatalf("wait failed during build run: %T %s", err, err) + } + + select { + case <-time.After(30 * time.Second): + // If we don't get here in a timely fashion, it wasn't killed. + t.Fatal("container cancel did not succeed") + case <-eventDie: + // We saw the container shut down in the `docker events` stream, + // as expected. + } + + logDone("build - ensure canceled job finishes immediately") +} + func TestBuildRm(t *testing.T) { name := "testbuildrm" defer deleteImages(name) diff --git a/integration-cli/utils.go b/integration-cli/utils.go index 691402f35e..159cfb1d13 100644 --- a/integration-cli/utils.go +++ b/integration-cli/utils.go @@ -42,6 +42,18 @@ func processExitCode(err error) (exitCode int) { return } +func IsKilled(err error) bool { + if exitErr, ok := err.(*exec.ExitError); ok { + sys := exitErr.ProcessState.Sys() + status, ok := sys.(syscall.WaitStatus) + if !ok { + return false + } + return status.Signaled() && status.Signal() == os.Kill + } + return false +} + func runCommandWithOutput(cmd *exec.Cmd) (output string, exitCode int, err error) { exitCode = 0 out, err := cmd.CombinedOutput()