From a422813113b3b9a14184766f7f0977ccaaeb661b Mon Sep 17 00:00:00 2001 From: Zhang Wei Date: Tue, 9 Aug 2016 17:33:36 +0800 Subject: [PATCH 1/2] Bugfix: client hangs after run container New codes introduced a new method to wait container's exit code and removal via Events API, but it specified a "since" filter based on client side clock, which sometimes isn't synced up with daemon's clock, hence leads the client to hang on waiting container's exit status. This commit brings the Events call before start, and removes the time filter, so that it can catch daemon events correctly without care of the clock issue. Signed-off-by: Zhang Wei --- api/client/container/run.go | 21 +++++------ api/client/container/utils.go | 65 +++++++++++++++++++++-------------- 2 files changed, 48 insertions(+), 38 deletions(-) diff --git a/api/client/container/run.go b/api/client/container/run.go index 5d82ff79ab..ee62b60867 100644 --- a/api/client/container/run.go +++ b/api/client/container/run.go @@ -8,7 +8,6 @@ import ( "runtime" "strings" "syscall" - "time" "golang.org/x/net/context" @@ -144,7 +143,6 @@ func runRun(dockerCli *client.DockerCli, flags *pflag.FlagSet, opts *runOptions, hostConfig.ConsoleSize[0], hostConfig.ConsoleSize[1] = dockerCli.GetTtySize() } - startTime := time.Now() ctx, cancelFun := context.WithCancel(context.Background()) createResponse, err := createContainer(ctx, dockerCli, config, hostConfig, networkingConfig, hostConfig.ContainerIDFile, opts.name) @@ -221,6 +219,11 @@ func runRun(dockerCli *client.DockerCli, flags *pflag.FlagSet, opts *runOptions, }) } + statusChan, err := waitExitOrRemoved(dockerCli, context.Background(), createResponse.ID, hostConfig.AutoRemove) + if err != nil { + return fmt.Errorf("Error waiting container's exit code: %v", err) + } + //start the container if err := client.ContainerStart(ctx, createResponse.ID, types.ContainerStartOptions{}); err != nil { // If we have holdHijackedConnection, we should notify @@ -233,9 +236,8 @@ func runRun(dockerCli *client.DockerCli, flags *pflag.FlagSet, opts *runOptions, reportError(stderr, cmdPath, err.Error(), false) if hostConfig.AutoRemove { - if _, errWait := waitExitOrRemoved(dockerCli, context.Background(), createResponse.ID, hostConfig.AutoRemove, startTime); errWait != nil { - logrus.Debugf("Error waiting container's removal: %v", errWait) - } + // wait container to be removed + <-statusChan } return runStartContainerErr(err) } @@ -260,14 +262,7 @@ func runRun(dockerCli *client.DockerCli, flags *pflag.FlagSet, opts *runOptions, return nil } - var status int - - // Attached mode - status, err = waitExitOrRemoved(dockerCli, ctx, createResponse.ID, hostConfig.AutoRemove, startTime) - if err != nil { - return fmt.Errorf("Error waiting container to exit: %v", err) - } - + status := <-statusChan if status != 0 { return cli.StatusError{StatusCode: status} } diff --git a/api/client/container/utils.go b/api/client/container/utils.go index 3693ff5e86..59d4ab6cd2 100644 --- a/api/client/container/utils.go +++ b/api/client/container/utils.go @@ -3,7 +3,6 @@ package container import ( "fmt" "strconv" - "time" "golang.org/x/net/context" @@ -16,16 +15,18 @@ import ( "github.com/docker/engine-api/types/filters" ) -func waitExitOrRemoved(dockerCli *client.DockerCli, ctx context.Context, containerID string, waitRemove bool, since time.Time) (int, error) { +func waitExitOrRemoved(dockerCli *client.DockerCli, ctx context.Context, containerID string, waitRemove bool) (chan int, error) { if len(containerID) == 0 { // containerID can never be empty panic("Internal Error: waitExitOrRemoved needs a containerID as parameter") } var exitCode int + statusChan := make(chan int) exitChan := make(chan struct{}) detachChan := make(chan struct{}) destroyChan := make(chan struct{}) + eventsErr := make(chan error) // Start watch events eh := system.InitEventHandler() @@ -54,45 +55,59 @@ func waitExitOrRemoved(dockerCli *client.DockerCli, ctx context.Context, contain eventChan := make(chan events.Message) go eh.Watch(eventChan) - defer close(eventChan) // Get events via Events API f := filters.NewArgs() f.Add("type", "container") f.Add("container", containerID) options := types.EventsOptions{ - Since: fmt.Sprintf("%d", since.Unix()), Filters: f, } resBody, err := dockerCli.Client().Events(ctx, options) if err != nil { - return -1, fmt.Errorf("can't get events from daemon: %v", err) + return nil, fmt.Errorf("can't get events from daemon: %v", err) } - defer resBody.Close() - go system.DecodeEvents(resBody, func(event events.Message, err error) error { - if err != nil { + go func() { + eventsErr <- system.DecodeEvents(resBody, func(event events.Message, err error) error { + if err != nil { + return fmt.Errorf("decode events error: %v", err) + } + eventChan <- event return nil - } - eventChan <- event - return nil - }) + }) + close(eventChan) + }() - if waitRemove { - select { - case <-destroyChan: - return exitCode, nil - case <-detachChan: - return 0, nil + go func() { + var waitErr error + if waitRemove { + select { + case <-destroyChan: + // keep exitcode and return + case <-detachChan: + exitCode = 0 + case waitErr = <-eventsErr: + exitCode = 125 + } + } else { + select { + case <-exitChan: + // keep exitcode and return + case <-detachChan: + exitCode = 0 + case waitErr = <-eventsErr: + exitCode = 125 + } } - } else { - select { - case <-exitChan: - return exitCode, nil - case <-detachChan: - return 0, nil + if waitErr != nil { + logrus.Errorf("%v", waitErr) } - } + statusChan <- exitCode + + resBody.Close() + }() + return statusChan, nil } // getExitCode performs an inspect on the container. It returns From eadcb996192e0bded2eaadef842dd86e9013cc27 Mon Sep 17 00:00:00 2001 From: Zhang Wei Date: Fri, 12 Aug 2016 00:44:18 +0800 Subject: [PATCH 2/2] Refactor waitExitOrRemoved by mlaventure Reduce complexity of waitExitOrRemoved. Signed-off-by: mlaventure Signed-off-by: Zhang Wei --- api/client/container/utils.go | 97 ++++++++++++----------------------- 1 file changed, 32 insertions(+), 65 deletions(-) diff --git a/api/client/container/utils.go b/api/client/container/utils.go index 59d4ab6cd2..5a589ff680 100644 --- a/api/client/container/utils.go +++ b/api/client/container/utils.go @@ -21,40 +21,44 @@ func waitExitOrRemoved(dockerCli *client.DockerCli, ctx context.Context, contain panic("Internal Error: waitExitOrRemoved needs a containerID as parameter") } - var exitCode int statusChan := make(chan int) - exitChan := make(chan struct{}) - detachChan := make(chan struct{}) - destroyChan := make(chan struct{}) - eventsErr := make(chan error) + exitCode := 125 - // Start watch events - eh := system.InitEventHandler() - eh.Handle("die", func(e events.Message) { - if len(e.Actor.Attributes) > 0 { - for k, v := range e.Actor.Attributes { - if k == "exitCode" { - var err error - if exitCode, err = strconv.Atoi(v); err != nil { - logrus.Errorf("Can't convert %q to int: %v", v, err) - } - close(exitChan) - break + eventProcessor := func(e events.Message, err error) error { + if err != nil { + statusChan <- exitCode + return fmt.Errorf("failed to decode event: %v", err) + } + + stopProcessing := false + switch e.Status { + case "die": + if v, ok := e.Actor.Attributes["exitCode"]; ok { + code, cerr := strconv.Atoi(v) + if cerr != nil { + logrus.Errorf("failed to convert exitcode '%q' to int: %v", v, cerr) + } else { + exitCode = code } } + if !waitRemove { + stopProcessing = true + } + case "detach": + exitCode = 0 + stopProcessing = true + case "destroy": + stopProcessing = true } - }) - eh.Handle("detach", func(e events.Message) { - exitCode = 0 - close(detachChan) - }) - eh.Handle("destroy", func(e events.Message) { - close(destroyChan) - }) + if stopProcessing { + statusChan <- exitCode + // stop the loop processing + return fmt.Errorf("done") + } - eventChan := make(chan events.Message) - go eh.Watch(eventChan) + return nil + } // Get events via Events API f := filters.NewArgs() @@ -68,45 +72,8 @@ func waitExitOrRemoved(dockerCli *client.DockerCli, ctx context.Context, contain return nil, fmt.Errorf("can't get events from daemon: %v", err) } - go func() { - eventsErr <- system.DecodeEvents(resBody, func(event events.Message, err error) error { - if err != nil { - return fmt.Errorf("decode events error: %v", err) - } - eventChan <- event - return nil - }) - close(eventChan) - }() + go system.DecodeEvents(resBody, eventProcessor) - go func() { - var waitErr error - if waitRemove { - select { - case <-destroyChan: - // keep exitcode and return - case <-detachChan: - exitCode = 0 - case waitErr = <-eventsErr: - exitCode = 125 - } - } else { - select { - case <-exitChan: - // keep exitcode and return - case <-detachChan: - exitCode = 0 - case waitErr = <-eventsErr: - exitCode = 125 - } - } - if waitErr != nil { - logrus.Errorf("%v", waitErr) - } - statusChan <- exitCode - - resBody.Close() - }() return statusChan, nil }