From a422813113b3b9a14184766f7f0977ccaaeb661b Mon Sep 17 00:00:00 2001 From: Zhang Wei Date: Tue, 9 Aug 2016 17:33:36 +0800 Subject: [PATCH] Bugfix: client hangs after run container New codes introduced a new method to wait container's exit code and removal via Events API, but it specified a "since" filter based on client side clock, which sometimes isn't synced up with daemon's clock, hence leads the client to hang on waiting container's exit status. This commit brings the Events call before start, and removes the time filter, so that it can catch daemon events correctly without care of the clock issue. Signed-off-by: Zhang Wei --- api/client/container/run.go | 21 +++++------ api/client/container/utils.go | 65 +++++++++++++++++++++-------------- 2 files changed, 48 insertions(+), 38 deletions(-) diff --git a/api/client/container/run.go b/api/client/container/run.go index 5d82ff79ab..ee62b60867 100644 --- a/api/client/container/run.go +++ b/api/client/container/run.go @@ -8,7 +8,6 @@ import ( "runtime" "strings" "syscall" - "time" "golang.org/x/net/context" @@ -144,7 +143,6 @@ func runRun(dockerCli *client.DockerCli, flags *pflag.FlagSet, opts *runOptions, hostConfig.ConsoleSize[0], hostConfig.ConsoleSize[1] = dockerCli.GetTtySize() } - startTime := time.Now() ctx, cancelFun := context.WithCancel(context.Background()) createResponse, err := createContainer(ctx, dockerCli, config, hostConfig, networkingConfig, hostConfig.ContainerIDFile, opts.name) @@ -221,6 +219,11 @@ func runRun(dockerCli *client.DockerCli, flags *pflag.FlagSet, opts *runOptions, }) } + statusChan, err := waitExitOrRemoved(dockerCli, context.Background(), createResponse.ID, hostConfig.AutoRemove) + if err != nil { + return fmt.Errorf("Error waiting container's exit code: %v", err) + } + //start the container if err := client.ContainerStart(ctx, createResponse.ID, types.ContainerStartOptions{}); err != nil { // If we have holdHijackedConnection, we should notify @@ -233,9 +236,8 @@ func runRun(dockerCli *client.DockerCli, flags *pflag.FlagSet, opts *runOptions, reportError(stderr, cmdPath, err.Error(), false) if hostConfig.AutoRemove { - if _, errWait := waitExitOrRemoved(dockerCli, context.Background(), createResponse.ID, hostConfig.AutoRemove, startTime); errWait != nil { - logrus.Debugf("Error waiting container's removal: %v", errWait) - } + // wait container to be removed + <-statusChan } return runStartContainerErr(err) } @@ -260,14 +262,7 @@ func runRun(dockerCli *client.DockerCli, flags *pflag.FlagSet, opts *runOptions, return nil } - var status int - - // Attached mode - status, err = waitExitOrRemoved(dockerCli, ctx, createResponse.ID, hostConfig.AutoRemove, startTime) - if err != nil { - return fmt.Errorf("Error waiting container to exit: %v", err) - } - + status := <-statusChan if status != 0 { return cli.StatusError{StatusCode: status} } diff --git a/api/client/container/utils.go b/api/client/container/utils.go index 3693ff5e86..59d4ab6cd2 100644 --- a/api/client/container/utils.go +++ b/api/client/container/utils.go @@ -3,7 +3,6 @@ package container import ( "fmt" "strconv" - "time" "golang.org/x/net/context" @@ -16,16 +15,18 @@ import ( "github.com/docker/engine-api/types/filters" ) -func waitExitOrRemoved(dockerCli *client.DockerCli, ctx context.Context, containerID string, waitRemove bool, since time.Time) (int, error) { +func waitExitOrRemoved(dockerCli *client.DockerCli, ctx context.Context, containerID string, waitRemove bool) (chan int, error) { if len(containerID) == 0 { // containerID can never be empty panic("Internal Error: waitExitOrRemoved needs a containerID as parameter") } var exitCode int + statusChan := make(chan int) exitChan := make(chan struct{}) detachChan := make(chan struct{}) destroyChan := make(chan struct{}) + eventsErr := make(chan error) // Start watch events eh := system.InitEventHandler() @@ -54,45 +55,59 @@ func waitExitOrRemoved(dockerCli *client.DockerCli, ctx context.Context, contain eventChan := make(chan events.Message) go eh.Watch(eventChan) - defer close(eventChan) // Get events via Events API f := filters.NewArgs() f.Add("type", "container") f.Add("container", containerID) options := types.EventsOptions{ - Since: fmt.Sprintf("%d", since.Unix()), Filters: f, } resBody, err := dockerCli.Client().Events(ctx, options) if err != nil { - return -1, fmt.Errorf("can't get events from daemon: %v", err) + return nil, fmt.Errorf("can't get events from daemon: %v", err) } - defer resBody.Close() - go system.DecodeEvents(resBody, func(event events.Message, err error) error { - if err != nil { + go func() { + eventsErr <- system.DecodeEvents(resBody, func(event events.Message, err error) error { + if err != nil { + return fmt.Errorf("decode events error: %v", err) + } + eventChan <- event return nil - } - eventChan <- event - return nil - }) + }) + close(eventChan) + }() - if waitRemove { - select { - case <-destroyChan: - return exitCode, nil - case <-detachChan: - return 0, nil + go func() { + var waitErr error + if waitRemove { + select { + case <-destroyChan: + // keep exitcode and return + case <-detachChan: + exitCode = 0 + case waitErr = <-eventsErr: + exitCode = 125 + } + } else { + select { + case <-exitChan: + // keep exitcode and return + case <-detachChan: + exitCode = 0 + case waitErr = <-eventsErr: + exitCode = 125 + } } - } else { - select { - case <-exitChan: - return exitCode, nil - case <-detachChan: - return 0, nil + if waitErr != nil { + logrus.Errorf("%v", waitErr) } - } + statusChan <- exitCode + + resBody.Close() + }() + return statusChan, nil } // getExitCode performs an inspect on the container. It returns