diff --git a/container/container.go b/container/container.go index 5b73672070..5ec598cb0d 100644 --- a/container/container.go +++ b/container/container.go @@ -292,9 +292,7 @@ func (container *Container) GetRootResourcePath(path string) (string, error) { // ExitOnNext signals to the monitor that it should not restart the container // after we send the kill signal. func (container *Container) ExitOnNext() { - if container.restartManager != nil { - container.restartManager.Cancel() - } + container.RestartManager().Cancel() } // HostConfigPath returns the path to the container's JSON hostconfig @@ -545,7 +543,7 @@ func copyEscapable(dst io.Writer, src io.ReadCloser, keys []byte) (written int64 // ShouldRestart decides whether the daemon should restart the container or not. // This is based on the container's restart policy. func (container *Container) ShouldRestart() bool { - shouldRestart, _, _ := container.restartManager.ShouldRestart(uint32(container.ExitCode()), container.HasBeenManuallyStopped, container.FinishedAt.Sub(container.StartedAt)) + shouldRestart, _, _ := container.RestartManager().ShouldRestart(uint32(container.ExitCode()), container.HasBeenManuallyStopped, container.FinishedAt.Sub(container.StartedAt)) return shouldRestart } @@ -941,7 +939,7 @@ func (container *Container) UpdateMonitor(restartPolicy containertypes.RestartPo SetPolicy(containertypes.RestartPolicy) } - if rm, ok := container.RestartManager(false).(policySetter); ok { + if rm, ok := container.RestartManager().(policySetter); ok { rm.SetPolicy(restartPolicy) } } @@ -956,18 +954,24 @@ func (container *Container) FullHostname() string { } // RestartManager returns the current restartmanager instance connected to container. -func (container *Container) RestartManager(reset bool) restartmanager.RestartManager { - if reset { - container.RestartCount = 0 - container.restartManager = nil - } +func (container *Container) RestartManager() restartmanager.RestartManager { if container.restartManager == nil { container.restartManager = restartmanager.New(container.HostConfig.RestartPolicy, container.RestartCount) } - return container.restartManager } +// ResetRestartManager initializes new restartmanager based on container config +func (container *Container) ResetRestartManager(resetCount bool) { + if container.restartManager != nil { + container.restartManager.Cancel() + } + if resetCount { + container.RestartCount = 0 + } + container.restartManager = nil +} + type attachContext struct { ctx context.Context cancel context.CancelFunc diff --git a/daemon/daemon.go b/daemon/daemon.go index 2d294ada62..5aaef55987 100644 --- a/daemon/daemon.go +++ b/daemon/daemon.go @@ -189,12 +189,13 @@ func (daemon *Daemon) restore() error { logrus.Errorf("Failed to migrate old mounts to use new spec format") } - rm := c.RestartManager(false) if c.IsRunning() || c.IsPaused() { - if err := daemon.containerd.Restore(c.ID, libcontainerd.WithRestartManager(rm)); err != nil { + c.RestartManager().Cancel() // manually start containers because some need to wait for swarm networking + if err := daemon.containerd.Restore(c.ID); err != nil { logrus.Errorf("Failed to restore %s with containerd: %s", c.ID, err) return } + c.ResetRestartManager(false) if !c.HostConfig.NetworkMode.IsContainer() && c.IsRunning() { options, err := daemon.buildSandboxOptions(c) if err != nil { @@ -300,7 +301,7 @@ func (daemon *Daemon) restore() error { // Make sure networks are available before starting daemon.waitForNetworks(c) - if err := daemon.containerStart(c, ""); err != nil { + if err := daemon.containerStart(c, "", true); err != nil { logrus.Errorf("Failed to start container %s: %s", c.ID, err) } close(chNotify) @@ -372,7 +373,7 @@ func (daemon *Daemon) RestartSwarmContainers() { group.Add(1) go func(c *container.Container) { defer group.Done() - if err := daemon.containerStart(c, ""); err != nil { + if err := daemon.containerStart(c, "", true); err != nil { logrus.Error(err) } }(c) diff --git a/daemon/monitor.go b/daemon/monitor.go index 8d1b6a3601..a5e779d976 100644 --- a/daemon/monitor.go +++ b/daemon/monitor.go @@ -6,11 +6,13 @@ import ( "io" "runtime" "strconv" + "time" "github.com/Sirupsen/logrus" "github.com/docker/docker/api/types" "github.com/docker/docker/daemon/exec" "github.com/docker/docker/libcontainerd" + "github.com/docker/docker/restartmanager" "github.com/docker/docker/runconfig" ) @@ -31,43 +33,57 @@ func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error { daemon.LogContainerEvent(c, "oom") case libcontainerd.StateExit: // if container's AutoRemove flag is set, remove it after clean up - if c.HostConfig.AutoRemove { - defer func() { + autoRemove := func() { + if c.HostConfig.AutoRemove { if err := daemon.ContainerRm(c.ID, &types.ContainerRmConfig{ForceRemove: true, RemoveVolume: true}); err != nil { logrus.Errorf("can't remove container %s: %v", c.ID, err) } - }() + } } + c.Lock() - defer c.Unlock() c.Wait() c.Reset(false) - c.SetStopped(platformConstructExitStatus(e)) + + restart, wait, err := c.RestartManager().ShouldRestart(e.ExitCode, false, time.Since(c.StartedAt)) + if err == nil && restart { + c.RestartCount++ + c.SetRestarting(platformConstructExitStatus(e)) + } else { + c.SetStopped(platformConstructExitStatus(e)) + defer autoRemove() + } + + daemon.updateHealthMonitor(c) attributes := map[string]string{ "exitCode": strconv.Itoa(int(e.ExitCode)), } - daemon.updateHealthMonitor(c) daemon.LogContainerEventWithAttributes(c, "die", attributes) daemon.Cleanup(c) - // FIXME: here is race condition between two RUN instructions in Dockerfile - // because they share same runconfig and change image. Must be fixed - // in builder/builder.go + + if err == nil && restart { + go func() { + err := <-wait + if err == nil { + if err = daemon.containerStart(c, "", false); err != nil { + logrus.Debugf("failed to restart contianer: %+v", err) + } + } + if err != nil { + c.SetStopped(platformConstructExitStatus(e)) + defer autoRemove() + if err != restartmanager.ErrRestartCanceled { + logrus.Errorf("restartmanger wait error: %+v", err) + } + } + }() + } + + defer c.Unlock() if err := c.ToDisk(); err != nil { return err } return daemon.postRunProcessing(c, e) - case libcontainerd.StateRestart: - c.Lock() - defer c.Unlock() - c.Reset(false) - c.RestartCount++ - c.SetRestarting(platformConstructExitStatus(e)) - attributes := map[string]string{ - "exitCode": strconv.Itoa(int(e.ExitCode)), - } - daemon.LogContainerEventWithAttributes(c, "die", attributes) - daemon.updateHealthMonitor(c) - return c.ToDisk() case libcontainerd.StateExitProcess: c.Lock() defer c.Unlock() diff --git a/daemon/monitor_windows.go b/daemon/monitor_windows.go index f4857d7f8a..c5319e8876 100644 --- a/daemon/monitor_windows.go +++ b/daemon/monitor_windows.go @@ -32,7 +32,7 @@ func (daemon *Daemon) postRunProcessing(container *container.Container, e libcon } if copts != nil { - newOpts = append(newOpts, *copts...) + newOpts = append(newOpts, copts...) } // Create a new servicing container, which will start, complete the update, and merge back the diff --git a/daemon/restart.go b/daemon/restart.go index a34e731c41..1172f6c82e 100644 --- a/daemon/restart.go +++ b/daemon/restart.go @@ -56,7 +56,7 @@ func (daemon *Daemon) containerRestart(container *container.Container, seconds i } } - if err := daemon.containerStart(container, ""); err != nil { + if err := daemon.containerStart(container, "", true); err != nil { return err } diff --git a/daemon/start.go b/daemon/start.go index 7914b1882a..c38cddfb3a 100644 --- a/daemon/start.go +++ b/daemon/start.go @@ -14,7 +14,6 @@ import ( "github.com/docker/docker/api/types" containertypes "github.com/docker/docker/api/types/container" "github.com/docker/docker/container" - "github.com/docker/docker/libcontainerd" "github.com/docker/docker/runconfig" ) @@ -78,23 +77,23 @@ func (daemon *Daemon) ContainerStart(name string, hostConfig *containertypes.Hos return err } - return daemon.containerStart(container, checkpoint) + return daemon.containerStart(container, checkpoint, true) } // Start starts a container func (daemon *Daemon) Start(container *container.Container) error { - return daemon.containerStart(container, "") + return daemon.containerStart(container, "", true) } // containerStart prepares the container to run by setting up everything the // container needs, such as storage and networking, as well as links // between containers. The container is left waiting for a signal to // begin running. -func (daemon *Daemon) containerStart(container *container.Container, checkpoint string) (err error) { +func (daemon *Daemon) containerStart(container *container.Container, checkpoint string, resetRestartManager bool) (err error) { container.Lock() defer container.Unlock() - if container.Running { + if resetRestartManager && container.Running { // skip this check if already in restarting step and resetRestartManager==false return nil } @@ -141,13 +140,13 @@ func (daemon *Daemon) containerStart(container *container.Container, checkpoint return err } - createOptions := []libcontainerd.CreateOption{libcontainerd.WithRestartManager(container.RestartManager(true))} - copts, err := daemon.getLibcontainerdCreateOptions(container) + createOptions, err := daemon.getLibcontainerdCreateOptions(container) if err != nil { return err } - if copts != nil { - createOptions = append(createOptions, *copts...) + + if resetRestartManager { + container.ResetRestartManager(true) } if err := daemon.containerd.Create(container.ID, checkpoint, container.CheckpointDir(), *spec, createOptions...); err != nil { diff --git a/daemon/start_linux.go b/daemon/start_linux.go index c509f178c8..e354d4003a 100644 --- a/daemon/start_linux.go +++ b/daemon/start_linux.go @@ -7,7 +7,7 @@ import ( "github.com/docker/docker/libcontainerd" ) -func (daemon *Daemon) getLibcontainerdCreateOptions(container *container.Container) (*[]libcontainerd.CreateOption, error) { +func (daemon *Daemon) getLibcontainerdCreateOptions(container *container.Container) ([]libcontainerd.CreateOption, error) { createOptions := []libcontainerd.CreateOption{} // Ensure a runtime has been assigned to this container @@ -25,5 +25,5 @@ func (daemon *Daemon) getLibcontainerdCreateOptions(container *container.Contain } createOptions = append(createOptions, libcontainerd.WithRuntime(rt.Path, rt.Args)) - return &createOptions, nil + return createOptions, nil } diff --git a/daemon/start_windows.go b/daemon/start_windows.go index dc73f09692..c792d35809 100644 --- a/daemon/start_windows.go +++ b/daemon/start_windows.go @@ -17,7 +17,7 @@ const ( credentialSpecFileLocation = "CredentialSpecs" ) -func (daemon *Daemon) getLibcontainerdCreateOptions(container *container.Container) (*[]libcontainerd.CreateOption, error) { +func (daemon *Daemon) getLibcontainerdCreateOptions(container *container.Container) ([]libcontainerd.CreateOption, error) { createOptions := []libcontainerd.CreateOption{} // Are we going to run as a Hyper-V container? @@ -139,7 +139,7 @@ func (daemon *Daemon) getLibcontainerdCreateOptions(container *container.Contain createOptions = append(createOptions, &libcontainerd.NetworkEndpointsOption{Endpoints: epList, AllowUnqualifiedDNSQuery: AllowUnqualifiedDNSQuery}) } - return &createOptions, nil + return createOptions, nil } // getCredentialSpec is a helper function to get the value of a credential spec supplied diff --git a/libcontainerd/client_linux.go b/libcontainerd/client_linux.go index b180e00205..c46a48a07a 100644 --- a/libcontainerd/client_linux.go +++ b/libcontainerd/client_linux.go @@ -138,13 +138,8 @@ func (clnt *client) Create(containerID string, checkpoint string, checkpointDir clnt.lock(containerID) defer clnt.unlock(containerID) - if ctr, err := clnt.getContainer(containerID); err == nil { - if ctr.restarting { - ctr.restartManager.Cancel() - ctr.clean() - } else { - return fmt.Errorf("Container %s is already active", containerID) - } + if _, err := clnt.getContainer(containerID); err == nil { + return fmt.Errorf("Container %s is already active", containerID) } uid, gid, err := getRootIDs(specs.Spec(spec)) diff --git a/libcontainerd/container.go b/libcontainerd/container.go index 30bc95028c..b40321389a 100644 --- a/libcontainerd/container.go +++ b/libcontainerd/container.go @@ -1,12 +1,5 @@ package libcontainerd -import ( - "fmt" - "time" - - "github.com/docker/docker/restartmanager" -) - const ( // InitFriendlyName is the name given in the lookup map of processes // for the first process started in a container. @@ -16,25 +9,5 @@ const ( type containerCommon struct { process - restartManager restartmanager.RestartManager - restarting bool - processes map[string]*process - startedAt time.Time -} - -// WithRestartManager sets the restartmanager to be used with the container. -func WithRestartManager(rm restartmanager.RestartManager) CreateOption { - return restartManager{rm} -} - -type restartManager struct { - rm restartmanager.RestartManager -} - -func (rm restartManager) Apply(p interface{}) error { - if pr, ok := p.(*container); ok { - pr.restartManager = rm.rm - return nil - } - return fmt.Errorf("WithRestartManager option not supported for this client") + processes map[string]*process } diff --git a/libcontainerd/container_linux.go b/libcontainerd/container_linux.go index 25e2cefa5e..67db34d7a4 100644 --- a/libcontainerd/container_linux.go +++ b/libcontainerd/container_linux.go @@ -7,12 +7,10 @@ import ( "os" "path/filepath" "syscall" - "time" "github.com/Sirupsen/logrus" containerd "github.com/docker/containerd/api/grpc/types" "github.com/docker/docker/pkg/ioutils" - "github.com/docker/docker/restartmanager" "github.com/opencontainers/runtime-spec/specs-go" "golang.org/x/net/context" ) @@ -137,7 +135,6 @@ func (ctr *container) start(checkpoint string, checkpointDir string) error { ctr.closeFifos(iopipe) return err } - ctr.startedAt = time.Now() ctr.systemPid = systemPid(resp.Container) close(createChan) @@ -164,7 +161,6 @@ func (ctr *container) handleEvent(e *containerd.Event) error { defer ctr.client.unlock(ctr.containerID) switch e.Type { case StateExit, StatePause, StateResume, StateOOM: - var waitRestart chan error st := StateInfo{ CommonStateInfo: CommonStateInfo{ State: e.Type, @@ -179,20 +175,8 @@ func (ctr *container) handleEvent(e *containerd.Event) error { st.ProcessID = e.Pid st.State = StateExitProcess } - if st.State == StateExit && ctr.restartManager != nil { - restart, wait, err := ctr.restartManager.ShouldRestart(e.Status, false, time.Since(ctr.startedAt)) - if err != nil { - logrus.Warnf("libcontainerd: container %s %v", ctr.containerID, err) - } else if restart { - st.State = StateRestart - ctr.restarting = true - ctr.client.deleteContainer(e.Id) - waitRestart = wait - } - } // Remove process from list if we have exited - // We need to do so here in case the Message Handler decides to restart it. switch st.State { case StateExit: ctr.clean() @@ -204,32 +188,6 @@ func (ctr *container) handleEvent(e *containerd.Event) error { if err := ctr.client.backend.StateChanged(e.Id, st); err != nil { logrus.Errorf("libcontainerd: backend.StateChanged(): %v", err) } - if st.State == StateRestart { - go func() { - err := <-waitRestart - ctr.client.lock(ctr.containerID) - defer ctr.client.unlock(ctr.containerID) - ctr.restarting = false - if err == nil { - if err = ctr.start("", ""); err != nil { - logrus.Errorf("libcontainerd: error restarting %v", err) - } - } - if err != nil { - st.State = StateExit - ctr.clean() - ctr.client.q.append(e.Id, func() { - if err := ctr.client.backend.StateChanged(e.Id, st); err != nil { - logrus.Errorf("libcontainerd: %v", err) - } - }) - if err != restartmanager.ErrRestartCanceled { - logrus.Errorf("libcontainerd: %v", err) - } - } - }() - } - if e.Type == StatePause || e.Type == StateResume { ctr.pauseMonitor.handle(e.Type) } diff --git a/libcontainerd/container_windows.go b/libcontainerd/container_windows.go index ba6c7fda08..28928e9f7c 100644 --- a/libcontainerd/container_windows.go +++ b/libcontainerd/container_windows.go @@ -91,7 +91,6 @@ func (ctr *container) start() error { } return err } - ctr.startedAt = time.Now() pid := newProcess.Pid() @@ -194,7 +193,6 @@ func (ctr *container) waitProcessExitCode(process *process) int { // equivalent to (in the linux containerd world) where events come in for // state change notifications from containerd. func (ctr *container) waitExit(process *process, isFirstProcessToStart bool) error { - var waitRestart chan error logrus.Debugln("libcontainerd: waitExit() on pid", process.systemPid) exitCode := ctr.waitProcessExitCode(process) @@ -234,20 +232,7 @@ func (ctr *container) waitExit(process *process, isFirstProcessToStart bool) err logrus.Error(err) } - if !ctr.manualStopRequested && ctr.restartManager != nil { - restart, wait, err := ctr.restartManager.ShouldRestart(uint32(exitCode), false, time.Since(ctr.startedAt)) - if err != nil { - logrus.Error(err) - } else if restart { - si.State = StateRestart - ctr.restarting = true - ctr.client.deleteContainer(ctr.containerID) - waitRestart = wait - } - } - // Remove process from list if we have exited - // We need to do so here in case the Message Handler decides to restart it. if si.State == StateExit { ctr.client.deleteContainer(ctr.containerID) } @@ -268,24 +253,6 @@ func (ctr *container) waitExit(process *process, isFirstProcessToStart bool) err logrus.Debugf("libcontainerd: waitExit() completed OK, %+v", si) - if si.State == StateRestart { - go func() { - err := <-waitRestart - ctr.restarting = false - if err == nil { - if err = ctr.client.Create(ctr.containerID, "", "", ctr.ociSpec, ctr.options...); err != nil { - logrus.Errorf("libcontainerd: error restarting %v", err) - } - } - if err != nil { - si.State = StateExit - if err := ctr.client.backend.StateChanged(ctr.containerID, si); err != nil { - logrus.Error(err) - } - } - }() - } - return nil } diff --git a/libcontainerd/types.go b/libcontainerd/types.go index 8eea36384b..98c1ad50d6 100644 --- a/libcontainerd/types.go +++ b/libcontainerd/types.go @@ -13,7 +13,6 @@ const ( StatePause = "pause" StateResume = "resume" StateExit = "exit" - StateRestart = "restart" StateRestore = "restore" StateStartProcess = "start-process" StateExitProcess = "exit-process"