diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index 73842f729f..c07c45de3c 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -50,8 +50,13 @@ func (ns *linuxNs) Exec(container *libcontainer.Container, term Terminal, args [ if err := command.Start(); err != nil { return -1, err } + + started, err := system.GetProcessStartTime(command.Process.Pid) + if err != nil { + return -1, err + } ns.logger.Printf("writting pid %d to file\n", command.Process.Pid) - if err := ns.stateWriter.WritePid(command.Process.Pid); err != nil { + if err := ns.stateWriter.WritePid(command.Process.Pid, started); err != nil { command.Process.Kill() return -1, err } diff --git a/pkg/libcontainer/nsinit/init.go b/pkg/libcontainer/nsinit/init.go index 85182326ee..c7c2addb18 100644 --- a/pkg/libcontainer/nsinit/init.go +++ b/pkg/libcontainer/nsinit/init.go @@ -54,11 +54,6 @@ func (ns *linuxNs) Init(container *libcontainer.Container, uncleanRootfs, consol return fmt.Errorf("setctty %s", err) } } - // this is our best effort to let the process know that the parent has died and that it - // should it should act on it how it sees fit - if err := system.ParentDeathSignal(uintptr(syscall.SIGTERM)); err != nil { - return fmt.Errorf("parent death signal %s", err) - } if err := setupNetwork(container, context); err != nil { return fmt.Errorf("setup networking %s", err) } diff --git a/pkg/libcontainer/nsinit/state.go b/pkg/libcontainer/nsinit/state.go index af38008c03..26d7fa4230 100644 --- a/pkg/libcontainer/nsinit/state.go +++ b/pkg/libcontainer/nsinit/state.go @@ -10,7 +10,7 @@ import ( // StateWriter handles writing and deleting the pid file // on disk type StateWriter interface { - WritePid(pid int) error + WritePid(pid int, startTime string) error DeletePid() error } @@ -19,10 +19,18 @@ type DefaultStateWriter struct { } // writePidFile writes the namespaced processes pid to pid in the rootfs for the container -func (d *DefaultStateWriter) WritePid(pid int) error { - return ioutil.WriteFile(filepath.Join(d.Root, "pid"), []byte(fmt.Sprint(pid)), 0655) +func (d *DefaultStateWriter) WritePid(pid int, startTime string) error { + err := ioutil.WriteFile(filepath.Join(d.Root, "pid"), []byte(fmt.Sprint(pid)), 0655) + if err != nil { + return err + } + return ioutil.WriteFile(filepath.Join(d.Root, "start"), []byte(startTime), 0655) } func (d *DefaultStateWriter) DeletePid() error { - return os.Remove(filepath.Join(d.Root, "pid")) + err := os.Remove(filepath.Join(d.Root, "pid")) + if serr := os.Remove(filepath.Join(d.Root, "start")); err == nil { + err = serr + } + return err } diff --git a/pkg/system/proc.go b/pkg/system/proc.go new file mode 100644 index 0000000000..a492346c7f --- /dev/null +++ b/pkg/system/proc.go @@ -0,0 +1,26 @@ +package system + +import ( + "io/ioutil" + "path/filepath" + "strconv" + "strings" +) + +// look in /proc to find the process start time so that we can verify +// that this pid has started after ourself +func GetProcessStartTime(pid int) (string, error) { + data, err := ioutil.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "stat")) + if err != nil { + return "", err + } + parts := strings.Split(string(data), " ") + // the starttime is located at pos 22 + // from the man page + // + // starttime %llu (was %lu before Linux 2.6) + // (22) The time the process started after system boot. In kernels before Linux 2.6, this + // value was expressed in jiffies. Since Linux 2.6, the value is expressed in clock ticks + // (divide by sysconf(_SC_CLK_TCK)). + return parts[22-1], nil // starts at 1 +} diff --git a/runtime/container.go b/runtime/container.go index ed68fd0844..bd4a6f2bea 100644 --- a/runtime/container.go +++ b/runtime/container.go @@ -915,7 +915,6 @@ func (container *Container) Stop(seconds int) error { // 1. Send a SIGTERM if err := container.KillSig(15); err != nil { - utils.Debugf("Error sending kill SIGTERM: %s", err) log.Print("Failed to send SIGTERM to the process, force killing") if err := container.KillSig(9); err != nil { return err diff --git a/runtime/execdriver/driver.go b/runtime/execdriver/driver.go index d067973419..27a575cb3a 100644 --- a/runtime/execdriver/driver.go +++ b/runtime/execdriver/driver.go @@ -84,6 +84,7 @@ type Driver interface { Name() string // Driver name Info(id string) Info // "temporary" hack (until we move state from core to plugins) GetPidsForContainer(id string) ([]int, error) // Returns a list of pids for the given container. + Terminate(c *Command) error // kill it with fire } // Network settings of the container diff --git a/runtime/execdriver/lxc/driver.go b/runtime/execdriver/lxc/driver.go index 896f215366..ef16dcc380 100644 --- a/runtime/execdriver/lxc/driver.go +++ b/runtime/execdriver/lxc/driver.go @@ -204,6 +204,10 @@ func (d *driver) Kill(c *execdriver.Command, sig int) error { return KillLxc(c.ID, sig) } +func (d *driver) Terminate(c *execdriver.Command) error { + return KillLxc(c.ID, 9) +} + func (d *driver) version() string { var ( version string diff --git a/runtime/execdriver/native/driver.go b/runtime/execdriver/native/driver.go index 4acc4b388c..c5a3837615 100644 --- a/runtime/execdriver/native/driver.go +++ b/runtime/execdriver/native/driver.go @@ -117,9 +117,39 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba } func (d *driver) Kill(p *execdriver.Command, sig int) error { - err := syscall.Kill(p.Process.Pid, syscall.Signal(sig)) + return syscall.Kill(p.Process.Pid, syscall.Signal(sig)) +} + +func (d *driver) Terminate(p *execdriver.Command) error { + // lets check the start time for the process + started, err := d.readStartTime(p) + if err != nil { + // if we don't have the data on disk then we can assume the process is gone + // because this is only removed after we know the process has stopped + if os.IsNotExist(err) { + return nil + } + return err + } + + currentStartTime, err := system.GetProcessStartTime(p.Process.Pid) + if err != nil { + return err + } + if started == currentStartTime { + err = syscall.Kill(p.Process.Pid, 9) + } d.removeContainerRoot(p.ID) return err + +} + +func (d *driver) readStartTime(p *execdriver.Command) (string, error) { + data, err := ioutil.ReadFile(filepath.Join(d.root, p.ID, "start")) + if err != nil { + return "", err + } + return string(data), nil } func (d *driver) Info(id string) execdriver.Info { @@ -235,9 +265,9 @@ type dockerStateWriter struct { callback execdriver.StartCallback } -func (d *dockerStateWriter) WritePid(pid int) error { +func (d *dockerStateWriter) WritePid(pid int, started string) error { d.c.ContainerPid = pid - err := d.dsw.WritePid(pid) + err := d.dsw.WritePid(pid, started) if d.callback != nil { d.callback(d.c) } diff --git a/runtime/runtime.go b/runtime/runtime.go index b035f5df9f..9e8323279e 100644 --- a/runtime/runtime.go +++ b/runtime/runtime.go @@ -174,6 +174,7 @@ func (runtime *Runtime) Register(container *Container) error { if container.State.IsGhost() { utils.Debugf("killing ghost %s", container.ID) + existingPid := container.State.Pid container.State.SetGhost(false) container.State.SetStopped(0) @@ -181,9 +182,23 @@ func (runtime *Runtime) Register(container *Container) error { // no ghost processes are left when docker dies if container.ExecDriver == "" || strings.Contains(container.ExecDriver, "lxc") { lxc.KillLxc(container.ID, 9) - if err := container.Unmount(); err != nil { - utils.Debugf("ghost unmount error %s", err) + } else { + // use the current driver and ensure that the container is dead x.x + cmd := &execdriver.Command{ + ID: container.ID, } + var err error + cmd.Process, err = os.FindProcess(existingPid) + if err != nil { + utils.Debugf("cannot find existing process for %d", existingPid) + } + runtime.execDriver.Terminate(cmd) + } + if err := container.Unmount(); err != nil { + utils.Debugf("ghost unmount error %s", err) + } + if err := container.ToDisk(); err != nil { + utils.Debugf("saving ghost state to disk %s", err) } } @@ -778,8 +793,36 @@ func NewRuntimeFromDirectory(config *daemonconfig.Config, eng *engine.Engine) (* return runtime, nil } +func (runtime *Runtime) shutdown() error { + group := sync.WaitGroup{} + utils.Debugf("starting clean shutdown of all containers...") + for _, container := range runtime.List() { + c := container + if c.State.IsRunning() { + utils.Debugf("stopping %s", c.ID) + group.Add(1) + + go func() { + defer group.Done() + if err := c.KillSig(15); err != nil { + utils.Debugf("kill 15 error for %s - %s", c.ID, err) + } + c.Wait() + utils.Debugf("container stopped %s", c.ID) + }() + } + } + group.Wait() + + return nil +} + func (runtime *Runtime) Close() error { errorsStrings := []string{} + if err := runtime.shutdown(); err != nil { + utils.Errorf("runtime.shutdown(): %s", err) + errorsStrings = append(errorsStrings, err.Error()) + } if err := portallocator.ReleaseAll(); err != nil { utils.Errorf("portallocator.ReleaseAll(): %s", err) errorsStrings = append(errorsStrings, err.Error()) diff --git a/server/server.go b/server/server.go index 278cab2b2a..65dbcca47b 100644 --- a/server/server.go +++ b/server/server.go @@ -54,7 +54,7 @@ func InitServer(job *engine.Job) engine.Status { gosignal.Notify(c, os.Interrupt, syscall.SIGTERM, syscall.SIGQUIT) go func() { sig := <-c - log.Printf("Received signal '%v', exiting\n", sig) + log.Printf("Received signal '%v', starting shutdown of docker...\n", sig) utils.RemovePidFile(srv.runtime.Config().Pidfile) srv.Close() os.Exit(0)