This patch adds ability in docker to detect out of memory conditions in containers.

Since the containers can handle the out of memory kernel kills gracefully, docker
will only provide out of memory information as an additional metadata as part of
container status.
Docker-DCO-1.1-Signed-off-by: Vishnu Kannan <vishnuk@google.com> (github: vishh)
This commit is contained in:
Vishnu Kannan 2014-10-08 17:03:57 +00:00
parent 349f67632f
commit f96e04ffc7
8 changed files with 119 additions and 54 deletions

View File

@ -231,7 +231,7 @@ func (daemon *Daemon) register(container *Container, updateSuffixarray bool) err
log.Debugf("killing old running container %s", container.ID)
existingPid := container.Pid
container.SetStopped(0)
container.SetStopped(&execdriver.ExitStatus{0, false})
// We only have to handle this for lxc because the other drivers will ensure that
// no processes are left when docker dies
@ -263,7 +263,7 @@ func (daemon *Daemon) register(container *Container, updateSuffixarray bool) err
log.Debugf("Marking as stopped")
container.SetStopped(-127)
container.SetStopped(&execdriver.ExitStatus{-127, false})
if err := container.ToDisk(); err != nil {
return err
}
@ -991,7 +991,7 @@ func (daemon *Daemon) Diff(container *Container) (archive.Archive, error) {
return daemon.driver.Diff(container.ID, initID)
}
func (daemon *Daemon) Run(c *Container, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (int, error) {
func (daemon *Daemon) Run(c *Container, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (*execdriver.ExitStatus, error) {
return daemon.execDriver.Run(c.command, pipes, startCallback)
}

View File

@ -40,9 +40,18 @@ type TtyTerminal interface {
Master() *os.File
}
// ExitStatus provides exit reasons for a container.
type ExitStatus struct {
// The exit code with which the container exited.
ExitCode int
// Whether the container encountered an OOM.
OOMKilled bool
}
type Driver interface {
Run(c *Command, pipes *Pipes, startCallback StartCallback) (int, error) // Run executes the process and blocks until the process exits and returns the exit code
// Exec executes the process in a running container, blocks until the process exits and returns the exit code
// Exec executes the process in an existing container, blocks until the process exits and returns the exit code
Exec(c *Command, processConfig *ProcessConfig, pipes *Pipes, startCallback StartCallback) (int, error)
Kill(c *Command, sig int) error
Pause(c *Command) error

View File

@ -55,7 +55,7 @@ func (d *driver) Name() string {
return fmt.Sprintf("%s-%s", DriverName, version)
}
func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (int, error) {
func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (*execdriver.ExitStatus, error) {
var (
term execdriver.Terminal
err error
@ -76,11 +76,11 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
})
if err := d.generateEnvConfig(c); err != nil {
return -1, err
return nil, err
}
configPath, err := d.generateLXCConfig(c)
if err != nil {
return -1, err
return nil, err
}
params := []string{
"lxc-start",
@ -155,11 +155,11 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
c.ProcessConfig.Args = append([]string{name}, arg...)
if err := nodes.CreateDeviceNodes(c.Rootfs, c.AutoCreatedDevices); err != nil {
return -1, err
return nil, err
}
if err := c.ProcessConfig.Start(); err != nil {
return -1, err
return nil, err
}
var (
@ -183,7 +183,7 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
c.ProcessConfig.Process.Kill()
c.ProcessConfig.Wait()
}
return -1, err
return nil, err
}
c.ContainerPid = pid
@ -194,7 +194,7 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
<-waitLock
return getExitCode(c), waitErr
return &execdriver.ExitStatus{getExitCode(c), false}, waitErr
}
/// Return the exit code of the process

View File

@ -14,6 +14,7 @@ import (
"sync"
"syscall"
log "github.com/Sirupsen/logrus"
"github.com/docker/docker/daemon/execdriver"
"github.com/docker/docker/pkg/term"
"github.com/docker/libcontainer"
@ -60,11 +61,20 @@ func NewDriver(root, initPath string) (*driver, error) {
}, nil
}
func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (int, error) {
func (d *driver) notifyOnOOM(config *libcontainer.Config) (<-chan struct{}, error) {
return fs.NotifyOnOOM(config.Cgroups)
}
type execOutput struct {
exitCode int
err error
}
func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (*execdriver.ExitStatus, error) {
// take the Command and populate the libcontainer.Config from it
container, err := d.createContainer(c)
if err != nil {
return -1, err
return nil, err
}
var term execdriver.Terminal
@ -75,7 +85,7 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
term, err = execdriver.NewStdConsole(&c.ProcessConfig, pipes)
}
if err != nil {
return -1, err
return nil, err
}
c.ProcessConfig.Terminal = term
@ -92,40 +102,70 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
)
if err := d.createContainerRoot(c.ID); err != nil {
return -1, err
return nil, err
}
defer d.cleanContainer(c.ID)
if err := d.writeContainerFile(container, c.ID); err != nil {
return -1, err
return nil, err
}
return namespaces.Exec(container, c.ProcessConfig.Stdin, c.ProcessConfig.Stdout, c.ProcessConfig.Stderr, c.ProcessConfig.Console, dataPath, args, func(container *libcontainer.Config, console, dataPath, init string, child *os.File, args []string) *exec.Cmd {
c.ProcessConfig.Path = d.initPath
c.ProcessConfig.Args = append([]string{
DriverName,
"-console", console,
"-pipe", "3",
"-root", filepath.Join(d.root, c.ID),
"--",
}, args...)
execOutputChan := make(chan execOutput, 0)
waitForStart := make(chan struct{}, 0)
// set this to nil so that when we set the clone flags anything else is reset
c.ProcessConfig.SysProcAttr = &syscall.SysProcAttr{
Cloneflags: uintptr(namespaces.GetNamespaceFlags(container.Namespaces)),
go func() {
exitCode, err := namespaces.Exec(container, c.ProcessConfig.Stdin, c.ProcessConfig.Stdout, c.ProcessConfig.Stderr, c.ProcessConfig.Console, dataPath, args, func(container *libcontainer.Config, console, dataPath, init string, child *os.File, args []string) *exec.Cmd {
c.ProcessConfig.Path = d.initPath
c.ProcessConfig.Args = append([]string{
DriverName,
"-console", console,
"-pipe", "3",
"-root", filepath.Join(d.root, c.ID),
"--",
}, args...)
// set this to nil so that when we set the clone flags anything else is reset
c.ProcessConfig.SysProcAttr = &syscall.SysProcAttr{
Cloneflags: uintptr(namespaces.GetNamespaceFlags(container.Namespaces)),
}
c.ProcessConfig.ExtraFiles = []*os.File{child}
c.ProcessConfig.Env = container.Env
c.ProcessConfig.Dir = container.RootFs
return &c.ProcessConfig.Cmd
}, func() {
close(waitForStart)
if startCallback != nil {
c.ContainerPid = c.ProcessConfig.Process.Pid
startCallback(&c.ProcessConfig, c.ContainerPid)
}
})
execOutputChan <- execOutput{exitCode, err}
}()
select {
case execOutput := <-execOutputChan:
return &execdriver.ExitStatus{execOutput.exitCode, false}, execOutput.err
case <-waitForStart:
break
}
oomKill := false
go func() {
oomKillNotification, err := d.notifyOnOOM(container)
if err == nil {
if _, ok := <-oomKillNotification; ok {
oomKill = true
}
} else {
log.Infof("WARNING: Your kernel does not support OOM notifications: %s", err)
}
c.ProcessConfig.ExtraFiles = []*os.File{child}
}()
// wait for the container to exit.
execOutput := <-execOutputChan
c.ProcessConfig.Env = container.Env
c.ProcessConfig.Dir = container.RootFs
return &c.ProcessConfig.Cmd
}, func() {
if startCallback != nil {
c.ContainerPid = c.ProcessConfig.Process.Pid
startCallback(&c.ProcessConfig, c.ContainerPid)
}
})
return &execdriver.ExitStatus{execOutput.exitCode, oomKill}, execOutput.err
}
func (d *driver) Kill(p *execdriver.Command, sig int) error {

View File

@ -100,7 +100,7 @@ func (m *containerMonitor) Close() error {
func (m *containerMonitor) Start() error {
var (
err error
exitStatus int
exitStatus *execdriver.ExitStatus
// this variable indicates where we in execution flow:
// before Run or after
afterRun bool
@ -150,9 +150,9 @@ func (m *containerMonitor) Start() error {
// here container.Lock is already lost
afterRun = true
m.resetMonitor(err == nil && exitStatus == 0)
m.resetMonitor(err == nil && exitStatus.ExitCode == 0)
if m.shouldRestart(exitStatus) {
if m.shouldRestart(exitStatus.ExitCode) {
m.container.SetRestarting(exitStatus)
m.container.LogEvent("die")
m.resetContainer(true)
@ -209,7 +209,7 @@ func (m *containerMonitor) waitForNextRestart() {
// shouldRestart checks the restart policy and applies the rules to determine if
// the container's process should be restarted
func (m *containerMonitor) shouldRestart(exitStatus int) bool {
func (m *containerMonitor) shouldRestart(exitCode int) bool {
m.mux.Lock()
defer m.mux.Unlock()
@ -228,7 +228,7 @@ func (m *containerMonitor) shouldRestart(exitStatus int) bool {
return false
}
return exitStatus != 0
return exitCode != 0
}
return false

View File

@ -5,6 +5,7 @@ import (
"sync"
"time"
"github.com/docker/docker/daemon/execdriver"
"github.com/docker/docker/pkg/units"
)
@ -13,6 +14,7 @@ type State struct {
Running bool
Paused bool
Restarting bool
OOMKilled bool
Pid int
ExitCode int
Error string // contains last known error when starting the container
@ -29,12 +31,16 @@ func NewState() *State {
// String returns a human-readable description of the state
func (s *State) String() string {
oomInfo := ""
if s.OOMKilled {
oomInfo = "possibly due to lack of memory"
}
if s.Running {
if s.Paused {
return fmt.Sprintf("Up %s (Paused)", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt)))
}
if s.Restarting {
return fmt.Sprintf("Restarting (%d) %s ago", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt)))
return fmt.Sprintf("Restarting (%d) %s ago %s", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt)), oomInfo)
}
return fmt.Sprintf("Up %s", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt)))
@ -44,7 +50,7 @@ func (s *State) String() string {
return ""
}
return fmt.Sprintf("Exited (%d) %s ago", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt)))
return fmt.Sprintf("Exited (%d) %s ago %s", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt)), oomInfo)
}
// StateString returns a single string to describe state
@ -149,25 +155,29 @@ func (s *State) setRunning(pid int) {
s.waitChan = make(chan struct{})
}
func (s *State) SetStopped(exitCode int) {
func (s *State) SetStopped(exitStatus *execdriver.ExitStatus) {
s.Lock()
s.setStopped(exitCode)
s.setStopped(exitStatus)
s.Unlock()
}
func (s *State) setStopped(exitCode int) {
func (s *State) setStopped(exitStatus *execdriver.ExitStatus) {
s.Running = false
s.Restarting = false
s.Pid = 0
s.FinishedAt = time.Now().UTC()
s.ExitCode = exitCode
s.ExitCode = exitStatus.ExitCode
s.OOMKilled = false
if exitStatus.OOMKilled {
s.OOMKilled = true
}
close(s.waitChan) // fire waiters for stop
s.waitChan = make(chan struct{})
}
// SetRestarting is when docker hanldes the auto restart of containers when they are
// in the middle of a stop and being restarted again
func (s *State) SetRestarting(exitCode int) {
func (s *State) SetRestarting(exitStatus *execdriver.ExitStatus) {
s.Lock()
// we should consider the container running when it is restarting because of
// all the checks in docker around rm/stop/etc
@ -175,7 +185,10 @@ func (s *State) SetRestarting(exitCode int) {
s.Restarting = true
s.Pid = 0
s.FinishedAt = time.Now().UTC()
s.ExitCode = exitCode
s.ExitCode = exitStatus.ExitCode
if exitStatus.OOMKilled {
s.OOMKilled = true
}
close(s.waitChan) // fire waiters for stop
s.waitChan = make(chan struct{})
s.Unlock()

View File

@ -4,6 +4,8 @@ import (
"sync/atomic"
"testing"
"time"
"github.com/docker/docker/daemon/execdriver"
)
func TestStateRunStop(t *testing.T) {
@ -47,7 +49,7 @@ func TestStateRunStop(t *testing.T) {
atomic.StoreInt64(&exit, int64(exitCode))
close(stopped)
}()
s.SetStopped(i)
s.SetStopped(&execdriver.ExitStatus{i, false})
if s.IsRunning() {
t.Fatal("State is running")
}

View File

@ -18,6 +18,7 @@ import (
log "github.com/Sirupsen/logrus"
"github.com/docker/docker/daemon"
"github.com/docker/docker/daemon/execdriver"
"github.com/docker/docker/engine"
"github.com/docker/docker/image"
"github.com/docker/docker/nat"
@ -652,7 +653,7 @@ func TestRestore(t *testing.T) {
if err := container3.Run(); err != nil {
t.Fatal(err)
}
container2.SetStopped(0)
container2.SetStopped(&execdriver.ExitStatus{0, false})
}
func TestDefaultContainerName(t *testing.T) {