1
0
Fork 0
mirror of https://github.com/moby/moby.git synced 2022-11-09 12:21:53 -05:00

This patch adds ability in docker to detect out of memory conditions in containers.

Since the containers can handle the out of memory kernel kills gracefully, docker
will only provide out of memory information as an additional metadata as part of
container status.
Docker-DCO-1.1-Signed-off-by: Vishnu Kannan <vishnuk@google.com> (github: vishh)
This commit is contained in:
Vishnu Kannan 2014-10-08 17:03:57 +00:00
parent 349f67632f
commit f96e04ffc7
8 changed files with 119 additions and 54 deletions

View file

@ -231,7 +231,7 @@ func (daemon *Daemon) register(container *Container, updateSuffixarray bool) err
log.Debugf("killing old running container %s", container.ID) log.Debugf("killing old running container %s", container.ID)
existingPid := container.Pid existingPid := container.Pid
container.SetStopped(0) container.SetStopped(&execdriver.ExitStatus{0, false})
// We only have to handle this for lxc because the other drivers will ensure that // We only have to handle this for lxc because the other drivers will ensure that
// no processes are left when docker dies // no processes are left when docker dies
@ -263,7 +263,7 @@ func (daemon *Daemon) register(container *Container, updateSuffixarray bool) err
log.Debugf("Marking as stopped") log.Debugf("Marking as stopped")
container.SetStopped(-127) container.SetStopped(&execdriver.ExitStatus{-127, false})
if err := container.ToDisk(); err != nil { if err := container.ToDisk(); err != nil {
return err return err
} }
@ -991,7 +991,7 @@ func (daemon *Daemon) Diff(container *Container) (archive.Archive, error) {
return daemon.driver.Diff(container.ID, initID) return daemon.driver.Diff(container.ID, initID)
} }
func (daemon *Daemon) Run(c *Container, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (int, error) { func (daemon *Daemon) Run(c *Container, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (*execdriver.ExitStatus, error) {
return daemon.execDriver.Run(c.command, pipes, startCallback) return daemon.execDriver.Run(c.command, pipes, startCallback)
} }

View file

@ -40,9 +40,18 @@ type TtyTerminal interface {
Master() *os.File Master() *os.File
} }
// ExitStatus provides exit reasons for a container.
type ExitStatus struct {
// The exit code with which the container exited.
ExitCode int
// Whether the container encountered an OOM.
OOMKilled bool
}
type Driver interface { type Driver interface {
Run(c *Command, pipes *Pipes, startCallback StartCallback) (int, error) // Run executes the process and blocks until the process exits and returns the exit code Run(c *Command, pipes *Pipes, startCallback StartCallback) (int, error) // Run executes the process and blocks until the process exits and returns the exit code
// Exec executes the process in a running container, blocks until the process exits and returns the exit code // Exec executes the process in an existing container, blocks until the process exits and returns the exit code
Exec(c *Command, processConfig *ProcessConfig, pipes *Pipes, startCallback StartCallback) (int, error) Exec(c *Command, processConfig *ProcessConfig, pipes *Pipes, startCallback StartCallback) (int, error)
Kill(c *Command, sig int) error Kill(c *Command, sig int) error
Pause(c *Command) error Pause(c *Command) error

View file

@ -55,7 +55,7 @@ func (d *driver) Name() string {
return fmt.Sprintf("%s-%s", DriverName, version) return fmt.Sprintf("%s-%s", DriverName, version)
} }
func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (int, error) { func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (*execdriver.ExitStatus, error) {
var ( var (
term execdriver.Terminal term execdriver.Terminal
err error err error
@ -76,11 +76,11 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
}) })
if err := d.generateEnvConfig(c); err != nil { if err := d.generateEnvConfig(c); err != nil {
return -1, err return nil, err
} }
configPath, err := d.generateLXCConfig(c) configPath, err := d.generateLXCConfig(c)
if err != nil { if err != nil {
return -1, err return nil, err
} }
params := []string{ params := []string{
"lxc-start", "lxc-start",
@ -155,11 +155,11 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
c.ProcessConfig.Args = append([]string{name}, arg...) c.ProcessConfig.Args = append([]string{name}, arg...)
if err := nodes.CreateDeviceNodes(c.Rootfs, c.AutoCreatedDevices); err != nil { if err := nodes.CreateDeviceNodes(c.Rootfs, c.AutoCreatedDevices); err != nil {
return -1, err return nil, err
} }
if err := c.ProcessConfig.Start(); err != nil { if err := c.ProcessConfig.Start(); err != nil {
return -1, err return nil, err
} }
var ( var (
@ -183,7 +183,7 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
c.ProcessConfig.Process.Kill() c.ProcessConfig.Process.Kill()
c.ProcessConfig.Wait() c.ProcessConfig.Wait()
} }
return -1, err return nil, err
} }
c.ContainerPid = pid c.ContainerPid = pid
@ -194,7 +194,7 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
<-waitLock <-waitLock
return getExitCode(c), waitErr return &execdriver.ExitStatus{getExitCode(c), false}, waitErr
} }
/// Return the exit code of the process /// Return the exit code of the process

View file

@ -14,6 +14,7 @@ import (
"sync" "sync"
"syscall" "syscall"
log "github.com/Sirupsen/logrus"
"github.com/docker/docker/daemon/execdriver" "github.com/docker/docker/daemon/execdriver"
"github.com/docker/docker/pkg/term" "github.com/docker/docker/pkg/term"
"github.com/docker/libcontainer" "github.com/docker/libcontainer"
@ -60,11 +61,20 @@ func NewDriver(root, initPath string) (*driver, error) {
}, nil }, nil
} }
func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (int, error) { func (d *driver) notifyOnOOM(config *libcontainer.Config) (<-chan struct{}, error) {
return fs.NotifyOnOOM(config.Cgroups)
}
type execOutput struct {
exitCode int
err error
}
func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (*execdriver.ExitStatus, error) {
// take the Command and populate the libcontainer.Config from it // take the Command and populate the libcontainer.Config from it
container, err := d.createContainer(c) container, err := d.createContainer(c)
if err != nil { if err != nil {
return -1, err return nil, err
} }
var term execdriver.Terminal var term execdriver.Terminal
@ -75,7 +85,7 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
term, err = execdriver.NewStdConsole(&c.ProcessConfig, pipes) term, err = execdriver.NewStdConsole(&c.ProcessConfig, pipes)
} }
if err != nil { if err != nil {
return -1, err return nil, err
} }
c.ProcessConfig.Terminal = term c.ProcessConfig.Terminal = term
@ -92,40 +102,70 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
) )
if err := d.createContainerRoot(c.ID); err != nil { if err := d.createContainerRoot(c.ID); err != nil {
return -1, err return nil, err
} }
defer d.cleanContainer(c.ID) defer d.cleanContainer(c.ID)
if err := d.writeContainerFile(container, c.ID); err != nil { if err := d.writeContainerFile(container, c.ID); err != nil {
return -1, err return nil, err
} }
return namespaces.Exec(container, c.ProcessConfig.Stdin, c.ProcessConfig.Stdout, c.ProcessConfig.Stderr, c.ProcessConfig.Console, dataPath, args, func(container *libcontainer.Config, console, dataPath, init string, child *os.File, args []string) *exec.Cmd { execOutputChan := make(chan execOutput, 0)
c.ProcessConfig.Path = d.initPath waitForStart := make(chan struct{}, 0)
c.ProcessConfig.Args = append([]string{
DriverName,
"-console", console,
"-pipe", "3",
"-root", filepath.Join(d.root, c.ID),
"--",
}, args...)
// set this to nil so that when we set the clone flags anything else is reset go func() {
c.ProcessConfig.SysProcAttr = &syscall.SysProcAttr{ exitCode, err := namespaces.Exec(container, c.ProcessConfig.Stdin, c.ProcessConfig.Stdout, c.ProcessConfig.Stderr, c.ProcessConfig.Console, dataPath, args, func(container *libcontainer.Config, console, dataPath, init string, child *os.File, args []string) *exec.Cmd {
Cloneflags: uintptr(namespaces.GetNamespaceFlags(container.Namespaces)), c.ProcessConfig.Path = d.initPath
c.ProcessConfig.Args = append([]string{
DriverName,
"-console", console,
"-pipe", "3",
"-root", filepath.Join(d.root, c.ID),
"--",
}, args...)
// set this to nil so that when we set the clone flags anything else is reset
c.ProcessConfig.SysProcAttr = &syscall.SysProcAttr{
Cloneflags: uintptr(namespaces.GetNamespaceFlags(container.Namespaces)),
}
c.ProcessConfig.ExtraFiles = []*os.File{child}
c.ProcessConfig.Env = container.Env
c.ProcessConfig.Dir = container.RootFs
return &c.ProcessConfig.Cmd
}, func() {
close(waitForStart)
if startCallback != nil {
c.ContainerPid = c.ProcessConfig.Process.Pid
startCallback(&c.ProcessConfig, c.ContainerPid)
}
})
execOutputChan <- execOutput{exitCode, err}
}()
select {
case execOutput := <-execOutputChan:
return &execdriver.ExitStatus{execOutput.exitCode, false}, execOutput.err
case <-waitForStart:
break
}
oomKill := false
go func() {
oomKillNotification, err := d.notifyOnOOM(container)
if err == nil {
if _, ok := <-oomKillNotification; ok {
oomKill = true
}
} else {
log.Infof("WARNING: Your kernel does not support OOM notifications: %s", err)
} }
c.ProcessConfig.ExtraFiles = []*os.File{child} }()
// wait for the container to exit.
execOutput := <-execOutputChan
c.ProcessConfig.Env = container.Env return &execdriver.ExitStatus{execOutput.exitCode, oomKill}, execOutput.err
c.ProcessConfig.Dir = container.RootFs
return &c.ProcessConfig.Cmd
}, func() {
if startCallback != nil {
c.ContainerPid = c.ProcessConfig.Process.Pid
startCallback(&c.ProcessConfig, c.ContainerPid)
}
})
} }
func (d *driver) Kill(p *execdriver.Command, sig int) error { func (d *driver) Kill(p *execdriver.Command, sig int) error {

View file

@ -100,7 +100,7 @@ func (m *containerMonitor) Close() error {
func (m *containerMonitor) Start() error { func (m *containerMonitor) Start() error {
var ( var (
err error err error
exitStatus int exitStatus *execdriver.ExitStatus
// this variable indicates where we in execution flow: // this variable indicates where we in execution flow:
// before Run or after // before Run or after
afterRun bool afterRun bool
@ -150,9 +150,9 @@ func (m *containerMonitor) Start() error {
// here container.Lock is already lost // here container.Lock is already lost
afterRun = true afterRun = true
m.resetMonitor(err == nil && exitStatus == 0) m.resetMonitor(err == nil && exitStatus.ExitCode == 0)
if m.shouldRestart(exitStatus) { if m.shouldRestart(exitStatus.ExitCode) {
m.container.SetRestarting(exitStatus) m.container.SetRestarting(exitStatus)
m.container.LogEvent("die") m.container.LogEvent("die")
m.resetContainer(true) m.resetContainer(true)
@ -209,7 +209,7 @@ func (m *containerMonitor) waitForNextRestart() {
// shouldRestart checks the restart policy and applies the rules to determine if // shouldRestart checks the restart policy and applies the rules to determine if
// the container's process should be restarted // the container's process should be restarted
func (m *containerMonitor) shouldRestart(exitStatus int) bool { func (m *containerMonitor) shouldRestart(exitCode int) bool {
m.mux.Lock() m.mux.Lock()
defer m.mux.Unlock() defer m.mux.Unlock()
@ -228,7 +228,7 @@ func (m *containerMonitor) shouldRestart(exitStatus int) bool {
return false return false
} }
return exitStatus != 0 return exitCode != 0
} }
return false return false

View file

@ -5,6 +5,7 @@ import (
"sync" "sync"
"time" "time"
"github.com/docker/docker/daemon/execdriver"
"github.com/docker/docker/pkg/units" "github.com/docker/docker/pkg/units"
) )
@ -13,6 +14,7 @@ type State struct {
Running bool Running bool
Paused bool Paused bool
Restarting bool Restarting bool
OOMKilled bool
Pid int Pid int
ExitCode int ExitCode int
Error string // contains last known error when starting the container Error string // contains last known error when starting the container
@ -29,12 +31,16 @@ func NewState() *State {
// String returns a human-readable description of the state // String returns a human-readable description of the state
func (s *State) String() string { func (s *State) String() string {
oomInfo := ""
if s.OOMKilled {
oomInfo = "possibly due to lack of memory"
}
if s.Running { if s.Running {
if s.Paused { if s.Paused {
return fmt.Sprintf("Up %s (Paused)", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt))) return fmt.Sprintf("Up %s (Paused)", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt)))
} }
if s.Restarting { if s.Restarting {
return fmt.Sprintf("Restarting (%d) %s ago", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt))) return fmt.Sprintf("Restarting (%d) %s ago %s", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt)), oomInfo)
} }
return fmt.Sprintf("Up %s", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt))) return fmt.Sprintf("Up %s", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt)))
@ -44,7 +50,7 @@ func (s *State) String() string {
return "" return ""
} }
return fmt.Sprintf("Exited (%d) %s ago", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt))) return fmt.Sprintf("Exited (%d) %s ago %s", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt)), oomInfo)
} }
// StateString returns a single string to describe state // StateString returns a single string to describe state
@ -149,25 +155,29 @@ func (s *State) setRunning(pid int) {
s.waitChan = make(chan struct{}) s.waitChan = make(chan struct{})
} }
func (s *State) SetStopped(exitCode int) { func (s *State) SetStopped(exitStatus *execdriver.ExitStatus) {
s.Lock() s.Lock()
s.setStopped(exitCode) s.setStopped(exitStatus)
s.Unlock() s.Unlock()
} }
func (s *State) setStopped(exitCode int) { func (s *State) setStopped(exitStatus *execdriver.ExitStatus) {
s.Running = false s.Running = false
s.Restarting = false s.Restarting = false
s.Pid = 0 s.Pid = 0
s.FinishedAt = time.Now().UTC() s.FinishedAt = time.Now().UTC()
s.ExitCode = exitCode s.ExitCode = exitStatus.ExitCode
s.OOMKilled = false
if exitStatus.OOMKilled {
s.OOMKilled = true
}
close(s.waitChan) // fire waiters for stop close(s.waitChan) // fire waiters for stop
s.waitChan = make(chan struct{}) s.waitChan = make(chan struct{})
} }
// SetRestarting is when docker hanldes the auto restart of containers when they are // SetRestarting is when docker hanldes the auto restart of containers when they are
// in the middle of a stop and being restarted again // in the middle of a stop and being restarted again
func (s *State) SetRestarting(exitCode int) { func (s *State) SetRestarting(exitStatus *execdriver.ExitStatus) {
s.Lock() s.Lock()
// we should consider the container running when it is restarting because of // we should consider the container running when it is restarting because of
// all the checks in docker around rm/stop/etc // all the checks in docker around rm/stop/etc
@ -175,7 +185,10 @@ func (s *State) SetRestarting(exitCode int) {
s.Restarting = true s.Restarting = true
s.Pid = 0 s.Pid = 0
s.FinishedAt = time.Now().UTC() s.FinishedAt = time.Now().UTC()
s.ExitCode = exitCode s.ExitCode = exitStatus.ExitCode
if exitStatus.OOMKilled {
s.OOMKilled = true
}
close(s.waitChan) // fire waiters for stop close(s.waitChan) // fire waiters for stop
s.waitChan = make(chan struct{}) s.waitChan = make(chan struct{})
s.Unlock() s.Unlock()

View file

@ -4,6 +4,8 @@ import (
"sync/atomic" "sync/atomic"
"testing" "testing"
"time" "time"
"github.com/docker/docker/daemon/execdriver"
) )
func TestStateRunStop(t *testing.T) { func TestStateRunStop(t *testing.T) {
@ -47,7 +49,7 @@ func TestStateRunStop(t *testing.T) {
atomic.StoreInt64(&exit, int64(exitCode)) atomic.StoreInt64(&exit, int64(exitCode))
close(stopped) close(stopped)
}() }()
s.SetStopped(i) s.SetStopped(&execdriver.ExitStatus{i, false})
if s.IsRunning() { if s.IsRunning() {
t.Fatal("State is running") t.Fatal("State is running")
} }

View file

@ -18,6 +18,7 @@ import (
log "github.com/Sirupsen/logrus" log "github.com/Sirupsen/logrus"
"github.com/docker/docker/daemon" "github.com/docker/docker/daemon"
"github.com/docker/docker/daemon/execdriver"
"github.com/docker/docker/engine" "github.com/docker/docker/engine"
"github.com/docker/docker/image" "github.com/docker/docker/image"
"github.com/docker/docker/nat" "github.com/docker/docker/nat"
@ -652,7 +653,7 @@ func TestRestore(t *testing.T) {
if err := container3.Run(); err != nil { if err := container3.Run(); err != nil {
t.Fatal(err) t.Fatal(err)
} }
container2.SetStopped(0) container2.SetStopped(&execdriver.ExitStatus{0, false})
} }
func TestDefaultContainerName(t *testing.T) { func TestDefaultContainerName(t *testing.T) {