mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00
This patch adds ability in docker to detect out of memory conditions in containers.
Since the containers can handle the out of memory kernel kills gracefully, docker will only provide out of memory information as an additional metadata as part of container status. Docker-DCO-1.1-Signed-off-by: Vishnu Kannan <vishnuk@google.com> (github: vishh)
This commit is contained in:
parent
349f67632f
commit
f96e04ffc7
8 changed files with 119 additions and 54 deletions
|
@ -231,7 +231,7 @@ func (daemon *Daemon) register(container *Container, updateSuffixarray bool) err
|
|||
log.Debugf("killing old running container %s", container.ID)
|
||||
|
||||
existingPid := container.Pid
|
||||
container.SetStopped(0)
|
||||
container.SetStopped(&execdriver.ExitStatus{0, false})
|
||||
|
||||
// We only have to handle this for lxc because the other drivers will ensure that
|
||||
// no processes are left when docker dies
|
||||
|
@ -263,7 +263,7 @@ func (daemon *Daemon) register(container *Container, updateSuffixarray bool) err
|
|||
|
||||
log.Debugf("Marking as stopped")
|
||||
|
||||
container.SetStopped(-127)
|
||||
container.SetStopped(&execdriver.ExitStatus{-127, false})
|
||||
if err := container.ToDisk(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -991,7 +991,7 @@ func (daemon *Daemon) Diff(container *Container) (archive.Archive, error) {
|
|||
return daemon.driver.Diff(container.ID, initID)
|
||||
}
|
||||
|
||||
func (daemon *Daemon) Run(c *Container, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (int, error) {
|
||||
func (daemon *Daemon) Run(c *Container, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (*execdriver.ExitStatus, error) {
|
||||
return daemon.execDriver.Run(c.command, pipes, startCallback)
|
||||
}
|
||||
|
||||
|
|
|
@ -40,9 +40,18 @@ type TtyTerminal interface {
|
|||
Master() *os.File
|
||||
}
|
||||
|
||||
// ExitStatus provides exit reasons for a container.
|
||||
type ExitStatus struct {
|
||||
// The exit code with which the container exited.
|
||||
ExitCode int
|
||||
|
||||
// Whether the container encountered an OOM.
|
||||
OOMKilled bool
|
||||
}
|
||||
|
||||
type Driver interface {
|
||||
Run(c *Command, pipes *Pipes, startCallback StartCallback) (int, error) // Run executes the process and blocks until the process exits and returns the exit code
|
||||
// Exec executes the process in a running container, blocks until the process exits and returns the exit code
|
||||
// Exec executes the process in an existing container, blocks until the process exits and returns the exit code
|
||||
Exec(c *Command, processConfig *ProcessConfig, pipes *Pipes, startCallback StartCallback) (int, error)
|
||||
Kill(c *Command, sig int) error
|
||||
Pause(c *Command) error
|
||||
|
|
|
@ -55,7 +55,7 @@ func (d *driver) Name() string {
|
|||
return fmt.Sprintf("%s-%s", DriverName, version)
|
||||
}
|
||||
|
||||
func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (int, error) {
|
||||
func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (*execdriver.ExitStatus, error) {
|
||||
var (
|
||||
term execdriver.Terminal
|
||||
err error
|
||||
|
@ -76,11 +76,11 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
|
|||
})
|
||||
|
||||
if err := d.generateEnvConfig(c); err != nil {
|
||||
return -1, err
|
||||
return nil, err
|
||||
}
|
||||
configPath, err := d.generateLXCConfig(c)
|
||||
if err != nil {
|
||||
return -1, err
|
||||
return nil, err
|
||||
}
|
||||
params := []string{
|
||||
"lxc-start",
|
||||
|
@ -155,11 +155,11 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
|
|||
c.ProcessConfig.Args = append([]string{name}, arg...)
|
||||
|
||||
if err := nodes.CreateDeviceNodes(c.Rootfs, c.AutoCreatedDevices); err != nil {
|
||||
return -1, err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := c.ProcessConfig.Start(); err != nil {
|
||||
return -1, err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var (
|
||||
|
@ -183,7 +183,7 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
|
|||
c.ProcessConfig.Process.Kill()
|
||||
c.ProcessConfig.Wait()
|
||||
}
|
||||
return -1, err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
c.ContainerPid = pid
|
||||
|
@ -194,7 +194,7 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
|
|||
|
||||
<-waitLock
|
||||
|
||||
return getExitCode(c), waitErr
|
||||
return &execdriver.ExitStatus{getExitCode(c), false}, waitErr
|
||||
}
|
||||
|
||||
/// Return the exit code of the process
|
||||
|
|
|
@ -14,6 +14,7 @@ import (
|
|||
"sync"
|
||||
"syscall"
|
||||
|
||||
log "github.com/Sirupsen/logrus"
|
||||
"github.com/docker/docker/daemon/execdriver"
|
||||
"github.com/docker/docker/pkg/term"
|
||||
"github.com/docker/libcontainer"
|
||||
|
@ -60,11 +61,20 @@ func NewDriver(root, initPath string) (*driver, error) {
|
|||
}, nil
|
||||
}
|
||||
|
||||
func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (int, error) {
|
||||
func (d *driver) notifyOnOOM(config *libcontainer.Config) (<-chan struct{}, error) {
|
||||
return fs.NotifyOnOOM(config.Cgroups)
|
||||
}
|
||||
|
||||
type execOutput struct {
|
||||
exitCode int
|
||||
err error
|
||||
}
|
||||
|
||||
func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (*execdriver.ExitStatus, error) {
|
||||
// take the Command and populate the libcontainer.Config from it
|
||||
container, err := d.createContainer(c)
|
||||
if err != nil {
|
||||
return -1, err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var term execdriver.Terminal
|
||||
|
@ -75,7 +85,7 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
|
|||
term, err = execdriver.NewStdConsole(&c.ProcessConfig, pipes)
|
||||
}
|
||||
if err != nil {
|
||||
return -1, err
|
||||
return nil, err
|
||||
}
|
||||
c.ProcessConfig.Terminal = term
|
||||
|
||||
|
@ -92,40 +102,70 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
|
|||
)
|
||||
|
||||
if err := d.createContainerRoot(c.ID); err != nil {
|
||||
return -1, err
|
||||
return nil, err
|
||||
}
|
||||
defer d.cleanContainer(c.ID)
|
||||
|
||||
if err := d.writeContainerFile(container, c.ID); err != nil {
|
||||
return -1, err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return namespaces.Exec(container, c.ProcessConfig.Stdin, c.ProcessConfig.Stdout, c.ProcessConfig.Stderr, c.ProcessConfig.Console, dataPath, args, func(container *libcontainer.Config, console, dataPath, init string, child *os.File, args []string) *exec.Cmd {
|
||||
c.ProcessConfig.Path = d.initPath
|
||||
c.ProcessConfig.Args = append([]string{
|
||||
DriverName,
|
||||
"-console", console,
|
||||
"-pipe", "3",
|
||||
"-root", filepath.Join(d.root, c.ID),
|
||||
"--",
|
||||
}, args...)
|
||||
execOutputChan := make(chan execOutput, 0)
|
||||
waitForStart := make(chan struct{}, 0)
|
||||
|
||||
// set this to nil so that when we set the clone flags anything else is reset
|
||||
c.ProcessConfig.SysProcAttr = &syscall.SysProcAttr{
|
||||
Cloneflags: uintptr(namespaces.GetNamespaceFlags(container.Namespaces)),
|
||||
go func() {
|
||||
exitCode, err := namespaces.Exec(container, c.ProcessConfig.Stdin, c.ProcessConfig.Stdout, c.ProcessConfig.Stderr, c.ProcessConfig.Console, dataPath, args, func(container *libcontainer.Config, console, dataPath, init string, child *os.File, args []string) *exec.Cmd {
|
||||
c.ProcessConfig.Path = d.initPath
|
||||
c.ProcessConfig.Args = append([]string{
|
||||
DriverName,
|
||||
"-console", console,
|
||||
"-pipe", "3",
|
||||
"-root", filepath.Join(d.root, c.ID),
|
||||
"--",
|
||||
}, args...)
|
||||
|
||||
// set this to nil so that when we set the clone flags anything else is reset
|
||||
c.ProcessConfig.SysProcAttr = &syscall.SysProcAttr{
|
||||
Cloneflags: uintptr(namespaces.GetNamespaceFlags(container.Namespaces)),
|
||||
}
|
||||
c.ProcessConfig.ExtraFiles = []*os.File{child}
|
||||
|
||||
c.ProcessConfig.Env = container.Env
|
||||
c.ProcessConfig.Dir = container.RootFs
|
||||
|
||||
return &c.ProcessConfig.Cmd
|
||||
}, func() {
|
||||
close(waitForStart)
|
||||
if startCallback != nil {
|
||||
c.ContainerPid = c.ProcessConfig.Process.Pid
|
||||
startCallback(&c.ProcessConfig, c.ContainerPid)
|
||||
}
|
||||
})
|
||||
execOutputChan <- execOutput{exitCode, err}
|
||||
}()
|
||||
|
||||
select {
|
||||
case execOutput := <-execOutputChan:
|
||||
return &execdriver.ExitStatus{execOutput.exitCode, false}, execOutput.err
|
||||
case <-waitForStart:
|
||||
break
|
||||
}
|
||||
|
||||
oomKill := false
|
||||
go func() {
|
||||
oomKillNotification, err := d.notifyOnOOM(container)
|
||||
if err == nil {
|
||||
if _, ok := <-oomKillNotification; ok {
|
||||
oomKill = true
|
||||
}
|
||||
} else {
|
||||
log.Infof("WARNING: Your kernel does not support OOM notifications: %s", err)
|
||||
}
|
||||
c.ProcessConfig.ExtraFiles = []*os.File{child}
|
||||
}()
|
||||
// wait for the container to exit.
|
||||
execOutput := <-execOutputChan
|
||||
|
||||
c.ProcessConfig.Env = container.Env
|
||||
c.ProcessConfig.Dir = container.RootFs
|
||||
|
||||
return &c.ProcessConfig.Cmd
|
||||
}, func() {
|
||||
if startCallback != nil {
|
||||
c.ContainerPid = c.ProcessConfig.Process.Pid
|
||||
startCallback(&c.ProcessConfig, c.ContainerPid)
|
||||
}
|
||||
})
|
||||
return &execdriver.ExitStatus{execOutput.exitCode, oomKill}, execOutput.err
|
||||
}
|
||||
|
||||
func (d *driver) Kill(p *execdriver.Command, sig int) error {
|
||||
|
|
|
@ -100,7 +100,7 @@ func (m *containerMonitor) Close() error {
|
|||
func (m *containerMonitor) Start() error {
|
||||
var (
|
||||
err error
|
||||
exitStatus int
|
||||
exitStatus *execdriver.ExitStatus
|
||||
// this variable indicates where we in execution flow:
|
||||
// before Run or after
|
||||
afterRun bool
|
||||
|
@ -150,9 +150,9 @@ func (m *containerMonitor) Start() error {
|
|||
// here container.Lock is already lost
|
||||
afterRun = true
|
||||
|
||||
m.resetMonitor(err == nil && exitStatus == 0)
|
||||
m.resetMonitor(err == nil && exitStatus.ExitCode == 0)
|
||||
|
||||
if m.shouldRestart(exitStatus) {
|
||||
if m.shouldRestart(exitStatus.ExitCode) {
|
||||
m.container.SetRestarting(exitStatus)
|
||||
m.container.LogEvent("die")
|
||||
m.resetContainer(true)
|
||||
|
@ -209,7 +209,7 @@ func (m *containerMonitor) waitForNextRestart() {
|
|||
|
||||
// shouldRestart checks the restart policy and applies the rules to determine if
|
||||
// the container's process should be restarted
|
||||
func (m *containerMonitor) shouldRestart(exitStatus int) bool {
|
||||
func (m *containerMonitor) shouldRestart(exitCode int) bool {
|
||||
m.mux.Lock()
|
||||
defer m.mux.Unlock()
|
||||
|
||||
|
@ -228,7 +228,7 @@ func (m *containerMonitor) shouldRestart(exitStatus int) bool {
|
|||
return false
|
||||
}
|
||||
|
||||
return exitStatus != 0
|
||||
return exitCode != 0
|
||||
}
|
||||
|
||||
return false
|
||||
|
|
|
@ -5,6 +5,7 @@ import (
|
|||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/docker/docker/daemon/execdriver"
|
||||
"github.com/docker/docker/pkg/units"
|
||||
)
|
||||
|
||||
|
@ -13,6 +14,7 @@ type State struct {
|
|||
Running bool
|
||||
Paused bool
|
||||
Restarting bool
|
||||
OOMKilled bool
|
||||
Pid int
|
||||
ExitCode int
|
||||
Error string // contains last known error when starting the container
|
||||
|
@ -29,12 +31,16 @@ func NewState() *State {
|
|||
|
||||
// String returns a human-readable description of the state
|
||||
func (s *State) String() string {
|
||||
oomInfo := ""
|
||||
if s.OOMKilled {
|
||||
oomInfo = "possibly due to lack of memory"
|
||||
}
|
||||
if s.Running {
|
||||
if s.Paused {
|
||||
return fmt.Sprintf("Up %s (Paused)", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt)))
|
||||
}
|
||||
if s.Restarting {
|
||||
return fmt.Sprintf("Restarting (%d) %s ago", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt)))
|
||||
return fmt.Sprintf("Restarting (%d) %s ago %s", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt)), oomInfo)
|
||||
}
|
||||
|
||||
return fmt.Sprintf("Up %s", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt)))
|
||||
|
@ -44,7 +50,7 @@ func (s *State) String() string {
|
|||
return ""
|
||||
}
|
||||
|
||||
return fmt.Sprintf("Exited (%d) %s ago", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt)))
|
||||
return fmt.Sprintf("Exited (%d) %s ago %s", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt)), oomInfo)
|
||||
}
|
||||
|
||||
// StateString returns a single string to describe state
|
||||
|
@ -149,25 +155,29 @@ func (s *State) setRunning(pid int) {
|
|||
s.waitChan = make(chan struct{})
|
||||
}
|
||||
|
||||
func (s *State) SetStopped(exitCode int) {
|
||||
func (s *State) SetStopped(exitStatus *execdriver.ExitStatus) {
|
||||
s.Lock()
|
||||
s.setStopped(exitCode)
|
||||
s.setStopped(exitStatus)
|
||||
s.Unlock()
|
||||
}
|
||||
|
||||
func (s *State) setStopped(exitCode int) {
|
||||
func (s *State) setStopped(exitStatus *execdriver.ExitStatus) {
|
||||
s.Running = false
|
||||
s.Restarting = false
|
||||
s.Pid = 0
|
||||
s.FinishedAt = time.Now().UTC()
|
||||
s.ExitCode = exitCode
|
||||
s.ExitCode = exitStatus.ExitCode
|
||||
s.OOMKilled = false
|
||||
if exitStatus.OOMKilled {
|
||||
s.OOMKilled = true
|
||||
}
|
||||
close(s.waitChan) // fire waiters for stop
|
||||
s.waitChan = make(chan struct{})
|
||||
}
|
||||
|
||||
// SetRestarting is when docker hanldes the auto restart of containers when they are
|
||||
// in the middle of a stop and being restarted again
|
||||
func (s *State) SetRestarting(exitCode int) {
|
||||
func (s *State) SetRestarting(exitStatus *execdriver.ExitStatus) {
|
||||
s.Lock()
|
||||
// we should consider the container running when it is restarting because of
|
||||
// all the checks in docker around rm/stop/etc
|
||||
|
@ -175,7 +185,10 @@ func (s *State) SetRestarting(exitCode int) {
|
|||
s.Restarting = true
|
||||
s.Pid = 0
|
||||
s.FinishedAt = time.Now().UTC()
|
||||
s.ExitCode = exitCode
|
||||
s.ExitCode = exitStatus.ExitCode
|
||||
if exitStatus.OOMKilled {
|
||||
s.OOMKilled = true
|
||||
}
|
||||
close(s.waitChan) // fire waiters for stop
|
||||
s.waitChan = make(chan struct{})
|
||||
s.Unlock()
|
||||
|
|
|
@ -4,6 +4,8 @@ import (
|
|||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/docker/docker/daemon/execdriver"
|
||||
)
|
||||
|
||||
func TestStateRunStop(t *testing.T) {
|
||||
|
@ -47,7 +49,7 @@ func TestStateRunStop(t *testing.T) {
|
|||
atomic.StoreInt64(&exit, int64(exitCode))
|
||||
close(stopped)
|
||||
}()
|
||||
s.SetStopped(i)
|
||||
s.SetStopped(&execdriver.ExitStatus{i, false})
|
||||
if s.IsRunning() {
|
||||
t.Fatal("State is running")
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@ import (
|
|||
|
||||
log "github.com/Sirupsen/logrus"
|
||||
"github.com/docker/docker/daemon"
|
||||
"github.com/docker/docker/daemon/execdriver"
|
||||
"github.com/docker/docker/engine"
|
||||
"github.com/docker/docker/image"
|
||||
"github.com/docker/docker/nat"
|
||||
|
@ -652,7 +653,7 @@ func TestRestore(t *testing.T) {
|
|||
if err := container3.Run(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
container2.SetStopped(0)
|
||||
container2.SetStopped(&execdriver.ExitStatus{0, false})
|
||||
}
|
||||
|
||||
func TestDefaultContainerName(t *testing.T) {
|
||||
|
|
Loading…
Reference in a new issue