mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00
b3e527dfd2
It will Tar up contents of child directory onto tmpfs if mounted over This patch will use the new PreMount and PostMount hooks to "tar" up the contents of the base image on top of tmpfs mount points. Signed-off-by: Dan Walsh <dwalsh@redhat.com>
550 lines
13 KiB
Go
550 lines
13 KiB
Go
// +build linux,cgo
|
|
|
|
package native
|
|
|
|
import (
|
|
"fmt"
|
|
"io"
|
|
"io/ioutil"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"strings"
|
|
"sync"
|
|
"syscall"
|
|
"time"
|
|
|
|
"github.com/Sirupsen/logrus"
|
|
"github.com/docker/docker/daemon/execdriver"
|
|
"github.com/docker/docker/pkg/parsers"
|
|
"github.com/docker/docker/pkg/pools"
|
|
"github.com/docker/docker/pkg/reexec"
|
|
sysinfo "github.com/docker/docker/pkg/system"
|
|
"github.com/docker/docker/pkg/term"
|
|
"github.com/opencontainers/runc/libcontainer"
|
|
"github.com/opencontainers/runc/libcontainer/apparmor"
|
|
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
|
|
"github.com/opencontainers/runc/libcontainer/configs"
|
|
"github.com/opencontainers/runc/libcontainer/system"
|
|
"github.com/opencontainers/runc/libcontainer/utils"
|
|
)
|
|
|
|
// Define constants for native driver
|
|
const (
|
|
DriverName = "native"
|
|
Version = "0.2"
|
|
)
|
|
|
|
// Driver contains all information for native driver,
|
|
// it implements execdriver.Driver.
|
|
type Driver struct {
|
|
root string
|
|
activeContainers map[string]libcontainer.Container
|
|
machineMemory int64
|
|
factory libcontainer.Factory
|
|
sync.Mutex
|
|
}
|
|
|
|
// NewDriver returns a new native driver, called from NewDriver of execdriver.
|
|
func NewDriver(root string, options []string) (*Driver, error) {
|
|
meminfo, err := sysinfo.ReadMemInfo()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if err := sysinfo.MkdirAll(root, 0700); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if apparmor.IsEnabled() {
|
|
if err := installAppArmorProfile(); err != nil {
|
|
apparmorProfiles := []string{"docker-default"}
|
|
|
|
// Allow daemon to run if loading failed, but are active
|
|
// (possibly through another run, manually, or via system startup)
|
|
for _, policy := range apparmorProfiles {
|
|
if err := hasAppArmorProfileLoaded(policy); err != nil {
|
|
return nil, fmt.Errorf("AppArmor enabled on system but the %s profile could not be loaded.", policy)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// choose cgroup manager
|
|
// this makes sure there are no breaking changes to people
|
|
// who upgrade from versions without native.cgroupdriver opt
|
|
cgm := libcontainer.Cgroupfs
|
|
|
|
// parse the options
|
|
for _, option := range options {
|
|
key, val, err := parsers.ParseKeyValueOpt(option)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
key = strings.ToLower(key)
|
|
switch key {
|
|
case "native.cgroupdriver":
|
|
// override the default if they set options
|
|
switch val {
|
|
case "systemd":
|
|
if systemd.UseSystemd() {
|
|
cgm = libcontainer.SystemdCgroups
|
|
} else {
|
|
// warn them that they chose the wrong driver
|
|
logrus.Warn("You cannot use systemd as native.cgroupdriver, using cgroupfs instead")
|
|
}
|
|
case "cgroupfs":
|
|
cgm = libcontainer.Cgroupfs
|
|
default:
|
|
return nil, fmt.Errorf("Unknown native.cgroupdriver given %q. try cgroupfs or systemd", val)
|
|
}
|
|
default:
|
|
return nil, fmt.Errorf("Unknown option %s\n", key)
|
|
}
|
|
}
|
|
|
|
f, err := libcontainer.New(
|
|
root,
|
|
cgm,
|
|
libcontainer.InitPath(reexec.Self(), DriverName),
|
|
)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return &Driver{
|
|
root: root,
|
|
activeContainers: make(map[string]libcontainer.Container),
|
|
machineMemory: meminfo.MemTotal,
|
|
factory: f,
|
|
}, nil
|
|
}
|
|
|
|
type execOutput struct {
|
|
exitCode int
|
|
err error
|
|
}
|
|
|
|
// Run implements the exec driver Driver interface,
|
|
// it calls libcontainer APIs to run a container.
|
|
func (d *Driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, hooks execdriver.Hooks) (execdriver.ExitStatus, error) {
|
|
destroyed := false
|
|
var err error
|
|
c.TmpDir, err = ioutil.TempDir("", c.ID)
|
|
if err != nil {
|
|
return execdriver.ExitStatus{ExitCode: -1}, err
|
|
}
|
|
defer os.RemoveAll(c.TmpDir)
|
|
|
|
// take the Command and populate the libcontainer.Config from it
|
|
container, err := d.createContainer(c, hooks)
|
|
if err != nil {
|
|
return execdriver.ExitStatus{ExitCode: -1}, err
|
|
}
|
|
|
|
p := &libcontainer.Process{
|
|
Args: append([]string{c.ProcessConfig.Entrypoint}, c.ProcessConfig.Arguments...),
|
|
Env: c.ProcessConfig.Env,
|
|
Cwd: c.WorkingDir,
|
|
User: c.ProcessConfig.User,
|
|
}
|
|
|
|
if err := setupPipes(container, &c.ProcessConfig, p, pipes); err != nil {
|
|
return execdriver.ExitStatus{ExitCode: -1}, err
|
|
}
|
|
|
|
cont, err := d.factory.Create(c.ID, container)
|
|
if err != nil {
|
|
return execdriver.ExitStatus{ExitCode: -1}, err
|
|
}
|
|
d.Lock()
|
|
d.activeContainers[c.ID] = cont
|
|
d.Unlock()
|
|
defer func() {
|
|
if !destroyed {
|
|
cont.Destroy()
|
|
}
|
|
d.cleanContainer(c.ID)
|
|
}()
|
|
|
|
if err := cont.Start(p); err != nil {
|
|
return execdriver.ExitStatus{ExitCode: -1}, err
|
|
}
|
|
|
|
oom := notifyOnOOM(cont)
|
|
if hooks.Start != nil {
|
|
pid, err := p.Pid()
|
|
if err != nil {
|
|
p.Signal(os.Kill)
|
|
p.Wait()
|
|
return execdriver.ExitStatus{ExitCode: -1}, err
|
|
}
|
|
hooks.Start(&c.ProcessConfig, pid, oom)
|
|
}
|
|
|
|
waitF := p.Wait
|
|
if nss := cont.Config().Namespaces; !nss.Contains(configs.NEWPID) {
|
|
// we need such hack for tracking processes with inherited fds,
|
|
// because cmd.Wait() waiting for all streams to be copied
|
|
waitF = waitInPIDHost(p, cont)
|
|
}
|
|
ps, err := waitF()
|
|
if err != nil {
|
|
execErr, ok := err.(*exec.ExitError)
|
|
if !ok {
|
|
return execdriver.ExitStatus{ExitCode: -1}, err
|
|
}
|
|
ps = execErr.ProcessState
|
|
}
|
|
cont.Destroy()
|
|
destroyed = true
|
|
_, oomKill := <-oom
|
|
return execdriver.ExitStatus{ExitCode: utils.ExitStatus(ps.Sys().(syscall.WaitStatus)), OOMKilled: oomKill}, nil
|
|
}
|
|
|
|
// notifyOnOOM returns a channel that signals if the container received an OOM notification
|
|
// for any process. If it is unable to subscribe to OOM notifications then a closed
|
|
// channel is returned as it will be non-blocking and return the correct result when read.
|
|
func notifyOnOOM(container libcontainer.Container) <-chan struct{} {
|
|
oom, err := container.NotifyOOM()
|
|
if err != nil {
|
|
logrus.Warnf("Your kernel does not support OOM notifications: %s", err)
|
|
c := make(chan struct{})
|
|
close(c)
|
|
return c
|
|
}
|
|
return oom
|
|
}
|
|
|
|
func killCgroupProcs(c libcontainer.Container) {
|
|
var procs []*os.Process
|
|
if err := c.Pause(); err != nil {
|
|
logrus.Warn(err)
|
|
}
|
|
pids, err := c.Processes()
|
|
if err != nil {
|
|
// don't care about childs if we can't get them, this is mostly because cgroup already deleted
|
|
logrus.Warnf("Failed to get processes from container %s: %v", c.ID(), err)
|
|
}
|
|
for _, pid := range pids {
|
|
if p, err := os.FindProcess(pid); err == nil {
|
|
procs = append(procs, p)
|
|
if err := p.Kill(); err != nil {
|
|
logrus.Warn(err)
|
|
}
|
|
}
|
|
}
|
|
if err := c.Resume(); err != nil {
|
|
logrus.Warn(err)
|
|
}
|
|
for _, p := range procs {
|
|
if _, err := p.Wait(); err != nil {
|
|
logrus.Warn(err)
|
|
}
|
|
}
|
|
}
|
|
|
|
func waitInPIDHost(p *libcontainer.Process, c libcontainer.Container) func() (*os.ProcessState, error) {
|
|
return func() (*os.ProcessState, error) {
|
|
pid, err := p.Pid()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
process, err := os.FindProcess(pid)
|
|
s, err := process.Wait()
|
|
if err != nil {
|
|
execErr, ok := err.(*exec.ExitError)
|
|
if !ok {
|
|
return s, err
|
|
}
|
|
s = execErr.ProcessState
|
|
}
|
|
killCgroupProcs(c)
|
|
p.Wait()
|
|
return s, err
|
|
}
|
|
}
|
|
|
|
// Kill implements the exec driver Driver interface.
|
|
func (d *Driver) Kill(c *execdriver.Command, sig int) error {
|
|
d.Lock()
|
|
active := d.activeContainers[c.ID]
|
|
d.Unlock()
|
|
if active == nil {
|
|
return fmt.Errorf("active container for %s does not exist", c.ID)
|
|
}
|
|
state, err := active.State()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return syscall.Kill(state.InitProcessPid, syscall.Signal(sig))
|
|
}
|
|
|
|
// Pause implements the exec driver Driver interface,
|
|
// it calls libcontainer API to pause a container.
|
|
func (d *Driver) Pause(c *execdriver.Command) error {
|
|
d.Lock()
|
|
active := d.activeContainers[c.ID]
|
|
d.Unlock()
|
|
if active == nil {
|
|
return fmt.Errorf("active container for %s does not exist", c.ID)
|
|
}
|
|
return active.Pause()
|
|
}
|
|
|
|
// Unpause implements the exec driver Driver interface,
|
|
// it calls libcontainer API to unpause a container.
|
|
func (d *Driver) Unpause(c *execdriver.Command) error {
|
|
d.Lock()
|
|
active := d.activeContainers[c.ID]
|
|
d.Unlock()
|
|
if active == nil {
|
|
return fmt.Errorf("active container for %s does not exist", c.ID)
|
|
}
|
|
return active.Resume()
|
|
}
|
|
|
|
// Terminate implements the exec driver Driver interface.
|
|
func (d *Driver) Terminate(c *execdriver.Command) error {
|
|
defer d.cleanContainer(c.ID)
|
|
container, err := d.factory.Load(c.ID)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer container.Destroy()
|
|
state, err := container.State()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
pid := state.InitProcessPid
|
|
currentStartTime, err := system.GetProcessStartTime(pid)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if state.InitProcessStartTime == currentStartTime {
|
|
err = syscall.Kill(pid, 9)
|
|
syscall.Wait4(pid, nil, 0, nil)
|
|
}
|
|
return err
|
|
}
|
|
|
|
// Info implements the exec driver Driver interface.
|
|
func (d *Driver) Info(id string) execdriver.Info {
|
|
return &info{
|
|
ID: id,
|
|
driver: d,
|
|
}
|
|
}
|
|
|
|
// Name implements the exec driver Driver interface.
|
|
func (d *Driver) Name() string {
|
|
return fmt.Sprintf("%s-%s", DriverName, Version)
|
|
}
|
|
|
|
// GetPidsForContainer implements the exec driver Driver interface.
|
|
func (d *Driver) GetPidsForContainer(id string) ([]int, error) {
|
|
d.Lock()
|
|
active := d.activeContainers[id]
|
|
d.Unlock()
|
|
|
|
if active == nil {
|
|
return nil, fmt.Errorf("active container for %s does not exist", id)
|
|
}
|
|
return active.Processes()
|
|
}
|
|
|
|
func (d *Driver) cleanContainer(id string) error {
|
|
d.Lock()
|
|
delete(d.activeContainers, id)
|
|
d.Unlock()
|
|
return os.RemoveAll(filepath.Join(d.root, id))
|
|
}
|
|
|
|
func (d *Driver) createContainerRoot(id string) error {
|
|
return os.MkdirAll(filepath.Join(d.root, id), 0655)
|
|
}
|
|
|
|
// Clean implements the exec driver Driver interface.
|
|
func (d *Driver) Clean(id string) error {
|
|
return os.RemoveAll(filepath.Join(d.root, id))
|
|
}
|
|
|
|
// Stats implements the exec driver Driver interface.
|
|
func (d *Driver) Stats(id string) (*execdriver.ResourceStats, error) {
|
|
d.Lock()
|
|
c := d.activeContainers[id]
|
|
d.Unlock()
|
|
if c == nil {
|
|
return nil, execdriver.ErrNotRunning
|
|
}
|
|
now := time.Now()
|
|
stats, err := c.Stats()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
memoryLimit := c.Config().Cgroups.Memory
|
|
// if the container does not have any memory limit specified set the
|
|
// limit to the machines memory
|
|
if memoryLimit == 0 {
|
|
memoryLimit = d.machineMemory
|
|
}
|
|
return &execdriver.ResourceStats{
|
|
Stats: stats,
|
|
Read: now,
|
|
MemoryLimit: memoryLimit,
|
|
}, nil
|
|
}
|
|
|
|
// TtyConsole implements the exec driver Terminal interface.
|
|
type TtyConsole struct {
|
|
console libcontainer.Console
|
|
}
|
|
|
|
// NewTtyConsole returns a new TtyConsole struct.
|
|
func NewTtyConsole(console libcontainer.Console, pipes *execdriver.Pipes) (*TtyConsole, error) {
|
|
tty := &TtyConsole{
|
|
console: console,
|
|
}
|
|
|
|
if err := tty.AttachPipes(pipes); err != nil {
|
|
tty.Close()
|
|
return nil, err
|
|
}
|
|
|
|
return tty, nil
|
|
}
|
|
|
|
// Resize implements Resize method of Terminal interface
|
|
func (t *TtyConsole) Resize(h, w int) error {
|
|
return term.SetWinsize(t.console.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)})
|
|
}
|
|
|
|
// AttachPipes attaches given pipes to TtyConsole
|
|
func (t *TtyConsole) AttachPipes(pipes *execdriver.Pipes) error {
|
|
go func() {
|
|
if wb, ok := pipes.Stdout.(interface {
|
|
CloseWriters() error
|
|
}); ok {
|
|
defer wb.CloseWriters()
|
|
}
|
|
|
|
pools.Copy(pipes.Stdout, t.console)
|
|
}()
|
|
|
|
if pipes.Stdin != nil {
|
|
go func() {
|
|
pools.Copy(t.console, pipes.Stdin)
|
|
|
|
pipes.Stdin.Close()
|
|
}()
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Close implements Close method of Terminal interface
|
|
func (t *TtyConsole) Close() error {
|
|
return t.console.Close()
|
|
}
|
|
|
|
func setupPipes(container *configs.Config, processConfig *execdriver.ProcessConfig, p *libcontainer.Process, pipes *execdriver.Pipes) error {
|
|
|
|
rootuid, err := container.HostUID()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if processConfig.Tty {
|
|
cons, err := p.NewConsole(rootuid)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
term, err := NewTtyConsole(cons, pipes)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
processConfig.Terminal = term
|
|
return nil
|
|
}
|
|
// not a tty--set up stdio pipes
|
|
term := &execdriver.StdConsole{}
|
|
processConfig.Terminal = term
|
|
|
|
// if we are not in a user namespace, there is no reason to go through
|
|
// the hassle of setting up os-level pipes with proper (remapped) ownership
|
|
// so we will do the prior shortcut for non-userns containers
|
|
if rootuid == 0 {
|
|
p.Stdout = pipes.Stdout
|
|
p.Stderr = pipes.Stderr
|
|
|
|
r, w, err := os.Pipe()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if pipes.Stdin != nil {
|
|
go func() {
|
|
io.Copy(w, pipes.Stdin)
|
|
w.Close()
|
|
}()
|
|
p.Stdin = r
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// if we have user namespaces enabled (rootuid != 0), we will set
|
|
// up os pipes for stderr, stdout, stdin so we can chown them to
|
|
// the proper ownership to allow for proper access to the underlying
|
|
// fds
|
|
var fds []int
|
|
|
|
//setup stdout
|
|
r, w, err := os.Pipe()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
fds = append(fds, int(r.Fd()), int(w.Fd()))
|
|
if pipes.Stdout != nil {
|
|
go io.Copy(pipes.Stdout, r)
|
|
}
|
|
term.Closers = append(term.Closers, r)
|
|
p.Stdout = w
|
|
|
|
//setup stderr
|
|
r, w, err = os.Pipe()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
fds = append(fds, int(r.Fd()), int(w.Fd()))
|
|
if pipes.Stderr != nil {
|
|
go io.Copy(pipes.Stderr, r)
|
|
}
|
|
term.Closers = append(term.Closers, r)
|
|
p.Stderr = w
|
|
|
|
//setup stdin
|
|
r, w, err = os.Pipe()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
fds = append(fds, int(r.Fd()), int(w.Fd()))
|
|
if pipes.Stdin != nil {
|
|
go func() {
|
|
io.Copy(w, pipes.Stdin)
|
|
w.Close()
|
|
}()
|
|
p.Stdin = r
|
|
}
|
|
for _, fd := range fds {
|
|
if err := syscall.Fchown(fd, rootuid, rootuid); err != nil {
|
|
return fmt.Errorf("Failed to chown pipes fd: %v", err)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// SupportsHooks implements the execdriver Driver interface.
|
|
// The libcontainer/runC-based native execdriver does exploit the hook mechanism
|
|
func (d *Driver) SupportsHooks() bool {
|
|
return true
|
|
}
|