1
0
Fork 0
mirror of https://github.com/moby/moby.git synced 2022-11-09 12:21:53 -05:00

Merge branch 'libcontainer-in-docker' into add-libcontainer

Docker-DCO-1.1-Signed-off-by: Michael Crosby <michael@crosbymichael.com> (github: crosbymichael)
This commit is contained in:
Michael Crosby 2014-02-24 10:49:49 -08:00
commit 89dbdb1f71
20 changed files with 811 additions and 238 deletions

View file

@ -530,6 +530,7 @@ func (container *Container) Start() (err error) {
}
populateCommand(container)
container.command.Env = env
// Setup logging of stdout and stderr to disk
if err := container.runtime.LogToDisk(container.stdout, container.logPath("json"), "stdout"); err != nil {

View file

@ -6,6 +6,7 @@ import (
"github.com/kr/pty"
"io"
"os"
"os/exec"
)
func SetTerminal(command *execdriver.Command, pipes *execdriver.Pipes) error {
@ -26,8 +27,8 @@ func SetTerminal(command *execdriver.Command, pipes *execdriver.Pipes) error {
}
type TtyConsole struct {
master *os.File
slave *os.File
MasterPty *os.File
SlavePty *os.File
}
func NewTtyConsole(command *execdriver.Command, pipes *execdriver.Pipes) (*TtyConsole, error) {
@ -36,28 +37,28 @@ func NewTtyConsole(command *execdriver.Command, pipes *execdriver.Pipes) (*TtyCo
return nil, err
}
tty := &TtyConsole{
master: ptyMaster,
slave: ptySlave,
MasterPty: ptyMaster,
SlavePty: ptySlave,
}
if err := tty.attach(command, pipes); err != nil {
if err := tty.AttachPipes(&command.Cmd, pipes); err != nil {
tty.Close()
return nil, err
}
command.Console = tty.SlavePty.Name()
return tty, nil
}
func (t *TtyConsole) Master() *os.File {
return t.master
return t.MasterPty
}
func (t *TtyConsole) Resize(h, w int) error {
return term.SetWinsize(t.master.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)})
return term.SetWinsize(t.MasterPty.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)})
}
func (t *TtyConsole) attach(command *execdriver.Command, pipes *execdriver.Pipes) error {
command.Stdout = t.slave
command.Stderr = t.slave
command.Console = t.slave.Name()
func (t *TtyConsole) AttachPipes(command *exec.Cmd, pipes *execdriver.Pipes) error {
command.Stdout = t.SlavePty
command.Stderr = t.SlavePty
go func() {
if wb, ok := pipes.Stdout.(interface {
@ -65,24 +66,24 @@ func (t *TtyConsole) attach(command *execdriver.Command, pipes *execdriver.Pipes
}); ok {
defer wb.CloseWriters()
}
io.Copy(pipes.Stdout, t.master)
io.Copy(pipes.Stdout, t.MasterPty)
}()
if pipes.Stdin != nil {
command.Stdin = t.slave
command.Stdin = t.SlavePty
command.SysProcAttr.Setctty = true
go func() {
defer pipes.Stdin.Close()
io.Copy(t.master, pipes.Stdin)
io.Copy(t.MasterPty, pipes.Stdin)
}()
}
return nil
}
func (t *TtyConsole) Close() error {
t.slave.Close()
return t.master.Close()
t.SlavePty.Close()
return t.MasterPty.Close()
}
type StdConsole struct {
@ -91,13 +92,13 @@ type StdConsole struct {
func NewStdConsole(command *execdriver.Command, pipes *execdriver.Pipes) (*StdConsole, error) {
std := &StdConsole{}
if err := std.attach(command, pipes); err != nil {
if err := std.AttachPipes(&command.Cmd, pipes); err != nil {
return nil, err
}
return std, nil
}
func (s *StdConsole) attach(command *execdriver.Command, pipes *execdriver.Pipes) error {
func (s *StdConsole) AttachPipes(command *exec.Cmd, pipes *execdriver.Pipes) error {
command.Stdout = pipes.Stdout
command.Stderr = pipes.Stderr

View file

@ -0,0 +1,41 @@
package namespaces
import (
"github.com/dotcloud/docker/pkg/cgroups"
"github.com/dotcloud/docker/pkg/libcontainer"
)
// getDefaultTemplate returns the docker default for
// the libcontainer configuration file
func getDefaultTemplate() *libcontainer.Container {
return &libcontainer.Container{
Capabilities: libcontainer.Capabilities{
libcontainer.CAP_SETPCAP,
libcontainer.CAP_SYS_MODULE,
libcontainer.CAP_SYS_RAWIO,
libcontainer.CAP_SYS_PACCT,
libcontainer.CAP_SYS_ADMIN,
libcontainer.CAP_SYS_NICE,
libcontainer.CAP_SYS_RESOURCE,
libcontainer.CAP_SYS_TIME,
libcontainer.CAP_SYS_TTY_CONFIG,
libcontainer.CAP_MKNOD,
libcontainer.CAP_AUDIT_WRITE,
libcontainer.CAP_AUDIT_CONTROL,
libcontainer.CAP_MAC_ADMIN,
libcontainer.CAP_MAC_OVERRIDE,
libcontainer.CAP_NET_ADMIN,
},
Namespaces: libcontainer.Namespaces{
libcontainer.CLONE_NEWIPC,
libcontainer.CLONE_NEWNET,
libcontainer.CLONE_NEWNS,
libcontainer.CLONE_NEWPID,
libcontainer.CLONE_NEWUTS,
},
Cgroups: &cgroups.Cgroup{
Name: "docker",
DeviceAccess: false,
},
}
}

View file

@ -0,0 +1,216 @@
package namespaces
import (
"encoding/json"
"errors"
"fmt"
"github.com/dotcloud/docker/execdriver"
"github.com/dotcloud/docker/execdriver/lxc"
"github.com/dotcloud/docker/pkg/libcontainer"
"github.com/dotcloud/docker/pkg/libcontainer/nsinit"
"io"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"strings"
"syscall"
)
const (
DriverName = "namespaces"
Version = "0.1"
)
var (
ErrNotSupported = errors.New("not supported")
)
func init() {
execdriver.RegisterInitFunc(DriverName, func(args *execdriver.InitArgs) error {
return nil
})
}
type driver struct {
}
func NewDriver() (*driver, error) {
return &driver{}, nil
}
func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (int, error) {
var (
term nsinit.Terminal
container = createContainer(c)
factory = &dockerCommandFactory{c}
stateWriter = &dockerStateWriter{
callback: startCallback,
c: c,
dsw: &nsinit.DefaultStateWriter{c.Rootfs},
}
)
if c.Tty {
term = &dockerTtyTerm{
pipes: pipes,
}
} else {
term = &dockerStdTerm{
pipes: pipes,
}
}
c.Terminal = term
if err := writeContainerFile(container, c.Rootfs); err != nil {
return -1, err
}
args := append([]string{c.Entrypoint}, c.Arguments...)
return nsinit.Exec(container, factory, stateWriter, term, "/nsinit.log", args)
}
func (d *driver) Kill(p *execdriver.Command, sig int) error {
return p.Process.Kill()
}
func (d *driver) Restore(c *execdriver.Command) error {
return ErrNotSupported
}
func (d *driver) Info(id string) execdriver.Info {
return nil
}
func (d *driver) Name() string {
return fmt.Sprintf("%s-%s", DriverName, Version)
}
func (d *driver) GetPidsForContainer(id string) ([]int, error) {
return nil, ErrNotSupported
}
func writeContainerFile(container *libcontainer.Container, rootfs string) error {
data, err := json.Marshal(container)
if err != nil {
return err
}
return ioutil.WriteFile(filepath.Join(rootfs, "container.json"), data, 0755)
}
func getEnv(key string, env []string) string {
for _, pair := range env {
parts := strings.Split(pair, "=")
if parts[0] == key {
return parts[1]
}
}
return ""
}
type dockerCommandFactory struct {
c *execdriver.Command
}
// createCommand will return an exec.Cmd with the Cloneflags set to the proper namespaces
// defined on the container's configuration and use the current binary as the init with the
// args provided
func (d *dockerCommandFactory) Create(container *libcontainer.Container,
console, logFile string, syncFd uintptr, args []string) *exec.Cmd {
c := d.c
aname, _ := exec.LookPath("nsinit")
c.Path = aname
c.Args = append([]string{
aname,
"-console", console,
"-pipe", fmt.Sprint(syncFd),
"-log", logFile,
"init",
}, args...)
c.SysProcAttr = &syscall.SysProcAttr{
Cloneflags: uintptr(nsinit.GetNamespaceFlags(container.Namespaces)),
}
c.Env = container.Env
c.Dir = c.Rootfs
return &c.Cmd
}
type dockerStateWriter struct {
dsw nsinit.StateWriter
c *execdriver.Command
callback execdriver.StartCallback
}
func (d *dockerStateWriter) WritePid(pid int) error {
err := d.dsw.WritePid(pid)
if d.callback != nil {
d.callback(d.c)
}
return err
}
func (d *dockerStateWriter) DeletePid() error {
return d.dsw.DeletePid()
}
func createContainer(c *execdriver.Command) *libcontainer.Container {
container := getDefaultTemplate()
container.Hostname = getEnv("HOSTNAME", c.Env)
container.Tty = c.Tty
container.User = c.User
container.WorkingDir = c.WorkingDir
container.Env = c.Env
container.Env = append(container.Env, "container=docker")
if c.Network != nil {
container.Network = &libcontainer.Network{
Mtu: c.Network.Mtu,
Address: fmt.Sprintf("%s/%d", c.Network.IPAddress, c.Network.IPPrefixLen),
Gateway: c.Network.Gateway,
Type: "veth",
Context: libcontainer.Context{
"prefix": "dock",
"bridge": c.Network.Bridge,
},
}
}
if c.Privileged {
container.Capabilities = nil
}
if c.Resources != nil {
container.Cgroups.CpuShares = c.Resources.CpuShares
container.Cgroups.Memory = c.Resources.Memory
container.Cgroups.MemorySwap = c.Resources.MemorySwap
}
return container
}
type dockerStdTerm struct {
lxc.StdConsole
pipes *execdriver.Pipes
}
func (d *dockerStdTerm) Attach(cmd *exec.Cmd) error {
return d.AttachPipes(cmd, d.pipes)
}
func (d *dockerStdTerm) SetMaster(master *os.File) {
// do nothing
}
type dockerTtyTerm struct {
lxc.TtyConsole
pipes *execdriver.Pipes
}
func (t *dockerTtyTerm) Attach(cmd *exec.Cmd) error {
go io.Copy(t.pipes.Stdout, t.MasterPty)
if t.pipes.Stdin != nil {
go io.Copy(t.MasterPty, t.pipes.Stdin)
}
return nil
}
func (t *dockerTtyTerm) SetMaster(master *os.File) {
t.MasterPty = master
}

View file

@ -0,0 +1,26 @@
package namespaces
import (
"github.com/dotcloud/docker/execdriver"
"github.com/dotcloud/docker/pkg/term"
"os"
)
type NsinitTerm struct {
master *os.File
}
func NewTerm(pipes *execdriver.Pipes, master *os.File) *NsinitTerm {
return &NsinitTerm{master}
}
func (t *NsinitTerm) Close() error {
return t.master.Close()
}
func (t *NsinitTerm) Resize(h, w int) error {
if t.master != nil {
return term.SetWinsize(t.master.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)})
}
return nil
}

View file

@ -132,7 +132,7 @@ func (c *Cgroup) Apply(pid int) error {
// http://www.freedesktop.org/wiki/Software/systemd/PaxControlGroups/
//
// we can pick any subsystem to find the root
cgroupRoot, err := FindCgroupMountpoint("memory")
cgroupRoot, err := FindCgroupMountpoint("cpu")
if err != nil {
return err
}

View file

@ -45,12 +45,17 @@ Sample `container.json` file:
"AUDIT_WRITE",
"AUDIT_CONTROL",
"MAC_OVERRIDE",
"MAC_ADMIN"
"MAC_ADMIN",
"NET_ADMIN"
],
"network": {
"type": "veth",
"context": {
"bridge": "docker0",
"prefix": "dock"
},
"address": "172.17.0.100/16",
"gateway": "172.17.42.1",
"bridge": "docker0",
"mtu": 1500
},
"cgroups": {

View file

@ -4,6 +4,10 @@ import (
"github.com/dotcloud/docker/pkg/cgroups"
)
// Context is a generic key value pair that allows
// arbatrary data to be sent
type Context map[string]string
// Container defines configuration options for how a
// container is setup inside a directory and how a process should be executed
type Container struct {
@ -24,8 +28,9 @@ type Container struct {
// The network configuration can be omited from a container causing the
// container to be setup with the host's networking stack
type Network struct {
Address string `json:"address,omitempty"`
Gateway string `json:"gateway,omitempty"`
Bridge string `json:"bridge,omitempty"`
Mtu int `json:"mtu,omitempty"`
Type string `json:"type,omitempty"` // type of networking to setup i.e. veth, macvlan, etc
Context Context `json:"context,omitempty"` // generic context for type specific networking options
Address string `json:"address,omitempty"`
Gateway string `json:"gateway,omitempty"`
Mtu int `json:"mtu,omitempty"`
}

View file

@ -28,12 +28,17 @@
"AUDIT_WRITE",
"AUDIT_CONTROL",
"MAC_OVERRIDE",
"MAC_ADMIN"
"MAC_ADMIN",
"NET_ADMIN"
],
"network": {
"type": "veth",
"context": {
"bridge": "docker0",
"prefix": "dock"
},
"address": "172.17.0.100/16",
"gateway": "172.17.42.1",
"bridge": "docker0",
"mtu": 1500
},
"cgroups": {

View file

@ -0,0 +1,32 @@
package network
import (
"errors"
"github.com/dotcloud/docker/pkg/libcontainer"
)
var (
ErrNotValidStrategyType = errors.New("not a valid network strategy type")
)
var strategies = map[string]NetworkStrategy{
"veth": &Veth{},
}
// NetworkStrategy represends a specific network configuration for
// a containers networking stack
type NetworkStrategy interface {
Create(*libcontainer.Network, int) (libcontainer.Context, error)
Initialize(*libcontainer.Network, libcontainer.Context) error
}
// GetStrategy returns the specific network strategy for the
// provided type. If no strategy is registered for the type an
// ErrNotValidStrategyType is returned.
func GetStrategy(tpe string) (NetworkStrategy, error) {
s, exists := strategies[tpe]
if !exists {
return nil, ErrNotValidStrategyType
}
return s, nil
}

View file

@ -0,0 +1,103 @@
package network
import (
"fmt"
"github.com/dotcloud/docker/pkg/libcontainer"
"github.com/dotcloud/docker/pkg/libcontainer/utils"
"log"
)
type Veth struct {
}
func (v *Veth) Create(n *libcontainer.Network, nspid int) (libcontainer.Context, error) {
log.Printf("creating veth network")
var (
bridge string
prefix string
exists bool
)
if bridge, exists = n.Context["bridge"]; !exists {
return nil, fmt.Errorf("bridge does not exist in network context")
}
if prefix, exists = n.Context["prefix"]; !exists {
return nil, fmt.Errorf("veth prefix does not exist in network context")
}
name1, name2, err := createVethPair(prefix)
if err != nil {
return nil, err
}
context := libcontainer.Context{
"vethHost": name1,
"vethChild": name2,
}
log.Printf("veth pair created %s <> %s", name1, name2)
if err := SetInterfaceMaster(name1, bridge); err != nil {
return context, err
}
if err := SetMtu(name1, n.Mtu); err != nil {
return context, err
}
if err := InterfaceUp(name1); err != nil {
return context, err
}
log.Printf("setting %s inside %d namespace", name2, nspid)
if err := SetInterfaceInNamespacePid(name2, nspid); err != nil {
return context, err
}
return context, nil
}
func (v *Veth) Initialize(config *libcontainer.Network, context libcontainer.Context) error {
var (
vethChild string
exists bool
)
if vethChild, exists = context["vethChild"]; !exists {
return fmt.Errorf("vethChild does not exist in network context")
}
if err := InterfaceDown(vethChild); err != nil {
return fmt.Errorf("interface down %s %s", vethChild, err)
}
if err := ChangeInterfaceName(vethChild, "eth0"); err != nil {
return fmt.Errorf("change %s to eth0 %s", vethChild, err)
}
if err := SetInterfaceIp("eth0", config.Address); err != nil {
return fmt.Errorf("set eth0 ip %s", err)
}
if err := SetMtu("eth0", config.Mtu); err != nil {
return fmt.Errorf("set eth0 mtu to %d %s", config.Mtu, err)
}
if err := InterfaceUp("eth0"); err != nil {
return fmt.Errorf("eth0 up %s", err)
}
if err := SetMtu("lo", config.Mtu); err != nil {
return fmt.Errorf("set lo mtu to %d %s", config.Mtu, err)
}
if err := InterfaceUp("lo"); err != nil {
return fmt.Errorf("lo up %s", err)
}
if config.Gateway != "" {
if err := SetDefaultGateway(config.Gateway); err != nil {
return fmt.Errorf("set gateway to %s %s", config.Gateway, err)
}
}
return nil
}
// createVethPair will automatically generage two random names for
// the veth pair and ensure that they have been created
func createVethPair(prefix string) (name1 string, name2 string, err error) {
name1, err = utils.GenerateRandomName(prefix, 4)
if err != nil {
return
}
name2, err = utils.GenerateRandomName(prefix, 4)
if err != nil {
return
}
if err = CreateVethPair(name1, name2); err != nil {
return
}
return
}

View file

@ -0,0 +1,34 @@
package nsinit
import (
"fmt"
"github.com/dotcloud/docker/pkg/libcontainer"
"os"
"os/exec"
"syscall"
)
type CommandFactory interface {
Create(container *libcontainer.Container, console, logFile string, syncFd uintptr, args []string) *exec.Cmd
}
type DefaultCommandFactory struct{}
// Create will return an exec.Cmd with the Cloneflags set to the proper namespaces
// defined on the container's configuration and use the current binary as the init with the
// args provided
func (c *DefaultCommandFactory) Create(container *libcontainer.Container, console, logFile string, pipe uintptr, args []string) *exec.Cmd {
// get our binary name so we can always reexec ourself
name := os.Args[0]
command := exec.Command(name, append([]string{
"-console", console,
"-pipe", fmt.Sprint(pipe),
"-log", logFile,
"init"}, args...)...)
command.SysProcAttr = &syscall.SysProcAttr{
Cloneflags: uintptr(GetNamespaceFlags(container.Namespaces)),
}
command.Env = container.Env
return command
}

View file

@ -3,14 +3,9 @@
package nsinit
import (
"fmt"
"github.com/dotcloud/docker/pkg/libcontainer"
"github.com/dotcloud/docker/pkg/libcontainer/network"
"github.com/dotcloud/docker/pkg/libcontainer/utils"
"github.com/dotcloud/docker/pkg/system"
"github.com/dotcloud/docker/pkg/term"
"io"
"io/ioutil"
"log"
"os"
"os/exec"
@ -19,102 +14,65 @@ import (
// Exec performes setup outside of a namespace so that a container can be
// executed. Exec is a high level function for working with container namespaces.
func Exec(container *libcontainer.Container, logFile string, args []string) (int, error) {
func Exec(container *libcontainer.Container,
factory CommandFactory, state StateWriter, term Terminal,
logFile string, args []string) (int, error) {
var (
master *os.File
console string
err error
inPipe io.WriteCloser
outPipe, errPipe io.ReadCloser
)
if container.Tty {
log.Printf("setting up master and console")
master, console, err = createMasterAndConsole()
if err != nil {
return -1, err
}
}
// create a pipe so that we can syncronize with the namespaced process and
// pass the veth name to the child
r, w, err := os.Pipe()
syncPipe, err := NewSyncPipe()
if err != nil {
return -1, err
}
system.UsetCloseOnExec(r.Fd())
command := createCommand(container, console, logFile, r.Fd(), args)
if !container.Tty {
log.Printf("opening pipes on command")
if inPipe, err = command.StdinPipe(); err != nil {
return -1, err
}
if outPipe, err = command.StdoutPipe(); err != nil {
return -1, err
}
if errPipe, err = command.StderrPipe(); err != nil {
if container.Tty {
log.Printf("setting up master and console")
master, console, err = CreateMasterAndConsole()
if err != nil {
return -1, err
}
term.SetMaster(master)
}
command := factory.Create(container, console, logFile, syncPipe.child.Fd(), args)
if err := term.Attach(command); err != nil {
return -1, err
}
defer term.Close()
log.Printf("staring init")
if err := command.Start(); err != nil {
return -1, err
}
log.Printf("writting state file")
if err := writePidFile(command); err != nil {
log.Printf("writing state file")
if err := state.WritePid(command.Process.Pid); err != nil {
command.Process.Kill()
return -1, err
}
defer deletePidFile()
defer func() {
log.Printf("removing state file")
state.DeletePid()
}()
// Do this before syncing with child so that no children
// can escape the cgroup
if container.Cgroups != nil {
log.Printf("setting up cgroups")
if err := container.Cgroups.Apply(command.Process.Pid); err != nil {
command.Process.Kill()
return -1, err
}
if err := SetupCgroups(container, command.Process.Pid); err != nil {
command.Process.Kill()
return -1, err
}
if container.Network != nil {
log.Printf("creating veth pair")
vethPair, err := initializeContainerVeth(container.Network.Bridge, container.Network.Mtu, command.Process.Pid)
if err != nil {
return -1, err
}
log.Printf("sending %s as veth pair name", vethPair)
sendVethName(w, vethPair)
if err := InitializeNetworking(container, command.Process.Pid, syncPipe); err != nil {
command.Process.Kill()
return -1, err
}
// Sync with child
log.Printf("closing sync pipes")
w.Close()
r.Close()
if container.Tty {
log.Printf("starting copy for tty")
go io.Copy(os.Stdout, master)
go io.Copy(master, os.Stdin)
state, err := setupWindow(master)
if err != nil {
command.Process.Kill()
return -1, err
}
defer term.RestoreTerminal(os.Stdin.Fd(), state)
} else {
log.Printf("starting copy for std pipes")
go func() {
defer inPipe.Close()
io.Copy(inPipe, os.Stdin)
}()
go io.Copy(os.Stdout, outPipe)
go io.Copy(os.Stderr, errPipe)
}
syncPipe.Close()
log.Printf("waiting on process")
if err := command.Wait(); err != nil {
@ -126,55 +84,38 @@ func Exec(container *libcontainer.Container, logFile string, args []string) (int
return command.ProcessState.Sys().(syscall.WaitStatus).ExitStatus(), nil
}
// sendVethName writes the veth pair name to the child's stdin then closes the
// pipe so that the child stops waiting for more data
func sendVethName(pipe io.Writer, name string) {
fmt.Fprint(pipe, name)
func SetupCgroups(container *libcontainer.Container, nspid int) error {
if container.Cgroups != nil {
log.Printf("setting up cgroups")
if err := container.Cgroups.Apply(nspid); err != nil {
return err
}
}
return nil
}
// initializeContainerVeth will create a veth pair and setup the host's
// side of the pair by setting the specified bridge as the master and bringing
// up the interface.
//
// Then will with set the other side of the veth pair into the container's namespaced
// using the pid and returns the veth's interface name to provide to the container to
// finish setting up the interface inside the namespace
func initializeContainerVeth(bridge string, mtu, nspid int) (string, error) {
name1, name2, err := createVethPair()
if err != nil {
return "", err
func InitializeNetworking(container *libcontainer.Container, nspid int, pipe *SyncPipe) error {
if container.Network != nil {
log.Printf("creating host network configuration type %s", container.Network.Type)
strategy, err := network.GetStrategy(container.Network.Type)
if err != nil {
return err
}
networkContext, err := strategy.Create(container.Network, nspid)
if err != nil {
return err
}
log.Printf("sending %v as network context", networkContext)
if err := pipe.SendToChild(networkContext); err != nil {
return err
}
}
log.Printf("veth pair created %s <> %s", name1, name2)
if err := network.SetInterfaceMaster(name1, bridge); err != nil {
return "", err
}
if err := network.SetMtu(name1, mtu); err != nil {
return "", err
}
if err := network.InterfaceUp(name1); err != nil {
return "", err
}
log.Printf("setting %s inside %d namespace", name2, nspid)
if err := network.SetInterfaceInNamespacePid(name2, nspid); err != nil {
return "", err
}
return name2, nil
return nil
}
func setupWindow(master *os.File) (*term.State, error) {
ws, err := term.GetWinsize(os.Stdin.Fd())
if err != nil {
return nil, err
}
if err := term.SetWinsize(master.Fd(), ws); err != nil {
return nil, err
}
return term.SetRawTerminal(os.Stdin.Fd())
}
// createMasterAndConsole will open /dev/ptmx on the host and retreive the
// CreateMasterAndConsole will open /dev/ptmx on the host and retreive the
// pts name for use as the pty slave inside the container
func createMasterAndConsole() (*os.File, string, error) {
func CreateMasterAndConsole() (*os.File, string, error) {
master, err := os.OpenFile("/dev/ptmx", syscall.O_RDWR|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0)
if err != nil {
return nil, "", err
@ -188,45 +129,3 @@ func createMasterAndConsole() (*os.File, string, error) {
}
return master, console, nil
}
// createVethPair will automatically generage two random names for
// the veth pair and ensure that they have been created
func createVethPair() (name1 string, name2 string, err error) {
name1, err = utils.GenerateRandomName("dock", 4)
if err != nil {
return
}
name2, err = utils.GenerateRandomName("dock", 4)
if err != nil {
return
}
if err = network.CreateVethPair(name1, name2); err != nil {
return
}
return
}
// writePidFile writes the namespaced processes pid to .nspid in the rootfs for the container
func writePidFile(command *exec.Cmd) error {
return ioutil.WriteFile(".nspid", []byte(fmt.Sprint(command.Process.Pid)), 0655)
}
func deletePidFile() error {
return os.Remove(".nspid")
}
// createCommand will return an exec.Cmd with the Cloneflags set to the proper namespaces
// defined on the container's configuration and use the current binary as the init with the
// args provided
func createCommand(container *libcontainer.Container, console, logFile string, pipe uintptr, args []string) *exec.Cmd {
command := exec.Command("nsinit", append([]string{
"-console", console,
"-pipe", fmt.Sprint(pipe),
"-log", logFile,
"init"}, args...)...)
command.SysProcAttr = &syscall.SysProcAttr{
Cloneflags: uintptr(getNamespaceFlags(container.Namespaces)),
}
return command
}

View file

@ -8,17 +8,16 @@ import (
"github.com/dotcloud/docker/pkg/libcontainer/capabilities"
"github.com/dotcloud/docker/pkg/libcontainer/network"
"github.com/dotcloud/docker/pkg/system"
"io"
"io/ioutil"
"log"
"os"
"os/exec"
"path/filepath"
"syscall"
)
// Init is the init process that first runs inside a new namespace to setup mounts, users, networking,
// and other options required for the new container.
func Init(container *libcontainer.Container, uncleanRootfs, console string, pipe io.ReadCloser, args []string) error {
func Init(container *libcontainer.Container, uncleanRootfs, console string, syncPipe *SyncPipe, args []string) error {
rootfs, err := resolveRootfs(uncleanRootfs)
if err != nil {
return err
@ -26,19 +25,18 @@ func Init(container *libcontainer.Container, uncleanRootfs, console string, pipe
log.Printf("initializing namespace at %s", rootfs)
// We always read this as it is a way to sync with the parent as well
tempVethName, err := getVethName(pipe)
context, err := syncPipe.ReadFromParent()
if err != nil {
syncPipe.Close()
return err
}
if tempVethName != "" {
log.Printf("received veth name %s", tempVethName)
}
syncPipe.Close()
log.Printf("received context from parent %v", context)
if console != "" {
log.Printf("setting up console for %s", console)
// close pipes so that we can replace it with the pty
os.Stdin.Close()
os.Stdout.Close()
os.Stderr.Close()
closeStdPipes()
slave, err := openTerminal(console, syscall.O_RDWR)
if err != nil {
return fmt.Errorf("open terminal %s", err)
@ -61,7 +59,7 @@ func Init(container *libcontainer.Container, uncleanRootfs, console string, pipe
if err := setupNewMountNamespace(rootfs, console, container.ReadonlyFs); err != nil {
return fmt.Errorf("setup mount namespace %s", err)
}
if err := setupVethNetwork(container.Network, tempVethName); err != nil {
if err := setupNetwork(container.Network, context); err != nil {
return fmt.Errorf("setup networking %s", err)
}
if err := system.Sethostname(container.Hostname); err != nil {
@ -80,13 +78,27 @@ func Init(container *libcontainer.Container, uncleanRootfs, console string, pipe
return fmt.Errorf("chdir to %s %s", container.WorkingDir, err)
}
}
log.Printf("execing %s goodbye", args[0])
if err := system.Exec(args[0], args[0:], container.Env); err != nil {
return execArgs(args, container.Env)
}
func execArgs(args []string, env []string) error {
name, err := exec.LookPath(args[0])
if err != nil {
return err
}
log.Printf("execing %s goodbye", name)
if err := system.Exec(name, args[0:], env); err != nil {
return fmt.Errorf("exec %s", err)
}
panic("unreachable")
}
func closeStdPipes() {
os.Stdin.Close()
os.Stdout.Close()
os.Stderr.Close()
}
// resolveRootfs ensures that the current working directory is
// not a symlink and returns the absolute path to the rootfs
func resolveRootfs(uncleanRootfs string) (string, error) {
@ -139,46 +151,13 @@ func openTerminal(name string, flag int) (*os.File, error) {
// setupVethNetwork uses the Network config if it is not nil to initialize
// the new veth interface inside the container for use by changing the name to eth0
// setting the MTU and IP address along with the default gateway
func setupVethNetwork(config *libcontainer.Network, tempVethName string) error {
func setupNetwork(config *libcontainer.Network, context libcontainer.Context) error {
if config != nil {
if err := network.InterfaceDown(tempVethName); err != nil {
return fmt.Errorf("interface down %s %s", tempVethName, err)
}
if err := network.ChangeInterfaceName(tempVethName, "eth0"); err != nil {
return fmt.Errorf("change %s to eth0 %s", tempVethName, err)
}
if err := network.SetInterfaceIp("eth0", config.Address); err != nil {
return fmt.Errorf("set eth0 ip %s", err)
}
if err := network.SetMtu("eth0", config.Mtu); err != nil {
return fmt.Errorf("set eth0 mtu to %d %s", config.Mtu, err)
}
if err := network.InterfaceUp("eth0"); err != nil {
return fmt.Errorf("eth0 up %s", err)
}
if err := network.SetMtu("lo", config.Mtu); err != nil {
return fmt.Errorf("set lo mtu to %d %s", config.Mtu, err)
}
if err := network.InterfaceUp("lo"); err != nil {
return fmt.Errorf("lo up %s", err)
}
if config.Gateway != "" {
if err := network.SetDefaultGateway(config.Gateway); err != nil {
return fmt.Errorf("set gateway to %s %s", config.Gateway, err)
}
strategy, err := network.GetStrategy(config.Type)
if err != nil {
return err
}
return strategy.Initialize(config, context)
}
return nil
}
// getVethName reads from Stdin the temp veth name
// sent by the parent processes after the veth pair
// has been created and setup
func getVethName(pipe io.ReadCloser) (string, error) {
defer pipe.Close()
data, err := ioutil.ReadAll(pipe)
if err != nil {
return "", fmt.Errorf("error reading from stdin %s", err)
}
return string(data), nil
}

View file

@ -28,7 +28,7 @@ var namespaceFileMap = map[libcontainer.Namespace]string{
// getNamespaceFlags parses the container's Namespaces options to set the correct
// flags on clone, unshare, and setns
func getNamespaceFlags(namespaces libcontainer.Namespaces) (flag int) {
func GetNamespaceFlags(namespaces libcontainer.Namespaces) (flag int) {
for _, ns := range namespaces {
flag |= namespaceMap[ns]
}

View file

@ -57,7 +57,11 @@ func main() {
if nspid > 0 {
exitCode, err = nsinit.ExecIn(container, nspid, flag.Args()[1:])
} else {
exitCode, err = nsinit.Exec(container, logFile, flag.Args()[1:])
term := nsinit.NewTerminal(os.Stdin, os.Stdout, os.Stderr, container.Tty)
exitCode, err = nsinit.Exec(container,
&nsinit.DefaultCommandFactory{}, &nsinit.DefaultStateWriter{},
term,
logFile, flag.Args()[1:])
}
if err != nil {
log.Fatal(err)
@ -72,7 +76,11 @@ func main() {
if flag.NArg() < 2 {
log.Fatal(ErrWrongArguments)
}
if err := nsinit.Init(container, cwd, console, os.NewFile(uintptr(pipeFd), "pipe"), flag.Args()[1:]); err != nil {
syncPipe, err := nsinit.NewSyncPipeFromFd(0, uintptr(pipeFd))
if err != nil {
log.Fatal(err)
}
if err := nsinit.Init(container, cwd, console, syncPipe, flag.Args()[1:]); err != nil {
log.Fatal(err)
}
default:

View file

@ -0,0 +1,26 @@
package nsinit
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
)
type StateWriter interface {
WritePid(pid int) error
DeletePid() error
}
type DefaultStateWriter struct {
Root string
}
// writePidFile writes the namespaced processes pid to .nspid in the rootfs for the container
func (d *DefaultStateWriter) WritePid(pid int) error {
return ioutil.WriteFile(filepath.Join(d.Root, ".nspid"), []byte(fmt.Sprint(pid)), 0655)
}
func (d *DefaultStateWriter) DeletePid() error {
return os.Remove(filepath.Join(d.Root, ".nspid"))
}

View file

@ -0,0 +1,73 @@
package nsinit
import (
"encoding/json"
"fmt"
"github.com/dotcloud/docker/pkg/libcontainer"
"github.com/dotcloud/docker/pkg/system"
"io/ioutil"
"os"
)
// SyncPipe allows communication to and from the child processes
// to it's parent and allows the two independent processes to
// syncronize their state.
type SyncPipe struct {
parent, child *os.File
}
func NewSyncPipe() (s *SyncPipe, err error) {
s = &SyncPipe{}
s.child, s.parent, err = os.Pipe()
if err != nil {
return nil, err
}
system.UsetCloseOnExec(s.child.Fd())
return s, nil
}
func NewSyncPipeFromFd(parendFd, childFd uintptr) (*SyncPipe, error) {
s := &SyncPipe{}
if parendFd > 0 {
s.parent = os.NewFile(parendFd, "parendPipe")
} else if childFd > 0 {
s.child = os.NewFile(childFd, "childPipe")
} else {
return nil, fmt.Errorf("no valid sync pipe fd specified")
}
return s, nil
}
func (s *SyncPipe) SendToChild(context libcontainer.Context) error {
data, err := json.Marshal(context)
if err != nil {
return err
}
s.parent.Write(data)
return nil
}
func (s *SyncPipe) ReadFromParent() (libcontainer.Context, error) {
data, err := ioutil.ReadAll(s.child)
if err != nil {
return nil, fmt.Errorf("error reading from sync pipe %s", err)
}
var context libcontainer.Context
if len(data) > 0 {
if err := json.Unmarshal(data, &context); err != nil {
return nil, err
}
}
return context, nil
}
func (s *SyncPipe) Close() error {
if s.parent != nil {
s.parent.Close()
}
if s.child != nil {
s.child.Close()
}
return nil
}

View file

@ -0,0 +1,118 @@
package nsinit
import (
"github.com/dotcloud/docker/pkg/term"
"io"
"os"
"os/exec"
)
type Terminal interface {
io.Closer
SetMaster(*os.File)
Attach(*exec.Cmd) error
Resize(h, w int) error
}
func NewTerminal(stdin io.Reader, stdout, stderr io.Writer, tty bool) Terminal {
if tty {
return &TtyTerminal{
stdin: stdin,
stdout: stdout,
stderr: stderr,
}
}
return &StdTerminal{
stdin: stdin,
stdout: stdout,
stderr: stderr,
}
}
type TtyTerminal struct {
stdin io.Reader
stdout, stderr io.Writer
master *os.File
state *term.State
}
func (t *TtyTerminal) Resize(h, w int) error {
return term.SetWinsize(t.master.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)})
}
func (t *TtyTerminal) SetMaster(master *os.File) {
t.master = master
}
func (t *TtyTerminal) Attach(command *exec.Cmd) error {
go io.Copy(t.stdout, t.master)
go io.Copy(t.master, t.stdin)
state, err := t.setupWindow(t.master, os.Stdin)
if err != nil {
command.Process.Kill()
return err
}
t.state = state
return err
}
// SetupWindow gets the parent window size and sets the master
// pty to the current size and set the parents mode to RAW
func (t *TtyTerminal) setupWindow(master, parent *os.File) (*term.State, error) {
ws, err := term.GetWinsize(parent.Fd())
if err != nil {
return nil, err
}
if err := term.SetWinsize(master.Fd(), ws); err != nil {
return nil, err
}
return term.SetRawTerminal(parent.Fd())
}
func (t *TtyTerminal) Close() error {
term.RestoreTerminal(os.Stdin.Fd(), t.state)
return t.master.Close()
}
type StdTerminal struct {
stdin io.Reader
stdout, stderr io.Writer
}
func (s *StdTerminal) SetMaster(*os.File) {
// no need to set master on non tty
}
func (s *StdTerminal) Close() error {
return nil
}
func (s *StdTerminal) Resize(h, w int) error {
return nil
}
func (s *StdTerminal) Attach(command *exec.Cmd) error {
inPipe, err := command.StdinPipe()
if err != nil {
return err
}
outPipe, err := command.StdoutPipe()
if err != nil {
return err
}
errPipe, err := command.StderrPipe()
if err != nil {
return err
}
go func() {
defer inPipe.Close()
io.Copy(inPipe, s.stdin)
}()
go io.Copy(s.stdout, outPipe)
go io.Copy(s.stderr, errPipe)
return nil
}

View file

@ -7,7 +7,8 @@ import (
"github.com/dotcloud/docker/dockerversion"
"github.com/dotcloud/docker/engine"
"github.com/dotcloud/docker/execdriver"
"github.com/dotcloud/docker/execdriver/lxc"
_ "github.com/dotcloud/docker/execdriver/lxc"
"github.com/dotcloud/docker/execdriver/namespaces"
"github.com/dotcloud/docker/graphdriver"
"github.com/dotcloud/docker/graphdriver/aufs"
_ "github.com/dotcloud/docker/graphdriver/btrfs"
@ -703,7 +704,7 @@ func NewRuntimeFromDirectory(config *DaemonConfig, eng *engine.Engine) (*Runtime
sysInfo := sysinfo.New(false)
ed, err := lxc.NewDriver(config.Root, sysInfo.AppArmor)
ed, err := namespaces.NewDriver()
if err != nil {
return nil, err
}