From ba025cb75cceaa8536d0d512889ea86f13349950 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Fri, 21 Feb 2014 15:32:50 -0800 Subject: [PATCH 01/13] User os.Args[0] as name to reexec Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/nsinit/exec.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index 24e722a22f..ba548a2bd7 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -219,7 +219,9 @@ func deletePidFile() error { // defined on the container's configuration and use the current binary as the init with the // args provided func createCommand(container *libcontainer.Container, console, logFile string, pipe uintptr, args []string) *exec.Cmd { - command := exec.Command("nsinit", append([]string{ + // get our binary name so we can always reexec ourself + name := os.Args[0] + command := exec.Command(name, append([]string{ "-console", console, "-pipe", fmt.Sprint(pipe), "-log", logFile, From a352ecb01a788eff3446fe12191ca0434fce1eed Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Fri, 21 Feb 2014 16:17:18 -0800 Subject: [PATCH 02/13] Use lookup path for init Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/nsinit/exec.go | 1 + pkg/libcontainer/nsinit/init.go | 10 ++++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index ba548a2bd7..80fe8495ff 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -230,5 +230,6 @@ func createCommand(container *libcontainer.Container, console, logFile string, p command.SysProcAttr = &syscall.SysProcAttr{ Cloneflags: uintptr(getNamespaceFlags(container.Namespaces)), } + command.Env = container.Env return command } diff --git a/pkg/libcontainer/nsinit/init.go b/pkg/libcontainer/nsinit/init.go index 8fc5f3d05c..04716ba645 100644 --- a/pkg/libcontainer/nsinit/init.go +++ b/pkg/libcontainer/nsinit/init.go @@ -12,6 +12,7 @@ import ( "io/ioutil" "log" "os" + "os/exec" "path/filepath" "syscall" ) @@ -80,8 +81,13 @@ func Init(container *libcontainer.Container, uncleanRootfs, console string, pipe return fmt.Errorf("chdir to %s %s", container.WorkingDir, err) } } - log.Printf("execing %s goodbye", args[0]) - if err := system.Exec(args[0], args[0:], container.Env); err != nil { + name, err := exec.LookPath(args[0]) + if err != nil { + return err + } + + log.Printf("execing %s goodbye", name) + if err := system.Exec(name, args[0:], container.Env); err != nil { return fmt.Errorf("exec %s", err) } panic("unreachable") From c8fd81c27821576f339ccf4fd85c47375ba34042 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Fri, 21 Feb 2014 16:28:43 -0800 Subject: [PATCH 03/13] Pass pipes into Exec function Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/nsinit/exec.go | 14 +++++++------- pkg/libcontainer/nsinit/nsinit/main.go | 4 +++- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index 80fe8495ff..98f5209f03 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -19,7 +19,7 @@ import ( // Exec performes setup outside of a namespace so that a container can be // executed. Exec is a high level function for working with container namespaces. -func Exec(container *libcontainer.Container, logFile string, args []string) (int, error) { +func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io.Writer, logFile string, args []string) (int, error) { var ( master *os.File console string @@ -97,23 +97,23 @@ func Exec(container *libcontainer.Container, logFile string, args []string) (int if container.Tty { log.Printf("starting copy for tty") - go io.Copy(os.Stdout, master) - go io.Copy(master, os.Stdin) + go io.Copy(stdout, master) + go io.Copy(master, stdin) state, err := setupWindow(master) if err != nil { command.Process.Kill() return -1, err } - defer term.RestoreTerminal(os.Stdin.Fd(), state) + defer term.RestoreTerminal(uintptr(syscall.Stdin), state) } else { log.Printf("starting copy for std pipes") go func() { defer inPipe.Close() - io.Copy(inPipe, os.Stdin) + io.Copy(inPipe, stdin) }() - go io.Copy(os.Stdout, outPipe) - go io.Copy(os.Stderr, errPipe) + go io.Copy(stdout, outPipe) + go io.Copy(stderr, errPipe) } log.Printf("waiting on process") diff --git a/pkg/libcontainer/nsinit/nsinit/main.go b/pkg/libcontainer/nsinit/nsinit/main.go index 0873c09fe0..e6e3827713 100644 --- a/pkg/libcontainer/nsinit/nsinit/main.go +++ b/pkg/libcontainer/nsinit/nsinit/main.go @@ -57,7 +57,9 @@ func main() { if nspid > 0 { exitCode, err = nsinit.ExecIn(container, nspid, flag.Args()[1:]) } else { - exitCode, err = nsinit.Exec(container, logFile, flag.Args()[1:]) + exitCode, err = nsinit.Exec(container, + os.Stdin, os.Stdout, os.Stderr, + logFile, flag.Args()[1:]) } if err != nil { log.Fatal(err) From 332755b99d345a8ffbf4fb636ca8fed604a233c0 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Fri, 21 Feb 2014 16:40:32 -0800 Subject: [PATCH 04/13] Pass tty master to Exec Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/nsinit/exec.go | 3 +-- pkg/libcontainer/nsinit/nsinit/main.go | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index 98f5209f03..3622196b78 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -19,9 +19,8 @@ import ( // Exec performes setup outside of a namespace so that a container can be // executed. Exec is a high level function for working with container namespaces. -func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io.Writer, logFile string, args []string) (int, error) { +func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io.Writer, master *os.File, logFile string, args []string) (int, error) { var ( - master *os.File console string err error diff --git a/pkg/libcontainer/nsinit/nsinit/main.go b/pkg/libcontainer/nsinit/nsinit/main.go index e6e3827713..28d42d4643 100644 --- a/pkg/libcontainer/nsinit/nsinit/main.go +++ b/pkg/libcontainer/nsinit/nsinit/main.go @@ -58,7 +58,7 @@ func main() { exitCode, err = nsinit.ExecIn(container, nspid, flag.Args()[1:]) } else { exitCode, err = nsinit.Exec(container, - os.Stdin, os.Stdout, os.Stderr, + os.Stdin, os.Stdout, os.Stderr, nil, logFile, flag.Args()[1:]) } if err != nil { From 2419e63d243255ef38f16799ffdc64084aa18fe4 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Fri, 21 Feb 2014 17:11:57 -0800 Subject: [PATCH 05/13] Initial commit of libcontainer running docker Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- container.go | 1 + execdriver/namespaces/default_template.go | 41 +++ execdriver/namespaces/driver.go | 349 ++++++++++++++++++++++ execdriver/namespaces/term.go | 26 ++ pkg/libcontainer/nsinit/exec.go | 2 +- pkg/libcontainer/nsinit/ns_linux.go | 2 +- runtime.go | 5 +- 7 files changed, 422 insertions(+), 4 deletions(-) create mode 100644 execdriver/namespaces/default_template.go create mode 100644 execdriver/namespaces/driver.go create mode 100644 execdriver/namespaces/term.go diff --git a/container.go b/container.go index ca53bb57c7..76e51cdad3 100644 --- a/container.go +++ b/container.go @@ -530,6 +530,7 @@ func (container *Container) Start() (err error) { } populateCommand(container) + container.command.Env = env // Setup logging of stdout and stderr to disk if err := container.runtime.LogToDisk(container.stdout, container.logPath("json"), "stdout"); err != nil { diff --git a/execdriver/namespaces/default_template.go b/execdriver/namespaces/default_template.go new file mode 100644 index 0000000000..79b6ac1c11 --- /dev/null +++ b/execdriver/namespaces/default_template.go @@ -0,0 +1,41 @@ +package namespaces + +import ( + "github.com/dotcloud/docker/pkg/cgroups" + "github.com/dotcloud/docker/pkg/libcontainer" +) + +// getDefaultTemplate returns the docker default for +// the libcontainer configuration file +func getDefaultTemplate() *libcontainer.Container { + return &libcontainer.Container{ + Capabilities: libcontainer.Capabilities{ + libcontainer.CAP_SETPCAP, + libcontainer.CAP_SYS_MODULE, + libcontainer.CAP_SYS_RAWIO, + libcontainer.CAP_SYS_PACCT, + libcontainer.CAP_SYS_ADMIN, + libcontainer.CAP_SYS_NICE, + libcontainer.CAP_SYS_RESOURCE, + libcontainer.CAP_SYS_TIME, + libcontainer.CAP_SYS_TTY_CONFIG, + libcontainer.CAP_MKNOD, + libcontainer.CAP_AUDIT_WRITE, + libcontainer.CAP_AUDIT_CONTROL, + libcontainer.CAP_MAC_ADMIN, + libcontainer.CAP_MAC_OVERRIDE, + libcontainer.CAP_NET_ADMIN, + }, + Namespaces: libcontainer.Namespaces{ + libcontainer.CLONE_NEWIPC, + libcontainer.CLONE_NEWNET, + libcontainer.CLONE_NEWNS, + libcontainer.CLONE_NEWPID, + libcontainer.CLONE_NEWUTS, + }, + Cgroups: &cgroups.Cgroup{ + Name: "docker", + DeviceAccess: false, + }, + } +} diff --git a/execdriver/namespaces/driver.go b/execdriver/namespaces/driver.go new file mode 100644 index 0000000000..e243c64703 --- /dev/null +++ b/execdriver/namespaces/driver.go @@ -0,0 +1,349 @@ +package namespaces + +import ( + "encoding/json" + "errors" + "fmt" + "github.com/dotcloud/docker/execdriver" + "github.com/dotcloud/docker/pkg/libcontainer" + "github.com/dotcloud/docker/pkg/libcontainer/network" + "github.com/dotcloud/docker/pkg/libcontainer/nsinit" + "github.com/dotcloud/docker/pkg/libcontainer/utils" + "github.com/dotcloud/docker/pkg/system" + "github.com/dotcloud/docker/pkg/term" + "io" + "io/ioutil" + "log" + "os" + "os/exec" + "path/filepath" + "strings" + "syscall" +) + +const ( + DriverName = "namespaces" + Version = "0.1" +) + +var ( + ErrNotSupported = errors.New("not supported") +) + +func init() { + execdriver.RegisterInitFunc(DriverName, func(args *execdriver.InitArgs) error { + return nil + }) +} + +type driver struct { +} + +func NewDriver() (*driver, error) { + return &driver{}, nil +} + +func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (int, error) { + container := createContainer(c) + if err := writeContainerFile(container, c.Rootfs); err != nil { + return -1, err + } + + var ( + console string + master *os.File + err error + + inPipe io.WriteCloser + outPipe, errPipe io.ReadCloser + ) + + if container.Tty { + log.Printf("setting up master and console") + master, console, err = createMasterAndConsole() + if err != nil { + return -1, err + } + } + c.Terminal = NewTerm(pipes, master) + + // create a pipe so that we can syncronize with the namespaced process and + // pass the veth name to the child + r, w, err := os.Pipe() + if err != nil { + return -1, err + } + system.UsetCloseOnExec(r.Fd()) + + args := append([]string{c.Entrypoint}, c.Arguments...) + createCommand(c, container, console, "/nsinit.logs", r.Fd(), args) + command := c + + if !container.Tty { + log.Printf("opening pipes on command") + if inPipe, err = command.StdinPipe(); err != nil { + return -1, err + } + if outPipe, err = command.StdoutPipe(); err != nil { + return -1, err + } + if errPipe, err = command.StderrPipe(); err != nil { + return -1, err + } + } + + log.Printf("staring init") + if err := command.Start(); err != nil { + return -1, err + } + log.Printf("writting state file") + if err := writePidFile(c.Rootfs, command.Process.Pid); err != nil { + command.Process.Kill() + return -1, err + } + defer deletePidFile(c.Rootfs) + + // Do this before syncing with child so that no children + // can escape the cgroup + if container.Cgroups != nil { + log.Printf("setting up cgroups") + if err := container.Cgroups.Apply(command.Process.Pid); err != nil { + command.Process.Kill() + return -1, err + } + } + + if container.Network != nil { + log.Printf("creating veth pair") + vethPair, err := initializeContainerVeth(container.Network.Bridge, container.Network.Mtu, command.Process.Pid) + if err != nil { + return -1, err + } + log.Printf("sending %s as veth pair name", vethPair) + sendVethName(w, vethPair) + } + + // Sync with child + log.Printf("closing sync pipes") + w.Close() + r.Close() + + if container.Tty { + log.Printf("starting copy for tty") + go io.Copy(pipes.Stdout, master) + if pipes.Stdin != nil { + go io.Copy(master, pipes.Stdin) + } + + /* + state, err := setupWindow(master) + if err != nil { + command.Process.Kill() + return -1, err + } + defer term.RestoreTerminal(uintptr(syscall.Stdin), state) + */ + } else { + log.Printf("starting copy for std pipes") + if pipes.Stdin != nil { + go func() { + defer inPipe.Close() + io.Copy(inPipe, pipes.Stdin) + }() + } + go io.Copy(pipes.Stdout, outPipe) + go io.Copy(pipes.Stderr, errPipe) + } + + if startCallback != nil { + startCallback(c) + } + + log.Printf("waiting on process") + if err := command.Wait(); err != nil { + if _, ok := err.(*exec.ExitError); !ok { + return -1, err + } + } + log.Printf("process ended") + return command.ProcessState.Sys().(syscall.WaitStatus).ExitStatus(), nil +} + +func (d *driver) Kill(p *execdriver.Command, sig int) error { + return p.Process.Kill() +} + +func (d *driver) Restore(c *execdriver.Command) error { + return ErrNotSupported +} + +func (d *driver) Info(id string) execdriver.Info { + return nil +} + +func (d *driver) Name() string { + return fmt.Sprintf("%s-%s", DriverName, Version) +} + +func (d *driver) GetPidsForContainer(id string) ([]int, error) { + return nil, ErrNotSupported +} + +func writeContainerFile(container *libcontainer.Container, rootfs string) error { + data, err := json.Marshal(container) + if err != nil { + return err + } + return ioutil.WriteFile(filepath.Join(rootfs, "container.json"), data, 0755) +} + +func getEnv(key string, env []string) string { + for _, pair := range env { + parts := strings.Split(pair, "=") + if parts[0] == key { + return parts[1] + } + } + return "" +} + +// sendVethName writes the veth pair name to the child's stdin then closes the +// pipe so that the child stops waiting for more data +func sendVethName(pipe io.Writer, name string) { + fmt.Fprint(pipe, name) +} + +// initializeContainerVeth will create a veth pair and setup the host's +// side of the pair by setting the specified bridge as the master and bringing +// up the interface. +// +// Then will with set the other side of the veth pair into the container's namespaced +// using the pid and returns the veth's interface name to provide to the container to +// finish setting up the interface inside the namespace +func initializeContainerVeth(bridge string, mtu, nspid int) (string, error) { + name1, name2, err := createVethPair() + if err != nil { + return "", err + } + log.Printf("veth pair created %s <> %s", name1, name2) + if err := network.SetInterfaceMaster(name1, bridge); err != nil { + return "", err + } + if err := network.SetMtu(name1, mtu); err != nil { + return "", err + } + if err := network.InterfaceUp(name1); err != nil { + return "", err + } + log.Printf("setting %s inside %d namespace", name2, nspid) + if err := network.SetInterfaceInNamespacePid(name2, nspid); err != nil { + return "", err + } + return name2, nil +} + +func setupWindow(master *os.File) (*term.State, error) { + ws, err := term.GetWinsize(os.Stdin.Fd()) + if err != nil { + return nil, err + } + if err := term.SetWinsize(master.Fd(), ws); err != nil { + return nil, err + } + return term.SetRawTerminal(os.Stdin.Fd()) +} + +// createMasterAndConsole will open /dev/ptmx on the host and retreive the +// pts name for use as the pty slave inside the container +func createMasterAndConsole() (*os.File, string, error) { + master, err := os.OpenFile("/dev/ptmx", syscall.O_RDWR|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0) + if err != nil { + return nil, "", err + } + console, err := system.Ptsname(master) + if err != nil { + return nil, "", err + } + if err := system.Unlockpt(master); err != nil { + return nil, "", err + } + return master, console, nil +} + +// createVethPair will automatically generage two random names for +// the veth pair and ensure that they have been created +func createVethPair() (name1 string, name2 string, err error) { + name1, err = utils.GenerateRandomName("dock", 4) + if err != nil { + return + } + name2, err = utils.GenerateRandomName("dock", 4) + if err != nil { + return + } + if err = network.CreateVethPair(name1, name2); err != nil { + return + } + return +} + +// writePidFile writes the namespaced processes pid to .nspid in the rootfs for the container +func writePidFile(rootfs string, pid int) error { + return ioutil.WriteFile(filepath.Join(rootfs, ".nspid"), []byte(fmt.Sprint(pid)), 0655) +} + +func deletePidFile(rootfs string) error { + return os.Remove(filepath.Join(rootfs, ".nspid")) +} + +// createCommand will return an exec.Cmd with the Cloneflags set to the proper namespaces +// defined on the container's configuration and use the current binary as the init with the +// args provided +func createCommand(c *execdriver.Command, container *libcontainer.Container, + console, logFile string, pipe uintptr, args []string) { + + aname, _ := exec.LookPath("nsinit") + c.Path = aname + c.Args = append([]string{ + aname, + "-console", console, + "-pipe", fmt.Sprint(pipe), + "-log", logFile, + "init", + }, args...) + c.SysProcAttr = &syscall.SysProcAttr{ + Cloneflags: uintptr(nsinit.GetNamespaceFlags(container.Namespaces)), + } + c.Env = container.Env + c.Dir = c.Rootfs +} + +func createContainer(c *execdriver.Command) *libcontainer.Container { + container := getDefaultTemplate() + + container.Hostname = getEnv("HOSTNAME", c.Env) + container.Tty = c.Tty + container.User = c.User + container.WorkingDir = c.WorkingDir + container.Env = c.Env + + container.Env = append(container.Env, "container=docker") + + if c.Network != nil { + container.Network = &libcontainer.Network{ + Mtu: c.Network.Mtu, + Address: fmt.Sprintf("%s/%d", c.Network.IPAddress, c.Network.IPPrefixLen), + Gateway: c.Network.Gateway, + Bridge: c.Network.Bridge, + } + } + if c.Privileged { + container.Capabilities = nil + } + if c.Resources != nil { + container.Cgroups.CpuShares = c.Resources.CpuShares + container.Cgroups.Memory = c.Resources.Memory + container.Cgroups.MemorySwap = c.Resources.MemorySwap + } + return container +} diff --git a/execdriver/namespaces/term.go b/execdriver/namespaces/term.go new file mode 100644 index 0000000000..682c6a27b1 --- /dev/null +++ b/execdriver/namespaces/term.go @@ -0,0 +1,26 @@ +package namespaces + +import ( + "github.com/dotcloud/docker/execdriver" + "github.com/dotcloud/docker/pkg/term" + "os" +) + +type NsinitTerm struct { + master *os.File +} + +func NewTerm(pipes *execdriver.Pipes, master *os.File) *NsinitTerm { + return &NsinitTerm{master} +} + +func (t *NsinitTerm) Close() error { + return t.master.Close() +} + +func (t *NsinitTerm) Resize(h, w int) error { + if t.master != nil { + return term.SetWinsize(t.master.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)}) + } + return nil +} diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index 3622196b78..6671ebe129 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -227,7 +227,7 @@ func createCommand(container *libcontainer.Container, console, logFile string, p "init"}, args...)...) command.SysProcAttr = &syscall.SysProcAttr{ - Cloneflags: uintptr(getNamespaceFlags(container.Namespaces)), + Cloneflags: uintptr(GetNamespaceFlags(container.Namespaces)), } command.Env = container.Env return command diff --git a/pkg/libcontainer/nsinit/ns_linux.go b/pkg/libcontainer/nsinit/ns_linux.go index e42d4b88d7..58af24798f 100644 --- a/pkg/libcontainer/nsinit/ns_linux.go +++ b/pkg/libcontainer/nsinit/ns_linux.go @@ -28,7 +28,7 @@ var namespaceFileMap = map[libcontainer.Namespace]string{ // getNamespaceFlags parses the container's Namespaces options to set the correct // flags on clone, unshare, and setns -func getNamespaceFlags(namespaces libcontainer.Namespaces) (flag int) { +func GetNamespaceFlags(namespaces libcontainer.Namespaces) (flag int) { for _, ns := range namespaces { flag |= namespaceMap[ns] } diff --git a/runtime.go b/runtime.go index a38109cca0..9f16d6213b 100644 --- a/runtime.go +++ b/runtime.go @@ -7,7 +7,8 @@ import ( "github.com/dotcloud/docker/dockerversion" "github.com/dotcloud/docker/engine" "github.com/dotcloud/docker/execdriver" - "github.com/dotcloud/docker/execdriver/lxc" + _ "github.com/dotcloud/docker/execdriver/lxc" + "github.com/dotcloud/docker/execdriver/namespaces" "github.com/dotcloud/docker/graphdriver" "github.com/dotcloud/docker/graphdriver/aufs" _ "github.com/dotcloud/docker/graphdriver/btrfs" @@ -703,7 +704,7 @@ func NewRuntimeFromDirectory(config *DaemonConfig, eng *engine.Engine) (*Runtime sysInfo := sysinfo.New(false) - ed, err := lxc.NewDriver(config.Root, sysInfo.AppArmor) + ed, err := namespaces.NewDriver() if err != nil { return nil, err } From 9876e5b8901199bad2ab424593131d574b582bf9 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Fri, 21 Feb 2014 21:14:21 -0800 Subject: [PATCH 06/13] Export functions of nsinit Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/nsinit/exec.go | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index 6671ebe129..b2eaa0bc65 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -30,7 +30,7 @@ func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io. if container.Tty { log.Printf("setting up master and console") - master, console, err = createMasterAndConsole() + master, console, err = CreateMasterAndConsole() if err != nil { return -1, err } @@ -44,7 +44,7 @@ func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io. } system.UsetCloseOnExec(r.Fd()) - command := createCommand(container, console, logFile, r.Fd(), args) + command := CreateCommand(container, console, logFile, r.Fd(), args) if !container.Tty { log.Printf("opening pipes on command") if inPipe, err = command.StdinPipe(); err != nil { @@ -81,12 +81,12 @@ func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io. if container.Network != nil { log.Printf("creating veth pair") - vethPair, err := initializeContainerVeth(container.Network.Bridge, container.Network.Mtu, command.Process.Pid) + vethPair, err := InitializeContainerVeth(container.Network.Bridge, container.Network.Mtu, command.Process.Pid) if err != nil { return -1, err } log.Printf("sending %s as veth pair name", vethPair) - sendVethName(w, vethPair) + SendVethName(w, vethPair) } // Sync with child @@ -99,7 +99,7 @@ func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io. go io.Copy(stdout, master) go io.Copy(master, stdin) - state, err := setupWindow(master) + state, err := SetupWindow(master, os.Stdin) if err != nil { command.Process.Kill() return -1, err @@ -125,9 +125,9 @@ func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io. return command.ProcessState.Sys().(syscall.WaitStatus).ExitStatus(), nil } -// sendVethName writes the veth pair name to the child's stdin then closes the +// SendVethName writes the veth pair name to the child's stdin then closes the // pipe so that the child stops waiting for more data -func sendVethName(pipe io.Writer, name string) { +func SendVethName(pipe io.Writer, name string) { fmt.Fprint(pipe, name) } @@ -138,7 +138,7 @@ func sendVethName(pipe io.Writer, name string) { // Then will with set the other side of the veth pair into the container's namespaced // using the pid and returns the veth's interface name to provide to the container to // finish setting up the interface inside the namespace -func initializeContainerVeth(bridge string, mtu, nspid int) (string, error) { +func InitializeContainerVeth(bridge string, mtu, nspid int) (string, error) { name1, name2, err := createVethPair() if err != nil { return "", err @@ -160,20 +160,22 @@ func initializeContainerVeth(bridge string, mtu, nspid int) (string, error) { return name2, nil } -func setupWindow(master *os.File) (*term.State, error) { - ws, err := term.GetWinsize(os.Stdin.Fd()) +// SetupWindow gets the parent window size and sets the master +// pty to the current size and set the parents mode to RAW +func SetupWindow(master, parent *os.File) (*term.State, error) { + ws, err := term.GetWinsize(parent.Fd()) if err != nil { return nil, err } if err := term.SetWinsize(master.Fd(), ws); err != nil { return nil, err } - return term.SetRawTerminal(os.Stdin.Fd()) + return term.SetRawTerminal(parent.Fd()) } -// createMasterAndConsole will open /dev/ptmx on the host and retreive the +// CreateMasterAndConsole will open /dev/ptmx on the host and retreive the // pts name for use as the pty slave inside the container -func createMasterAndConsole() (*os.File, string, error) { +func CreateMasterAndConsole() (*os.File, string, error) { master, err := os.OpenFile("/dev/ptmx", syscall.O_RDWR|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0) if err != nil { return nil, "", err @@ -217,7 +219,7 @@ func deletePidFile() error { // createCommand will return an exec.Cmd with the Cloneflags set to the proper namespaces // defined on the container's configuration and use the current binary as the init with the // args provided -func createCommand(container *libcontainer.Container, console, logFile string, pipe uintptr, args []string) *exec.Cmd { +func CreateCommand(container *libcontainer.Container, console, logFile string, pipe uintptr, args []string) *exec.Cmd { // get our binary name so we can always reexec ourself name := os.Args[0] command := exec.Command(name, append([]string{ From 5a4069f3aacd0dc30ee7c5dd97f0dc9a6e416f35 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Fri, 21 Feb 2014 22:20:15 -0800 Subject: [PATCH 07/13] Refactor network creation and initialization into strategies Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/README.md | 9 ++- pkg/libcontainer/container.go | 13 ++-- pkg/libcontainer/container.json | 9 ++- pkg/libcontainer/network/strategy.go | 32 +++++++++ pkg/libcontainer/network/veth.go | 103 +++++++++++++++++++++++++++ pkg/libcontainer/nsinit/exec.go | 97 +++++++++---------------- pkg/libcontainer/nsinit/init.go | 55 +++++--------- 7 files changed, 211 insertions(+), 107 deletions(-) create mode 100644 pkg/libcontainer/network/strategy.go create mode 100644 pkg/libcontainer/network/veth.go diff --git a/pkg/libcontainer/README.md b/pkg/libcontainer/README.md index 89a4ec0c48..36553af5bc 100644 --- a/pkg/libcontainer/README.md +++ b/pkg/libcontainer/README.md @@ -45,12 +45,17 @@ Sample `container.json` file: "AUDIT_WRITE", "AUDIT_CONTROL", "MAC_OVERRIDE", - "MAC_ADMIN" + "MAC_ADMIN", + "NET_ADMIN" ], "network": { + "type": "veth", + "context": { + "bridge": "docker0", + "prefix": "dock" + }, "address": "172.17.0.100/16", "gateway": "172.17.42.1", - "bridge": "docker0", "mtu": 1500 }, "cgroups": { diff --git a/pkg/libcontainer/container.go b/pkg/libcontainer/container.go index 3c1b62b65a..4a47977334 100644 --- a/pkg/libcontainer/container.go +++ b/pkg/libcontainer/container.go @@ -4,6 +4,10 @@ import ( "github.com/dotcloud/docker/pkg/cgroups" ) +// Context is a generic key value pair that allows +// arbatrary data to be sent +type Context map[string]string + // Container defines configuration options for how a // container is setup inside a directory and how a process should be executed type Container struct { @@ -24,8 +28,9 @@ type Container struct { // The network configuration can be omited from a container causing the // container to be setup with the host's networking stack type Network struct { - Address string `json:"address,omitempty"` - Gateway string `json:"gateway,omitempty"` - Bridge string `json:"bridge,omitempty"` - Mtu int `json:"mtu,omitempty"` + Type string `json:"type,omitempty"` // type of networking to setup i.e. veth, macvlan, etc + Context Context `json:"context,omitempty"` // generic context for type specific networking options + Address string `json:"address,omitempty"` + Gateway string `json:"gateway,omitempty"` + Mtu int `json:"mtu,omitempty"` } diff --git a/pkg/libcontainer/container.json b/pkg/libcontainer/container.json index 07e52df428..c2b21f8609 100644 --- a/pkg/libcontainer/container.json +++ b/pkg/libcontainer/container.json @@ -28,12 +28,17 @@ "AUDIT_WRITE", "AUDIT_CONTROL", "MAC_OVERRIDE", - "MAC_ADMIN" + "MAC_ADMIN", + "NET_ADMIN" ], "network": { + "type": "veth", + "context": { + "bridge": "docker0", + "prefix": "dock" + }, "address": "172.17.0.100/16", "gateway": "172.17.42.1", - "bridge": "docker0", "mtu": 1500 }, "cgroups": { diff --git a/pkg/libcontainer/network/strategy.go b/pkg/libcontainer/network/strategy.go new file mode 100644 index 0000000000..8ecc11a24d --- /dev/null +++ b/pkg/libcontainer/network/strategy.go @@ -0,0 +1,32 @@ +package network + +import ( + "errors" + "github.com/dotcloud/docker/pkg/libcontainer" +) + +var ( + ErrNotValidStrategyType = errors.New("not a valid network strategy type") +) + +var strategies = map[string]NetworkStrategy{ + "veth": &Veth{}, +} + +// NetworkStrategy represends a specific network configuration for +// a containers networking stack +type NetworkStrategy interface { + Create(*libcontainer.Network, int) (libcontainer.Context, error) + Initialize(*libcontainer.Network, libcontainer.Context) error +} + +// GetStrategy returns the specific network strategy for the +// provided type. If no strategy is registered for the type an +// ErrNotValidStrategyType is returned. +func GetStrategy(tpe string) (NetworkStrategy, error) { + s, exists := strategies[tpe] + if !exists { + return nil, ErrNotValidStrategyType + } + return s, nil +} diff --git a/pkg/libcontainer/network/veth.go b/pkg/libcontainer/network/veth.go new file mode 100644 index 0000000000..61fec5500c --- /dev/null +++ b/pkg/libcontainer/network/veth.go @@ -0,0 +1,103 @@ +package network + +import ( + "fmt" + "github.com/dotcloud/docker/pkg/libcontainer" + "github.com/dotcloud/docker/pkg/libcontainer/utils" + "log" +) + +type Veth struct { +} + +func (v *Veth) Create(n *libcontainer.Network, nspid int) (libcontainer.Context, error) { + log.Printf("creating veth network") + var ( + bridge string + prefix string + exists bool + ) + if bridge, exists = n.Context["bridge"]; !exists { + return nil, fmt.Errorf("bridge does not exist in network context") + } + if prefix, exists = n.Context["prefix"]; !exists { + return nil, fmt.Errorf("veth prefix does not exist in network context") + } + name1, name2, err := createVethPair(prefix) + if err != nil { + return nil, err + } + context := libcontainer.Context{ + "vethHost": name1, + "vethChild": name2, + } + log.Printf("veth pair created %s <> %s", name1, name2) + if err := SetInterfaceMaster(name1, bridge); err != nil { + return context, err + } + if err := SetMtu(name1, n.Mtu); err != nil { + return context, err + } + if err := InterfaceUp(name1); err != nil { + return context, err + } + log.Printf("setting %s inside %d namespace", name2, nspid) + if err := SetInterfaceInNamespacePid(name2, nspid); err != nil { + return context, err + } + return context, nil +} + +func (v *Veth) Initialize(config *libcontainer.Network, context libcontainer.Context) error { + var ( + vethChild string + exists bool + ) + if vethChild, exists = context["vethChild"]; !exists { + return fmt.Errorf("vethChild does not exist in network context") + } + if err := InterfaceDown(vethChild); err != nil { + return fmt.Errorf("interface down %s %s", vethChild, err) + } + if err := ChangeInterfaceName(vethChild, "eth0"); err != nil { + return fmt.Errorf("change %s to eth0 %s", vethChild, err) + } + if err := SetInterfaceIp("eth0", config.Address); err != nil { + return fmt.Errorf("set eth0 ip %s", err) + } + if err := SetMtu("eth0", config.Mtu); err != nil { + return fmt.Errorf("set eth0 mtu to %d %s", config.Mtu, err) + } + if err := InterfaceUp("eth0"); err != nil { + return fmt.Errorf("eth0 up %s", err) + } + if err := SetMtu("lo", config.Mtu); err != nil { + return fmt.Errorf("set lo mtu to %d %s", config.Mtu, err) + } + if err := InterfaceUp("lo"); err != nil { + return fmt.Errorf("lo up %s", err) + } + if config.Gateway != "" { + if err := SetDefaultGateway(config.Gateway); err != nil { + return fmt.Errorf("set gateway to %s %s", config.Gateway, err) + } + } + return nil +} + +// createVethPair will automatically generage two random names for +// the veth pair and ensure that they have been created +func createVethPair(prefix string) (name1 string, name2 string, err error) { + name1, err = utils.GenerateRandomName(prefix, 4) + if err != nil { + return + } + name2, err = utils.GenerateRandomName(prefix, 4) + if err != nil { + return + } + if err = CreateVethPair(name1, name2); err != nil { + return + } + return +} diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index b2eaa0bc65..6c4d7666a2 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -3,10 +3,10 @@ package nsinit import ( + "encoding/json" "fmt" "github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/libcontainer/network" - "github.com/dotcloud/docker/pkg/libcontainer/utils" "github.com/dotcloud/docker/pkg/system" "github.com/dotcloud/docker/pkg/term" "io" @@ -19,11 +19,11 @@ import ( // Exec performes setup outside of a namespace so that a container can be // executed. Exec is a high level function for working with container namespaces. -func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io.Writer, master *os.File, logFile string, args []string) (int, error) { +func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io.Writer, + master *os.File, logFile string, args []string) (int, error) { var ( - console string - err error - + console string + err error inPipe io.WriteCloser outPipe, errPipe io.ReadCloser ) @@ -46,7 +46,7 @@ func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io. command := CreateCommand(container, console, logFile, r.Fd(), args) if !container.Tty { - log.Printf("opening pipes on command") + log.Printf("opening std pipes") if inPipe, err = command.StdinPipe(); err != nil { return -1, err } @@ -78,15 +78,9 @@ func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io. return -1, err } } - - if container.Network != nil { - log.Printf("creating veth pair") - vethPair, err := InitializeContainerVeth(container.Network.Bridge, container.Network.Mtu, command.Process.Pid) - if err != nil { - return -1, err - } - log.Printf("sending %s as veth pair name", vethPair) - SendVethName(w, vethPair) + if err := InitializeNetworking(container, command.Process.Pid, w); err != nil { + command.Process.Kill() + return -1, err } // Sync with child @@ -104,7 +98,7 @@ func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io. command.Process.Kill() return -1, err } - defer term.RestoreTerminal(uintptr(syscall.Stdin), state) + defer term.RestoreTerminal(os.Stdin.Fd(), state) } else { log.Printf("starting copy for std pipes") go func() { @@ -125,39 +119,34 @@ func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io. return command.ProcessState.Sys().(syscall.WaitStatus).ExitStatus(), nil } -// SendVethName writes the veth pair name to the child's stdin then closes the -// pipe so that the child stops waiting for more data -func SendVethName(pipe io.Writer, name string) { - fmt.Fprint(pipe, name) +func InitializeNetworking(container *libcontainer.Container, nspid int, pipe io.Writer) error { + if container.Network != nil { + log.Printf("creating host network configuration type %s", container.Network.Type) + strategy, err := network.GetStrategy(container.Network.Type) + if err != nil { + return err + } + networkContext, err := strategy.Create(container.Network, nspid) + if err != nil { + return err + } + log.Printf("sending %v as network context", networkContext) + if err := SendContext(pipe, networkContext); err != nil { + return err + } + } + return nil } -// initializeContainerVeth will create a veth pair and setup the host's -// side of the pair by setting the specified bridge as the master and bringing -// up the interface. -// -// Then will with set the other side of the veth pair into the container's namespaced -// using the pid and returns the veth's interface name to provide to the container to -// finish setting up the interface inside the namespace -func InitializeContainerVeth(bridge string, mtu, nspid int) (string, error) { - name1, name2, err := createVethPair() +// SendContext writes the veth pair name to the child's stdin then closes the +// pipe so that the child stops waiting for more data +func SendContext(pipe io.Writer, context libcontainer.Context) error { + data, err := json.Marshal(context) if err != nil { - return "", err + return err } - log.Printf("veth pair created %s <> %s", name1, name2) - if err := network.SetInterfaceMaster(name1, bridge); err != nil { - return "", err - } - if err := network.SetMtu(name1, mtu); err != nil { - return "", err - } - if err := network.InterfaceUp(name1); err != nil { - return "", err - } - log.Printf("setting %s inside %d namespace", name2, nspid) - if err := network.SetInterfaceInNamespacePid(name2, nspid); err != nil { - return "", err - } - return name2, nil + pipe.Write(data) + return nil } // SetupWindow gets the parent window size and sets the master @@ -190,29 +179,13 @@ func CreateMasterAndConsole() (*os.File, string, error) { return master, console, nil } -// createVethPair will automatically generage two random names for -// the veth pair and ensure that they have been created -func createVethPair() (name1 string, name2 string, err error) { - name1, err = utils.GenerateRandomName("dock", 4) - if err != nil { - return - } - name2, err = utils.GenerateRandomName("dock", 4) - if err != nil { - return - } - if err = network.CreateVethPair(name1, name2); err != nil { - return - } - return -} - // writePidFile writes the namespaced processes pid to .nspid in the rootfs for the container func writePidFile(command *exec.Cmd) error { return ioutil.WriteFile(".nspid", []byte(fmt.Sprint(command.Process.Pid)), 0655) } func deletePidFile() error { + log.Printf("removing .nspid file") return os.Remove(".nspid") } diff --git a/pkg/libcontainer/nsinit/init.go b/pkg/libcontainer/nsinit/init.go index 04716ba645..f530d4a52a 100644 --- a/pkg/libcontainer/nsinit/init.go +++ b/pkg/libcontainer/nsinit/init.go @@ -3,6 +3,7 @@ package nsinit import ( + "encoding/json" "fmt" "github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/libcontainer/capabilities" @@ -27,13 +28,10 @@ func Init(container *libcontainer.Container, uncleanRootfs, console string, pipe log.Printf("initializing namespace at %s", rootfs) // We always read this as it is a way to sync with the parent as well - tempVethName, err := getVethName(pipe) + context, err := GetContextFromParent(pipe) if err != nil { return err } - if tempVethName != "" { - log.Printf("received veth name %s", tempVethName) - } if console != "" { log.Printf("setting up console for %s", console) // close pipes so that we can replace it with the pty @@ -62,7 +60,7 @@ func Init(container *libcontainer.Container, uncleanRootfs, console string, pipe if err := setupNewMountNamespace(rootfs, console, container.ReadonlyFs); err != nil { return fmt.Errorf("setup mount namespace %s", err) } - if err := setupVethNetwork(container.Network, tempVethName); err != nil { + if err := setupNetwork(container.Network, context); err != nil { return fmt.Errorf("setup networking %s", err) } if err := system.Sethostname(container.Hostname); err != nil { @@ -145,46 +143,29 @@ func openTerminal(name string, flag int) (*os.File, error) { // setupVethNetwork uses the Network config if it is not nil to initialize // the new veth interface inside the container for use by changing the name to eth0 // setting the MTU and IP address along with the default gateway -func setupVethNetwork(config *libcontainer.Network, tempVethName string) error { +func setupNetwork(config *libcontainer.Network, context libcontainer.Context) error { if config != nil { - if err := network.InterfaceDown(tempVethName); err != nil { - return fmt.Errorf("interface down %s %s", tempVethName, err) - } - if err := network.ChangeInterfaceName(tempVethName, "eth0"); err != nil { - return fmt.Errorf("change %s to eth0 %s", tempVethName, err) - } - if err := network.SetInterfaceIp("eth0", config.Address); err != nil { - return fmt.Errorf("set eth0 ip %s", err) - } - if err := network.SetMtu("eth0", config.Mtu); err != nil { - return fmt.Errorf("set eth0 mtu to %d %s", config.Mtu, err) - } - if err := network.InterfaceUp("eth0"); err != nil { - return fmt.Errorf("eth0 up %s", err) - } - if err := network.SetMtu("lo", config.Mtu); err != nil { - return fmt.Errorf("set lo mtu to %d %s", config.Mtu, err) - } - if err := network.InterfaceUp("lo"); err != nil { - return fmt.Errorf("lo up %s", err) - } - if config.Gateway != "" { - if err := network.SetDefaultGateway(config.Gateway); err != nil { - return fmt.Errorf("set gateway to %s %s", config.Gateway, err) - } + strategy, err := network.GetStrategy(config.Type) + if err != nil { + return err } + return strategy.Initialize(config, context) } return nil } -// getVethName reads from Stdin the temp veth name -// sent by the parent processes after the veth pair -// has been created and setup -func getVethName(pipe io.ReadCloser) (string, error) { +func GetContextFromParent(pipe io.ReadCloser) (libcontainer.Context, error) { defer pipe.Close() data, err := ioutil.ReadAll(pipe) if err != nil { - return "", fmt.Errorf("error reading from stdin %s", err) + return nil, fmt.Errorf("error reading from stdin %s", err) } - return string(data), nil + var context libcontainer.Context + if len(data) > 0 { + if err := json.Unmarshal(data, &context); err != nil { + return nil, err + } + log.Printf("received context %v", context) + } + return context, nil } From dd59f7fb286f2abff6cee2699e62fff564425149 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Fri, 21 Feb 2014 22:37:09 -0800 Subject: [PATCH 08/13] Refactor exec method Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/nsinit/exec.go | 138 ++++++++++++++++++-------------- 1 file changed, 77 insertions(+), 61 deletions(-) diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index 6c4d7666a2..3cbe43ae7a 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -22,20 +22,10 @@ import ( func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io.Writer, master *os.File, logFile string, args []string) (int, error) { var ( - console string - err error - inPipe io.WriteCloser - outPipe, errPipe io.ReadCloser + console string + err error ) - if container.Tty { - log.Printf("setting up master and console") - master, console, err = CreateMasterAndConsole() - if err != nil { - return -1, err - } - } - // create a pipe so that we can syncronize with the namespaced process and // pass the veth name to the child r, w, err := os.Pipe() @@ -44,49 +34,15 @@ func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io. } system.UsetCloseOnExec(r.Fd()) + if container.Tty { + log.Printf("setting up master and console") + master, console, err = CreateMasterAndConsole() + if err != nil { + return -1, err + } + } + command := CreateCommand(container, console, logFile, r.Fd(), args) - if !container.Tty { - log.Printf("opening std pipes") - if inPipe, err = command.StdinPipe(); err != nil { - return -1, err - } - if outPipe, err = command.StdoutPipe(); err != nil { - return -1, err - } - if errPipe, err = command.StderrPipe(); err != nil { - return -1, err - } - } - - log.Printf("staring init") - if err := command.Start(); err != nil { - return -1, err - } - log.Printf("writting state file") - if err := writePidFile(command); err != nil { - command.Process.Kill() - return -1, err - } - defer deletePidFile() - - // Do this before syncing with child so that no children - // can escape the cgroup - if container.Cgroups != nil { - log.Printf("setting up cgroups") - if err := container.Cgroups.Apply(command.Process.Pid); err != nil { - command.Process.Kill() - return -1, err - } - } - if err := InitializeNetworking(container, command.Process.Pid, w); err != nil { - command.Process.Kill() - return -1, err - } - - // Sync with child - log.Printf("closing sync pipes") - w.Close() - r.Close() if container.Tty { log.Printf("starting copy for tty") @@ -100,15 +56,39 @@ func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io. } defer term.RestoreTerminal(os.Stdin.Fd(), state) } else { - log.Printf("starting copy for std pipes") - go func() { - defer inPipe.Close() - io.Copy(inPipe, stdin) - }() - go io.Copy(stdout, outPipe) - go io.Copy(stderr, errPipe) + if err := startStdCopy(command, stdin, stdout, stderr); err != nil { + command.Process.Kill() + return -1, err + } } + log.Printf("staring init") + if err := command.Start(); err != nil { + return -1, err + } + log.Printf("writing state file") + if err := writePidFile(command); err != nil { + command.Process.Kill() + return -1, err + } + defer deletePidFile() + + // Do this before syncing with child so that no children + // can escape the cgroup + if err := SetupCgroups(container, command.Process.Pid); err != nil { + command.Process.Kill() + return -1, err + } + if err := InitializeNetworking(container, command.Process.Pid, w); err != nil { + command.Process.Kill() + return -1, err + } + + // Sync with child + log.Printf("closing sync pipes") + w.Close() + r.Close() + log.Printf("waiting on process") if err := command.Wait(); err != nil { if _, ok := err.(*exec.ExitError); !ok { @@ -119,6 +99,16 @@ func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io. return command.ProcessState.Sys().(syscall.WaitStatus).ExitStatus(), nil } +func SetupCgroups(container *libcontainer.Container, nspid int) error { + if container.Cgroups != nil { + log.Printf("setting up cgroups") + if err := container.Cgroups.Apply(nspid); err != nil { + return err + } + } + return nil +} + func InitializeNetworking(container *libcontainer.Container, nspid int, pipe io.Writer) error { if container.Network != nil { log.Printf("creating host network configuration type %s", container.Network.Type) @@ -207,3 +197,29 @@ func CreateCommand(container *libcontainer.Container, console, logFile string, p command.Env = container.Env return command } + +func startStdCopy(command *exec.Cmd, stdin io.Reader, stdout, stderr io.Writer) error { + log.Printf("opening std pipes") + inPipe, err := command.StdinPipe() + if err != nil { + return err + } + outPipe, err := command.StdoutPipe() + if err != nil { + return err + } + errPipe, err := command.StderrPipe() + if err != nil { + return err + } + + log.Printf("starting copy for std pipes") + go func() { + defer inPipe.Close() + io.Copy(inPipe, stdin) + }() + go io.Copy(stdout, outPipe) + go io.Copy(stderr, errPipe) + + return nil +} From 2412656ef54cb4df36df2f8122e1fda24ec8e8a4 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Fri, 21 Feb 2014 22:58:30 -0800 Subject: [PATCH 09/13] Add syncpipe for passing context Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/nsinit/exec.go | 27 +++------- pkg/libcontainer/nsinit/init.go | 44 +++++++--------- pkg/libcontainer/nsinit/nsinit/main.go | 6 ++- pkg/libcontainer/nsinit/sync_pipe.go | 73 ++++++++++++++++++++++++++ 4 files changed, 102 insertions(+), 48 deletions(-) create mode 100644 pkg/libcontainer/nsinit/sync_pipe.go diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index 3cbe43ae7a..ec75e9c923 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -3,7 +3,6 @@ package nsinit import ( - "encoding/json" "fmt" "github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/libcontainer/network" @@ -28,11 +27,10 @@ func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io. // create a pipe so that we can syncronize with the namespaced process and // pass the veth name to the child - r, w, err := os.Pipe() + syncPipe, err := NewSyncPipe() if err != nil { return -1, err } - system.UsetCloseOnExec(r.Fd()) if container.Tty { log.Printf("setting up master and console") @@ -42,8 +40,7 @@ func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io. } } - command := CreateCommand(container, console, logFile, r.Fd(), args) - + command := CreateCommand(container, console, logFile, syncPipe.child.Fd(), args) if container.Tty { log.Printf("starting copy for tty") go io.Copy(stdout, master) @@ -79,15 +76,14 @@ func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io. command.Process.Kill() return -1, err } - if err := InitializeNetworking(container, command.Process.Pid, w); err != nil { + if err := InitializeNetworking(container, command.Process.Pid, syncPipe); err != nil { command.Process.Kill() return -1, err } // Sync with child log.Printf("closing sync pipes") - w.Close() - r.Close() + syncPipe.Close() log.Printf("waiting on process") if err := command.Wait(); err != nil { @@ -109,7 +105,7 @@ func SetupCgroups(container *libcontainer.Container, nspid int) error { return nil } -func InitializeNetworking(container *libcontainer.Container, nspid int, pipe io.Writer) error { +func InitializeNetworking(container *libcontainer.Container, nspid int, pipe *SyncPipe) error { if container.Network != nil { log.Printf("creating host network configuration type %s", container.Network.Type) strategy, err := network.GetStrategy(container.Network.Type) @@ -121,24 +117,13 @@ func InitializeNetworking(container *libcontainer.Container, nspid int, pipe io. return err } log.Printf("sending %v as network context", networkContext) - if err := SendContext(pipe, networkContext); err != nil { + if err := pipe.SendToChild(networkContext); err != nil { return err } } return nil } -// SendContext writes the veth pair name to the child's stdin then closes the -// pipe so that the child stops waiting for more data -func SendContext(pipe io.Writer, context libcontainer.Context) error { - data, err := json.Marshal(context) - if err != nil { - return err - } - pipe.Write(data) - return nil -} - // SetupWindow gets the parent window size and sets the master // pty to the current size and set the parents mode to RAW func SetupWindow(master, parent *os.File) (*term.State, error) { diff --git a/pkg/libcontainer/nsinit/init.go b/pkg/libcontainer/nsinit/init.go index f530d4a52a..cdedc14769 100644 --- a/pkg/libcontainer/nsinit/init.go +++ b/pkg/libcontainer/nsinit/init.go @@ -3,14 +3,11 @@ package nsinit import ( - "encoding/json" "fmt" "github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/libcontainer/capabilities" "github.com/dotcloud/docker/pkg/libcontainer/network" "github.com/dotcloud/docker/pkg/system" - "io" - "io/ioutil" "log" "os" "os/exec" @@ -20,7 +17,7 @@ import ( // Init is the init process that first runs inside a new namespace to setup mounts, users, networking, // and other options required for the new container. -func Init(container *libcontainer.Container, uncleanRootfs, console string, pipe io.ReadCloser, args []string) error { +func Init(container *libcontainer.Container, uncleanRootfs, console string, syncPipe *SyncPipe, args []string) error { rootfs, err := resolveRootfs(uncleanRootfs) if err != nil { return err @@ -28,16 +25,18 @@ func Init(container *libcontainer.Container, uncleanRootfs, console string, pipe log.Printf("initializing namespace at %s", rootfs) // We always read this as it is a way to sync with the parent as well - context, err := GetContextFromParent(pipe) + context, err := syncPipe.ReadFromParent() if err != nil { + syncPipe.Close() return err } + syncPipe.Close() + log.Printf("received context from parent %v", context) + if console != "" { log.Printf("setting up console for %s", console) // close pipes so that we can replace it with the pty - os.Stdin.Close() - os.Stdout.Close() - os.Stderr.Close() + closeStdPipes() slave, err := openTerminal(console, syscall.O_RDWR) if err != nil { return fmt.Errorf("open terminal %s", err) @@ -79,18 +78,27 @@ func Init(container *libcontainer.Container, uncleanRootfs, console string, pipe return fmt.Errorf("chdir to %s %s", container.WorkingDir, err) } } + return execArgs(args, container.Env) +} + +func execArgs(args []string, env []string) error { name, err := exec.LookPath(args[0]) if err != nil { return err } - log.Printf("execing %s goodbye", name) - if err := system.Exec(name, args[0:], container.Env); err != nil { + if err := system.Exec(name, args[0:], env); err != nil { return fmt.Errorf("exec %s", err) } panic("unreachable") } +func closeStdPipes() { + os.Stdin.Close() + os.Stdout.Close() + os.Stderr.Close() +} + // resolveRootfs ensures that the current working directory is // not a symlink and returns the absolute path to the rootfs func resolveRootfs(uncleanRootfs string) (string, error) { @@ -153,19 +161,3 @@ func setupNetwork(config *libcontainer.Network, context libcontainer.Context) er } return nil } - -func GetContextFromParent(pipe io.ReadCloser) (libcontainer.Context, error) { - defer pipe.Close() - data, err := ioutil.ReadAll(pipe) - if err != nil { - return nil, fmt.Errorf("error reading from stdin %s", err) - } - var context libcontainer.Context - if len(data) > 0 { - if err := json.Unmarshal(data, &context); err != nil { - return nil, err - } - log.Printf("received context %v", context) - } - return context, nil -} diff --git a/pkg/libcontainer/nsinit/nsinit/main.go b/pkg/libcontainer/nsinit/nsinit/main.go index 28d42d4643..2400ab6903 100644 --- a/pkg/libcontainer/nsinit/nsinit/main.go +++ b/pkg/libcontainer/nsinit/nsinit/main.go @@ -74,7 +74,11 @@ func main() { if flag.NArg() < 2 { log.Fatal(ErrWrongArguments) } - if err := nsinit.Init(container, cwd, console, os.NewFile(uintptr(pipeFd), "pipe"), flag.Args()[1:]); err != nil { + syncPipe, err := nsinit.NewSyncPipeFromFd(0, uintptr(pipeFd)) + if err != nil { + log.Fatal(err) + } + if err := nsinit.Init(container, cwd, console, syncPipe, flag.Args()[1:]); err != nil { log.Fatal(err) } default: diff --git a/pkg/libcontainer/nsinit/sync_pipe.go b/pkg/libcontainer/nsinit/sync_pipe.go new file mode 100644 index 0000000000..7b29e98680 --- /dev/null +++ b/pkg/libcontainer/nsinit/sync_pipe.go @@ -0,0 +1,73 @@ +package nsinit + +import ( + "encoding/json" + "fmt" + "github.com/dotcloud/docker/pkg/libcontainer" + "github.com/dotcloud/docker/pkg/system" + "io/ioutil" + "os" +) + +// SyncPipe allows communication to and from the child processes +// to it's parent and allows the two independent processes to +// syncronize their state. +type SyncPipe struct { + parent, child *os.File +} + +func NewSyncPipe() (s *SyncPipe, err error) { + s = &SyncPipe{} + s.child, s.parent, err = os.Pipe() + if err != nil { + return nil, err + } + system.UsetCloseOnExec(s.child.Fd()) + return s, nil +} + +func NewSyncPipeFromFd(parendFd, childFd uintptr) (*SyncPipe, error) { + s := &SyncPipe{} + if parendFd > 0 { + s.parent = os.NewFile(parendFd, "parendPipe") + } else if childFd > 0 { + s.child = os.NewFile(childFd, "childPipe") + } else { + return nil, fmt.Errorf("no valid sync pipe fd specified") + } + return s, nil +} + +func (s *SyncPipe) SendToChild(context libcontainer.Context) error { + data, err := json.Marshal(context) + if err != nil { + return err + } + s.parent.Write(data) + return nil +} + +func (s *SyncPipe) ReadFromParent() (libcontainer.Context, error) { + data, err := ioutil.ReadAll(s.child) + if err != nil { + return nil, fmt.Errorf("error reading from sync pipe %s", err) + } + var context libcontainer.Context + if len(data) > 0 { + if err := json.Unmarshal(data, &context); err != nil { + return nil, err + } + } + return context, nil + +} + +func (s *SyncPipe) Close() error { + if s.parent != nil { + s.parent.Close() + } + if s.child != nil { + s.child.Close() + } + return nil +} From ae423a036e6f884572491b1ff5ef8a626b1592aa Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Sat, 22 Feb 2014 00:29:21 -0800 Subject: [PATCH 10/13] Abstract out diff implementations for importing Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/nsinit/command.go | 34 ++++++++ pkg/libcontainer/nsinit/exec.go | 107 ++++-------------------- pkg/libcontainer/nsinit/nsinit/main.go | 4 +- pkg/libcontainer/nsinit/state.go | 24 ++++++ pkg/libcontainer/nsinit/term.go | 109 +++++++++++++++++++++++++ 5 files changed, 184 insertions(+), 94 deletions(-) create mode 100644 pkg/libcontainer/nsinit/command.go create mode 100644 pkg/libcontainer/nsinit/state.go create mode 100644 pkg/libcontainer/nsinit/term.go diff --git a/pkg/libcontainer/nsinit/command.go b/pkg/libcontainer/nsinit/command.go new file mode 100644 index 0000000000..b1c5631b4b --- /dev/null +++ b/pkg/libcontainer/nsinit/command.go @@ -0,0 +1,34 @@ +package nsinit + +import ( + "fmt" + "github.com/dotcloud/docker/pkg/libcontainer" + "os" + "os/exec" + "syscall" +) + +type CommandFactory interface { + Create(container *libcontainer.Container, console, logFile string, syncFd uintptr, args []string) *exec.Cmd +} + +type DefaultCommandFactory struct{} + +// Create will return an exec.Cmd with the Cloneflags set to the proper namespaces +// defined on the container's configuration and use the current binary as the init with the +// args provided +func (c *DefaultCommandFactory) Create(container *libcontainer.Container, console, logFile string, pipe uintptr, args []string) *exec.Cmd { + // get our binary name so we can always reexec ourself + name := os.Args[0] + command := exec.Command(name, append([]string{ + "-console", console, + "-pipe", fmt.Sprint(pipe), + "-log", logFile, + "init"}, args...)...) + + command.SysProcAttr = &syscall.SysProcAttr{ + Cloneflags: uintptr(GetNamespaceFlags(container.Namespaces)), + } + command.Env = container.Env + return command +} diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index ec75e9c923..ee83f4f107 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -3,13 +3,9 @@ package nsinit import ( - "fmt" "github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/libcontainer/network" "github.com/dotcloud/docker/pkg/system" - "github.com/dotcloud/docker/pkg/term" - "io" - "io/ioutil" "log" "os" "os/exec" @@ -18,9 +14,11 @@ import ( // Exec performes setup outside of a namespace so that a container can be // executed. Exec is a high level function for working with container namespaces. -func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io.Writer, - master *os.File, logFile string, args []string) (int, error) { +func Exec(container *libcontainer.Container, + factory CommandFactory, state StateWriter, term Terminal, + logFile string, args []string) (int, error) { var ( + master *os.File console string err error ) @@ -38,37 +36,28 @@ func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io. if err != nil { return -1, err } + term.SetMaster(master) } - command := CreateCommand(container, console, logFile, syncPipe.child.Fd(), args) - if container.Tty { - log.Printf("starting copy for tty") - go io.Copy(stdout, master) - go io.Copy(master, stdin) - - state, err := SetupWindow(master, os.Stdin) - if err != nil { - command.Process.Kill() - return -1, err - } - defer term.RestoreTerminal(os.Stdin.Fd(), state) - } else { - if err := startStdCopy(command, stdin, stdout, stderr); err != nil { - command.Process.Kill() - return -1, err - } + command := factory.Create(container, console, logFile, syncPipe.child.Fd(), args) + if err := term.Attach(command); err != nil { + return -1, err } + defer term.Close() log.Printf("staring init") if err := command.Start(); err != nil { return -1, err } log.Printf("writing state file") - if err := writePidFile(command); err != nil { + if err := state.WritePid(command.Process.Pid); err != nil { command.Process.Kill() return -1, err } - defer deletePidFile() + defer func() { + log.Printf("removing state file") + state.DeletePid() + }() // Do this before syncing with child so that no children // can escape the cgroup @@ -124,19 +113,6 @@ func InitializeNetworking(container *libcontainer.Container, nspid int, pipe *Sy return nil } -// SetupWindow gets the parent window size and sets the master -// pty to the current size and set the parents mode to RAW -func SetupWindow(master, parent *os.File) (*term.State, error) { - ws, err := term.GetWinsize(parent.Fd()) - if err != nil { - return nil, err - } - if err := term.SetWinsize(master.Fd(), ws); err != nil { - return nil, err - } - return term.SetRawTerminal(parent.Fd()) -} - // CreateMasterAndConsole will open /dev/ptmx on the host and retreive the // pts name for use as the pty slave inside the container func CreateMasterAndConsole() (*os.File, string, error) { @@ -153,58 +129,3 @@ func CreateMasterAndConsole() (*os.File, string, error) { } return master, console, nil } - -// writePidFile writes the namespaced processes pid to .nspid in the rootfs for the container -func writePidFile(command *exec.Cmd) error { - return ioutil.WriteFile(".nspid", []byte(fmt.Sprint(command.Process.Pid)), 0655) -} - -func deletePidFile() error { - log.Printf("removing .nspid file") - return os.Remove(".nspid") -} - -// createCommand will return an exec.Cmd with the Cloneflags set to the proper namespaces -// defined on the container's configuration and use the current binary as the init with the -// args provided -func CreateCommand(container *libcontainer.Container, console, logFile string, pipe uintptr, args []string) *exec.Cmd { - // get our binary name so we can always reexec ourself - name := os.Args[0] - command := exec.Command(name, append([]string{ - "-console", console, - "-pipe", fmt.Sprint(pipe), - "-log", logFile, - "init"}, args...)...) - - command.SysProcAttr = &syscall.SysProcAttr{ - Cloneflags: uintptr(GetNamespaceFlags(container.Namespaces)), - } - command.Env = container.Env - return command -} - -func startStdCopy(command *exec.Cmd, stdin io.Reader, stdout, stderr io.Writer) error { - log.Printf("opening std pipes") - inPipe, err := command.StdinPipe() - if err != nil { - return err - } - outPipe, err := command.StdoutPipe() - if err != nil { - return err - } - errPipe, err := command.StderrPipe() - if err != nil { - return err - } - - log.Printf("starting copy for std pipes") - go func() { - defer inPipe.Close() - io.Copy(inPipe, stdin) - }() - go io.Copy(stdout, outPipe) - go io.Copy(stderr, errPipe) - - return nil -} diff --git a/pkg/libcontainer/nsinit/nsinit/main.go b/pkg/libcontainer/nsinit/nsinit/main.go index 2400ab6903..c299412c7b 100644 --- a/pkg/libcontainer/nsinit/nsinit/main.go +++ b/pkg/libcontainer/nsinit/nsinit/main.go @@ -57,8 +57,10 @@ func main() { if nspid > 0 { exitCode, err = nsinit.ExecIn(container, nspid, flag.Args()[1:]) } else { + term := nsinit.NewTerminal(os.Stdin, os.Stdout, os.Stderr, container.Tty) exitCode, err = nsinit.Exec(container, - os.Stdin, os.Stdout, os.Stderr, nil, + &nsinit.DefaultCommandFactory{}, &nsinit.DefaultStateWriter{}, + term, logFile, flag.Args()[1:]) } if err != nil { diff --git a/pkg/libcontainer/nsinit/state.go b/pkg/libcontainer/nsinit/state.go new file mode 100644 index 0000000000..1f0fedd110 --- /dev/null +++ b/pkg/libcontainer/nsinit/state.go @@ -0,0 +1,24 @@ +package nsinit + +import ( + "fmt" + "io/ioutil" + "os" +) + +type StateWriter interface { + WritePid(pid int) error + DeletePid() error +} + +type DefaultStateWriter struct { +} + +// writePidFile writes the namespaced processes pid to .nspid in the rootfs for the container +func (*DefaultStateWriter) WritePid(pid int) error { + return ioutil.WriteFile(".nspid", []byte(fmt.Sprint(pid)), 0655) +} + +func (*DefaultStateWriter) DeletePid() error { + return os.Remove(".nspid") +} diff --git a/pkg/libcontainer/nsinit/term.go b/pkg/libcontainer/nsinit/term.go new file mode 100644 index 0000000000..649246891e --- /dev/null +++ b/pkg/libcontainer/nsinit/term.go @@ -0,0 +1,109 @@ +package nsinit + +import ( + "github.com/dotcloud/docker/pkg/term" + "io" + "os" + "os/exec" +) + +type Terminal interface { + io.Closer + SetMaster(*os.File) + Attach(*exec.Cmd) error +} + +func NewTerminal(stdin io.Reader, stdout, stderr io.Writer, tty bool) Terminal { + if tty { + return &TtyTerminal{ + stdin: stdin, + stdout: stdout, + stderr: stderr, + } + } + return &StdTerminal{ + stdin: stdin, + stdout: stdout, + stderr: stderr, + } +} + +type TtyTerminal struct { + stdin io.Reader + stdout, stderr io.Writer + master *os.File + state *term.State +} + +func (t *TtyTerminal) SetMaster(master *os.File) { + t.master = master +} + +func (t *TtyTerminal) Attach(command *exec.Cmd) error { + go io.Copy(t.stdout, t.master) + go io.Copy(t.master, t.stdin) + + state, err := t.setupWindow(t.master, os.Stdin) + if err != nil { + command.Process.Kill() + return err + } + t.state = state + return err +} + +// SetupWindow gets the parent window size and sets the master +// pty to the current size and set the parents mode to RAW +func (t *TtyTerminal) setupWindow(master, parent *os.File) (*term.State, error) { + ws, err := term.GetWinsize(parent.Fd()) + if err != nil { + return nil, err + } + if err := term.SetWinsize(master.Fd(), ws); err != nil { + return nil, err + } + return term.SetRawTerminal(parent.Fd()) +} + +func (t *TtyTerminal) Close() error { + term.RestoreTerminal(os.Stdin.Fd(), t.state) + return t.master.Close() +} + +type StdTerminal struct { + stdin io.Reader + stdout, stderr io.Writer +} + +func (s *StdTerminal) SetMaster(*os.File) { + // no need to set master on non tty +} + +func (s *StdTerminal) Close() error { + return nil +} + +func (s *StdTerminal) Attach(command *exec.Cmd) error { + inPipe, err := command.StdinPipe() + if err != nil { + return err + } + outPipe, err := command.StdoutPipe() + if err != nil { + return err + } + errPipe, err := command.StderrPipe() + if err != nil { + return err + } + + go func() { + defer inPipe.Close() + io.Copy(inPipe, s.stdin) + }() + + go io.Copy(s.stdout, outPipe) + go io.Copy(s.stderr, errPipe) + + return nil +} From fac41af25bd5f42269424a788783a4280dd7fc9c Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Sat, 22 Feb 2014 01:21:26 -0800 Subject: [PATCH 11/13] Refactor driver to use Exec function from nsini Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- execdriver/lxc/term.go | 37 ++-- execdriver/namespaces/driver.go | 296 +++++++++---------------------- pkg/libcontainer/nsinit/state.go | 10 +- pkg/libcontainer/nsinit/term.go | 9 + 4 files changed, 113 insertions(+), 239 deletions(-) diff --git a/execdriver/lxc/term.go b/execdriver/lxc/term.go index d772f60972..db58c3181a 100644 --- a/execdriver/lxc/term.go +++ b/execdriver/lxc/term.go @@ -6,6 +6,7 @@ import ( "github.com/kr/pty" "io" "os" + "os/exec" ) func SetTerminal(command *execdriver.Command, pipes *execdriver.Pipes) error { @@ -26,8 +27,8 @@ func SetTerminal(command *execdriver.Command, pipes *execdriver.Pipes) error { } type TtyConsole struct { - master *os.File - slave *os.File + MasterPty *os.File + SlavePty *os.File } func NewTtyConsole(command *execdriver.Command, pipes *execdriver.Pipes) (*TtyConsole, error) { @@ -36,28 +37,28 @@ func NewTtyConsole(command *execdriver.Command, pipes *execdriver.Pipes) (*TtyCo return nil, err } tty := &TtyConsole{ - master: ptyMaster, - slave: ptySlave, + MasterPty: ptyMaster, + SlavePty: ptySlave, } - if err := tty.attach(command, pipes); err != nil { + if err := tty.AttachPipes(&command.Cmd, pipes); err != nil { tty.Close() return nil, err } + command.Console = tty.SlavePty.Name() return tty, nil } func (t *TtyConsole) Master() *os.File { - return t.master + return t.MasterPty } func (t *TtyConsole) Resize(h, w int) error { - return term.SetWinsize(t.master.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)}) + return term.SetWinsize(t.MasterPty.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)}) } -func (t *TtyConsole) attach(command *execdriver.Command, pipes *execdriver.Pipes) error { - command.Stdout = t.slave - command.Stderr = t.slave - command.Console = t.slave.Name() +func (t *TtyConsole) AttachPipes(command *exec.Cmd, pipes *execdriver.Pipes) error { + command.Stdout = t.SlavePty + command.Stderr = t.SlavePty go func() { if wb, ok := pipes.Stdout.(interface { @@ -65,24 +66,24 @@ func (t *TtyConsole) attach(command *execdriver.Command, pipes *execdriver.Pipes }); ok { defer wb.CloseWriters() } - io.Copy(pipes.Stdout, t.master) + io.Copy(pipes.Stdout, t.MasterPty) }() if pipes.Stdin != nil { - command.Stdin = t.slave + command.Stdin = t.SlavePty command.SysProcAttr.Setctty = true go func() { defer pipes.Stdin.Close() - io.Copy(t.master, pipes.Stdin) + io.Copy(t.MasterPty, pipes.Stdin) }() } return nil } func (t *TtyConsole) Close() error { - t.slave.Close() - return t.master.Close() + t.SlavePty.Close() + return t.MasterPty.Close() } type StdConsole struct { @@ -91,13 +92,13 @@ type StdConsole struct { func NewStdConsole(command *execdriver.Command, pipes *execdriver.Pipes) (*StdConsole, error) { std := &StdConsole{} - if err := std.attach(command, pipes); err != nil { + if err := std.AttachPipes(&command.Cmd, pipes); err != nil { return nil, err } return std, nil } -func (s *StdConsole) attach(command *execdriver.Command, pipes *execdriver.Pipes) error { +func (s *StdConsole) AttachPipes(command *exec.Cmd, pipes *execdriver.Pipes) error { command.Stdout = pipes.Stdout command.Stderr = pipes.Stderr diff --git a/execdriver/namespaces/driver.go b/execdriver/namespaces/driver.go index e243c64703..a3f095f464 100644 --- a/execdriver/namespaces/driver.go +++ b/execdriver/namespaces/driver.go @@ -5,15 +5,10 @@ import ( "errors" "fmt" "github.com/dotcloud/docker/execdriver" + "github.com/dotcloud/docker/execdriver/lxc" "github.com/dotcloud/docker/pkg/libcontainer" - "github.com/dotcloud/docker/pkg/libcontainer/network" "github.com/dotcloud/docker/pkg/libcontainer/nsinit" - "github.com/dotcloud/docker/pkg/libcontainer/utils" - "github.com/dotcloud/docker/pkg/system" - "github.com/dotcloud/docker/pkg/term" - "io" "io/ioutil" - "log" "os" "os/exec" "path/filepath" @@ -44,129 +39,31 @@ func NewDriver() (*driver, error) { } func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (int, error) { - container := createContainer(c) + var ( + term nsinit.Terminal + container = createContainer(c) + factory = &dockerCommandFactory{c} + stateWriter = &dockerStateWriter{ + callback: startCallback, + c: c, + dsw: &nsinit.DefaultStateWriter{c.Rootfs}, + } + ) + if c.Tty { + term = &dockerTtyTerm{ + pipes: pipes, + } + } else { + term = &dockerStdTerm{ + pipes: pipes, + } + } + c.Terminal = term if err := writeContainerFile(container, c.Rootfs); err != nil { return -1, err } - - var ( - console string - master *os.File - err error - - inPipe io.WriteCloser - outPipe, errPipe io.ReadCloser - ) - - if container.Tty { - log.Printf("setting up master and console") - master, console, err = createMasterAndConsole() - if err != nil { - return -1, err - } - } - c.Terminal = NewTerm(pipes, master) - - // create a pipe so that we can syncronize with the namespaced process and - // pass the veth name to the child - r, w, err := os.Pipe() - if err != nil { - return -1, err - } - system.UsetCloseOnExec(r.Fd()) - args := append([]string{c.Entrypoint}, c.Arguments...) - createCommand(c, container, console, "/nsinit.logs", r.Fd(), args) - command := c - - if !container.Tty { - log.Printf("opening pipes on command") - if inPipe, err = command.StdinPipe(); err != nil { - return -1, err - } - if outPipe, err = command.StdoutPipe(); err != nil { - return -1, err - } - if errPipe, err = command.StderrPipe(); err != nil { - return -1, err - } - } - - log.Printf("staring init") - if err := command.Start(); err != nil { - return -1, err - } - log.Printf("writting state file") - if err := writePidFile(c.Rootfs, command.Process.Pid); err != nil { - command.Process.Kill() - return -1, err - } - defer deletePidFile(c.Rootfs) - - // Do this before syncing with child so that no children - // can escape the cgroup - if container.Cgroups != nil { - log.Printf("setting up cgroups") - if err := container.Cgroups.Apply(command.Process.Pid); err != nil { - command.Process.Kill() - return -1, err - } - } - - if container.Network != nil { - log.Printf("creating veth pair") - vethPair, err := initializeContainerVeth(container.Network.Bridge, container.Network.Mtu, command.Process.Pid) - if err != nil { - return -1, err - } - log.Printf("sending %s as veth pair name", vethPair) - sendVethName(w, vethPair) - } - - // Sync with child - log.Printf("closing sync pipes") - w.Close() - r.Close() - - if container.Tty { - log.Printf("starting copy for tty") - go io.Copy(pipes.Stdout, master) - if pipes.Stdin != nil { - go io.Copy(master, pipes.Stdin) - } - - /* - state, err := setupWindow(master) - if err != nil { - command.Process.Kill() - return -1, err - } - defer term.RestoreTerminal(uintptr(syscall.Stdin), state) - */ - } else { - log.Printf("starting copy for std pipes") - if pipes.Stdin != nil { - go func() { - defer inPipe.Close() - io.Copy(inPipe, pipes.Stdin) - }() - } - go io.Copy(pipes.Stdout, outPipe) - go io.Copy(pipes.Stderr, errPipe) - } - - if startCallback != nil { - startCallback(c) - } - - log.Printf("waiting on process") - if err := command.Wait(); err != nil { - if _, ok := err.(*exec.ExitError); !ok { - return -1, err - } - } - log.Printf("process ended") - return command.ProcessState.Sys().(syscall.WaitStatus).ExitStatus(), nil + return nsinit.Exec(container, factory, stateWriter, term, "/nsinit.log", args) } func (d *driver) Kill(p *execdriver.Command, sig int) error { @@ -207,107 +104,22 @@ func getEnv(key string, env []string) string { return "" } -// sendVethName writes the veth pair name to the child's stdin then closes the -// pipe so that the child stops waiting for more data -func sendVethName(pipe io.Writer, name string) { - fmt.Fprint(pipe, name) -} - -// initializeContainerVeth will create a veth pair and setup the host's -// side of the pair by setting the specified bridge as the master and bringing -// up the interface. -// -// Then will with set the other side of the veth pair into the container's namespaced -// using the pid and returns the veth's interface name to provide to the container to -// finish setting up the interface inside the namespace -func initializeContainerVeth(bridge string, mtu, nspid int) (string, error) { - name1, name2, err := createVethPair() - if err != nil { - return "", err - } - log.Printf("veth pair created %s <> %s", name1, name2) - if err := network.SetInterfaceMaster(name1, bridge); err != nil { - return "", err - } - if err := network.SetMtu(name1, mtu); err != nil { - return "", err - } - if err := network.InterfaceUp(name1); err != nil { - return "", err - } - log.Printf("setting %s inside %d namespace", name2, nspid) - if err := network.SetInterfaceInNamespacePid(name2, nspid); err != nil { - return "", err - } - return name2, nil -} - -func setupWindow(master *os.File) (*term.State, error) { - ws, err := term.GetWinsize(os.Stdin.Fd()) - if err != nil { - return nil, err - } - if err := term.SetWinsize(master.Fd(), ws); err != nil { - return nil, err - } - return term.SetRawTerminal(os.Stdin.Fd()) -} - -// createMasterAndConsole will open /dev/ptmx on the host and retreive the -// pts name for use as the pty slave inside the container -func createMasterAndConsole() (*os.File, string, error) { - master, err := os.OpenFile("/dev/ptmx", syscall.O_RDWR|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0) - if err != nil { - return nil, "", err - } - console, err := system.Ptsname(master) - if err != nil { - return nil, "", err - } - if err := system.Unlockpt(master); err != nil { - return nil, "", err - } - return master, console, nil -} - -// createVethPair will automatically generage two random names for -// the veth pair and ensure that they have been created -func createVethPair() (name1 string, name2 string, err error) { - name1, err = utils.GenerateRandomName("dock", 4) - if err != nil { - return - } - name2, err = utils.GenerateRandomName("dock", 4) - if err != nil { - return - } - if err = network.CreateVethPair(name1, name2); err != nil { - return - } - return -} - -// writePidFile writes the namespaced processes pid to .nspid in the rootfs for the container -func writePidFile(rootfs string, pid int) error { - return ioutil.WriteFile(filepath.Join(rootfs, ".nspid"), []byte(fmt.Sprint(pid)), 0655) -} - -func deletePidFile(rootfs string) error { - return os.Remove(filepath.Join(rootfs, ".nspid")) +type dockerCommandFactory struct { + c *execdriver.Command } // createCommand will return an exec.Cmd with the Cloneflags set to the proper namespaces // defined on the container's configuration and use the current binary as the init with the // args provided -func createCommand(c *execdriver.Command, container *libcontainer.Container, - console, logFile string, pipe uintptr, args []string) { - +func (d *dockerCommandFactory) Create(container *libcontainer.Container, + console, logFile string, syncFd uintptr, args []string) *exec.Cmd { + c := d.c aname, _ := exec.LookPath("nsinit") c.Path = aname c.Args = append([]string{ aname, "-console", console, - "-pipe", fmt.Sprint(pipe), + "-pipe", fmt.Sprint(syncFd), "-log", logFile, "init", }, args...) @@ -316,6 +128,26 @@ func createCommand(c *execdriver.Command, container *libcontainer.Container, } c.Env = container.Env c.Dir = c.Rootfs + + return &c.Cmd +} + +type dockerStateWriter struct { + dsw nsinit.StateWriter + c *execdriver.Command + callback execdriver.StartCallback +} + +func (d *dockerStateWriter) WritePid(pid int) error { + err := d.dsw.WritePid(pid) + if d.callback != nil { + d.callback(d.c) + } + return err +} + +func (d *dockerStateWriter) DeletePid() error { + return d.dsw.DeletePid() } func createContainer(c *execdriver.Command) *libcontainer.Container { @@ -334,7 +166,11 @@ func createContainer(c *execdriver.Command) *libcontainer.Container { Mtu: c.Network.Mtu, Address: fmt.Sprintf("%s/%d", c.Network.IPAddress, c.Network.IPPrefixLen), Gateway: c.Network.Gateway, - Bridge: c.Network.Bridge, + Type: "veth", + Context: libcontainer.Context{ + "prefix": "dock", + "bridge": c.Network.Bridge, + }, } } if c.Privileged { @@ -347,3 +183,29 @@ func createContainer(c *execdriver.Command) *libcontainer.Container { } return container } + +type dockerStdTerm struct { + lxc.StdConsole + pipes *execdriver.Pipes +} + +func (d *dockerStdTerm) Attach(cmd *exec.Cmd) error { + return d.AttachPipes(cmd, d.pipes) +} + +func (d *dockerStdTerm) SetMaster(master *os.File) { + // do nothing +} + +type dockerTtyTerm struct { + lxc.TtyConsole + pipes *execdriver.Pipes +} + +func (t *dockerTtyTerm) Attach(cmd *exec.Cmd) error { + return t.AttachPipes(cmd, t.pipes) +} + +func (t *dockerTtyTerm) SetMaster(master *os.File) { + t.MasterPty = master +} diff --git a/pkg/libcontainer/nsinit/state.go b/pkg/libcontainer/nsinit/state.go index 1f0fedd110..2dbaaa5977 100644 --- a/pkg/libcontainer/nsinit/state.go +++ b/pkg/libcontainer/nsinit/state.go @@ -4,6 +4,7 @@ import ( "fmt" "io/ioutil" "os" + "path/filepath" ) type StateWriter interface { @@ -12,13 +13,14 @@ type StateWriter interface { } type DefaultStateWriter struct { + Root string } // writePidFile writes the namespaced processes pid to .nspid in the rootfs for the container -func (*DefaultStateWriter) WritePid(pid int) error { - return ioutil.WriteFile(".nspid", []byte(fmt.Sprint(pid)), 0655) +func (d *DefaultStateWriter) WritePid(pid int) error { + return ioutil.WriteFile(filepath.Join(d.Root, ".nspid"), []byte(fmt.Sprint(pid)), 0655) } -func (*DefaultStateWriter) DeletePid() error { - return os.Remove(".nspid") +func (d *DefaultStateWriter) DeletePid() error { + return os.Remove(filepath.Join(d.Root, ".nspid")) } diff --git a/pkg/libcontainer/nsinit/term.go b/pkg/libcontainer/nsinit/term.go index 649246891e..58dccab2b8 100644 --- a/pkg/libcontainer/nsinit/term.go +++ b/pkg/libcontainer/nsinit/term.go @@ -11,6 +11,7 @@ type Terminal interface { io.Closer SetMaster(*os.File) Attach(*exec.Cmd) error + Resize(h, w int) error } func NewTerminal(stdin io.Reader, stdout, stderr io.Writer, tty bool) Terminal { @@ -35,6 +36,10 @@ type TtyTerminal struct { state *term.State } +func (t *TtyTerminal) Resize(h, w int) error { + return term.SetWinsize(t.master.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)}) +} + func (t *TtyTerminal) SetMaster(master *os.File) { t.master = master } @@ -83,6 +88,10 @@ func (s *StdTerminal) Close() error { return nil } +func (s *StdTerminal) Resize(h, w int) error { + return nil +} + func (s *StdTerminal) Attach(command *exec.Cmd) error { inPipe, err := command.StdinPipe() if err != nil { From 172260a49be6c3516edc6869d58957e844f9c69b Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Sat, 22 Feb 2014 01:28:59 -0800 Subject: [PATCH 12/13] Fix tty copy for driver Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- execdriver/namespaces/driver.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/execdriver/namespaces/driver.go b/execdriver/namespaces/driver.go index a3f095f464..efbb09d131 100644 --- a/execdriver/namespaces/driver.go +++ b/execdriver/namespaces/driver.go @@ -8,6 +8,7 @@ import ( "github.com/dotcloud/docker/execdriver/lxc" "github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/libcontainer/nsinit" + "io" "io/ioutil" "os" "os/exec" @@ -203,7 +204,11 @@ type dockerTtyTerm struct { } func (t *dockerTtyTerm) Attach(cmd *exec.Cmd) error { - return t.AttachPipes(cmd, t.pipes) + go io.Copy(t.pipes.Stdout, t.MasterPty) + if t.pipes.Stdin != nil { + go io.Copy(t.MasterPty, t.pipes.Stdin) + } + return nil } func (t *dockerTtyTerm) SetMaster(master *os.File) { From a08e78a78c4b548919515fa7910db56990ef44dc Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Mon, 24 Feb 2014 10:46:20 -0800 Subject: [PATCH 13/13] Look for cpu subsystem instead of memory Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/cgroups/cgroups.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/cgroups/cgroups.go b/pkg/cgroups/cgroups.go index 96002f0af9..e260d67661 100644 --- a/pkg/cgroups/cgroups.go +++ b/pkg/cgroups/cgroups.go @@ -132,7 +132,7 @@ func (c *Cgroup) Apply(pid int) error { // http://www.freedesktop.org/wiki/Software/systemd/PaxControlGroups/ // // we can pick any subsystem to find the root - cgroupRoot, err := FindCgroupMountpoint("memory") + cgroupRoot, err := FindCgroupMountpoint("cpu") if err != nil { return err }