From e8abaf217b887fcd6a157b4f905156bd245f8f0a Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Tue, 18 Feb 2014 16:56:11 -0800 Subject: [PATCH 01/81] Initial commit of libcontainer Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/MAINTAINERS | 2 + pkg/libcontainer/README.md | 63 +++++ pkg/libcontainer/capabilities/capabilities.go | 49 ++++ pkg/libcontainer/cli/main.go | 171 +++++++++++ pkg/libcontainer/container.go | 27 ++ pkg/libcontainer/container.json | 38 +++ pkg/libcontainer/errors.go | 9 + pkg/libcontainer/namespaces/calls_linux.go | 164 +++++++++++ pkg/libcontainer/namespaces/exec.go | 266 ++++++++++++++++++ pkg/libcontainer/namespaces/linux_x86_64.go | 7 + pkg/libcontainer/namespaces/mount.go | 207 ++++++++++++++ pkg/libcontainer/namespaces/namespaces.go | 70 +++++ pkg/libcontainer/namespaces/ns_linux.go | 35 +++ pkg/libcontainer/namespaces/utils.go | 108 +++++++ pkg/libcontainer/network/network.go | 104 +++++++ pkg/libcontainer/network/veth.go | 85 ++++++ pkg/libcontainer/privileged.json | 22 ++ pkg/libcontainer/types.go | 49 ++++ pkg/libcontainer/ubuntu.json | 22 ++ pkg/libcontainer/utils/utils.go | 33 +++ 20 files changed, 1531 insertions(+) create mode 100644 pkg/libcontainer/MAINTAINERS create mode 100644 pkg/libcontainer/README.md create mode 100644 pkg/libcontainer/capabilities/capabilities.go create mode 100644 pkg/libcontainer/cli/main.go create mode 100644 pkg/libcontainer/container.go create mode 100644 pkg/libcontainer/container.json create mode 100644 pkg/libcontainer/errors.go create mode 100644 pkg/libcontainer/namespaces/calls_linux.go create mode 100644 pkg/libcontainer/namespaces/exec.go create mode 100644 pkg/libcontainer/namespaces/linux_x86_64.go create mode 100644 pkg/libcontainer/namespaces/mount.go create mode 100644 pkg/libcontainer/namespaces/namespaces.go create mode 100644 pkg/libcontainer/namespaces/ns_linux.go create mode 100644 pkg/libcontainer/namespaces/utils.go create mode 100644 pkg/libcontainer/network/network.go create mode 100644 pkg/libcontainer/network/veth.go create mode 100644 pkg/libcontainer/privileged.json create mode 100644 pkg/libcontainer/types.go create mode 100644 pkg/libcontainer/ubuntu.json create mode 100644 pkg/libcontainer/utils/utils.go diff --git a/pkg/libcontainer/MAINTAINERS b/pkg/libcontainer/MAINTAINERS new file mode 100644 index 0000000000..e53d933d47 --- /dev/null +++ b/pkg/libcontainer/MAINTAINERS @@ -0,0 +1,2 @@ +Michael Crosby (@crosbymichael) +Guillaume Charmes (@creack) diff --git a/pkg/libcontainer/README.md b/pkg/libcontainer/README.md new file mode 100644 index 0000000000..91d747863c --- /dev/null +++ b/pkg/libcontainer/README.md @@ -0,0 +1,63 @@ +## libcontainer - reference implementation for containers + +#### playground + + +Use the cli package to test out functionality + +First setup a container configuration. You will need a root fs, better go the path to a +stopped docker container and use that. + + +```json +{ + "id": "koye", + "namespace_pid": 12265, + "command": { + "args": [ + "/bin/bash" + ], + "environment": [ + "HOME=/", + "PATH=PATH=$PATH:/bin:/usr/bin:/sbin:/usr/sbin", + "container=docker", + "TERM=xterm" + ] + }, + "rootfs": "/root/development/gocode/src/github.com/docker/libcontainer/namespaces/ubuntu", + "network": null, + "user": "", + "working_dir": "", + "namespaces": [ + "NEWNET", + "NEWIPC", + "NEWNS", + "NEWPID", + "NEWUTS" + ], + "capabilities": [ + "SETPCAP", + "SYS_MODULE", + "SYS_RAWIO", + "SYS_PACCT", + "SYS_ADMIN", + "SYS_NICE", + "SYS_RESOURCE", + "SYS_TIME", + "SYS_TTY_CONFIG", + "MKNOD", + "AUDIT_WRITE", + "AUDIT_CONTROL", + "MAC_OVERRIDE", + "MAC_ADMIN" + ] +} +``` + +After you have a json file and a rootfs path to use just run: +`./cli exec container.json` + + +If you want to attach to an existing namespace just use the same json +file with the container still running and do: +`./cli execin container.json` diff --git a/pkg/libcontainer/capabilities/capabilities.go b/pkg/libcontainer/capabilities/capabilities.go new file mode 100644 index 0000000000..3301e10f7f --- /dev/null +++ b/pkg/libcontainer/capabilities/capabilities.go @@ -0,0 +1,49 @@ +package capabilities + +import ( + "github.com/dotcloud/docker/pkg/libcontainer" + "github.com/syndtr/gocapability/capability" + "os" +) + +var capMap = map[libcontainer.Capability]capability.Cap{ + libcontainer.CAP_SETPCAP: capability.CAP_SETPCAP, + libcontainer.CAP_SYS_MODULE: capability.CAP_SYS_MODULE, + libcontainer.CAP_SYS_RAWIO: capability.CAP_SYS_RAWIO, + libcontainer.CAP_SYS_PACCT: capability.CAP_SYS_PACCT, + libcontainer.CAP_SYS_ADMIN: capability.CAP_SYS_ADMIN, + libcontainer.CAP_SYS_NICE: capability.CAP_SYS_NICE, + libcontainer.CAP_SYS_RESOURCE: capability.CAP_SYS_RESOURCE, + libcontainer.CAP_SYS_TIME: capability.CAP_SYS_TIME, + libcontainer.CAP_SYS_TTY_CONFIG: capability.CAP_SYS_TTY_CONFIG, + libcontainer.CAP_MKNOD: capability.CAP_MKNOD, + libcontainer.CAP_AUDIT_WRITE: capability.CAP_AUDIT_WRITE, + libcontainer.CAP_AUDIT_CONTROL: capability.CAP_AUDIT_CONTROL, + libcontainer.CAP_MAC_OVERRIDE: capability.CAP_MAC_OVERRIDE, + libcontainer.CAP_MAC_ADMIN: capability.CAP_MAC_ADMIN, +} + +// DropCapabilities drops capabilities for the current process based +// on the container's configuration. +func DropCapabilities(container *libcontainer.Container) error { + if drop := getCapabilities(container); len(drop) > 0 { + c, err := capability.NewPid(os.Getpid()) + if err != nil { + return err + } + c.Unset(capability.CAPS|capability.BOUNDS, drop...) + + if err := c.Apply(capability.CAPS | capability.BOUNDS); err != nil { + return err + } + } + return nil +} + +func getCapabilities(container *libcontainer.Container) []capability.Cap { + drop := []capability.Cap{} + for _, c := range container.Capabilities { + drop = append(drop, capMap[c]) + } + return drop +} diff --git a/pkg/libcontainer/cli/main.go b/pkg/libcontainer/cli/main.go new file mode 100644 index 0000000000..490135ef5a --- /dev/null +++ b/pkg/libcontainer/cli/main.go @@ -0,0 +1,171 @@ +package main + +import ( + "encoding/json" + "flag" + "fmt" + "github.com/dotcloud/docker/pkg/libcontainer" + "github.com/dotcloud/docker/pkg/libcontainer/namespaces" + "github.com/dotcloud/docker/pkg/libcontainer/network" + "github.com/dotcloud/docker/pkg/libcontainer/utils" + "os" +) + +var ( + displayPid bool + newCommand string + usrNet bool +) + +func init() { + flag.BoolVar(&displayPid, "pid", false, "display the pid before waiting") + flag.StringVar(&newCommand, "cmd", "/bin/bash", "command to run in the existing namespace") + flag.BoolVar(&usrNet, "net", false, "user a net namespace") + flag.Parse() +} + +func exec(container *libcontainer.Container) error { + var ( + netFile *os.File + err error + ) + container.NetNsFd = 0 + + if usrNet { + netFile, err = os.Open("/root/nsroot/test") + if err != nil { + return err + } + container.NetNsFd = netFile.Fd() + } + + pid, err := namespaces.Exec(container) + if err != nil { + return fmt.Errorf("error exec container %s", err) + } + + if displayPid { + fmt.Println(pid) + } + + exitcode, err := utils.WaitOnPid(pid) + if err != nil { + return fmt.Errorf("error waiting on child %s", err) + } + fmt.Println(exitcode) + if usrNet { + netFile.Close() + if err := network.DeleteNetworkNamespace("/root/nsroot/test"); err != nil { + return err + } + } + os.Exit(exitcode) + return nil +} + +func execIn(container *libcontainer.Container) error { + f, err := os.Open("/root/nsroot/test") + if err != nil { + return err + } + container.NetNsFd = f.Fd() + pid, err := namespaces.ExecIn(container, &libcontainer.Command{ + Env: container.Command.Env, + Args: []string{ + newCommand, + }, + }) + if err != nil { + return fmt.Errorf("error exexin container %s", err) + } + exitcode, err := utils.WaitOnPid(pid) + if err != nil { + return fmt.Errorf("error waiting on child %s", err) + } + os.Exit(exitcode) + return nil +} + +func createNet(config *libcontainer.Network) error { + root := "/root/nsroot" + if err := network.SetupNamespaceMountDir(root); err != nil { + return err + } + + nspath := root + "/test" + if err := network.CreateNetworkNamespace(nspath); err != nil { + return nil + } + if err := network.CreateVethPair("veth0", config.TempVethName); err != nil { + return err + } + if err := network.SetInterfaceMaster("veth0", config.Bridge); err != nil { + return err + } + if err := network.InterfaceUp("veth0"); err != nil { + return err + } + + f, err := os.Open(nspath) + if err != nil { + return err + } + defer f.Close() + + if err := network.SetInterfaceInNamespaceFd("veth1", int(f.Fd())); err != nil { + return err + } + + /* + if err := network.SetupVethInsideNamespace(f.Fd(), config); err != nil { + return err + } + */ + return nil +} + +func printErr(err error) { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) +} + +func main() { + var ( + err error + cliCmd = flag.Arg(0) + config = flag.Arg(1) + ) + f, err := os.Open(config) + if err != nil { + printErr(err) + } + + dec := json.NewDecoder(f) + var container *libcontainer.Container + + if err := dec.Decode(&container); err != nil { + printErr(err) + } + f.Close() + + switch cliCmd { + case "exec": + err = exec(container) + case "execin": + err = execIn(container) + case "net": + err = createNet(&libcontainer.Network{ + TempVethName: "veth1", + IP: "172.17.0.100/16", + Gateway: "172.17.42.1", + Mtu: 1500, + Bridge: "docker0", + }) + default: + err = fmt.Errorf("command not supported: %s", cliCmd) + } + + if err != nil { + printErr(err) + } +} diff --git a/pkg/libcontainer/container.go b/pkg/libcontainer/container.go new file mode 100644 index 0000000000..b77890fb5c --- /dev/null +++ b/pkg/libcontainer/container.go @@ -0,0 +1,27 @@ +package libcontainer + +type Container struct { + ID string `json:"id,omitempty"` + NsPid int `json:"namespace_pid,omitempty"` + Command *Command `json:"command,omitempty"` + RootFs string `json:"rootfs,omitempty"` + ReadonlyFs bool `json:"readonly_fs,omitempty"` + NetNsFd uintptr `json:"network_namespace_fd,omitempty"` + User string `json:"user,omitempty"` + WorkingDir string `json:"working_dir,omitempty"` + Namespaces Namespaces `json:"namespaces,omitempty"` + Capabilities Capabilities `json:"capabilities,omitempty"` +} + +type Command struct { + Args []string `json:"args,omitempty"` + Env []string `json:"environment,omitempty"` +} + +type Network struct { + TempVethName string `json:"temp_veth,omitempty"` + IP string `json:"ip,omitempty"` + Gateway string `json:"gateway,omitempty"` + Bridge string `json:"bridge,omitempty"` + Mtu int `json:"mtu,omitempty"` +} diff --git a/pkg/libcontainer/container.json b/pkg/libcontainer/container.json new file mode 100644 index 0000000000..ed8eb1bd78 --- /dev/null +++ b/pkg/libcontainer/container.json @@ -0,0 +1,38 @@ +{ + "id": "koye", + "namespace_pid": 3117, + "command": { + "args": [ + "/bin/bash" + ], + "environment": [ + "HOME=/", + "PATH=PATH=$PATH:/bin:/usr/bin:/sbin:/usr/sbin", + "container=docker", + "TERM=xterm" + ] + }, + "rootfs": "/root/main/mycontainer", + "namespaces": [ + "NEWIPC", + "NEWNS", + "NEWPID", + "NEWUTS" + ], + "capabilities": [ + "SETPCAP", + "SYS_MODULE", + "SYS_RAWIO", + "SYS_PACCT", + "SYS_ADMIN", + "SYS_NICE", + "SYS_RESOURCE", + "SYS_TIME", + "SYS_TTY_CONFIG", + "MKNOD", + "AUDIT_WRITE", + "AUDIT_CONTROL", + "MAC_OVERRIDE", + "MAC_ADMIN" + ] +} diff --git a/pkg/libcontainer/errors.go b/pkg/libcontainer/errors.go new file mode 100644 index 0000000000..c6964ee8e6 --- /dev/null +++ b/pkg/libcontainer/errors.go @@ -0,0 +1,9 @@ +package libcontainer + +import ( + "errors" +) + +var ( + ErrInvalidPid = errors.New("no ns pid found") +) diff --git a/pkg/libcontainer/namespaces/calls_linux.go b/pkg/libcontainer/namespaces/calls_linux.go new file mode 100644 index 0000000000..793e940b6e --- /dev/null +++ b/pkg/libcontainer/namespaces/calls_linux.go @@ -0,0 +1,164 @@ +package namespaces + +import ( + "fmt" + "os" + "syscall" + "unsafe" +) + +const ( + TIOCGPTN = 0x80045430 + TIOCSPTLCK = 0x40045431 +) + +func chroot(dir string) error { + return syscall.Chroot(dir) +} + +func chdir(dir string) error { + return syscall.Chdir(dir) +} + +func exec(cmd string, args []string, env []string) error { + return syscall.Exec(cmd, args, env) +} + +func fork() (int, error) { + syscall.ForkLock.Lock() + pid, _, err := syscall.Syscall(syscall.SYS_FORK, 0, 0, 0) + syscall.ForkLock.Unlock() + if err != 0 { + return -1, err + } + return int(pid), nil +} + +func vfork() (int, error) { + syscall.ForkLock.Lock() + pid, _, err := syscall.Syscall(syscall.SYS_VFORK, 0, 0, 0) + syscall.ForkLock.Unlock() + if err != 0 { + return -1, err + } + return int(pid), nil +} + +func mount(source, target, fstype string, flags uintptr, data string) error { + return syscall.Mount(source, target, fstype, flags, data) +} + +func unmount(target string, flags int) error { + return syscall.Unmount(target, flags) +} + +func pivotroot(newroot, putold string) error { + return syscall.PivotRoot(newroot, putold) +} + +func unshare(flags int) error { + return syscall.Unshare(flags) +} + +func clone(flags uintptr) (int, error) { + syscall.ForkLock.Lock() + pid, _, err := syscall.RawSyscall(syscall.SYS_CLONE, flags, 0, 0) + syscall.ForkLock.Unlock() + if err != 0 { + return -1, err + } + return int(pid), nil +} + +func setns(fd uintptr, flags uintptr) error { + _, _, err := syscall.RawSyscall(SYS_SETNS, fd, flags, 0) + if err != 0 { + return err + } + return nil +} + +func usetCloseOnExec(fd uintptr) error { + if _, _, err := syscall.Syscall(syscall.SYS_FCNTL, fd, syscall.F_SETFD, 0); err != 0 { + return err + } + return nil +} + +func setgroups(gids []int) error { + return syscall.Setgroups(gids) +} + +func setresgid(rgid, egid, sgid int) error { + return syscall.Setresgid(rgid, egid, sgid) +} + +func setresuid(ruid, euid, suid int) error { + return syscall.Setresuid(ruid, euid, suid) +} + +func sethostname(name string) error { + return syscall.Sethostname([]byte(name)) +} + +func setsid() (int, error) { + return syscall.Setsid() +} + +func ioctl(fd uintptr, flag, data uintptr) error { + if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, fd, flag, data); err != 0 { + return err + } + return nil +} + +func openpmtx() (*os.File, error) { + return os.OpenFile("/dev/ptmx", syscall.O_RDONLY|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0) +} + +func unlockpt(f *os.File) error { + var u int + return ioctl(f.Fd(), TIOCSPTLCK, uintptr(unsafe.Pointer(&u))) +} + +func ptsname(f *os.File) (string, error) { + var n int + if err := ioctl(f.Fd(), TIOCGPTN, uintptr(unsafe.Pointer(&n))); err != nil { + return "", err + } + return fmt.Sprintf("/dev/pts/%d", n), nil +} + +func closefd(fd uintptr) error { + return syscall.Close(int(fd)) +} + +func dup2(fd1, fd2 uintptr) error { + return syscall.Dup2(int(fd1), int(fd2)) +} + +func mknod(path string, mode uint32, dev int) error { + return syscall.Mknod(path, mode, dev) +} + +func parentDeathSignal() error { + if _, _, err := syscall.RawSyscall6(syscall.SYS_PRCTL, syscall.PR_SET_PDEATHSIG, uintptr(syscall.SIGKILL), 0, 0, 0, 0); err != 0 { + return err + } + return nil +} + +func setctty() error { + if _, _, err := syscall.RawSyscall(syscall.SYS_IOCTL, 0, uintptr(syscall.TIOCSCTTY), 0); err != 0 { + return err + } + return nil +} + +func mkfifo(name string, mode uint32) error { + return syscall.Mkfifo(name, mode) +} + +func umask(mask int) int { + return syscall.Umask(mask) +} diff --git a/pkg/libcontainer/namespaces/exec.go b/pkg/libcontainer/namespaces/exec.go new file mode 100644 index 0000000000..893b302887 --- /dev/null +++ b/pkg/libcontainer/namespaces/exec.go @@ -0,0 +1,266 @@ +/* + Higher level convience functions for setting up a container +*/ + +package namespaces + +import ( + "errors" + "fmt" + "github.com/dotcloud/docker/pkg/libcontainer" + "github.com/dotcloud/docker/pkg/libcontainer/capabilities" + "github.com/dotcloud/docker/pkg/libcontainer/utils" + "io" + "log" + "os" + "path/filepath" + "syscall" +) + +var ( + ErrExistingNetworkNamespace = errors.New("specified both CLONE_NEWNET and an existing network namespace") +) + +// Exec will spawn new namespaces with the specified Container configuration +// in the RootFs path and return the pid of the new containerized process. +// +// If an existing network namespace is specified the container +// will join that namespace. If an existing network namespace is not specified but CLONE_NEWNET is, +// the container will be spawned with a new network namespace with no configuration. Omiting an +// existing network namespace and the CLONE_NEWNET option in the container configuration will allow +// the container to the the host's networking options and configuration. +func Exec(container *libcontainer.Container) (pid int, err error) { + // a user cannot pass CLONE_NEWNET and an existing net namespace fd to join + if container.NetNsFd > 0 && container.Namespaces.Contains(libcontainer.CLONE_NEWNET) { + return -1, ErrExistingNetworkNamespace + } + + rootfs, err := resolveRootfs(container) + if err != nil { + return -1, err + } + + master, console, err := createMasterAndConsole() + if err != nil { + return -1, err + } + + logger, err := os.OpenFile("/root/logs", os.O_RDWR|os.O_CREATE|os.O_APPEND, 0755) + if err != nil { + return -1, err + } + log.SetOutput(logger) + + // we need CLONE_VFORK so we can wait on the child + flag := getNamespaceFlags(container.Namespaces) | CLONE_VFORK + + if pid, err = clone(uintptr(flag | SIGCHLD)); err != nil { + return -1, fmt.Errorf("error cloning process: %s", err) + } + + if pid == 0 { + // welcome to your new namespace ;) + // + // any errors encoutered inside the namespace we should write + // out to a log or a pipe to our parent and exit(1) + // because writing to stderr will not work after we close + if err := closeMasterAndStd(master); err != nil { + writeError("close master and std %s", err) + } + slave, err := openTerminal(console, syscall.O_RDWR) + if err != nil { + writeError("open terminal %s", err) + } + if err := dupSlave(slave); err != nil { + writeError("dup2 slave %s", err) + } + + if container.NetNsFd > 0 { + if err := JoinExistingNamespace(container.NetNsFd, libcontainer.CLONE_NEWNET); err != nil { + writeError("join existing net namespace %s", err) + } + } + + if _, err := setsid(); err != nil { + writeError("setsid %s", err) + } + if err := setctty(); err != nil { + writeError("setctty %s", err) + } + if err := parentDeathSignal(); err != nil { + writeError("parent deth signal %s", err) + } + if err := SetupNewMountNamespace(rootfs, console, container.ReadonlyFs); err != nil { + writeError("setup mount namespace %s", err) + } + if err := sethostname(container.ID); err != nil { + writeError("sethostname %s", err) + } + if err := capabilities.DropCapabilities(container); err != nil { + writeError("drop capabilities %s", err) + } + if err := setupUser(container); err != nil { + writeError("setup user %s", err) + } + if container.WorkingDir != "" { + if err := chdir(container.WorkingDir); err != nil { + writeError("chdir to %s %s", container.WorkingDir, err) + } + } + if err := exec(container.Command.Args[0], container.Command.Args[0:], container.Command.Env); err != nil { + writeError("exec %s", err) + } + panic("unreachable") + } + + go func() { + if _, err := io.Copy(os.Stdout, master); err != nil { + log.Println(err) + } + }() + go func() { + if _, err := io.Copy(master, os.Stdin); err != nil { + log.Println(err) + } + }() + return pid, nil +} + +// ExecIn will spawn a new command inside an existing container's namespaces. The existing container's +// pid and namespace configuration is needed along with the specific capabilities that should +// be dropped once inside the namespace. +func ExecIn(container *libcontainer.Container, cmd *libcontainer.Command) (int, error) { + if container.NsPid <= 0 { + return -1, libcontainer.ErrInvalidPid + } + + fds, err := getNsFds(container) + if err != nil { + return -1, err + } + + if container.NetNsFd > 0 { + fds = append(fds, container.NetNsFd) + } + + pid, err := fork() + if err != nil { + for _, fd := range fds { + syscall.Close(int(fd)) + } + return -1, err + } + + if pid == 0 { + for _, fd := range fds { + if fd > 0 { + if err := JoinExistingNamespace(fd, ""); err != nil { + for _, fd := range fds { + syscall.Close(int(fd)) + } + writeError("join existing namespace for %d %s", fd, err) + } + } + syscall.Close(int(fd)) + } + + if container.Namespaces.Contains(libcontainer.CLONE_NEWNS) && + container.Namespaces.Contains(libcontainer.CLONE_NEWPID) { + // important: + // + // we need to fork and unshare so that re can remount proc and sys within + // the namespace so the CLONE_NEWPID namespace will take effect + // if we don't fork we would end up unmounting proc and sys for the entire + // namespace + child, err := fork() + if err != nil { + writeError("fork child %s", err) + } + + if child == 0 { + if err := unshare(CLONE_NEWNS); err != nil { + writeError("unshare newns %s", err) + } + if err := remountProc(); err != nil { + writeError("remount proc %s", err) + } + if err := remountSys(); err != nil { + writeError("remount sys %s", err) + } + if err := capabilities.DropCapabilities(container); err != nil { + writeError("drop caps %s", err) + } + if err := exec(cmd.Args[0], cmd.Args[0:], cmd.Env); err != nil { + writeError("exec %s", err) + } + panic("unreachable") + } + exit, err := utils.WaitOnPid(child) + if err != nil { + writeError("wait on child %s", err) + } + os.Exit(exit) + } + if err := exec(cmd.Args[0], cmd.Args[0:], cmd.Env); err != nil { + writeError("exec %s", err) + } + panic("unreachable") + } + return pid, err +} + +func resolveRootfs(container *libcontainer.Container) (string, error) { + rootfs, err := filepath.Abs(container.RootFs) + if err != nil { + return "", err + } + return filepath.EvalSymlinks(rootfs) +} + +func createMasterAndConsole() (*os.File, string, error) { + master, err := os.OpenFile("/dev/ptmx", syscall.O_RDWR|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0) + if err != nil { + return nil, "", err + } + + console, err := ptsname(master) + if err != nil { + return nil, "", err + } + + if err := unlockpt(master); err != nil { + return nil, "", err + } + return master, console, nil +} + +func closeMasterAndStd(master *os.File) error { + closefd(master.Fd()) + closefd(0) + closefd(1) + closefd(2) + + return nil +} + +func dupSlave(slave *os.File) error { + // we close Stdin,etc so our pty slave should have fd 0 + if slave.Fd() != 0 { + return fmt.Errorf("slave fd not 0 %d", slave.Fd()) + } + if err := dup2(slave.Fd(), 1); err != nil { + return err + } + if err := dup2(slave.Fd(), 2); err != nil { + return err + } + return nil +} + +func openTerminal(name string, flag int) (*os.File, error) { + r, e := syscall.Open(name, flag, 0) + if e != nil { + return nil, &os.PathError{"open", name, e} + } + return os.NewFile(uintptr(r), name), nil +} diff --git a/pkg/libcontainer/namespaces/linux_x86_64.go b/pkg/libcontainer/namespaces/linux_x86_64.go new file mode 100644 index 0000000000..ac9a014763 --- /dev/null +++ b/pkg/libcontainer/namespaces/linux_x86_64.go @@ -0,0 +1,7 @@ +// +build linux,x86_64 +package namespaces + +// Via http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=7b21fddd087678a70ad64afc0f632e0f1071b092 +const ( + SYS_SETNS = 308 +) diff --git a/pkg/libcontainer/namespaces/mount.go b/pkg/libcontainer/namespaces/mount.go new file mode 100644 index 0000000000..6d867c91ec --- /dev/null +++ b/pkg/libcontainer/namespaces/mount.go @@ -0,0 +1,207 @@ +package namespaces + +import ( + "fmt" + "log" + "os" + "path/filepath" + "syscall" +) + +var ( + // default mount point options + defaults = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV +) + +func SetupNewMountNamespace(rootfs, console string, readonly bool) error { + if err := mount("", "/", "", syscall.MS_SLAVE|syscall.MS_REC, ""); err != nil { + return fmt.Errorf("mounting / as slave %s", err) + } + + if err := mount(rootfs, rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, ""); err != nil { + return fmt.Errorf("mouting %s as bind %s", rootfs, err) + } + + if readonly { + if err := mount(rootfs, rootfs, "bind", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC, ""); err != nil { + return fmt.Errorf("mounting %s as readonly %s", rootfs, err) + } + } + + if err := mountSystem(rootfs); err != nil { + return fmt.Errorf("mount system %s", err) + } + + if err := copyDevNodes(rootfs); err != nil { + return fmt.Errorf("copy dev nodes %s", err) + } + + ptmx := filepath.Join(rootfs, "dev/ptmx") + if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) { + return err + } + if err := os.Symlink(filepath.Join(rootfs, "pts/ptmx"), ptmx); err != nil { + return fmt.Errorf("symlink dev ptmx %s", err) + } + + if err := setupDev(rootfs); err != nil { + return err + } + + if err := setupConsole(rootfs, console); err != nil { + return err + } + + if err := chdir(rootfs); err != nil { + return fmt.Errorf("chdir into %s %s", rootfs, err) + } + + if err := mount(rootfs, "/", "", syscall.MS_MOVE, ""); err != nil { + return fmt.Errorf("mount move %s into / %s", rootfs, err) + } + + if err := chroot("."); err != nil { + return fmt.Errorf("chroot . %s", err) + } + + if err := chdir("/"); err != nil { + return fmt.Errorf("chdir / %s", err) + } + + umask(0022) + + return nil +} + +func copyDevNodes(rootfs string) error { + umask(0000) + + for _, node := range []string{ + "null", + "zero", + "full", + "random", + "urandom", + "tty", + } { + stat, err := os.Stat(filepath.Join("/dev", node)) + if err != nil { + return err + } + + var ( + dest = filepath.Join(rootfs, "dev", node) + st = stat.Sys().(*syscall.Stat_t) + ) + + log.Printf("copy %s to %s %d\n", node, dest, st.Rdev) + if err := mknod(dest, st.Mode, int(st.Rdev)); err != nil && !os.IsExist(err) { + return fmt.Errorf("copy %s %s", node, err) + } + } + return nil +} + +func setupDev(rootfs string) error { + for _, link := range []struct { + from string + to string + }{ + {"/proc/kcore", "/dev/core"}, + {"/proc/self/fd", "/dev/fd"}, + {"/proc/self/fd/0", "/dev/stdin"}, + {"/proc/self/fd/1", "/dev/stdout"}, + {"/proc/self/fd/2", "/dev/stderr"}, + } { + dest := filepath.Join(rootfs, link.to) + if err := os.Remove(dest); err != nil && !os.IsNotExist(err) { + return fmt.Errorf("remove %s %s", dest, err) + } + if err := os.Symlink(link.from, dest); err != nil { + return fmt.Errorf("symlink %s %s", dest, err) + } + } + return nil +} + +func setupConsole(rootfs, console string) error { + umask(0000) + + stat, err := os.Stat(console) + if err != nil { + return fmt.Errorf("stat console %s %s", console, err) + } + st := stat.Sys().(*syscall.Stat_t) + + dest := filepath.Join(rootfs, "dev/console") + if err := os.Remove(dest); err != nil && !os.IsNotExist(err) { + return fmt.Errorf("remove %s %s", dest, err) + } + + if err := os.Chmod(console, 0600); err != nil { + return err + } + if err := os.Chown(console, 0, 0); err != nil { + return err + } + + if err := mknod(dest, (st.Mode&^07777)|0600, int(st.Rdev)); err != nil { + return fmt.Errorf("mknod %s %s", dest, err) + } + + if err := mount(console, dest, "bind", syscall.MS_BIND, ""); err != nil { + return fmt.Errorf("bind %s to %s %s", console, dest, err) + } + return nil +} + +// mountSystem sets up linux specific system mounts like sys, proc, shm, and devpts +// inside the mount namespace +func mountSystem(rootfs string) error { + mounts := []struct { + source string + path string + device string + flags int + data string + }{ + {source: "proc", path: filepath.Join(rootfs, "proc"), device: "proc", flags: defaults}, + {source: "sysfs", path: filepath.Join(rootfs, "sys"), device: "sysfs", flags: defaults}, + {source: "tmpfs", path: filepath.Join(rootfs, "dev"), device: "tmpfs", flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, data: "mode=755"}, + {source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaults, data: "mode=1777"}, + {source: "devpts", path: filepath.Join(rootfs, "dev", "pts"), device: "devpts", flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, data: "newinstance,ptmxmode=0666,mode=620,gid=5"}, + {source: "tmpfs", path: filepath.Join(rootfs, "run"), device: "tmpfs", flags: syscall.MS_NOSUID | syscall.MS_NODEV | syscall.MS_STRICTATIME, data: "mode=755"}, + } + for _, m := range mounts { + if err := os.MkdirAll(m.path, 0755); err != nil && !os.IsExist(err) { + return fmt.Errorf("mkdirall %s %s", m.path, err) + } + if err := mount(m.source, m.path, m.device, uintptr(m.flags), m.data); err != nil { + return fmt.Errorf("mounting %s into %s %s", m.source, m.path, err) + } + } + return nil +} + +func remountProc() error { + if err := unmount("/proc", syscall.MNT_DETACH); err != nil { + return err + } + if err := mount("proc", "/proc", "proc", uintptr(defaults), ""); err != nil { + return err + } + return nil +} + +func remountSys() error { + if err := unmount("/sys", syscall.MNT_DETACH); err != nil { + if err != syscall.EINVAL { + return err + } + } else { + if err := mount("sysfs", "/sys", "sysfs", uintptr(defaults), ""); err != nil { + return err + } + } + return nil +} diff --git a/pkg/libcontainer/namespaces/namespaces.go b/pkg/libcontainer/namespaces/namespaces.go new file mode 100644 index 0000000000..2a50847015 --- /dev/null +++ b/pkg/libcontainer/namespaces/namespaces.go @@ -0,0 +1,70 @@ +/* + TODO + pivot root + cgroups + more mount stuff that I probably am forgetting + apparmor +*/ + +package namespaces + +import ( + "fmt" + "github.com/dotcloud/docker/pkg/libcontainer" + "github.com/dotcloud/docker/pkg/libcontainer/utils" + "os" + "path/filepath" + "syscall" +) + +// CreateNewNamespace creates a new namespace and binds it's fd to the specified path +func CreateNewNamespace(namespace libcontainer.Namespace, bindTo string) error { + var ( + flag = namespaceMap[namespace] + name = namespaceFileMap[namespace] + nspath = filepath.Join("/proc/self/ns", name) + ) + // TODO: perform validation on name and flag + + pid, err := fork() + if err != nil { + return err + } + + if pid == 0 { + if err := unshare(flag); err != nil { + writeError("unshare %s", err) + } + if err := mount(nspath, bindTo, "none", syscall.MS_BIND, ""); err != nil { + writeError("bind mount %s", err) + } + os.Exit(0) + } + exit, err := utils.WaitOnPid(pid) + if err != nil { + return err + } + if exit != 0 { + return fmt.Errorf("exit status %d", exit) + } + return err +} + +// JoinExistingNamespace uses the fd of an existing linux namespace and +// has the current process join that namespace or the spacespace specified by ns +func JoinExistingNamespace(fd uintptr, ns libcontainer.Namespace) error { + flag := namespaceMap[ns] + if err := setns(fd, uintptr(flag)); err != nil { + return err + } + return nil +} + +// getNamespaceFlags parses the container's Namespaces options to set the correct +// flags on clone, unshare, and setns +func getNamespaceFlags(namespaces libcontainer.Namespaces) (flag int) { + for _, ns := range namespaces { + flag |= namespaceMap[ns] + } + return +} diff --git a/pkg/libcontainer/namespaces/ns_linux.go b/pkg/libcontainer/namespaces/ns_linux.go new file mode 100644 index 0000000000..b0e5119130 --- /dev/null +++ b/pkg/libcontainer/namespaces/ns_linux.go @@ -0,0 +1,35 @@ +package namespaces + +import ( + "github.com/dotcloud/docker/pkg/libcontainer" +) + +const ( + SIGCHLD = 0x14 + CLONE_VFORK = 0x00004000 + CLONE_NEWNS = 0x00020000 + CLONE_NEWUTS = 0x04000000 + CLONE_NEWIPC = 0x08000000 + CLONE_NEWUSER = 0x10000000 + CLONE_NEWPID = 0x20000000 + CLONE_NEWNET = 0x40000000 +) + +var namespaceMap = map[libcontainer.Namespace]int{ + "": 0, + libcontainer.CLONE_NEWNS: CLONE_NEWNS, + libcontainer.CLONE_NEWUTS: CLONE_NEWUTS, + libcontainer.CLONE_NEWIPC: CLONE_NEWIPC, + libcontainer.CLONE_NEWUSER: CLONE_NEWUSER, + libcontainer.CLONE_NEWPID: CLONE_NEWPID, + libcontainer.CLONE_NEWNET: CLONE_NEWNET, +} + +var namespaceFileMap = map[libcontainer.Namespace]string{ + libcontainer.CLONE_NEWNS: "mnt", + libcontainer.CLONE_NEWUTS: "uts", + libcontainer.CLONE_NEWIPC: "ipc", + libcontainer.CLONE_NEWUSER: "user", + libcontainer.CLONE_NEWPID: "pid", + libcontainer.CLONE_NEWNET: "net", +} diff --git a/pkg/libcontainer/namespaces/utils.go b/pkg/libcontainer/namespaces/utils.go new file mode 100644 index 0000000000..438d896484 --- /dev/null +++ b/pkg/libcontainer/namespaces/utils.go @@ -0,0 +1,108 @@ +package namespaces + +import ( + "fmt" + "github.com/dotcloud/docker/pkg/libcontainer" + "os" + "path/filepath" + "strconv" + "strings" + "syscall" +) + +func addEnvIfNotSet(container *libcontainer.Container, key, value string) { + jv := fmt.Sprintf("%s=%s", key, value) + if len(container.Command.Env) == 0 { + container.Command.Env = []string{jv} + return + } + + for _, v := range container.Command.Env { + parts := strings.Split(v, "=") + if parts[0] == key { + return + } + } + container.Command.Env = append(container.Command.Env, jv) +} + +// print and error to stderr and exit(1) +func writeError(format string, v ...interface{}) { + fmt.Fprintf(os.Stderr, format, v...) + os.Exit(1) +} + +// getNsFds inspects the container's namespace configuration and opens the fds to +// each of the namespaces. +func getNsFds(container *libcontainer.Container) ([]uintptr, error) { + var ( + namespaces = []string{} + fds = []uintptr{} + ) + + for _, ns := range container.Namespaces { + namespaces = append(namespaces, namespaceFileMap[ns]) + } + + for _, ns := range namespaces { + fd, err := getNsFd(container.NsPid, ns) + if err != nil { + for _, fd = range fds { + syscall.Close(int(fd)) + } + return nil, err + } + fds = append(fds, fd) + } + return fds, nil +} + +// getNsFd returns the fd for a specific pid and namespace option +func getNsFd(pid int, ns string) (uintptr, error) { + nspath := filepath.Join("/proc", strconv.Itoa(pid), "ns", ns) + // OpenFile adds closOnExec + f, err := os.OpenFile(nspath, os.O_RDONLY, 0666) + if err != nil { + return 0, err + } + return f.Fd(), nil +} + +// setupEnvironment adds additional environment variables to the container's +// Command such as USER, LOGNAME, container, and TERM +func setupEnvironment(container *libcontainer.Container) { + addEnvIfNotSet(container, "container", "docker") + // TODO: check if pty + addEnvIfNotSet(container, "TERM", "xterm") + // TODO: get username from container + addEnvIfNotSet(container, "USER", "root") + addEnvIfNotSet(container, "LOGNAME", "root") +} + +func setupUser(container *libcontainer.Container) error { + // TODO: honor user passed on container + if err := setgroups(nil); err != nil { + return err + } + if err := setresgid(0, 0, 0); err != nil { + return err + } + if err := setresuid(0, 0, 0); err != nil { + return err + } + return nil +} + +func getMasterAndConsole(container *libcontainer.Container) (string, *os.File, error) { + master, err := openpmtx() + if err != nil { + return "", nil, err + } + + console, err := ptsname(master) + if err != nil { + master.Close() + return "", nil, err + } + return console, master, nil +} diff --git a/pkg/libcontainer/network/network.go b/pkg/libcontainer/network/network.go new file mode 100644 index 0000000000..31c5d32492 --- /dev/null +++ b/pkg/libcontainer/network/network.go @@ -0,0 +1,104 @@ +package network + +import ( + "errors" + "github.com/dotcloud/docker/pkg/netlink" + "net" +) + +var ( + ErrNoDefaultRoute = errors.New("no default network route found") +) + +func InterfaceUp(name string) error { + iface, err := net.InterfaceByName(name) + if err != nil { + return err + } + return netlink.NetworkLinkUp(iface) +} + +func InterfaceDown(name string) error { + iface, err := net.InterfaceByName(name) + if err != nil { + return err + } + return netlink.NetworkLinkDown(iface) +} + +func ChangeInterfaceName(old, newName string) error { + iface, err := net.InterfaceByName(old) + if err != nil { + return err + } + return netlink.NetworkChangeName(iface, newName) +} + +func CreateVethPair(name1, name2 string) error { + return netlink.NetworkCreateVethPair(name1, name2) +} + +func SetInterfaceInNamespacePid(name string, nsPid int) error { + iface, err := net.InterfaceByName(name) + if err != nil { + return err + } + return netlink.NetworkSetNsPid(iface, nsPid) +} + +func SetInterfaceInNamespaceFd(name string, fd int) error { + iface, err := net.InterfaceByName(name) + if err != nil { + return err + } + return netlink.NetworkSetNsFd(iface, fd) +} + +func SetInterfaceMaster(name, master string) error { + iface, err := net.InterfaceByName(name) + if err != nil { + return err + } + masterIface, err := net.InterfaceByName(master) + if err != nil { + return err + } + return netlink.NetworkSetMaster(iface, masterIface) +} + +func SetDefaultGateway(ip string) error { + return netlink.AddDefaultGw(net.ParseIP(ip)) +} + +func SetInterfaceIp(name string, rawIp string) error { + iface, err := net.InterfaceByName(name) + if err != nil { + return err + } + ip, ipNet, err := net.ParseCIDR(rawIp) + if err != nil { + return err + } + return netlink.NetworkLinkAddIp(iface, ip, ipNet) +} + +func SetMtu(name string, mtu int) error { + iface, err := net.InterfaceByName(name) + if err != nil { + return err + } + return netlink.NetworkSetMTU(iface, mtu) +} + +func GetDefaultMtu() (int, error) { + routes, err := netlink.NetworkGetRoutes() + if err != nil { + return -1, err + } + for _, r := range routes { + if r.Default { + return r.Iface.MTU, nil + } + } + return -1, ErrNoDefaultRoute +} diff --git a/pkg/libcontainer/network/veth.go b/pkg/libcontainer/network/veth.go new file mode 100644 index 0000000000..dc207b3394 --- /dev/null +++ b/pkg/libcontainer/network/veth.go @@ -0,0 +1,85 @@ +package network + +import ( + "fmt" + "github.com/dotcloud/docker/pkg/libcontainer" + "github.com/dotcloud/docker/pkg/libcontainer/namespaces" + "os" + "syscall" +) + +// SetupVeth sets up an existing network namespace with the specified +// network configuration. +func SetupVeth(config *libcontainer.Network) error { + if err := InterfaceDown(config.TempVethName); err != nil { + return fmt.Errorf("interface down %s %s", config.TempVethName, err) + } + if err := ChangeInterfaceName(config.TempVethName, "eth0"); err != nil { + return fmt.Errorf("change %s to eth0 %s", config.TempVethName, err) + } + if err := SetInterfaceIp("eth0", config.IP); err != nil { + return fmt.Errorf("set eth0 ip %s", err) + } + + if err := SetMtu("eth0", config.Mtu); err != nil { + return fmt.Errorf("set eth0 mtu to %d %s", config.Mtu, err) + } + if err := InterfaceUp("eth0"); err != nil { + return fmt.Errorf("eth0 up %s", err) + } + + if err := SetMtu("lo", config.Mtu); err != nil { + return fmt.Errorf("set lo mtu to %d %s", config.Mtu, err) + } + if err := InterfaceUp("lo"); err != nil { + return fmt.Errorf("lo up %s", err) + } + + if config.Gateway != "" { + if err := SetDefaultGateway(config.Gateway); err != nil { + return fmt.Errorf("set gateway to %s %s", config.Gateway, err) + } + } + return nil +} + +// SetupNamespaceMountDir prepares a new root for use as a mount +// source for bind mounting namespace fd to an outside path +func SetupNamespaceMountDir(root string) error { + if err := os.MkdirAll(root, 0666); err != nil { + return err + } + // make sure mounts are not unmounted by other mnt namespaces + if err := syscall.Mount("", root, "none", syscall.MS_SHARED|syscall.MS_REC, ""); err != nil && err != syscall.EINVAL { + return err + } + if err := syscall.Mount(root, root, "none", syscall.MS_BIND, ""); err != nil { + return err + } + return nil +} + +// CreateNetworkNamespace creates a new network namespace and binds it's fd +// at the binding path +func CreateNetworkNamespace(bindingPath string) error { + f, err := os.OpenFile(bindingPath, os.O_RDONLY|os.O_CREATE|os.O_EXCL, 0) + if err != nil { + return err + } + f.Close() + + if err := namespaces.CreateNewNamespace(libcontainer.CLONE_NEWNET, bindingPath); err != nil { + return err + } + return nil +} + +// DeleteNetworkNamespace unmounts the binding path and removes the +// file so that no references to the fd are present and the network +// namespace is automatically cleaned up +func DeleteNetworkNamespace(bindingPath string) error { + if err := syscall.Unmount(bindingPath, 0); err != nil { + return err + } + return os.Remove(bindingPath) +} diff --git a/pkg/libcontainer/privileged.json b/pkg/libcontainer/privileged.json new file mode 100644 index 0000000000..be877ad335 --- /dev/null +++ b/pkg/libcontainer/privileged.json @@ -0,0 +1,22 @@ +{ + "id": "koye", + "namespace_pid": 3745, + "command": { + "args": [ + "/usr/lib/systemd/systemd" + ], + "environment": [ + "HOME=/", + "PATH=PATH=$PATH:/bin:/usr/bin:/sbin:/usr/sbin", + "container=docker", + "TERM=" + ] + }, + "rootfs": "/root/main/mycontainer", + "namespaces": [ + "NEWIPC", + "NEWNS", + "NEWPID", + "NEWUTS" + ] +} diff --git a/pkg/libcontainer/types.go b/pkg/libcontainer/types.go new file mode 100644 index 0000000000..db1c3b9738 --- /dev/null +++ b/pkg/libcontainer/types.go @@ -0,0 +1,49 @@ +package libcontainer + +type Namespace string +type Namespaces []Namespace + +func (n Namespaces) Contains(ns Namespace) bool { + for _, nns := range n { + if nns == ns { + return true + } + } + return false +} + +type Capability string +type Capabilities []Capability + +func (c Capabilities) Contains(capp Capability) bool { + for _, cc := range c { + if cc == capp { + return true + } + } + return false +} + +const ( + CAP_SETPCAP Capability = "SETPCAP" + CAP_SYS_MODULE Capability = "SYS_MODULE" + CAP_SYS_RAWIO Capability = "SYS_RAWIO" + CAP_SYS_PACCT Capability = "SYS_PACCT" + CAP_SYS_ADMIN Capability = "SYS_ADMIN" + CAP_SYS_NICE Capability = "SYS_NICE" + CAP_SYS_RESOURCE Capability = "SYS_RESOURCE" + CAP_SYS_TIME Capability = "SYS_TIME" + CAP_SYS_TTY_CONFIG Capability = "SYS_TTY_CONFIG" + CAP_MKNOD Capability = "MKNOD" + CAP_AUDIT_WRITE Capability = "AUDIT_WRITE" + CAP_AUDIT_CONTROL Capability = "AUDIT_CONTROL" + CAP_MAC_OVERRIDE Capability = "MAC_OVERRIDE" + CAP_MAC_ADMIN Capability = "MAC_ADMIN" + + CLONE_NEWNS Namespace = "NEWNS" // mount + CLONE_NEWUTS Namespace = "NEWUTS" // utsname + CLONE_NEWIPC Namespace = "NEWIPC" // ipc + CLONE_NEWUSER Namespace = "NEWUSER" // user + CLONE_NEWPID Namespace = "NEWPID" // pid + CLONE_NEWNET Namespace = "NEWNET" // network +) diff --git a/pkg/libcontainer/ubuntu.json b/pkg/libcontainer/ubuntu.json new file mode 100644 index 0000000000..0a450ae066 --- /dev/null +++ b/pkg/libcontainer/ubuntu.json @@ -0,0 +1,22 @@ +{ + "id": "koye", + "namespace_pid": 3745, + "command": { + "args": [ + "/sbin/init" + ], + "environment": [ + "HOME=/", + "PATH=PATH=$PATH:/bin:/usr/bin:/sbin:/usr/sbin", + "container=docker", + "TERM=xterm" + ] + }, + "rootfs": "/var/lib/docker/btrfs/subvolumes/7c0f15df1ad2e2fe04d7a6e079aec17406e9465a6a37dd16cb0dd754fc0167b3", + "namespaces": [ + "NEWIPC", + "NEWNS", + "NEWPID", + "NEWUTS" + ] +} diff --git a/pkg/libcontainer/utils/utils.go b/pkg/libcontainer/utils/utils.go new file mode 100644 index 0000000000..7289fecf2e --- /dev/null +++ b/pkg/libcontainer/utils/utils.go @@ -0,0 +1,33 @@ +package utils + +import ( + "crypto/rand" + "encoding/hex" + "io" + "os" + "syscall" +) + +func WaitOnPid(pid int) (exitcode int, err error) { + child, err := os.FindProcess(pid) + if err != nil { + return -1, err + } + state, err := child.Wait() + if err != nil { + return -1, err + } + return getExitCode(state), nil +} + +func getExitCode(state *os.ProcessState) int { + return state.Sys().(syscall.WaitStatus).ExitStatus() +} + +func GenerateRandomName(size int) (string, error) { + id := make([]byte, size) + if _, err := io.ReadFull(rand.Reader, id); err != nil { + return "", err + } + return hex.EncodeToString(id), nil +} From 68b049aed4663eb5f6f53241390f7602c1b40c12 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Tue, 18 Feb 2014 17:52:06 -0800 Subject: [PATCH 02/81] Make separate nsinit pkg for a dockerinit like init Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/container.go | 2 + pkg/libcontainer/namespaces/calls_linux.go | 80 +++--- pkg/libcontainer/namespaces/exec.go | 270 +++++++-------------- pkg/libcontainer/namespaces/mount.go | 36 +-- pkg/libcontainer/namespaces/namespaces.go | 40 +-- pkg/libcontainer/namespaces/nsinit/init.go | 140 +++++++++++ pkg/libcontainer/namespaces/utils.go | 24 +- 7 files changed, 285 insertions(+), 307 deletions(-) create mode 100644 pkg/libcontainer/namespaces/nsinit/init.go diff --git a/pkg/libcontainer/container.go b/pkg/libcontainer/container.go index b77890fb5c..dd5e728e68 100644 --- a/pkg/libcontainer/container.go +++ b/pkg/libcontainer/container.go @@ -11,6 +11,8 @@ type Container struct { WorkingDir string `json:"working_dir,omitempty"` Namespaces Namespaces `json:"namespaces,omitempty"` Capabilities Capabilities `json:"capabilities,omitempty"` + Master uintptr `json:"master"` + Console string `json:"console"` } type Command struct { diff --git a/pkg/libcontainer/namespaces/calls_linux.go b/pkg/libcontainer/namespaces/calls_linux.go index 793e940b6e..f006d56da6 100644 --- a/pkg/libcontainer/namespaces/calls_linux.go +++ b/pkg/libcontainer/namespaces/calls_linux.go @@ -12,19 +12,19 @@ const ( TIOCSPTLCK = 0x40045431 ) -func chroot(dir string) error { +func Chroot(dir string) error { return syscall.Chroot(dir) } -func chdir(dir string) error { +func Chdir(dir string) error { return syscall.Chdir(dir) } -func exec(cmd string, args []string, env []string) error { +func Exec(cmd string, args []string, env []string) error { return syscall.Exec(cmd, args, env) } -func fork() (int, error) { +func Fork() (int, error) { syscall.ForkLock.Lock() pid, _, err := syscall.Syscall(syscall.SYS_FORK, 0, 0, 0) syscall.ForkLock.Unlock() @@ -34,33 +34,23 @@ func fork() (int, error) { return int(pid), nil } -func vfork() (int, error) { - syscall.ForkLock.Lock() - pid, _, err := syscall.Syscall(syscall.SYS_VFORK, 0, 0, 0) - syscall.ForkLock.Unlock() - if err != 0 { - return -1, err - } - return int(pid), nil -} - -func mount(source, target, fstype string, flags uintptr, data string) error { +func Mount(source, target, fstype string, flags uintptr, data string) error { return syscall.Mount(source, target, fstype, flags, data) } -func unmount(target string, flags int) error { +func Unmount(target string, flags int) error { return syscall.Unmount(target, flags) } -func pivotroot(newroot, putold string) error { +func Pivotroot(newroot, putold string) error { return syscall.PivotRoot(newroot, putold) } -func unshare(flags int) error { +func Unshare(flags int) error { return syscall.Unshare(flags) } -func clone(flags uintptr) (int, error) { +func Clone(flags uintptr) (int, error) { syscall.ForkLock.Lock() pid, _, err := syscall.RawSyscall(syscall.SYS_CLONE, flags, 0, 0) syscall.ForkLock.Unlock() @@ -70,7 +60,7 @@ func clone(flags uintptr) (int, error) { return int(pid), nil } -func setns(fd uintptr, flags uintptr) error { +func Setns(fd uintptr, flags uintptr) error { _, _, err := syscall.RawSyscall(SYS_SETNS, fd, flags, 0) if err != 0 { return err @@ -78,87 +68,87 @@ func setns(fd uintptr, flags uintptr) error { return nil } -func usetCloseOnExec(fd uintptr) error { +func UsetCloseOnExec(fd uintptr) error { if _, _, err := syscall.Syscall(syscall.SYS_FCNTL, fd, syscall.F_SETFD, 0); err != 0 { return err } return nil } -func setgroups(gids []int) error { +func Setgroups(gids []int) error { return syscall.Setgroups(gids) } -func setresgid(rgid, egid, sgid int) error { +func Setresgid(rgid, egid, sgid int) error { return syscall.Setresgid(rgid, egid, sgid) } -func setresuid(ruid, euid, suid int) error { +func Setresuid(ruid, euid, suid int) error { return syscall.Setresuid(ruid, euid, suid) } -func sethostname(name string) error { +func Sethostname(name string) error { return syscall.Sethostname([]byte(name)) } -func setsid() (int, error) { +func Setsid() (int, error) { return syscall.Setsid() } -func ioctl(fd uintptr, flag, data uintptr) error { +func Unlockpt(f *os.File) error { + var u int + return Ioctl(f.Fd(), TIOCSPTLCK, uintptr(unsafe.Pointer(&u))) +} + +func Ioctl(fd uintptr, flag, data uintptr) error { if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, fd, flag, data); err != 0 { return err } return nil } -func openpmtx() (*os.File, error) { - return os.OpenFile("/dev/ptmx", syscall.O_RDONLY|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0) -} - -func unlockpt(f *os.File) error { - var u int - return ioctl(f.Fd(), TIOCSPTLCK, uintptr(unsafe.Pointer(&u))) -} - -func ptsname(f *os.File) (string, error) { +func Ptsname(f *os.File) (string, error) { var n int - if err := ioctl(f.Fd(), TIOCGPTN, uintptr(unsafe.Pointer(&n))); err != nil { + if err := Ioctl(f.Fd(), TIOCGPTN, uintptr(unsafe.Pointer(&n))); err != nil { return "", err } return fmt.Sprintf("/dev/pts/%d", n), nil } -func closefd(fd uintptr) error { +func Openpmtx() (*os.File, error) { + return os.OpenFile("/dev/ptmx", syscall.O_RDONLY|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0) +} + +func Closefd(fd uintptr) error { return syscall.Close(int(fd)) } -func dup2(fd1, fd2 uintptr) error { +func Dup2(fd1, fd2 uintptr) error { return syscall.Dup2(int(fd1), int(fd2)) } -func mknod(path string, mode uint32, dev int) error { +func Mknod(path string, mode uint32, dev int) error { return syscall.Mknod(path, mode, dev) } -func parentDeathSignal() error { +func ParentDeathSignal() error { if _, _, err := syscall.RawSyscall6(syscall.SYS_PRCTL, syscall.PR_SET_PDEATHSIG, uintptr(syscall.SIGKILL), 0, 0, 0, 0); err != 0 { return err } return nil } -func setctty() error { +func Setctty() error { if _, _, err := syscall.RawSyscall(syscall.SYS_IOCTL, 0, uintptr(syscall.TIOCSCTTY), 0); err != 0 { return err } return nil } -func mkfifo(name string, mode uint32) error { +func Mkfifo(name string, mode uint32) error { return syscall.Mkfifo(name, mode) } -func umask(mask int) int { +func Umask(mask int) int { return syscall.Umask(mask) } diff --git a/pkg/libcontainer/namespaces/exec.go b/pkg/libcontainer/namespaces/exec.go index 893b302887..0077a0b16c 100644 --- a/pkg/libcontainer/namespaces/exec.go +++ b/pkg/libcontainer/namespaces/exec.go @@ -8,12 +8,10 @@ import ( "errors" "fmt" "github.com/dotcloud/docker/pkg/libcontainer" - "github.com/dotcloud/docker/pkg/libcontainer/capabilities" - "github.com/dotcloud/docker/pkg/libcontainer/utils" "io" "log" "os" - "path/filepath" + "os/exec" "syscall" ) @@ -29,89 +27,31 @@ var ( // the container will be spawned with a new network namespace with no configuration. Omiting an // existing network namespace and the CLONE_NEWNET option in the container configuration will allow // the container to the the host's networking options and configuration. -func Exec(container *libcontainer.Container) (pid int, err error) { +func ExecContainer(container *libcontainer.Container) (pid int, err error) { // a user cannot pass CLONE_NEWNET and an existing net namespace fd to join if container.NetNsFd > 0 && container.Namespaces.Contains(libcontainer.CLONE_NEWNET) { return -1, ErrExistingNetworkNamespace } - rootfs, err := resolveRootfs(container) - if err != nil { - return -1, err - } - master, console, err := createMasterAndConsole() if err != nil { return -1, err } - - logger, err := os.OpenFile("/root/logs", os.O_RDWR|os.O_CREATE|os.O_APPEND, 0755) - if err != nil { - return -1, err - } - log.SetOutput(logger) + container.Console = console + container.Master = master.Fd() // we need CLONE_VFORK so we can wait on the child - flag := getNamespaceFlags(container.Namespaces) | CLONE_VFORK + flag := uintptr(getNamespaceFlags(container.Namespaces) | CLONE_VFORK) - if pid, err = clone(uintptr(flag | SIGCHLD)); err != nil { - return -1, fmt.Errorf("error cloning process: %s", err) - } - - if pid == 0 { - // welcome to your new namespace ;) - // - // any errors encoutered inside the namespace we should write - // out to a log or a pipe to our parent and exit(1) - // because writing to stderr will not work after we close - if err := closeMasterAndStd(master); err != nil { - writeError("close master and std %s", err) - } - slave, err := openTerminal(console, syscall.O_RDWR) - if err != nil { - writeError("open terminal %s", err) - } - if err := dupSlave(slave); err != nil { - writeError("dup2 slave %s", err) - } - - if container.NetNsFd > 0 { - if err := JoinExistingNamespace(container.NetNsFd, libcontainer.CLONE_NEWNET); err != nil { - writeError("join existing net namespace %s", err) - } - } - - if _, err := setsid(); err != nil { - writeError("setsid %s", err) - } - if err := setctty(); err != nil { - writeError("setctty %s", err) - } - if err := parentDeathSignal(); err != nil { - writeError("parent deth signal %s", err) - } - if err := SetupNewMountNamespace(rootfs, console, container.ReadonlyFs); err != nil { - writeError("setup mount namespace %s", err) - } - if err := sethostname(container.ID); err != nil { - writeError("sethostname %s", err) - } - if err := capabilities.DropCapabilities(container); err != nil { - writeError("drop capabilities %s", err) - } - if err := setupUser(container); err != nil { - writeError("setup user %s", err) - } - if container.WorkingDir != "" { - if err := chdir(container.WorkingDir); err != nil { - writeError("chdir to %s %s", container.WorkingDir, err) - } - } - if err := exec(container.Command.Args[0], container.Command.Args[0:], container.Command.Env); err != nil { - writeError("exec %s", err) - } - panic("unreachable") + command := exec.Command("/.nsinit") + command.SysProcAttr = &syscall.SysProcAttr{} + command.SysProcAttr.Cloneflags = flag + command.SysProcAttr.Setctty = true + + if err := command.Start(); err != nil { + return -1, err } + pid = command.Process.Pid go func() { if _, err := io.Copy(os.Stdout, master); err != nil { @@ -130,91 +70,86 @@ func Exec(container *libcontainer.Container) (pid int, err error) { // pid and namespace configuration is needed along with the specific capabilities that should // be dropped once inside the namespace. func ExecIn(container *libcontainer.Container, cmd *libcontainer.Command) (int, error) { - if container.NsPid <= 0 { - return -1, libcontainer.ErrInvalidPid - } - - fds, err := getNsFds(container) - if err != nil { - return -1, err - } - - if container.NetNsFd > 0 { - fds = append(fds, container.NetNsFd) - } - - pid, err := fork() - if err != nil { - for _, fd := range fds { - syscall.Close(int(fd)) + return -1, fmt.Errorf("not implemented") + /* + if container.NsPid <= 0 { + return -1, libcontainer.ErrInvalidPid } - return -1, err - } - if pid == 0 { - for _, fd := range fds { - if fd > 0 { - if err := JoinExistingNamespace(fd, ""); err != nil { - for _, fd := range fds { - syscall.Close(int(fd)) + fds, err := getNsFds(container) + if err != nil { + return -1, err + } + + if container.NetNsFd > 0 { + fds = append(fds, container.NetNsFd) + } + + pid, err := fork() + if err != nil { + for _, fd := range fds { + syscall.Close(int(fd)) + } + return -1, err + } + + if pid == 0 { + for _, fd := range fds { + if fd > 0 { + if err := JoinExistingNamespace(fd, ""); err != nil { + for _, fd := range fds { + syscall.Close(int(fd)) + } + writeError("join existing namespace for %d %s", fd, err) } - writeError("join existing namespace for %d %s", fd, err) } - } - syscall.Close(int(fd)) - } - - if container.Namespaces.Contains(libcontainer.CLONE_NEWNS) && - container.Namespaces.Contains(libcontainer.CLONE_NEWPID) { - // important: - // - // we need to fork and unshare so that re can remount proc and sys within - // the namespace so the CLONE_NEWPID namespace will take effect - // if we don't fork we would end up unmounting proc and sys for the entire - // namespace - child, err := fork() - if err != nil { - writeError("fork child %s", err) + syscall.Close(int(fd)) } - if child == 0 { - if err := unshare(CLONE_NEWNS); err != nil { - writeError("unshare newns %s", err) + if container.Namespaces.Contains(libcontainer.CLONE_NEWNS) && + container.Namespaces.Contains(libcontainer.CLONE_NEWPID) { + // important: + // + // we need to fork and unshare so that re can remount proc and sys within + // the namespace so the CLONE_NEWPID namespace will take effect + // if we don't fork we would end up unmounting proc and sys for the entire + // namespace + child, err := fork() + if err != nil { + writeError("fork child %s", err) } - if err := remountProc(); err != nil { - writeError("remount proc %s", err) - } - if err := remountSys(); err != nil { - writeError("remount sys %s", err) - } - if err := capabilities.DropCapabilities(container); err != nil { - writeError("drop caps %s", err) - } - if err := exec(cmd.Args[0], cmd.Args[0:], cmd.Env); err != nil { - writeError("exec %s", err) - } - panic("unreachable") - } - exit, err := utils.WaitOnPid(child) - if err != nil { - writeError("wait on child %s", err) - } - os.Exit(exit) - } - if err := exec(cmd.Args[0], cmd.Args[0:], cmd.Env); err != nil { - writeError("exec %s", err) - } - panic("unreachable") - } - return pid, err -} -func resolveRootfs(container *libcontainer.Container) (string, error) { - rootfs, err := filepath.Abs(container.RootFs) - if err != nil { - return "", err - } - return filepath.EvalSymlinks(rootfs) + if child == 0 { + if err := unshare(CLONE_NEWNS); err != nil { + writeError("unshare newns %s", err) + } + if err := remountProc(); err != nil { + writeError("remount proc %s", err) + } + if err := remountSys(); err != nil { + writeError("remount sys %s", err) + } + if err := capabilities.DropCapabilities(container); err != nil { + writeError("drop caps %s", err) + } + if err := exec(cmd.Args[0], cmd.Args[0:], cmd.Env); err != nil { + writeError("exec %s", err) + } + panic("unreachable") + } + exit, err := utils.WaitOnPid(child) + if err != nil { + writeError("wait on child %s", err) + } + os.Exit(exit) + } + if err := exec(cmd.Args[0], cmd.Args[0:], cmd.Env); err != nil { + writeError("exec %s", err) + } + panic("unreachable") + } + return pid, err + */ } func createMasterAndConsole() (*os.File, string, error) { @@ -223,44 +158,13 @@ func createMasterAndConsole() (*os.File, string, error) { return nil, "", err } - console, err := ptsname(master) + console, err := Ptsname(master) if err != nil { return nil, "", err } - if err := unlockpt(master); err != nil { + if err := Unlockpt(master); err != nil { return nil, "", err } return master, console, nil } - -func closeMasterAndStd(master *os.File) error { - closefd(master.Fd()) - closefd(0) - closefd(1) - closefd(2) - - return nil -} - -func dupSlave(slave *os.File) error { - // we close Stdin,etc so our pty slave should have fd 0 - if slave.Fd() != 0 { - return fmt.Errorf("slave fd not 0 %d", slave.Fd()) - } - if err := dup2(slave.Fd(), 1); err != nil { - return err - } - if err := dup2(slave.Fd(), 2); err != nil { - return err - } - return nil -} - -func openTerminal(name string, flag int) (*os.File, error) { - r, e := syscall.Open(name, flag, 0) - if e != nil { - return nil, &os.PathError{"open", name, e} - } - return os.NewFile(uintptr(r), name), nil -} diff --git a/pkg/libcontainer/namespaces/mount.go b/pkg/libcontainer/namespaces/mount.go index 6d867c91ec..8e7c54b046 100644 --- a/pkg/libcontainer/namespaces/mount.go +++ b/pkg/libcontainer/namespaces/mount.go @@ -14,16 +14,16 @@ var ( ) func SetupNewMountNamespace(rootfs, console string, readonly bool) error { - if err := mount("", "/", "", syscall.MS_SLAVE|syscall.MS_REC, ""); err != nil { + if err := Mount("", "/", "", syscall.MS_SLAVE|syscall.MS_REC, ""); err != nil { return fmt.Errorf("mounting / as slave %s", err) } - if err := mount(rootfs, rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, ""); err != nil { + if err := Mount(rootfs, rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, ""); err != nil { return fmt.Errorf("mouting %s as bind %s", rootfs, err) } if readonly { - if err := mount(rootfs, rootfs, "bind", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC, ""); err != nil { + if err := Mount(rootfs, rootfs, "bind", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC, ""); err != nil { return fmt.Errorf("mounting %s as readonly %s", rootfs, err) } } @@ -52,29 +52,29 @@ func SetupNewMountNamespace(rootfs, console string, readonly bool) error { return err } - if err := chdir(rootfs); err != nil { + if err := Chdir(rootfs); err != nil { return fmt.Errorf("chdir into %s %s", rootfs, err) } - if err := mount(rootfs, "/", "", syscall.MS_MOVE, ""); err != nil { + if err := Mount(rootfs, "/", "", syscall.MS_MOVE, ""); err != nil { return fmt.Errorf("mount move %s into / %s", rootfs, err) } - if err := chroot("."); err != nil { + if err := Chroot("."); err != nil { return fmt.Errorf("chroot . %s", err) } - if err := chdir("/"); err != nil { + if err := Chdir("/"); err != nil { return fmt.Errorf("chdir / %s", err) } - umask(0022) + Umask(0022) return nil } func copyDevNodes(rootfs string) error { - umask(0000) + Umask(0000) for _, node := range []string{ "null", @@ -95,7 +95,7 @@ func copyDevNodes(rootfs string) error { ) log.Printf("copy %s to %s %d\n", node, dest, st.Rdev) - if err := mknod(dest, st.Mode, int(st.Rdev)); err != nil && !os.IsExist(err) { + if err := Mknod(dest, st.Mode, int(st.Rdev)); err != nil && !os.IsExist(err) { return fmt.Errorf("copy %s %s", node, err) } } @@ -125,7 +125,7 @@ func setupDev(rootfs string) error { } func setupConsole(rootfs, console string) error { - umask(0000) + Umask(0000) stat, err := os.Stat(console) if err != nil { @@ -145,11 +145,11 @@ func setupConsole(rootfs, console string) error { return err } - if err := mknod(dest, (st.Mode&^07777)|0600, int(st.Rdev)); err != nil { + if err := Mknod(dest, (st.Mode&^07777)|0600, int(st.Rdev)); err != nil { return fmt.Errorf("mknod %s %s", dest, err) } - if err := mount(console, dest, "bind", syscall.MS_BIND, ""); err != nil { + if err := Mount(console, dest, "bind", syscall.MS_BIND, ""); err != nil { return fmt.Errorf("bind %s to %s %s", console, dest, err) } return nil @@ -176,7 +176,7 @@ func mountSystem(rootfs string) error { if err := os.MkdirAll(m.path, 0755); err != nil && !os.IsExist(err) { return fmt.Errorf("mkdirall %s %s", m.path, err) } - if err := mount(m.source, m.path, m.device, uintptr(m.flags), m.data); err != nil { + if err := Mount(m.source, m.path, m.device, uintptr(m.flags), m.data); err != nil { return fmt.Errorf("mounting %s into %s %s", m.source, m.path, err) } } @@ -184,22 +184,22 @@ func mountSystem(rootfs string) error { } func remountProc() error { - if err := unmount("/proc", syscall.MNT_DETACH); err != nil { + if err := Unmount("/proc", syscall.MNT_DETACH); err != nil { return err } - if err := mount("proc", "/proc", "proc", uintptr(defaults), ""); err != nil { + if err := Mount("proc", "/proc", "proc", uintptr(defaults), ""); err != nil { return err } return nil } func remountSys() error { - if err := unmount("/sys", syscall.MNT_DETACH); err != nil { + if err := Unmount("/sys", syscall.MNT_DETACH); err != nil { if err != syscall.EINVAL { return err } } else { - if err := mount("sysfs", "/sys", "sysfs", uintptr(defaults), ""); err != nil { + if err := Mount("sysfs", "/sys", "sysfs", uintptr(defaults), ""); err != nil { return err } } diff --git a/pkg/libcontainer/namespaces/namespaces.go b/pkg/libcontainer/namespaces/namespaces.go index 2a50847015..05ef0ac7a9 100644 --- a/pkg/libcontainer/namespaces/namespaces.go +++ b/pkg/libcontainer/namespaces/namespaces.go @@ -9,52 +9,14 @@ package namespaces import ( - "fmt" "github.com/dotcloud/docker/pkg/libcontainer" - "github.com/dotcloud/docker/pkg/libcontainer/utils" - "os" - "path/filepath" - "syscall" ) -// CreateNewNamespace creates a new namespace and binds it's fd to the specified path -func CreateNewNamespace(namespace libcontainer.Namespace, bindTo string) error { - var ( - flag = namespaceMap[namespace] - name = namespaceFileMap[namespace] - nspath = filepath.Join("/proc/self/ns", name) - ) - // TODO: perform validation on name and flag - - pid, err := fork() - if err != nil { - return err - } - - if pid == 0 { - if err := unshare(flag); err != nil { - writeError("unshare %s", err) - } - if err := mount(nspath, bindTo, "none", syscall.MS_BIND, ""); err != nil { - writeError("bind mount %s", err) - } - os.Exit(0) - } - exit, err := utils.WaitOnPid(pid) - if err != nil { - return err - } - if exit != 0 { - return fmt.Errorf("exit status %d", exit) - } - return err -} - // JoinExistingNamespace uses the fd of an existing linux namespace and // has the current process join that namespace or the spacespace specified by ns func JoinExistingNamespace(fd uintptr, ns libcontainer.Namespace) error { flag := namespaceMap[ns] - if err := setns(fd, uintptr(flag)); err != nil { + if err := Setns(fd, uintptr(flag)); err != nil { return err } return nil diff --git a/pkg/libcontainer/namespaces/nsinit/init.go b/pkg/libcontainer/namespaces/nsinit/init.go new file mode 100644 index 0000000000..9a7563642c --- /dev/null +++ b/pkg/libcontainer/namespaces/nsinit/init.go @@ -0,0 +1,140 @@ +package nsinit + +import ( + "fmt" + "github.com/dotcloud/docker/pkg/libcontainer" + "github.com/dotcloud/docker/pkg/libcontainer/capabilities" + "github.com/dotcloud/docker/pkg/libcontainer/namespaces" + "log" + "os" + "path/filepath" + "syscall" +) + +// InitNamespace should be run inside an existing namespace to setup +// common mounts, drop capabilities, and setup network interfaces +func InitNamespace(container *libcontainer.Container) error { + rootfs, err := resolveRootfs(container) + if err != nil { + return err + } + + // any errors encoutered inside the namespace we should write + // out to a log or a pipe to our parent and exit(1) + // because writing to stderr will not work after we close + if err := closeMasterAndStd(container.Master); err != nil { + log.Fatalf("close master and std %s", err) + return err + } + + slave, err := openTerminal(container.Console, syscall.O_RDWR) + if err != nil { + log.Fatalf("open terminal %s", err) + return err + } + if err := dupSlave(slave); err != nil { + log.Fatalf("dup2 slave %s", err) + return err + } + + /* + if container.NetNsFd > 0 { + if err := joinExistingNamespace(container.NetNsFd, libcontainer.CLONE_NEWNET); err != nil { + log.Fatalf("join existing net namespace %s", err) + } + } + */ + + if _, err := namespaces.Setsid(); err != nil { + log.Fatalf("setsid %s", err) + return err + } + if err := namespaces.Setctty(); err != nil { + log.Fatalf("setctty %s", err) + return err + } + if err := namespaces.ParentDeathSignal(); err != nil { + log.Fatalf("parent deth signal %s", err) + return err + } + if err := namespaces.SetupNewMountNamespace(rootfs, container.Console, container.ReadonlyFs); err != nil { + log.Fatalf("setup mount namespace %s", err) + return err + } + if err := namespaces.Sethostname(container.ID); err != nil { + log.Fatalf("sethostname %s", err) + return err + } + if err := capabilities.DropCapabilities(container); err != nil { + log.Fatalf("drop capabilities %s", err) + return err + } + if err := setupUser(container); err != nil { + log.Fatalf("setup user %s", err) + return err + } + if container.WorkingDir != "" { + if err := namespaces.Chdir(container.WorkingDir); err != nil { + log.Fatalf("chdir to %s %s", container.WorkingDir, err) + return err + } + } + if err := namespaces.Exec(container.Command.Args[0], container.Command.Args[0:], container.Command.Env); err != nil { + log.Fatalf("exec %s", err) + return err + } + panic("unreachable") +} + +func resolveRootfs(container *libcontainer.Container) (string, error) { + rootfs, err := filepath.Abs(container.RootFs) + if err != nil { + return "", err + } + return filepath.EvalSymlinks(rootfs) +} + +func closeMasterAndStd(master uintptr) error { + namespaces.Closefd(master) + namespaces.Closefd(0) + namespaces.Closefd(1) + namespaces.Closefd(2) + + return nil +} + +func setupUser(container *libcontainer.Container) error { + // TODO: honor user passed on container + if err := namespaces.Setgroups(nil); err != nil { + return err + } + if err := namespaces.Setresgid(0, 0, 0); err != nil { + return err + } + if err := namespaces.Setresuid(0, 0, 0); err != nil { + return err + } + return nil +} + +func dupSlave(slave *os.File) error { + // we close Stdin,etc so our pty slave should have fd 0 + if slave.Fd() != 0 { + return fmt.Errorf("slave fd not 0 %d", slave.Fd()) + } + if err := namespaces.Dup2(slave.Fd(), 1); err != nil { + return err + } + if err := namespaces.Dup2(slave.Fd(), 2); err != nil { + return err + } + return nil +} + +func openTerminal(name string, flag int) (*os.File, error) { + r, e := syscall.Open(name, flag, 0) + if e != nil { + return nil, &os.PathError{"open", name, e} + } + return os.NewFile(uintptr(r), name), nil +} diff --git a/pkg/libcontainer/namespaces/utils.go b/pkg/libcontainer/namespaces/utils.go index 438d896484..fd195c0ad1 100644 --- a/pkg/libcontainer/namespaces/utils.go +++ b/pkg/libcontainer/namespaces/utils.go @@ -26,12 +26,6 @@ func addEnvIfNotSet(container *libcontainer.Container, key, value string) { container.Command.Env = append(container.Command.Env, jv) } -// print and error to stderr and exit(1) -func writeError(format string, v ...interface{}) { - fmt.Fprintf(os.Stderr, format, v...) - os.Exit(1) -} - // getNsFds inspects the container's namespace configuration and opens the fds to // each of the namespaces. func getNsFds(container *libcontainer.Container) ([]uintptr, error) { @@ -79,27 +73,13 @@ func setupEnvironment(container *libcontainer.Container) { addEnvIfNotSet(container, "LOGNAME", "root") } -func setupUser(container *libcontainer.Container) error { - // TODO: honor user passed on container - if err := setgroups(nil); err != nil { - return err - } - if err := setresgid(0, 0, 0); err != nil { - return err - } - if err := setresuid(0, 0, 0); err != nil { - return err - } - return nil -} - func getMasterAndConsole(container *libcontainer.Container) (string, *os.File, error) { - master, err := openpmtx() + master, err := Openpmtx() if err != nil { return "", nil, err } - console, err := ptsname(master) + console, err := Ptsname(master) if err != nil { master.Close() return "", nil, err From 72e65b654b75d5087e50cc6366e78bd2f8318bae Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Tue, 18 Feb 2014 18:15:41 -0800 Subject: [PATCH 03/81] WIP moving to nsini Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/cli/main.go | 78 +++++++++++++++++------------ pkg/libcontainer/namespaces/exec.go | 10 ++-- pkg/libcontainer/network/veth.go | 16 ------ 3 files changed, 52 insertions(+), 52 deletions(-) diff --git a/pkg/libcontainer/cli/main.go b/pkg/libcontainer/cli/main.go index 490135ef5a..0430e29430 100644 --- a/pkg/libcontainer/cli/main.go +++ b/pkg/libcontainer/cli/main.go @@ -6,6 +6,7 @@ import ( "fmt" "github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/libcontainer/namespaces" + "github.com/dotcloud/docker/pkg/libcontainer/namespaces/nsinit" "github.com/dotcloud/docker/pkg/libcontainer/network" "github.com/dotcloud/docker/pkg/libcontainer/utils" "os" @@ -15,15 +16,26 @@ var ( displayPid bool newCommand string usrNet bool + masterFd int + console string ) func init() { flag.BoolVar(&displayPid, "pid", false, "display the pid before waiting") flag.StringVar(&newCommand, "cmd", "/bin/bash", "command to run in the existing namespace") flag.BoolVar(&usrNet, "net", false, "user a net namespace") + flag.IntVar(&masterFd, "master", 0, "master fd") + flag.StringVar(&console, "console", "", "console path") flag.Parse() } +func nsinitFunc(container *libcontainer.Container) error { + container.Master = uintptr(masterFd) + container.Console = console + + return nsinit.InitNamespace(container) +} + func exec(container *libcontainer.Container) error { var ( netFile *os.File @@ -39,7 +51,7 @@ func exec(container *libcontainer.Container) error { container.NetNsFd = netFile.Fd() } - pid, err := namespaces.Exec(container) + pid, err := namespaces.ExecContainer(container) if err != nil { return fmt.Errorf("error exec container %s", err) } @@ -87,39 +99,39 @@ func execIn(container *libcontainer.Container) error { } func createNet(config *libcontainer.Network) error { - root := "/root/nsroot" - if err := network.SetupNamespaceMountDir(root); err != nil { - return err - } - - nspath := root + "/test" - if err := network.CreateNetworkNamespace(nspath); err != nil { - return nil - } - if err := network.CreateVethPair("veth0", config.TempVethName); err != nil { - return err - } - if err := network.SetInterfaceMaster("veth0", config.Bridge); err != nil { - return err - } - if err := network.InterfaceUp("veth0"); err != nil { - return err - } - - f, err := os.Open(nspath) - if err != nil { - return err - } - defer f.Close() - - if err := network.SetInterfaceInNamespaceFd("veth1", int(f.Fd())); err != nil { - return err - } - /* - if err := network.SetupVethInsideNamespace(f.Fd(), config); err != nil { + root := "/root/nsroot" + if err := network.SetupNamespaceMountDir(root); err != nil { return err } + + nspath := root + "/test" + if err := network.CreateNetworkNamespace(nspath); err != nil { + return nil + } + if err := network.CreateVethPair("veth0", config.TempVethName); err != nil { + return err + } + if err := network.SetInterfaceMaster("veth0", config.Bridge); err != nil { + return err + } + if err := network.InterfaceUp("veth0"); err != nil { + return err + } + + f, err := os.Open(nspath) + if err != nil { + return err + } + defer f.Close() + + if err := network.SetInterfaceInNamespaceFd("veth1", int(f.Fd())); err != nil { + return err + } + + if err := network.SetupVethInsideNamespace(f.Fd(), config); err != nil { + return err + } */ return nil } @@ -133,7 +145,7 @@ func main() { var ( err error cliCmd = flag.Arg(0) - config = flag.Arg(1) + config = "/root/development/gocode/src/github.com/dotcloud/docker/pkg/libcontainer/container.json" //flag.Arg(1) ) f, err := os.Open(config) if err != nil { @@ -149,6 +161,8 @@ func main() { f.Close() switch cliCmd { + case "init": + err = nsinitFunc(container) case "exec": err = exec(container) case "execin": diff --git a/pkg/libcontainer/namespaces/exec.go b/pkg/libcontainer/namespaces/exec.go index 0077a0b16c..93b155ba24 100644 --- a/pkg/libcontainer/namespaces/exec.go +++ b/pkg/libcontainer/namespaces/exec.go @@ -12,6 +12,8 @@ import ( "log" "os" "os/exec" + "path/filepath" + "strconv" "syscall" ) @@ -37,16 +39,15 @@ func ExecContainer(container *libcontainer.Container) (pid int, err error) { if err != nil { return -1, err } - container.Console = console - container.Master = master.Fd() + nsinit := filepath.Join(container.RootFs, ".nsinit") // we need CLONE_VFORK so we can wait on the child flag := uintptr(getNamespaceFlags(container.Namespaces) | CLONE_VFORK) - command := exec.Command("/.nsinit") + command := exec.Command(nsinit, "init", "-master", strconv.Itoa(int(master.Fd())), "-console", console) command.SysProcAttr = &syscall.SysProcAttr{} command.SysProcAttr.Cloneflags = flag - command.SysProcAttr.Setctty = true + // command.SysProcAttr.Setctty = true if err := command.Start(); err != nil { return -1, err @@ -63,6 +64,7 @@ func ExecContainer(container *libcontainer.Container) (pid int, err error) { log.Println(err) } }() + command.Wait() return pid, nil } diff --git a/pkg/libcontainer/network/veth.go b/pkg/libcontainer/network/veth.go index dc207b3394..2ecce22c3e 100644 --- a/pkg/libcontainer/network/veth.go +++ b/pkg/libcontainer/network/veth.go @@ -3,7 +3,6 @@ package network import ( "fmt" "github.com/dotcloud/docker/pkg/libcontainer" - "github.com/dotcloud/docker/pkg/libcontainer/namespaces" "os" "syscall" ) @@ -59,21 +58,6 @@ func SetupNamespaceMountDir(root string) error { return nil } -// CreateNetworkNamespace creates a new network namespace and binds it's fd -// at the binding path -func CreateNetworkNamespace(bindingPath string) error { - f, err := os.OpenFile(bindingPath, os.O_RDONLY|os.O_CREATE|os.O_EXCL, 0) - if err != nil { - return err - } - f.Close() - - if err := namespaces.CreateNewNamespace(libcontainer.CLONE_NEWNET, bindingPath); err != nil { - return err - } - return nil -} - // DeleteNetworkNamespace unmounts the binding path and removes the // file so that no references to the fd are present and the network // namespace is automatically cleaned up From 11429457691be3b009c6d9f4cc9fce9150d4e810 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Wed, 19 Feb 2014 10:44:29 -0800 Subject: [PATCH 04/81] Use nsinit for setting up namespace Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/cli/main.go | 1 + pkg/libcontainer/container.go | 1 + pkg/libcontainer/namespaces/exec.go | 4 ++-- pkg/libcontainer/namespaces/nsinit/init.go | 13 +++++++++++++ 4 files changed, 17 insertions(+), 2 deletions(-) diff --git a/pkg/libcontainer/cli/main.go b/pkg/libcontainer/cli/main.go index 0430e29430..ac0ea29924 100644 --- a/pkg/libcontainer/cli/main.go +++ b/pkg/libcontainer/cli/main.go @@ -32,6 +32,7 @@ func init() { func nsinitFunc(container *libcontainer.Container) error { container.Master = uintptr(masterFd) container.Console = console + container.LogFile = "/root/logs" return nsinit.InitNamespace(container) } diff --git a/pkg/libcontainer/container.go b/pkg/libcontainer/container.go index dd5e728e68..c9a3f2e902 100644 --- a/pkg/libcontainer/container.go +++ b/pkg/libcontainer/container.go @@ -13,6 +13,7 @@ type Container struct { Capabilities Capabilities `json:"capabilities,omitempty"` Master uintptr `json:"master"` Console string `json:"console"` + LogFile string `json:"log_file"` } type Command struct { diff --git a/pkg/libcontainer/namespaces/exec.go b/pkg/libcontainer/namespaces/exec.go index 93b155ba24..7f4b4a609a 100644 --- a/pkg/libcontainer/namespaces/exec.go +++ b/pkg/libcontainer/namespaces/exec.go @@ -44,9 +44,10 @@ func ExecContainer(container *libcontainer.Container) (pid int, err error) { // we need CLONE_VFORK so we can wait on the child flag := uintptr(getNamespaceFlags(container.Namespaces) | CLONE_VFORK) - command := exec.Command(nsinit, "init", "-master", strconv.Itoa(int(master.Fd())), "-console", console) + command := exec.Command(nsinit, "-master", strconv.Itoa(int(master.Fd())), "-console", console, "init") command.SysProcAttr = &syscall.SysProcAttr{} command.SysProcAttr.Cloneflags = flag + command.ExtraFiles = []*os.File{master} // command.SysProcAttr.Setctty = true if err := command.Start(); err != nil { @@ -64,7 +65,6 @@ func ExecContainer(container *libcontainer.Container) (pid int, err error) { log.Println(err) } }() - command.Wait() return pid, nil } diff --git a/pkg/libcontainer/namespaces/nsinit/init.go b/pkg/libcontainer/namespaces/nsinit/init.go index 9a7563642c..ae6159b45a 100644 --- a/pkg/libcontainer/namespaces/nsinit/init.go +++ b/pkg/libcontainer/namespaces/nsinit/init.go @@ -14,6 +14,10 @@ import ( // InitNamespace should be run inside an existing namespace to setup // common mounts, drop capabilities, and setup network interfaces func InitNamespace(container *libcontainer.Container) error { + if err := setLogFile(container); err != nil { + return err + } + rootfs, err := resolveRootfs(container) if err != nil { return err @@ -138,3 +142,12 @@ func openTerminal(name string, flag int) (*os.File, error) { } return os.NewFile(uintptr(r), name), nil } + +func setLogFile(container *libcontainer.Container) error { + f, err := os.OpenFile(container.LogFile, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0655) + if err != nil { + return err + } + log.SetOutput(f) + return nil +} From 93d41e53ae64fd77340c31d3bc5531864ed28779 Mon Sep 17 00:00:00 2001 From: "Guillaume J. Charmes" Date: Tue, 18 Feb 2014 23:13:36 -0800 Subject: [PATCH 05/81] Improve general quality of libcontainer Docker-DCO-1.1-Signed-off-by: Guillaume J. Charmes (github: creack) --- pkg/libcontainer/cli/main.go | 65 ++++++----- pkg/libcontainer/container.json | 2 +- pkg/libcontainer/namespaces/exec.go | 101 ++---------------- pkg/libcontainer/namespaces/mount.go | 44 ++++---- pkg/libcontainer/namespaces/namespaces.go | 32 ------ pkg/libcontainer/namespaces/ns_linux.go | 9 ++ pkg/libcontainer/namespaces/nsinit/init.go | 41 +++---- pkg/libcontainer/namespaces/utils.go | 14 --- .../namespaces => system}/calls_linux.go | 37 +------ pkg/system/pty_linux.go | 31 ++++++ pkg/system/setns_linux.go | 13 +++ pkg/system/setns_linux_amd64.go | 8 ++ 12 files changed, 159 insertions(+), 238 deletions(-) delete mode 100644 pkg/libcontainer/namespaces/namespaces.go rename pkg/{libcontainer/namespaces => system}/calls_linux.go (74%) create mode 100644 pkg/system/pty_linux.go create mode 100644 pkg/system/setns_linux.go create mode 100644 pkg/system/setns_linux_amd64.go diff --git a/pkg/libcontainer/cli/main.go b/pkg/libcontainer/cli/main.go index ac0ea29924..93bb0399f0 100644 --- a/pkg/libcontainer/cli/main.go +++ b/pkg/libcontainer/cli/main.go @@ -10,6 +10,9 @@ import ( "github.com/dotcloud/docker/pkg/libcontainer/network" "github.com/dotcloud/docker/pkg/libcontainer/utils" "os" + exec_ "os/exec" + "path" + "path/filepath" ) var ( @@ -52,6 +55,18 @@ func exec(container *libcontainer.Container) error { container.NetNsFd = netFile.Fd() } + self, err := exec_.LookPath(os.Args[0]) + if err != nil { + return err + } + if output, err := exec_.Command("cp", self, path.Join(container.RootFs, ".nsinit")).CombinedOutput(); err != nil { + return fmt.Errorf("Error exec cp: %s, (%s)", err, output) + } else { + println(self, container.RootFs) + fmt.Printf("-----> %s\n", output) + } + println("----") + pid, err := namespaces.ExecContainer(container) if err != nil { return fmt.Errorf("error exec container %s", err) @@ -77,25 +92,25 @@ func exec(container *libcontainer.Container) error { } func execIn(container *libcontainer.Container) error { - f, err := os.Open("/root/nsroot/test") - if err != nil { - return err - } - container.NetNsFd = f.Fd() - pid, err := namespaces.ExecIn(container, &libcontainer.Command{ - Env: container.Command.Env, - Args: []string{ - newCommand, - }, - }) - if err != nil { - return fmt.Errorf("error exexin container %s", err) - } - exitcode, err := utils.WaitOnPid(pid) - if err != nil { - return fmt.Errorf("error waiting on child %s", err) - } - os.Exit(exitcode) + // f, err := os.Open("/root/nsroot/test") + // if err != nil { + // return err + // } + // container.NetNsFd = f.Fd() + // pid, err := namespaces.ExecIn(container, &libcontainer.Command{ + // Env: container.Command.Env, + // Args: []string{ + // newCommand, + // }, + // }) + // if err != nil { + // return fmt.Errorf("error exexin container %s", err) + // } + // exitcode, err := utils.WaitOnPid(pid) + // if err != nil { + // return fmt.Errorf("error waiting on child %s", err) + // } + // os.Exit(exitcode) return nil } @@ -143,11 +158,13 @@ func printErr(err error) { } func main() { - var ( - err error - cliCmd = flag.Arg(0) - config = "/root/development/gocode/src/github.com/dotcloud/docker/pkg/libcontainer/container.json" //flag.Arg(1) - ) + cliCmd := flag.Arg(0) + + config, err := filepath.Abs(flag.Arg(1)) + if err != nil { + printErr(err) + } + println("cli:", cliCmd, "config:", config) f, err := os.Open(config) if err != nil { printErr(err) diff --git a/pkg/libcontainer/container.json b/pkg/libcontainer/container.json index ed8eb1bd78..6e4fda54c8 100644 --- a/pkg/libcontainer/container.json +++ b/pkg/libcontainer/container.json @@ -12,7 +12,7 @@ "TERM=xterm" ] }, - "rootfs": "/root/main/mycontainer", + "rootfs": "/var/lib/docker/containers/ee76122136d691d63e09d24168a91ddb2ef9fdcf210b4de5c50aa76354892f4b/root", "namespaces": [ "NEWIPC", "NEWNS", diff --git a/pkg/libcontainer/namespaces/exec.go b/pkg/libcontainer/namespaces/exec.go index 7f4b4a609a..ea3d2caa70 100644 --- a/pkg/libcontainer/namespaces/exec.go +++ b/pkg/libcontainer/namespaces/exec.go @@ -6,8 +6,8 @@ package namespaces import ( "errors" - "fmt" "github.com/dotcloud/docker/pkg/libcontainer" + "github.com/dotcloud/docker/pkg/system" "io" "log" "os" @@ -44,12 +44,15 @@ func ExecContainer(container *libcontainer.Container) (pid int, err error) { // we need CLONE_VFORK so we can wait on the child flag := uintptr(getNamespaceFlags(container.Namespaces) | CLONE_VFORK) - command := exec.Command(nsinit, "-master", strconv.Itoa(int(master.Fd())), "-console", console, "init") + command := exec.Command(nsinit, "-master", strconv.Itoa(int(master.Fd())), "-console", console, "init", "container.json") + // command.Stdin = os.Stdin + // command.Stdout = os.Stdout + // command.Stderr = os.Stderr command.SysProcAttr = &syscall.SysProcAttr{} command.SysProcAttr.Cloneflags = flag - command.ExtraFiles = []*os.File{master} - // command.SysProcAttr.Setctty = true + //command.ExtraFiles = []*os.File{master} + println("vvvvvvvvv") if err := command.Start(); err != nil { return -1, err } @@ -68,104 +71,18 @@ func ExecContainer(container *libcontainer.Container) (pid int, err error) { return pid, nil } -// ExecIn will spawn a new command inside an existing container's namespaces. The existing container's -// pid and namespace configuration is needed along with the specific capabilities that should -// be dropped once inside the namespace. -func ExecIn(container *libcontainer.Container, cmd *libcontainer.Command) (int, error) { - return -1, fmt.Errorf("not implemented") - /* - if container.NsPid <= 0 { - return -1, libcontainer.ErrInvalidPid - } - - fds, err := getNsFds(container) - if err != nil { - return -1, err - } - - if container.NetNsFd > 0 { - fds = append(fds, container.NetNsFd) - } - - pid, err := fork() - if err != nil { - for _, fd := range fds { - syscall.Close(int(fd)) - } - return -1, err - } - - if pid == 0 { - for _, fd := range fds { - if fd > 0 { - if err := JoinExistingNamespace(fd, ""); err != nil { - for _, fd := range fds { - syscall.Close(int(fd)) - } - writeError("join existing namespace for %d %s", fd, err) - } - } - syscall.Close(int(fd)) - } - - if container.Namespaces.Contains(libcontainer.CLONE_NEWNS) && - container.Namespaces.Contains(libcontainer.CLONE_NEWPID) { - // important: - // - // we need to fork and unshare so that re can remount proc and sys within - // the namespace so the CLONE_NEWPID namespace will take effect - // if we don't fork we would end up unmounting proc and sys for the entire - // namespace - child, err := fork() - if err != nil { - writeError("fork child %s", err) - } - - if child == 0 { - if err := unshare(CLONE_NEWNS); err != nil { - writeError("unshare newns %s", err) - } - if err := remountProc(); err != nil { - writeError("remount proc %s", err) - } - if err := remountSys(); err != nil { - writeError("remount sys %s", err) - } - if err := capabilities.DropCapabilities(container); err != nil { - writeError("drop caps %s", err) - } - if err := exec(cmd.Args[0], cmd.Args[0:], cmd.Env); err != nil { - writeError("exec %s", err) - } - panic("unreachable") - } - exit, err := utils.WaitOnPid(child) - if err != nil { - writeError("wait on child %s", err) - } - os.Exit(exit) - } - if err := exec(cmd.Args[0], cmd.Args[0:], cmd.Env); err != nil { - writeError("exec %s", err) - } - panic("unreachable") - } - return pid, err - */ -} - func createMasterAndConsole() (*os.File, string, error) { master, err := os.OpenFile("/dev/ptmx", syscall.O_RDWR|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0) if err != nil { return nil, "", err } - console, err := Ptsname(master) + console, err := system.Ptsname(master) if err != nil { return nil, "", err } - if err := Unlockpt(master); err != nil { + if err := system.Unlockpt(master); err != nil { return nil, "", err } return master, console, nil diff --git a/pkg/libcontainer/namespaces/mount.go b/pkg/libcontainer/namespaces/mount.go index 8e7c54b046..a9b981ecd9 100644 --- a/pkg/libcontainer/namespaces/mount.go +++ b/pkg/libcontainer/namespaces/mount.go @@ -2,6 +2,7 @@ package namespaces import ( "fmt" + "github.com/dotcloud/docker/pkg/system" "log" "os" "path/filepath" @@ -14,16 +15,16 @@ var ( ) func SetupNewMountNamespace(rootfs, console string, readonly bool) error { - if err := Mount("", "/", "", syscall.MS_SLAVE|syscall.MS_REC, ""); err != nil { + if err := system.Mount("", "/", "", syscall.MS_SLAVE|syscall.MS_REC, ""); err != nil { return fmt.Errorf("mounting / as slave %s", err) } - if err := Mount(rootfs, rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, ""); err != nil { + if err := system.Mount(rootfs, rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, ""); err != nil { return fmt.Errorf("mouting %s as bind %s", rootfs, err) } if readonly { - if err := Mount(rootfs, rootfs, "bind", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC, ""); err != nil { + if err := system.Mount(rootfs, rootfs, "bind", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC, ""); err != nil { return fmt.Errorf("mounting %s as readonly %s", rootfs, err) } } @@ -52,29 +53,30 @@ func SetupNewMountNamespace(rootfs, console string, readonly bool) error { return err } - if err := Chdir(rootfs); err != nil { + if err := system.Chdir(rootfs); err != nil { return fmt.Errorf("chdir into %s %s", rootfs, err) } - if err := Mount(rootfs, "/", "", syscall.MS_MOVE, ""); err != nil { + if err := system.Mount(rootfs, "/", "", syscall.MS_MOVE, ""); err != nil { return fmt.Errorf("mount move %s into / %s", rootfs, err) } - if err := Chroot("."); err != nil { + if err := system.Chroot("."); err != nil { return fmt.Errorf("chroot . %s", err) } - if err := Chdir("/"); err != nil { + if err := system.Chdir("/"); err != nil { return fmt.Errorf("chdir / %s", err) } - Umask(0022) + system.Umask(0022) return nil } func copyDevNodes(rootfs string) error { - Umask(0000) + oldMask := system.Umask(0000) + defer system.Umask(oldMask) for _, node := range []string{ "null", @@ -95,7 +97,7 @@ func copyDevNodes(rootfs string) error { ) log.Printf("copy %s to %s %d\n", node, dest, st.Rdev) - if err := Mknod(dest, st.Mode, int(st.Rdev)); err != nil && !os.IsExist(err) { + if err := system.Mknod(dest, st.Mode, int(st.Rdev)); err != nil && !os.IsExist(err) { return fmt.Errorf("copy %s %s", node, err) } } @@ -125,7 +127,8 @@ func setupDev(rootfs string) error { } func setupConsole(rootfs, console string) error { - Umask(0000) + oldMask := system.Umask(0000) + defer system.Umask(oldMask) stat, err := os.Stat(console) if err != nil { @@ -145,11 +148,11 @@ func setupConsole(rootfs, console string) error { return err } - if err := Mknod(dest, (st.Mode&^07777)|0600, int(st.Rdev)); err != nil { + if err := system.Mknod(dest, (st.Mode&^07777)|0600, int(st.Rdev)); err != nil { return fmt.Errorf("mknod %s %s", dest, err) } - if err := Mount(console, dest, "bind", syscall.MS_BIND, ""); err != nil { + if err := system.Mount(console, dest, "bind", syscall.MS_BIND, ""); err != nil { return fmt.Errorf("bind %s to %s %s", console, dest, err) } return nil @@ -158,7 +161,7 @@ func setupConsole(rootfs, console string) error { // mountSystem sets up linux specific system mounts like sys, proc, shm, and devpts // inside the mount namespace func mountSystem(rootfs string) error { - mounts := []struct { + for _, m := range []struct { source string path string device string @@ -171,12 +174,11 @@ func mountSystem(rootfs string) error { {source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaults, data: "mode=1777"}, {source: "devpts", path: filepath.Join(rootfs, "dev", "pts"), device: "devpts", flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, data: "newinstance,ptmxmode=0666,mode=620,gid=5"}, {source: "tmpfs", path: filepath.Join(rootfs, "run"), device: "tmpfs", flags: syscall.MS_NOSUID | syscall.MS_NODEV | syscall.MS_STRICTATIME, data: "mode=755"}, - } - for _, m := range mounts { + } { if err := os.MkdirAll(m.path, 0755); err != nil && !os.IsExist(err) { return fmt.Errorf("mkdirall %s %s", m.path, err) } - if err := Mount(m.source, m.path, m.device, uintptr(m.flags), m.data); err != nil { + if err := system.Mount(m.source, m.path, m.device, uintptr(m.flags), m.data); err != nil { return fmt.Errorf("mounting %s into %s %s", m.source, m.path, err) } } @@ -184,22 +186,22 @@ func mountSystem(rootfs string) error { } func remountProc() error { - if err := Unmount("/proc", syscall.MNT_DETACH); err != nil { + if err := system.Unmount("/proc", syscall.MNT_DETACH); err != nil { return err } - if err := Mount("proc", "/proc", "proc", uintptr(defaults), ""); err != nil { + if err := system.Mount("proc", "/proc", "proc", uintptr(defaults), ""); err != nil { return err } return nil } func remountSys() error { - if err := Unmount("/sys", syscall.MNT_DETACH); err != nil { + if err := system.Unmount("/sys", syscall.MNT_DETACH); err != nil { if err != syscall.EINVAL { return err } } else { - if err := Mount("sysfs", "/sys", "sysfs", uintptr(defaults), ""); err != nil { + if err := system.Mount("sysfs", "/sys", "sysfs", uintptr(defaults), ""); err != nil { return err } } diff --git a/pkg/libcontainer/namespaces/namespaces.go b/pkg/libcontainer/namespaces/namespaces.go deleted file mode 100644 index 05ef0ac7a9..0000000000 --- a/pkg/libcontainer/namespaces/namespaces.go +++ /dev/null @@ -1,32 +0,0 @@ -/* - TODO - pivot root - cgroups - more mount stuff that I probably am forgetting - apparmor -*/ - -package namespaces - -import ( - "github.com/dotcloud/docker/pkg/libcontainer" -) - -// JoinExistingNamespace uses the fd of an existing linux namespace and -// has the current process join that namespace or the spacespace specified by ns -func JoinExistingNamespace(fd uintptr, ns libcontainer.Namespace) error { - flag := namespaceMap[ns] - if err := Setns(fd, uintptr(flag)); err != nil { - return err - } - return nil -} - -// getNamespaceFlags parses the container's Namespaces options to set the correct -// flags on clone, unshare, and setns -func getNamespaceFlags(namespaces libcontainer.Namespaces) (flag int) { - for _, ns := range namespaces { - flag |= namespaceMap[ns] - } - return -} diff --git a/pkg/libcontainer/namespaces/ns_linux.go b/pkg/libcontainer/namespaces/ns_linux.go index b0e5119130..f61279334d 100644 --- a/pkg/libcontainer/namespaces/ns_linux.go +++ b/pkg/libcontainer/namespaces/ns_linux.go @@ -33,3 +33,12 @@ var namespaceFileMap = map[libcontainer.Namespace]string{ libcontainer.CLONE_NEWPID: "pid", libcontainer.CLONE_NEWNET: "net", } + +// getNamespaceFlags parses the container's Namespaces options to set the correct +// flags on clone, unshare, and setns +func getNamespaceFlags(namespaces libcontainer.Namespaces) (flag int) { + for _, ns := range namespaces { + flag |= namespaceMap[ns] + } + return +} diff --git a/pkg/libcontainer/namespaces/nsinit/init.go b/pkg/libcontainer/namespaces/nsinit/init.go index ae6159b45a..7f85ebacdb 100644 --- a/pkg/libcontainer/namespaces/nsinit/init.go +++ b/pkg/libcontainer/namespaces/nsinit/init.go @@ -5,6 +5,7 @@ import ( "github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/libcontainer/capabilities" "github.com/dotcloud/docker/pkg/libcontainer/namespaces" + "github.com/dotcloud/docker/pkg/system" "log" "os" "path/filepath" @@ -14,10 +15,12 @@ import ( // InitNamespace should be run inside an existing namespace to setup // common mounts, drop capabilities, and setup network interfaces func InitNamespace(container *libcontainer.Container) error { + println("|||||||||||||") if err := setLogFile(container); err != nil { return err } - + println(container.LogFile) + log.Printf("--------->") rootfs, err := resolveRootfs(container) if err != nil { return err @@ -26,7 +29,7 @@ func InitNamespace(container *libcontainer.Container) error { // any errors encoutered inside the namespace we should write // out to a log or a pipe to our parent and exit(1) // because writing to stderr will not work after we close - if err := closeMasterAndStd(container.Master); err != nil { + if err := closeMasterAndStd(os.NewFile(container.Master, "/dev/ptmx")); err != nil { log.Fatalf("close master and std %s", err) return err } @@ -49,15 +52,15 @@ func InitNamespace(container *libcontainer.Container) error { } */ - if _, err := namespaces.Setsid(); err != nil { + if _, err := system.Setsid(); err != nil { log.Fatalf("setsid %s", err) return err } - if err := namespaces.Setctty(); err != nil { + if err := system.Setctty(); err != nil { log.Fatalf("setctty %s", err) return err } - if err := namespaces.ParentDeathSignal(); err != nil { + if err := system.ParentDeathSignal(); err != nil { log.Fatalf("parent deth signal %s", err) return err } @@ -65,7 +68,7 @@ func InitNamespace(container *libcontainer.Container) error { log.Fatalf("setup mount namespace %s", err) return err } - if err := namespaces.Sethostname(container.ID); err != nil { + if err := system.Sethostname(container.ID); err != nil { log.Fatalf("sethostname %s", err) return err } @@ -78,12 +81,12 @@ func InitNamespace(container *libcontainer.Container) error { return err } if container.WorkingDir != "" { - if err := namespaces.Chdir(container.WorkingDir); err != nil { + if err := system.Chdir(container.WorkingDir); err != nil { log.Fatalf("chdir to %s %s", container.WorkingDir, err) return err } } - if err := namespaces.Exec(container.Command.Args[0], container.Command.Args[0:], container.Command.Env); err != nil { + if err := system.Exec(container.Command.Args[0], container.Command.Args[0:], container.Command.Env); err != nil { log.Fatalf("exec %s", err) return err } @@ -98,24 +101,23 @@ func resolveRootfs(container *libcontainer.Container) (string, error) { return filepath.EvalSymlinks(rootfs) } -func closeMasterAndStd(master uintptr) error { - namespaces.Closefd(master) - namespaces.Closefd(0) - namespaces.Closefd(1) - namespaces.Closefd(2) - +func closeMasterAndStd(master *os.File) error { + master.Close() + os.Stdin.Close() + os.Stdout.Close() + os.Stderr.Close() return nil } func setupUser(container *libcontainer.Container) error { // TODO: honor user passed on container - if err := namespaces.Setgroups(nil); err != nil { + if err := system.Setgroups(nil); err != nil { return err } - if err := namespaces.Setresgid(0, 0, 0); err != nil { + if err := system.Setresgid(0, 0, 0); err != nil { return err } - if err := namespaces.Setresuid(0, 0, 0); err != nil { + if err := system.Setresuid(0, 0, 0); err != nil { return err } return nil @@ -126,15 +128,16 @@ func dupSlave(slave *os.File) error { if slave.Fd() != 0 { return fmt.Errorf("slave fd not 0 %d", slave.Fd()) } - if err := namespaces.Dup2(slave.Fd(), 1); err != nil { + if err := system.Dup2(slave.Fd(), 1); err != nil { return err } - if err := namespaces.Dup2(slave.Fd(), 2); err != nil { + if err := system.Dup2(slave.Fd(), 2); err != nil { return err } return nil } +// openTerminal is a clone of os.OpenFile without the O_CLOEXEC addition. func openTerminal(name string, flag int) (*os.File, error) { r, e := syscall.Open(name, flag, 0) if e != nil { diff --git a/pkg/libcontainer/namespaces/utils.go b/pkg/libcontainer/namespaces/utils.go index fd195c0ad1..a5d677c7b3 100644 --- a/pkg/libcontainer/namespaces/utils.go +++ b/pkg/libcontainer/namespaces/utils.go @@ -72,17 +72,3 @@ func setupEnvironment(container *libcontainer.Container) { addEnvIfNotSet(container, "USER", "root") addEnvIfNotSet(container, "LOGNAME", "root") } - -func getMasterAndConsole(container *libcontainer.Container) (string, *os.File, error) { - master, err := Openpmtx() - if err != nil { - return "", nil, err - } - - console, err := Ptsname(master) - if err != nil { - master.Close() - return "", nil, err - } - return console, master, nil -} diff --git a/pkg/libcontainer/namespaces/calls_linux.go b/pkg/system/calls_linux.go similarity index 74% rename from pkg/libcontainer/namespaces/calls_linux.go rename to pkg/system/calls_linux.go index f006d56da6..42afa349c2 100644 --- a/pkg/libcontainer/namespaces/calls_linux.go +++ b/pkg/system/calls_linux.go @@ -1,15 +1,7 @@ -package namespaces +package system import ( - "fmt" - "os" "syscall" - "unsafe" -) - -const ( - TIOCGPTN = 0x80045430 - TIOCSPTLCK = 0x40045431 ) func Chroot(dir string) error { @@ -60,14 +52,6 @@ func Clone(flags uintptr) (int, error) { return int(pid), nil } -func Setns(fd uintptr, flags uintptr) error { - _, _, err := syscall.RawSyscall(SYS_SETNS, fd, flags, 0) - if err != 0 { - return err - } - return nil -} - func UsetCloseOnExec(fd uintptr) error { if _, _, err := syscall.Syscall(syscall.SYS_FCNTL, fd, syscall.F_SETFD, 0); err != 0 { return err @@ -95,11 +79,6 @@ func Setsid() (int, error) { return syscall.Setsid() } -func Unlockpt(f *os.File) error { - var u int - return Ioctl(f.Fd(), TIOCSPTLCK, uintptr(unsafe.Pointer(&u))) -} - func Ioctl(fd uintptr, flag, data uintptr) error { if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, fd, flag, data); err != 0 { return err @@ -107,18 +86,6 @@ func Ioctl(fd uintptr, flag, data uintptr) error { return nil } -func Ptsname(f *os.File) (string, error) { - var n int - if err := Ioctl(f.Fd(), TIOCGPTN, uintptr(unsafe.Pointer(&n))); err != nil { - return "", err - } - return fmt.Sprintf("/dev/pts/%d", n), nil -} - -func Openpmtx() (*os.File, error) { - return os.OpenFile("/dev/ptmx", syscall.O_RDONLY|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0) -} - func Closefd(fd uintptr) error { return syscall.Close(int(fd)) } @@ -132,7 +99,7 @@ func Mknod(path string, mode uint32, dev int) error { } func ParentDeathSignal() error { - if _, _, err := syscall.RawSyscall6(syscall.SYS_PRCTL, syscall.PR_SET_PDEATHSIG, uintptr(syscall.SIGKILL), 0, 0, 0, 0); err != 0 { + if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_PDEATHSIG, uintptr(syscall.SIGKILL), 0); err != 0 { return err } return nil diff --git a/pkg/system/pty_linux.go b/pkg/system/pty_linux.go new file mode 100644 index 0000000000..b281b719fb --- /dev/null +++ b/pkg/system/pty_linux.go @@ -0,0 +1,31 @@ +package system + +import ( + "fmt" + "os" + "syscall" + "unsafe" +) + +// Unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f. +// Unlockpt should be called before opening the slave side of a pseudoterminal. +func Unlockpt(f *os.File) error { + var u int + return Ioctl(f.Fd(), syscall.TIOCSPTLCK, uintptr(unsafe.Pointer(&u))) +} + +// Ptsname retrieves the name of the first available pts for the given master. +func Ptsname(f *os.File) (string, error) { + var n int + + if err := Ioctl(f.Fd(), syscall.TIOCGPTN, uintptr(unsafe.Pointer(&n))); err != nil { + return "", err + } + return fmt.Sprintf("/dev/pts/%d", n), nil +} + +// OpenPtmx opens /dev/ptmx, i.e. the PTY master. +func OpenPtmx() (*os.File, error) { + // O_NOCTTY and O_CLOEXEC are not present in os package so we use the syscall's one for all. + return os.OpenFile("/dev/ptmx", syscall.O_RDONLY|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0) +} diff --git a/pkg/system/setns_linux.go b/pkg/system/setns_linux.go new file mode 100644 index 0000000000..be6f3edb30 --- /dev/null +++ b/pkg/system/setns_linux.go @@ -0,0 +1,13 @@ +package system + +import ( + "syscall" +) + +func Setns(fd uintptr, flags uintptr) error { + _, _, err := syscall.RawSyscall(SYS_SETNS, fd, flags, 0) + if err != 0 { + return err + } + return nil +} diff --git a/pkg/system/setns_linux_amd64.go b/pkg/system/setns_linux_amd64.go new file mode 100644 index 0000000000..4e306253d9 --- /dev/null +++ b/pkg/system/setns_linux_amd64.go @@ -0,0 +1,8 @@ +// +build linux,amd64 + +package system + +// Via http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=7b21fddd087678a70ad64afc0f632e0f1071b092 +const ( + SYS_SETNS = 308 +) From 18f06b8d16c475568fd023e97eecc138ab052c2d Mon Sep 17 00:00:00 2001 From: "Guillaume J. Charmes" Date: Wed, 19 Feb 2014 12:47:01 -0800 Subject: [PATCH 06/81] Fix ptmx issue on libcontainer Docker-DCO-1.1-Signed-off-by: Guillaume J. Charmes (github: creack) --- pkg/libcontainer/namespaces/exec.go | 3 ++- pkg/libcontainer/namespaces/mount.go | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pkg/libcontainer/namespaces/exec.go b/pkg/libcontainer/namespaces/exec.go index ea3d2caa70..77550d6089 100644 --- a/pkg/libcontainer/namespaces/exec.go +++ b/pkg/libcontainer/namespaces/exec.go @@ -50,7 +50,8 @@ func ExecContainer(container *libcontainer.Container) (pid int, err error) { // command.Stderr = os.Stderr command.SysProcAttr = &syscall.SysProcAttr{} command.SysProcAttr.Cloneflags = flag - //command.ExtraFiles = []*os.File{master} + + command.ExtraFiles = []*os.File{master} println("vvvvvvvvv") if err := command.Start(); err != nil { diff --git a/pkg/libcontainer/namespaces/mount.go b/pkg/libcontainer/namespaces/mount.go index a9b981ecd9..5c0b8ead16 100644 --- a/pkg/libcontainer/namespaces/mount.go +++ b/pkg/libcontainer/namespaces/mount.go @@ -41,7 +41,7 @@ func SetupNewMountNamespace(rootfs, console string, readonly bool) error { if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) { return err } - if err := os.Symlink(filepath.Join(rootfs, "pts/ptmx"), ptmx); err != nil { + if err := os.Symlink("pts/ptmx", ptmx); err != nil { return fmt.Errorf("symlink dev ptmx %s", err) } From e25065a6b1df09771598d77cc698e4fcf1159bd4 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Wed, 19 Feb 2014 14:33:25 -0800 Subject: [PATCH 07/81] Use nsinit as app Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/container.go | 10 +- pkg/libcontainer/namespaces/exec.go | 40 ++----- pkg/libcontainer/namespaces/linux_x86_64.go | 7 -- pkg/libcontainer/namespaces/ns_linux.go | 2 +- pkg/libcontainer/namespaces/nsinit/init.go | 112 ++++++++++-------- .../namespaces/{ => nsinit}/mount.go | 4 +- pkg/libcontainer/namespaces/utils.go | 26 ---- 7 files changed, 82 insertions(+), 119 deletions(-) delete mode 100644 pkg/libcontainer/namespaces/linux_x86_64.go rename pkg/libcontainer/namespaces/{ => nsinit}/mount.go (98%) diff --git a/pkg/libcontainer/container.go b/pkg/libcontainer/container.go index c9a3f2e902..c2885447fd 100644 --- a/pkg/libcontainer/container.go +++ b/pkg/libcontainer/container.go @@ -2,18 +2,14 @@ package libcontainer type Container struct { ID string `json:"id,omitempty"` - NsPid int `json:"namespace_pid,omitempty"` Command *Command `json:"command,omitempty"` - RootFs string `json:"rootfs,omitempty"` ReadonlyFs bool `json:"readonly_fs,omitempty"` - NetNsFd uintptr `json:"network_namespace_fd,omitempty"` User string `json:"user,omitempty"` WorkingDir string `json:"working_dir,omitempty"` Namespaces Namespaces `json:"namespaces,omitempty"` Capabilities Capabilities `json:"capabilities,omitempty"` - Master uintptr `json:"master"` - Console string `json:"console"` - LogFile string `json:"log_file"` + LogFile string `json:"log_file,omitempty"` + Network *Network `json:"network,omitempty"` } type Command struct { @@ -22,9 +18,9 @@ type Command struct { } type Network struct { - TempVethName string `json:"temp_veth,omitempty"` IP string `json:"ip,omitempty"` Gateway string `json:"gateway,omitempty"` Bridge string `json:"bridge,omitempty"` Mtu int `json:"mtu,omitempty"` + TempVethName string `json:"temp_veth,omitempty"` } diff --git a/pkg/libcontainer/namespaces/exec.go b/pkg/libcontainer/namespaces/exec.go index 77550d6089..8e5bf68aef 100644 --- a/pkg/libcontainer/namespaces/exec.go +++ b/pkg/libcontainer/namespaces/exec.go @@ -1,27 +1,17 @@ -/* - Higher level convience functions for setting up a container -*/ - package namespaces import ( - "errors" "github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/system" + "github.com/dotcloud/docker/pkg/term" "io" "log" "os" "os/exec" - "path/filepath" - "strconv" "syscall" ) -var ( - ErrExistingNetworkNamespace = errors.New("specified both CLONE_NEWNET and an existing network namespace") -) - -// Exec will spawn new namespaces with the specified Container configuration +// ExecContainer will spawn new namespaces with the specified Container configuration // in the RootFs path and return the pid of the new containerized process. // // If an existing network namespace is specified the container @@ -30,30 +20,19 @@ var ( // existing network namespace and the CLONE_NEWNET option in the container configuration will allow // the container to the the host's networking options and configuration. func ExecContainer(container *libcontainer.Container) (pid int, err error) { - // a user cannot pass CLONE_NEWNET and an existing net namespace fd to join - if container.NetNsFd > 0 && container.Namespaces.Contains(libcontainer.CLONE_NEWNET) { - return -1, ErrExistingNetworkNamespace - } - master, console, err := createMasterAndConsole() if err != nil { return -1, err } - nsinit := filepath.Join(container.RootFs, ".nsinit") // we need CLONE_VFORK so we can wait on the child flag := uintptr(getNamespaceFlags(container.Namespaces) | CLONE_VFORK) - command := exec.Command(nsinit, "-master", strconv.Itoa(int(master.Fd())), "-console", console, "init", "container.json") - // command.Stdin = os.Stdin - // command.Stdout = os.Stdout - // command.Stderr = os.Stderr - command.SysProcAttr = &syscall.SysProcAttr{} - command.SysProcAttr.Cloneflags = flag + command := exec.Command("nsinit", console) + command.SysProcAttr = &syscall.SysProcAttr{ + Cloneflags: flag, + } - command.ExtraFiles = []*os.File{master} - - println("vvvvvvvvv") if err := command.Start(); err != nil { return -1, err } @@ -64,11 +43,18 @@ func ExecContainer(container *libcontainer.Container) (pid int, err error) { log.Println(err) } }() + go func() { if _, err := io.Copy(master, os.Stdin); err != nil { log.Println(err) } }() + + term.SetRawTerminal(os.Stdin.Fd()) + + if err := command.Wait(); err != nil { + return pid, err + } return pid, nil } diff --git a/pkg/libcontainer/namespaces/linux_x86_64.go b/pkg/libcontainer/namespaces/linux_x86_64.go deleted file mode 100644 index ac9a014763..0000000000 --- a/pkg/libcontainer/namespaces/linux_x86_64.go +++ /dev/null @@ -1,7 +0,0 @@ -// +build linux,x86_64 -package namespaces - -// Via http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=7b21fddd087678a70ad64afc0f632e0f1071b092 -const ( - SYS_SETNS = 308 -) diff --git a/pkg/libcontainer/namespaces/ns_linux.go b/pkg/libcontainer/namespaces/ns_linux.go index f61279334d..2c73e08e58 100644 --- a/pkg/libcontainer/namespaces/ns_linux.go +++ b/pkg/libcontainer/namespaces/ns_linux.go @@ -40,5 +40,5 @@ func getNamespaceFlags(namespaces libcontainer.Namespaces) (flag int) { for _, ns := range namespaces { flag |= namespaceMap[ns] } - return + return flag } diff --git a/pkg/libcontainer/namespaces/nsinit/init.go b/pkg/libcontainer/namespaces/nsinit/init.go index 7f85ebacdb..523854e5d6 100644 --- a/pkg/libcontainer/namespaces/nsinit/init.go +++ b/pkg/libcontainer/namespaces/nsinit/init.go @@ -1,6 +1,7 @@ -package nsinit +package main import ( + "encoding/json" "fmt" "github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/libcontainer/capabilities" @@ -12,103 +13,112 @@ import ( "syscall" ) -// InitNamespace should be run inside an existing namespace to setup -// common mounts, drop capabilities, and setup network interfaces -func InitNamespace(container *libcontainer.Container) error { - println("|||||||||||||") - if err := setLogFile(container); err != nil { - return err - } - println(container.LogFile) - log.Printf("--------->") - rootfs, err := resolveRootfs(container) +func loadContainer() (*libcontainer.Container, error) { + f, err := os.Open("container.json") if err != nil { - return err + return nil, err + } + defer f.Close() + + var container *libcontainer.Container + if err := json.NewDecoder(f).Decode(&container); err != nil { + return nil, err + } + return container, nil +} + +func main() { + container, err := loadContainer() + if err != nil { + log.Fatal(err) } - // any errors encoutered inside the namespace we should write - // out to a log or a pipe to our parent and exit(1) - // because writing to stderr will not work after we close - if err := closeMasterAndStd(os.NewFile(container.Master, "/dev/ptmx")); err != nil { - log.Fatalf("close master and std %s", err) - return err + if os.Args[1] == "exec" { + _, err := namespaces.ExecContainer(container) + if err != nil { + log.Fatal(err) + } + os.Exit(0) + } + console := os.Args[1] + + if err := setLogFile(container); err != nil { + log.Fatal(err) } - slave, err := openTerminal(container.Console, syscall.O_RDWR) + rootfs, err := resolveRootfs() + if err != nil { + log.Fatal(err) + } + + // close pipes so that we can replace it with the pty + os.Stdin.Close() + os.Stdout.Close() + os.Stderr.Close() + + slave, err := openTerminal(console, syscall.O_RDWR) if err != nil { log.Fatalf("open terminal %s", err) - return err + } + if slave.Fd() != 0 { + log.Fatalf("slave fd should be 0") } if err := dupSlave(slave); err != nil { log.Fatalf("dup2 slave %s", err) - return err } - /* - if container.NetNsFd > 0 { - if err := joinExistingNamespace(container.NetNsFd, libcontainer.CLONE_NEWNET); err != nil { - log.Fatalf("join existing net namespace %s", err) - } - } - */ - if _, err := system.Setsid(); err != nil { log.Fatalf("setsid %s", err) - return err } if err := system.Setctty(); err != nil { log.Fatalf("setctty %s", err) - return err } if err := system.ParentDeathSignal(); err != nil { log.Fatalf("parent deth signal %s", err) - return err } - if err := namespaces.SetupNewMountNamespace(rootfs, container.Console, container.ReadonlyFs); err != nil { + + if err := setupNewMountNamespace(rootfs, console, container.ReadonlyFs); err != nil { log.Fatalf("setup mount namespace %s", err) - return err } + + if container.Network != nil { + if err := setupNetworking(container); err != nil { + log.Fatalf("setup networking %s", err) + } + } + if err := system.Sethostname(container.ID); err != nil { log.Fatalf("sethostname %s", err) - return err } if err := capabilities.DropCapabilities(container); err != nil { log.Fatalf("drop capabilities %s", err) - return err } if err := setupUser(container); err != nil { log.Fatalf("setup user %s", err) - return err } if container.WorkingDir != "" { if err := system.Chdir(container.WorkingDir); err != nil { log.Fatalf("chdir to %s %s", container.WorkingDir, err) - return err } } if err := system.Exec(container.Command.Args[0], container.Command.Args[0:], container.Command.Env); err != nil { log.Fatalf("exec %s", err) - return err } panic("unreachable") } -func resolveRootfs(container *libcontainer.Container) (string, error) { - rootfs, err := filepath.Abs(container.RootFs) +func resolveRootfs() (string, error) { + cwd, err := os.Getwd() + if err != nil { + return "", err + } + rootfs, err := filepath.Abs(cwd) if err != nil { return "", err } return filepath.EvalSymlinks(rootfs) } -func closeMasterAndStd(master *os.File) error { - master.Close() - os.Stdin.Close() - os.Stdout.Close() - os.Stderr.Close() - return nil -} - func setupUser(container *libcontainer.Container) error { // TODO: honor user passed on container if err := system.Setgroups(nil); err != nil { @@ -154,3 +164,7 @@ func setLogFile(container *libcontainer.Container) error { log.SetOutput(f) return nil } + +func setupNetworking(conatiner *libcontainer.Container) error { + return nil +} diff --git a/pkg/libcontainer/namespaces/mount.go b/pkg/libcontainer/namespaces/nsinit/mount.go similarity index 98% rename from pkg/libcontainer/namespaces/mount.go rename to pkg/libcontainer/namespaces/nsinit/mount.go index 5c0b8ead16..f9ee969636 100644 --- a/pkg/libcontainer/namespaces/mount.go +++ b/pkg/libcontainer/namespaces/nsinit/mount.go @@ -1,4 +1,4 @@ -package namespaces +package main import ( "fmt" @@ -14,7 +14,7 @@ var ( defaults = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV ) -func SetupNewMountNamespace(rootfs, console string, readonly bool) error { +func setupNewMountNamespace(rootfs, console string, readonly bool) error { if err := system.Mount("", "/", "", syscall.MS_SLAVE|syscall.MS_REC, ""); err != nil { return fmt.Errorf("mounting / as slave %s", err) } diff --git a/pkg/libcontainer/namespaces/utils.go b/pkg/libcontainer/namespaces/utils.go index a5d677c7b3..edc3ab52cc 100644 --- a/pkg/libcontainer/namespaces/utils.go +++ b/pkg/libcontainer/namespaces/utils.go @@ -7,7 +7,6 @@ import ( "path/filepath" "strconv" "strings" - "syscall" ) func addEnvIfNotSet(container *libcontainer.Container, key, value string) { @@ -26,31 +25,6 @@ func addEnvIfNotSet(container *libcontainer.Container, key, value string) { container.Command.Env = append(container.Command.Env, jv) } -// getNsFds inspects the container's namespace configuration and opens the fds to -// each of the namespaces. -func getNsFds(container *libcontainer.Container) ([]uintptr, error) { - var ( - namespaces = []string{} - fds = []uintptr{} - ) - - for _, ns := range container.Namespaces { - namespaces = append(namespaces, namespaceFileMap[ns]) - } - - for _, ns := range namespaces { - fd, err := getNsFd(container.NsPid, ns) - if err != nil { - for _, fd = range fds { - syscall.Close(int(fd)) - } - return nil, err - } - fds = append(fds, fd) - } - return fds, nil -} - // getNsFd returns the fd for a specific pid and namespace option func getNsFd(pid int, ns string) (uintptr, error) { nspath := filepath.Join("/proc", strconv.Itoa(pid), "ns", ns) From 7bc3c012507edcfc5e8ab8523b240ac2bb03fe19 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Wed, 19 Feb 2014 14:55:34 -0800 Subject: [PATCH 08/81] Simplify namespaces with only nsinit Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/cli/main.go | 203 ------------------ pkg/libcontainer/namespaces/utils.go | 48 ----- .../{namespaces => nsinit}/exec.go | 26 ++- .../{namespaces => }/nsinit/init.go | 13 +- .../{namespaces => }/nsinit/mount.go | 0 .../{namespaces => nsinit}/ns_linux.go | 11 +- 6 files changed, 23 insertions(+), 278 deletions(-) delete mode 100644 pkg/libcontainer/cli/main.go delete mode 100644 pkg/libcontainer/namespaces/utils.go rename pkg/libcontainer/{namespaces => nsinit}/exec.go (63%) rename pkg/libcontainer/{namespaces => }/nsinit/init.go (93%) rename pkg/libcontainer/{namespaces => }/nsinit/mount.go (100%) rename pkg/libcontainer/{namespaces => nsinit}/ns_linux.go (74%) diff --git a/pkg/libcontainer/cli/main.go b/pkg/libcontainer/cli/main.go deleted file mode 100644 index 93bb0399f0..0000000000 --- a/pkg/libcontainer/cli/main.go +++ /dev/null @@ -1,203 +0,0 @@ -package main - -import ( - "encoding/json" - "flag" - "fmt" - "github.com/dotcloud/docker/pkg/libcontainer" - "github.com/dotcloud/docker/pkg/libcontainer/namespaces" - "github.com/dotcloud/docker/pkg/libcontainer/namespaces/nsinit" - "github.com/dotcloud/docker/pkg/libcontainer/network" - "github.com/dotcloud/docker/pkg/libcontainer/utils" - "os" - exec_ "os/exec" - "path" - "path/filepath" -) - -var ( - displayPid bool - newCommand string - usrNet bool - masterFd int - console string -) - -func init() { - flag.BoolVar(&displayPid, "pid", false, "display the pid before waiting") - flag.StringVar(&newCommand, "cmd", "/bin/bash", "command to run in the existing namespace") - flag.BoolVar(&usrNet, "net", false, "user a net namespace") - flag.IntVar(&masterFd, "master", 0, "master fd") - flag.StringVar(&console, "console", "", "console path") - flag.Parse() -} - -func nsinitFunc(container *libcontainer.Container) error { - container.Master = uintptr(masterFd) - container.Console = console - container.LogFile = "/root/logs" - - return nsinit.InitNamespace(container) -} - -func exec(container *libcontainer.Container) error { - var ( - netFile *os.File - err error - ) - container.NetNsFd = 0 - - if usrNet { - netFile, err = os.Open("/root/nsroot/test") - if err != nil { - return err - } - container.NetNsFd = netFile.Fd() - } - - self, err := exec_.LookPath(os.Args[0]) - if err != nil { - return err - } - if output, err := exec_.Command("cp", self, path.Join(container.RootFs, ".nsinit")).CombinedOutput(); err != nil { - return fmt.Errorf("Error exec cp: %s, (%s)", err, output) - } else { - println(self, container.RootFs) - fmt.Printf("-----> %s\n", output) - } - println("----") - - pid, err := namespaces.ExecContainer(container) - if err != nil { - return fmt.Errorf("error exec container %s", err) - } - - if displayPid { - fmt.Println(pid) - } - - exitcode, err := utils.WaitOnPid(pid) - if err != nil { - return fmt.Errorf("error waiting on child %s", err) - } - fmt.Println(exitcode) - if usrNet { - netFile.Close() - if err := network.DeleteNetworkNamespace("/root/nsroot/test"); err != nil { - return err - } - } - os.Exit(exitcode) - return nil -} - -func execIn(container *libcontainer.Container) error { - // f, err := os.Open("/root/nsroot/test") - // if err != nil { - // return err - // } - // container.NetNsFd = f.Fd() - // pid, err := namespaces.ExecIn(container, &libcontainer.Command{ - // Env: container.Command.Env, - // Args: []string{ - // newCommand, - // }, - // }) - // if err != nil { - // return fmt.Errorf("error exexin container %s", err) - // } - // exitcode, err := utils.WaitOnPid(pid) - // if err != nil { - // return fmt.Errorf("error waiting on child %s", err) - // } - // os.Exit(exitcode) - return nil -} - -func createNet(config *libcontainer.Network) error { - /* - root := "/root/nsroot" - if err := network.SetupNamespaceMountDir(root); err != nil { - return err - } - - nspath := root + "/test" - if err := network.CreateNetworkNamespace(nspath); err != nil { - return nil - } - if err := network.CreateVethPair("veth0", config.TempVethName); err != nil { - return err - } - if err := network.SetInterfaceMaster("veth0", config.Bridge); err != nil { - return err - } - if err := network.InterfaceUp("veth0"); err != nil { - return err - } - - f, err := os.Open(nspath) - if err != nil { - return err - } - defer f.Close() - - if err := network.SetInterfaceInNamespaceFd("veth1", int(f.Fd())); err != nil { - return err - } - - if err := network.SetupVethInsideNamespace(f.Fd(), config); err != nil { - return err - } - */ - return nil -} - -func printErr(err error) { - fmt.Fprintln(os.Stderr, err) - os.Exit(1) -} - -func main() { - cliCmd := flag.Arg(0) - - config, err := filepath.Abs(flag.Arg(1)) - if err != nil { - printErr(err) - } - println("cli:", cliCmd, "config:", config) - f, err := os.Open(config) - if err != nil { - printErr(err) - } - - dec := json.NewDecoder(f) - var container *libcontainer.Container - - if err := dec.Decode(&container); err != nil { - printErr(err) - } - f.Close() - - switch cliCmd { - case "init": - err = nsinitFunc(container) - case "exec": - err = exec(container) - case "execin": - err = execIn(container) - case "net": - err = createNet(&libcontainer.Network{ - TempVethName: "veth1", - IP: "172.17.0.100/16", - Gateway: "172.17.42.1", - Mtu: 1500, - Bridge: "docker0", - }) - default: - err = fmt.Errorf("command not supported: %s", cliCmd) - } - - if err != nil { - printErr(err) - } -} diff --git a/pkg/libcontainer/namespaces/utils.go b/pkg/libcontainer/namespaces/utils.go deleted file mode 100644 index edc3ab52cc..0000000000 --- a/pkg/libcontainer/namespaces/utils.go +++ /dev/null @@ -1,48 +0,0 @@ -package namespaces - -import ( - "fmt" - "github.com/dotcloud/docker/pkg/libcontainer" - "os" - "path/filepath" - "strconv" - "strings" -) - -func addEnvIfNotSet(container *libcontainer.Container, key, value string) { - jv := fmt.Sprintf("%s=%s", key, value) - if len(container.Command.Env) == 0 { - container.Command.Env = []string{jv} - return - } - - for _, v := range container.Command.Env { - parts := strings.Split(v, "=") - if parts[0] == key { - return - } - } - container.Command.Env = append(container.Command.Env, jv) -} - -// getNsFd returns the fd for a specific pid and namespace option -func getNsFd(pid int, ns string) (uintptr, error) { - nspath := filepath.Join("/proc", strconv.Itoa(pid), "ns", ns) - // OpenFile adds closOnExec - f, err := os.OpenFile(nspath, os.O_RDONLY, 0666) - if err != nil { - return 0, err - } - return f.Fd(), nil -} - -// setupEnvironment adds additional environment variables to the container's -// Command such as USER, LOGNAME, container, and TERM -func setupEnvironment(container *libcontainer.Container) { - addEnvIfNotSet(container, "container", "docker") - // TODO: check if pty - addEnvIfNotSet(container, "TERM", "xterm") - // TODO: get username from container - addEnvIfNotSet(container, "USER", "root") - addEnvIfNotSet(container, "LOGNAME", "root") -} diff --git a/pkg/libcontainer/namespaces/exec.go b/pkg/libcontainer/nsinit/exec.go similarity index 63% rename from pkg/libcontainer/namespaces/exec.go rename to pkg/libcontainer/nsinit/exec.go index 8e5bf68aef..ef81b0ef87 100644 --- a/pkg/libcontainer/namespaces/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -1,4 +1,4 @@ -package namespaces +package main import ( "github.com/dotcloud/docker/pkg/libcontainer" @@ -11,15 +11,7 @@ import ( "syscall" ) -// ExecContainer will spawn new namespaces with the specified Container configuration -// in the RootFs path and return the pid of the new containerized process. -// -// If an existing network namespace is specified the container -// will join that namespace. If an existing network namespace is not specified but CLONE_NEWNET is, -// the container will be spawned with a new network namespace with no configuration. Omiting an -// existing network namespace and the CLONE_NEWNET option in the container configuration will allow -// the container to the the host's networking options and configuration. -func ExecContainer(container *libcontainer.Container) (pid int, err error) { +func execCommand(container *libcontainer.Container) (pid int, err error) { master, console, err := createMasterAndConsole() if err != nil { return -1, err @@ -50,7 +42,19 @@ func ExecContainer(container *libcontainer.Container) (pid int, err error) { } }() - term.SetRawTerminal(os.Stdin.Fd()) + ws, err := term.GetWinsize(os.Stdin.Fd()) + if err != nil { + return -1, err + } + if err := term.SetWinsize(master.Fd(), ws); err != nil { + return -1, err + } + state, err := term.SetRawTerminal(os.Stdin.Fd()) + if err != nil { + command.Process.Kill() + return -1, err + } + defer term.RestoreTerminal(os.Stdin.Fd(), state) if err := command.Wait(); err != nil { return pid, err diff --git a/pkg/libcontainer/namespaces/nsinit/init.go b/pkg/libcontainer/nsinit/init.go similarity index 93% rename from pkg/libcontainer/namespaces/nsinit/init.go rename to pkg/libcontainer/nsinit/init.go index 523854e5d6..b4b7de410c 100644 --- a/pkg/libcontainer/namespaces/nsinit/init.go +++ b/pkg/libcontainer/nsinit/init.go @@ -5,7 +5,6 @@ import ( "fmt" "github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/libcontainer/capabilities" - "github.com/dotcloud/docker/pkg/libcontainer/namespaces" "github.com/dotcloud/docker/pkg/system" "log" "os" @@ -34,7 +33,7 @@ func main() { } if os.Args[1] == "exec" { - _, err := namespaces.ExecContainer(container) + _, err := execCommand(container) if err != nil { log.Fatal(err) } @@ -157,11 +156,13 @@ func openTerminal(name string, flag int) (*os.File, error) { } func setLogFile(container *libcontainer.Container) error { - f, err := os.OpenFile(container.LogFile, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0655) - if err != nil { - return err + if container.LogFile != "" { + f, err := os.OpenFile(container.LogFile, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0655) + if err != nil { + return err + } + log.SetOutput(f) } - log.SetOutput(f) return nil } diff --git a/pkg/libcontainer/namespaces/nsinit/mount.go b/pkg/libcontainer/nsinit/mount.go similarity index 100% rename from pkg/libcontainer/namespaces/nsinit/mount.go rename to pkg/libcontainer/nsinit/mount.go diff --git a/pkg/libcontainer/namespaces/ns_linux.go b/pkg/libcontainer/nsinit/ns_linux.go similarity index 74% rename from pkg/libcontainer/namespaces/ns_linux.go rename to pkg/libcontainer/nsinit/ns_linux.go index 2c73e08e58..b54bc2b993 100644 --- a/pkg/libcontainer/namespaces/ns_linux.go +++ b/pkg/libcontainer/nsinit/ns_linux.go @@ -1,4 +1,4 @@ -package namespaces +package main import ( "github.com/dotcloud/docker/pkg/libcontainer" @@ -25,15 +25,6 @@ var namespaceMap = map[libcontainer.Namespace]int{ libcontainer.CLONE_NEWNET: CLONE_NEWNET, } -var namespaceFileMap = map[libcontainer.Namespace]string{ - libcontainer.CLONE_NEWNS: "mnt", - libcontainer.CLONE_NEWUTS: "uts", - libcontainer.CLONE_NEWIPC: "ipc", - libcontainer.CLONE_NEWUSER: "user", - libcontainer.CLONE_NEWPID: "pid", - libcontainer.CLONE_NEWNET: "net", -} - // getNamespaceFlags parses the container's Namespaces options to set the correct // flags on clone, unshare, and setns func getNamespaceFlags(namespaces libcontainer.Namespaces) (flag int) { From 34671f20103fb975fed31a03705e04bc65aed239 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Wed, 19 Feb 2014 15:33:44 -0800 Subject: [PATCH 09/81] Implement init veth creation Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/container.go | 9 ++++---- pkg/libcontainer/container.json | 14 ++++++++---- pkg/libcontainer/network/veth.go | 38 +++++--------------------------- pkg/libcontainer/nsinit/exec.go | 33 +++++++++++++++++++++++++++ pkg/libcontainer/nsinit/init.go | 14 +++++++++--- pkg/libcontainer/ubuntu.json | 22 ------------------ 6 files changed, 63 insertions(+), 67 deletions(-) delete mode 100644 pkg/libcontainer/ubuntu.json diff --git a/pkg/libcontainer/container.go b/pkg/libcontainer/container.go index c2885447fd..3f3961d496 100644 --- a/pkg/libcontainer/container.go +++ b/pkg/libcontainer/container.go @@ -18,9 +18,8 @@ type Command struct { } type Network struct { - IP string `json:"ip,omitempty"` - Gateway string `json:"gateway,omitempty"` - Bridge string `json:"bridge,omitempty"` - Mtu int `json:"mtu,omitempty"` - TempVethName string `json:"temp_veth,omitempty"` + IP string `json:"ip,omitempty"` + Gateway string `json:"gateway,omitempty"` + Bridge string `json:"bridge,omitempty"` + Mtu int `json:"mtu,omitempty"` } diff --git a/pkg/libcontainer/container.json b/pkg/libcontainer/container.json index 6e4fda54c8..8731170c2a 100644 --- a/pkg/libcontainer/container.json +++ b/pkg/libcontainer/container.json @@ -1,6 +1,6 @@ { "id": "koye", - "namespace_pid": 3117, + "log_file": "/root/logs", "command": { "args": [ "/bin/bash" @@ -12,12 +12,12 @@ "TERM=xterm" ] }, - "rootfs": "/var/lib/docker/containers/ee76122136d691d63e09d24168a91ddb2ef9fdcf210b4de5c50aa76354892f4b/root", "namespaces": [ "NEWIPC", "NEWNS", "NEWPID", - "NEWUTS" + "NEWUTS", + "NEWNET" ], "capabilities": [ "SETPCAP", @@ -34,5 +34,11 @@ "AUDIT_CONTROL", "MAC_OVERRIDE", "MAC_ADMIN" - ] + ], + "network": { + "ip": "172.17.0.100/16", + "gateway": "172.17.42.1", + "bridge": "docker0", + "mtu": 1500 + } } diff --git a/pkg/libcontainer/network/veth.go b/pkg/libcontainer/network/veth.go index 2ecce22c3e..05512e63c8 100644 --- a/pkg/libcontainer/network/veth.go +++ b/pkg/libcontainer/network/veth.go @@ -3,18 +3,16 @@ package network import ( "fmt" "github.com/dotcloud/docker/pkg/libcontainer" - "os" - "syscall" ) // SetupVeth sets up an existing network namespace with the specified // network configuration. -func SetupVeth(config *libcontainer.Network) error { - if err := InterfaceDown(config.TempVethName); err != nil { - return fmt.Errorf("interface down %s %s", config.TempVethName, err) +func SetupVeth(config *libcontainer.Network, tempVethName string) error { + if err := InterfaceDown(tempVethName); err != nil { + return fmt.Errorf("interface down %s %s", tempVethName, err) } - if err := ChangeInterfaceName(config.TempVethName, "eth0"); err != nil { - return fmt.Errorf("change %s to eth0 %s", config.TempVethName, err) + if err := ChangeInterfaceName(tempVethName, "eth0"); err != nil { + return fmt.Errorf("change %s to eth0 %s", tempVethName, err) } if err := SetInterfaceIp("eth0", config.IP); err != nil { return fmt.Errorf("set eth0 ip %s", err) @@ -41,29 +39,3 @@ func SetupVeth(config *libcontainer.Network) error { } return nil } - -// SetupNamespaceMountDir prepares a new root for use as a mount -// source for bind mounting namespace fd to an outside path -func SetupNamespaceMountDir(root string) error { - if err := os.MkdirAll(root, 0666); err != nil { - return err - } - // make sure mounts are not unmounted by other mnt namespaces - if err := syscall.Mount("", root, "none", syscall.MS_SHARED|syscall.MS_REC, ""); err != nil && err != syscall.EINVAL { - return err - } - if err := syscall.Mount(root, root, "none", syscall.MS_BIND, ""); err != nil { - return err - } - return nil -} - -// DeleteNetworkNamespace unmounts the binding path and removes the -// file so that no references to the fd are present and the network -// namespace is automatically cleaned up -func DeleteNetworkNamespace(bindingPath string) error { - if err := syscall.Unmount(bindingPath, 0); err != nil { - return err - } - return os.Remove(bindingPath) -} diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index ef81b0ef87..9cd1741706 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -1,7 +1,9 @@ package main import ( + "fmt" "github.com/dotcloud/docker/pkg/libcontainer" + "github.com/dotcloud/docker/pkg/libcontainer/network" "github.com/dotcloud/docker/pkg/system" "github.com/dotcloud/docker/pkg/term" "io" @@ -25,11 +27,34 @@ func execCommand(container *libcontainer.Container) (pid int, err error) { Cloneflags: flag, } + inPipe, err := command.StdinPipe() + if err != nil { + return -1, err + } + if err := command.Start(); err != nil { return -1, err } pid = command.Process.Pid + if container.Network != nil { + name1, name2, err := createVethPair() + if err != nil { + log.Fatal(err) + } + if err := network.SetInterfaceMaster(name1, container.Network.Bridge); err != nil { + log.Fatal(err) + } + if err := network.InterfaceUp(name1); err != nil { + log.Fatal(err) + } + if err := network.SetInterfaceInNamespacePid(name2, pid); err != nil { + log.Fatal(err) + } + fmt.Fprint(inPipe, name2) + inPipe.Close() + } + go func() { if _, err := io.Copy(os.Stdout, master); err != nil { log.Println(err) @@ -78,3 +103,11 @@ func createMasterAndConsole() (*os.File, string, error) { } return master, console, nil } + +func createVethPair() (name1 string, name2 string, err error) { + name1, name2 = "veth001", "veth002" + if err = network.CreateVethPair(name1, name2); err != nil { + return + } + return +} diff --git a/pkg/libcontainer/nsinit/init.go b/pkg/libcontainer/nsinit/init.go index b4b7de410c..2804f01e5f 100644 --- a/pkg/libcontainer/nsinit/init.go +++ b/pkg/libcontainer/nsinit/init.go @@ -5,7 +5,9 @@ import ( "fmt" "github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/libcontainer/capabilities" + "github.com/dotcloud/docker/pkg/libcontainer/network" "github.com/dotcloud/docker/pkg/system" + "io/ioutil" "log" "os" "path/filepath" @@ -50,6 +52,12 @@ func main() { log.Fatal(err) } + data, err := ioutil.ReadAll(os.Stdin) + if err != nil { + log.Fatalf("error reading from stdin %s", err) + } + tempVethName := string(data) + // close pipes so that we can replace it with the pty os.Stdin.Close() os.Stdout.Close() @@ -81,7 +89,7 @@ func main() { } if container.Network != nil { - if err := setupNetworking(container); err != nil { + if err := setupNetworking(container, tempVethName); err != nil { log.Fatalf("setup networking %s", err) } } @@ -166,6 +174,6 @@ func setLogFile(container *libcontainer.Container) error { return nil } -func setupNetworking(conatiner *libcontainer.Container) error { - return nil +func setupNetworking(container *libcontainer.Container, tempVethName string) error { + return network.SetupVeth(container.Network, tempVethName) } diff --git a/pkg/libcontainer/ubuntu.json b/pkg/libcontainer/ubuntu.json deleted file mode 100644 index 0a450ae066..0000000000 --- a/pkg/libcontainer/ubuntu.json +++ /dev/null @@ -1,22 +0,0 @@ -{ - "id": "koye", - "namespace_pid": 3745, - "command": { - "args": [ - "/sbin/init" - ], - "environment": [ - "HOME=/", - "PATH=PATH=$PATH:/bin:/usr/bin:/sbin:/usr/sbin", - "container=docker", - "TERM=xterm" - ] - }, - "rootfs": "/var/lib/docker/btrfs/subvolumes/7c0f15df1ad2e2fe04d7a6e079aec17406e9465a6a37dd16cb0dd754fc0167b3", - "namespaces": [ - "NEWIPC", - "NEWNS", - "NEWPID", - "NEWUTS" - ] -} From 5428964400ece4cd79cc5d482307df5e8913469f Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Wed, 19 Feb 2014 15:54:53 -0800 Subject: [PATCH 10/81] Add dynamic veth name Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/network/veth.go | 41 ---------------------------- pkg/libcontainer/nsinit/exec.go | 10 ++++++- pkg/libcontainer/nsinit/init.go | 46 +++++++++++++++++++++++++------- pkg/libcontainer/utils/utils.go | 24 +++-------------- 4 files changed, 48 insertions(+), 73 deletions(-) delete mode 100644 pkg/libcontainer/network/veth.go diff --git a/pkg/libcontainer/network/veth.go b/pkg/libcontainer/network/veth.go deleted file mode 100644 index 05512e63c8..0000000000 --- a/pkg/libcontainer/network/veth.go +++ /dev/null @@ -1,41 +0,0 @@ -package network - -import ( - "fmt" - "github.com/dotcloud/docker/pkg/libcontainer" -) - -// SetupVeth sets up an existing network namespace with the specified -// network configuration. -func SetupVeth(config *libcontainer.Network, tempVethName string) error { - if err := InterfaceDown(tempVethName); err != nil { - return fmt.Errorf("interface down %s %s", tempVethName, err) - } - if err := ChangeInterfaceName(tempVethName, "eth0"); err != nil { - return fmt.Errorf("change %s to eth0 %s", tempVethName, err) - } - if err := SetInterfaceIp("eth0", config.IP); err != nil { - return fmt.Errorf("set eth0 ip %s", err) - } - - if err := SetMtu("eth0", config.Mtu); err != nil { - return fmt.Errorf("set eth0 mtu to %d %s", config.Mtu, err) - } - if err := InterfaceUp("eth0"); err != nil { - return fmt.Errorf("eth0 up %s", err) - } - - if err := SetMtu("lo", config.Mtu); err != nil { - return fmt.Errorf("set lo mtu to %d %s", config.Mtu, err) - } - if err := InterfaceUp("lo"); err != nil { - return fmt.Errorf("lo up %s", err) - } - - if config.Gateway != "" { - if err := SetDefaultGateway(config.Gateway); err != nil { - return fmt.Errorf("set gateway to %s %s", config.Gateway, err) - } - } - return nil -} diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index 9cd1741706..e0324074c5 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -4,6 +4,7 @@ import ( "fmt" "github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/libcontainer/network" + "github.com/dotcloud/docker/pkg/libcontainer/utils" "github.com/dotcloud/docker/pkg/system" "github.com/dotcloud/docker/pkg/term" "io" @@ -105,7 +106,14 @@ func createMasterAndConsole() (*os.File, string, error) { } func createVethPair() (name1 string, name2 string, err error) { - name1, name2 = "veth001", "veth002" + name1, err = utils.GenerateRandomName("dock", 4) + if err != nil { + return + } + name2, err = utils.GenerateRandomName("dock", 4) + if err != nil { + return + } if err = network.CreateVethPair(name1, name2); err != nil { return } diff --git a/pkg/libcontainer/nsinit/init.go b/pkg/libcontainer/nsinit/init.go index 2804f01e5f..fe8fd4b4db 100644 --- a/pkg/libcontainer/nsinit/init.go +++ b/pkg/libcontainer/nsinit/init.go @@ -52,11 +52,14 @@ func main() { log.Fatal(err) } - data, err := ioutil.ReadAll(os.Stdin) - if err != nil { - log.Fatalf("error reading from stdin %s", err) + var tempVethName string + if container.Network != nil { + data, err := ioutil.ReadAll(os.Stdin) + if err != nil { + log.Fatalf("error reading from stdin %s", err) + } + tempVethName = string(data) } - tempVethName := string(data) // close pipes so that we can replace it with the pty os.Stdin.Close() @@ -73,7 +76,6 @@ func main() { if err := dupSlave(slave); err != nil { log.Fatalf("dup2 slave %s", err) } - if _, err := system.Setsid(); err != nil { log.Fatalf("setsid %s", err) } @@ -83,13 +85,11 @@ func main() { if err := system.ParentDeathSignal(); err != nil { log.Fatalf("parent deth signal %s", err) } - if err := setupNewMountNamespace(rootfs, console, container.ReadonlyFs); err != nil { log.Fatalf("setup mount namespace %s", err) } - if container.Network != nil { - if err := setupNetworking(container, tempVethName); err != nil { + if err := setupNetworking(container.Network, tempVethName); err != nil { log.Fatalf("setup networking %s", err) } } @@ -174,6 +174,32 @@ func setLogFile(container *libcontainer.Container) error { return nil } -func setupNetworking(container *libcontainer.Container, tempVethName string) error { - return network.SetupVeth(container.Network, tempVethName) +func setupNetworking(config *libcontainer.Network, tempVethName string) error { + if err := network.InterfaceDown(tempVethName); err != nil { + return fmt.Errorf("interface down %s %s", tempVethName, err) + } + if err := network.ChangeInterfaceName(tempVethName, "eth0"); err != nil { + return fmt.Errorf("change %s to eth0 %s", tempVethName, err) + } + if err := network.SetInterfaceIp("eth0", config.IP); err != nil { + return fmt.Errorf("set eth0 ip %s", err) + } + if err := network.SetMtu("eth0", config.Mtu); err != nil { + return fmt.Errorf("set eth0 mtu to %d %s", config.Mtu, err) + } + if err := network.InterfaceUp("eth0"); err != nil { + return fmt.Errorf("eth0 up %s", err) + } + if err := network.SetMtu("lo", config.Mtu); err != nil { + return fmt.Errorf("set lo mtu to %d %s", config.Mtu, err) + } + if err := network.InterfaceUp("lo"); err != nil { + return fmt.Errorf("lo up %s", err) + } + if config.Gateway != "" { + if err := network.SetDefaultGateway(config.Gateway); err != nil { + return fmt.Errorf("set gateway to %s %s", config.Gateway, err) + } + } + return nil } diff --git a/pkg/libcontainer/utils/utils.go b/pkg/libcontainer/utils/utils.go index 7289fecf2e..d3223c3e4d 100644 --- a/pkg/libcontainer/utils/utils.go +++ b/pkg/libcontainer/utils/utils.go @@ -4,30 +4,12 @@ import ( "crypto/rand" "encoding/hex" "io" - "os" - "syscall" ) -func WaitOnPid(pid int) (exitcode int, err error) { - child, err := os.FindProcess(pid) - if err != nil { - return -1, err - } - state, err := child.Wait() - if err != nil { - return -1, err - } - return getExitCode(state), nil -} - -func getExitCode(state *os.ProcessState) int { - return state.Sys().(syscall.WaitStatus).ExitStatus() -} - -func GenerateRandomName(size int) (string, error) { - id := make([]byte, size) +func GenerateRandomName(prefix string, size int) (string, error) { + id := make([]byte, 32) if _, err := io.ReadFull(rand.Reader, id); err != nil { return "", err } - return hex.EncodeToString(id), nil + return prefix + hex.EncodeToString(id)[:size], nil } From 61a119220d88d20bb1cca111e9c8ba7cdb45d4f6 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Wed, 19 Feb 2014 16:40:36 -0800 Subject: [PATCH 11/81] General cleanup of libcontainer Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/errors.go | 9 ----- pkg/libcontainer/network/network.go | 26 ------------ pkg/libcontainer/nsinit/exec.go | 51 ++++++++++------------- pkg/libcontainer/nsinit/init.go | 63 ++++++++--------------------- pkg/libcontainer/nsinit/main.go | 42 +++++++++++++++++++ pkg/libcontainer/nsinit/mount.go | 61 +++++++++++----------------- pkg/libcontainer/nsinit/ns_linux.go | 25 ++++-------- 7 files changed, 111 insertions(+), 166 deletions(-) delete mode 100644 pkg/libcontainer/errors.go create mode 100644 pkg/libcontainer/nsinit/main.go diff --git a/pkg/libcontainer/errors.go b/pkg/libcontainer/errors.go deleted file mode 100644 index c6964ee8e6..0000000000 --- a/pkg/libcontainer/errors.go +++ /dev/null @@ -1,9 +0,0 @@ -package libcontainer - -import ( - "errors" -) - -var ( - ErrInvalidPid = errors.New("no ns pid found") -) diff --git a/pkg/libcontainer/network/network.go b/pkg/libcontainer/network/network.go index 31c5d32492..8c7a4b618e 100644 --- a/pkg/libcontainer/network/network.go +++ b/pkg/libcontainer/network/network.go @@ -1,15 +1,10 @@ package network import ( - "errors" "github.com/dotcloud/docker/pkg/netlink" "net" ) -var ( - ErrNoDefaultRoute = errors.New("no default network route found") -) - func InterfaceUp(name string) error { iface, err := net.InterfaceByName(name) if err != nil { @@ -46,14 +41,6 @@ func SetInterfaceInNamespacePid(name string, nsPid int) error { return netlink.NetworkSetNsPid(iface, nsPid) } -func SetInterfaceInNamespaceFd(name string, fd int) error { - iface, err := net.InterfaceByName(name) - if err != nil { - return err - } - return netlink.NetworkSetNsFd(iface, fd) -} - func SetInterfaceMaster(name, master string) error { iface, err := net.InterfaceByName(name) if err != nil { @@ -89,16 +76,3 @@ func SetMtu(name string, mtu int) error { } return netlink.NetworkSetMTU(iface, mtu) } - -func GetDefaultMtu() (int, error) { - routes, err := netlink.NetworkGetRoutes() - if err != nil { - return -1, err - } - for _, r := range routes { - if r.Default { - return r.Iface.MTU, nil - } - } - return -1, ErrNoDefaultRoute -} diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index e0324074c5..4ac070db08 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -8,65 +8,54 @@ import ( "github.com/dotcloud/docker/pkg/system" "github.com/dotcloud/docker/pkg/term" "io" - "log" + "io/ioutil" "os" "os/exec" "syscall" ) -func execCommand(container *libcontainer.Container) (pid int, err error) { +func execCommand(container *libcontainer.Container) (int, error) { master, console, err := createMasterAndConsole() if err != nil { return -1, err } - // we need CLONE_VFORK so we can wait on the child - flag := uintptr(getNamespaceFlags(container.Namespaces) | CLONE_VFORK) - - command := exec.Command("nsinit", console) + command := exec.Command("nsinit", "init", console) command.SysProcAttr = &syscall.SysProcAttr{ - Cloneflags: flag, + Cloneflags: uintptr(getNamespaceFlags(container.Namespaces) | syscall.CLONE_VFORK), // we need CLONE_VFORK so we can wait on the child } inPipe, err := command.StdinPipe() if err != nil { return -1, err } - if err := command.Start(); err != nil { return -1, err } - pid = command.Process.Pid + if err := writePidFile(command); err != nil { + return -1, err + } if container.Network != nil { name1, name2, err := createVethPair() if err != nil { - log.Fatal(err) + return -1, err } if err := network.SetInterfaceMaster(name1, container.Network.Bridge); err != nil { - log.Fatal(err) + return -1, err } if err := network.InterfaceUp(name1); err != nil { - log.Fatal(err) + return -1, err } - if err := network.SetInterfaceInNamespacePid(name2, pid); err != nil { - log.Fatal(err) + if err := network.SetInterfaceInNamespacePid(name2, command.Process.Pid); err != nil { + return -1, err } fmt.Fprint(inPipe, name2) inPipe.Close() } - go func() { - if _, err := io.Copy(os.Stdout, master); err != nil { - log.Println(err) - } - }() - - go func() { - if _, err := io.Copy(master, os.Stdin); err != nil { - log.Println(err) - } - }() + go io.Copy(os.Stdout, master) + go io.Copy(master, os.Stdin) ws, err := term.GetWinsize(os.Stdin.Fd()) if err != nil { @@ -83,9 +72,11 @@ func execCommand(container *libcontainer.Container) (pid int, err error) { defer term.RestoreTerminal(os.Stdin.Fd(), state) if err := command.Wait(); err != nil { - return pid, err + if _, ok := err.(*exec.ExitError); !ok { + return -1, err + } } - return pid, nil + return command.ProcessState.Sys().(syscall.WaitStatus).ExitStatus(), nil } func createMasterAndConsole() (*os.File, string, error) { @@ -93,12 +84,10 @@ func createMasterAndConsole() (*os.File, string, error) { if err != nil { return nil, "", err } - console, err := system.Ptsname(master) if err != nil { return nil, "", err } - if err := system.Unlockpt(master); err != nil { return nil, "", err } @@ -119,3 +108,7 @@ func createVethPair() (name1 string, name2 string, err error) { } return } + +func writePidFile(command *exec.Cmd) error { + return ioutil.WriteFile(".nspid", []byte(fmt.Sprint(command.Process.Pid)), 0655) +} diff --git a/pkg/libcontainer/nsinit/init.go b/pkg/libcontainer/nsinit/init.go index fe8fd4b4db..16a30812f9 100644 --- a/pkg/libcontainer/nsinit/init.go +++ b/pkg/libcontainer/nsinit/init.go @@ -1,7 +1,6 @@ package main import ( - "encoding/json" "fmt" "github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/libcontainer/capabilities" @@ -14,49 +13,21 @@ import ( "syscall" ) -func loadContainer() (*libcontainer.Container, error) { - f, err := os.Open("container.json") - if err != nil { - return nil, err - } - defer f.Close() - - var container *libcontainer.Container - if err := json.NewDecoder(f).Decode(&container); err != nil { - return nil, err - } - return container, nil -} - -func main() { - container, err := loadContainer() - if err != nil { - log.Fatal(err) - } - - if os.Args[1] == "exec" { - _, err := execCommand(container) - if err != nil { - log.Fatal(err) - } - os.Exit(0) - } - console := os.Args[1] - +func initCommand(container *libcontainer.Container, console string) error { if err := setLogFile(container); err != nil { - log.Fatal(err) + return err } rootfs, err := resolveRootfs() if err != nil { - log.Fatal(err) + return err } var tempVethName string if container.Network != nil { data, err := ioutil.ReadAll(os.Stdin) if err != nil { - log.Fatalf("error reading from stdin %s", err) + return fmt.Errorf("error reading from stdin %s", err) } tempVethName = string(data) } @@ -68,48 +39,48 @@ func main() { slave, err := openTerminal(console, syscall.O_RDWR) if err != nil { - log.Fatalf("open terminal %s", err) + return fmt.Errorf("open terminal %s", err) } if slave.Fd() != 0 { - log.Fatalf("slave fd should be 0") + return fmt.Errorf("slave fd should be 0") } if err := dupSlave(slave); err != nil { - log.Fatalf("dup2 slave %s", err) + return fmt.Errorf("dup2 slave %s", err) } if _, err := system.Setsid(); err != nil { - log.Fatalf("setsid %s", err) + return fmt.Errorf("setsid %s", err) } if err := system.Setctty(); err != nil { - log.Fatalf("setctty %s", err) + return fmt.Errorf("setctty %s", err) } if err := system.ParentDeathSignal(); err != nil { - log.Fatalf("parent deth signal %s", err) + return fmt.Errorf("parent deth signal %s", err) } if err := setupNewMountNamespace(rootfs, console, container.ReadonlyFs); err != nil { - log.Fatalf("setup mount namespace %s", err) + return fmt.Errorf("setup mount namespace %s", err) } if container.Network != nil { if err := setupNetworking(container.Network, tempVethName); err != nil { - log.Fatalf("setup networking %s", err) + return fmt.Errorf("setup networking %s", err) } } if err := system.Sethostname(container.ID); err != nil { - log.Fatalf("sethostname %s", err) + return fmt.Errorf("sethostname %s", err) } if err := capabilities.DropCapabilities(container); err != nil { - log.Fatalf("drop capabilities %s", err) + return fmt.Errorf("drop capabilities %s", err) } if err := setupUser(container); err != nil { - log.Fatalf("setup user %s", err) + return fmt.Errorf("setup user %s", err) } if container.WorkingDir != "" { if err := system.Chdir(container.WorkingDir); err != nil { - log.Fatalf("chdir to %s %s", container.WorkingDir, err) + return fmt.Errorf("chdir to %s %s", container.WorkingDir, err) } } if err := system.Exec(container.Command.Args[0], container.Command.Args[0:], container.Command.Env); err != nil { - log.Fatalf("exec %s", err) + return fmt.Errorf("exec %s", err) } panic("unreachable") } diff --git a/pkg/libcontainer/nsinit/main.go b/pkg/libcontainer/nsinit/main.go new file mode 100644 index 0000000000..47abcce0c5 --- /dev/null +++ b/pkg/libcontainer/nsinit/main.go @@ -0,0 +1,42 @@ +package main + +import ( + "encoding/json" + "github.com/dotcloud/docker/pkg/libcontainer" + "log" + "os" +) + +func main() { + container, err := loadContainer() + if err != nil { + log.Fatal(err) + } + + switch os.Args[1] { + case "exec": + exitCode, err := execCommand(container) + if err != nil { + log.Fatal(err) + } + os.Exit(exitCode) + case "init": + if err := initCommand(container, os.Args[2]); err != nil { + log.Fatal(err) + } + } +} + +func loadContainer() (*libcontainer.Container, error) { + f, err := os.Open("container.json") + if err != nil { + return nil, err + } + defer f.Close() + + var container *libcontainer.Container + if err := json.NewDecoder(f).Decode(&container); err != nil { + return nil, err + } + return container, nil +} diff --git a/pkg/libcontainer/nsinit/mount.go b/pkg/libcontainer/nsinit/mount.go index f9ee969636..13ee13e001 100644 --- a/pkg/libcontainer/nsinit/mount.go +++ b/pkg/libcontainer/nsinit/mount.go @@ -3,68 +3,47 @@ package main import ( "fmt" "github.com/dotcloud/docker/pkg/system" - "log" "os" "path/filepath" "syscall" ) -var ( - // default mount point options - defaults = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV -) +// default mount point options +const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV func setupNewMountNamespace(rootfs, console string, readonly bool) error { if err := system.Mount("", "/", "", syscall.MS_SLAVE|syscall.MS_REC, ""); err != nil { return fmt.Errorf("mounting / as slave %s", err) } - if err := system.Mount(rootfs, rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, ""); err != nil { return fmt.Errorf("mouting %s as bind %s", rootfs, err) } - if readonly { if err := system.Mount(rootfs, rootfs, "bind", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC, ""); err != nil { return fmt.Errorf("mounting %s as readonly %s", rootfs, err) } } - if err := mountSystem(rootfs); err != nil { return fmt.Errorf("mount system %s", err) } - if err := copyDevNodes(rootfs); err != nil { return fmt.Errorf("copy dev nodes %s", err) } - - ptmx := filepath.Join(rootfs, "dev/ptmx") - if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) { - return err - } - if err := os.Symlink("pts/ptmx", ptmx); err != nil { - return fmt.Errorf("symlink dev ptmx %s", err) - } - if err := setupDev(rootfs); err != nil { return err } - - if err := setupConsole(rootfs, console); err != nil { + if err := setupPtmx(rootfs, console); err != nil { return err } - if err := system.Chdir(rootfs); err != nil { return fmt.Errorf("chdir into %s %s", rootfs, err) } - if err := system.Mount(rootfs, "/", "", syscall.MS_MOVE, ""); err != nil { return fmt.Errorf("mount move %s into / %s", rootfs, err) } - if err := system.Chroot("."); err != nil { return fmt.Errorf("chroot . %s", err) } - if err := system.Chdir("/"); err != nil { return fmt.Errorf("chdir / %s", err) } @@ -90,13 +69,10 @@ func copyDevNodes(rootfs string) error { if err != nil { return err } - var ( dest = filepath.Join(rootfs, "dev", node) st = stat.Sys().(*syscall.Stat_t) ) - - log.Printf("copy %s to %s %d\n", node, dest, st.Rdev) if err := system.Mknod(dest, st.Mode, int(st.Rdev)); err != nil && !os.IsExist(err) { return fmt.Errorf("copy %s %s", node, err) } @@ -134,24 +110,22 @@ func setupConsole(rootfs, console string) error { if err != nil { return fmt.Errorf("stat console %s %s", console, err) } - st := stat.Sys().(*syscall.Stat_t) - - dest := filepath.Join(rootfs, "dev/console") + var ( + st = stat.Sys().(*syscall.Stat_t) + dest = filepath.Join(rootfs, "dev/console") + ) if err := os.Remove(dest); err != nil && !os.IsNotExist(err) { return fmt.Errorf("remove %s %s", dest, err) } - if err := os.Chmod(console, 0600); err != nil { return err } if err := os.Chown(console, 0, 0); err != nil { return err } - if err := system.Mknod(dest, (st.Mode&^07777)|0600, int(st.Rdev)); err != nil { return fmt.Errorf("mknod %s %s", dest, err) } - if err := system.Mount(console, dest, "bind", syscall.MS_BIND, ""); err != nil { return fmt.Errorf("bind %s to %s %s", console, dest, err) } @@ -168,10 +142,10 @@ func mountSystem(rootfs string) error { flags int data string }{ - {source: "proc", path: filepath.Join(rootfs, "proc"), device: "proc", flags: defaults}, - {source: "sysfs", path: filepath.Join(rootfs, "sys"), device: "sysfs", flags: defaults}, + {source: "proc", path: filepath.Join(rootfs, "proc"), device: "proc", flags: defaultMountFlags}, + {source: "sysfs", path: filepath.Join(rootfs, "sys"), device: "sysfs", flags: defaultMountFlags}, {source: "tmpfs", path: filepath.Join(rootfs, "dev"), device: "tmpfs", flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, data: "mode=755"}, - {source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaults, data: "mode=1777"}, + {source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaultMountFlags, data: "mode=1777"}, {source: "devpts", path: filepath.Join(rootfs, "dev", "pts"), device: "devpts", flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, data: "newinstance,ptmxmode=0666,mode=620,gid=5"}, {source: "tmpfs", path: filepath.Join(rootfs, "run"), device: "tmpfs", flags: syscall.MS_NOSUID | syscall.MS_NODEV | syscall.MS_STRICTATIME, data: "mode=755"}, } { @@ -189,7 +163,7 @@ func remountProc() error { if err := system.Unmount("/proc", syscall.MNT_DETACH); err != nil { return err } - if err := system.Mount("proc", "/proc", "proc", uintptr(defaults), ""); err != nil { + if err := system.Mount("proc", "/proc", "proc", uintptr(defaultMountFlags), ""); err != nil { return err } return nil @@ -201,9 +175,20 @@ func remountSys() error { return err } } else { - if err := system.Mount("sysfs", "/sys", "sysfs", uintptr(defaults), ""); err != nil { + if err := system.Mount("sysfs", "/sys", "sysfs", uintptr(defaultMountFlags), ""); err != nil { return err } } return nil } + +func setupPtmx(rootfs, console string) error { + ptmx := filepath.Join(rootfs, "dev/ptmx") + if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) { + return err + } + if err := os.Symlink("pts/ptmx", ptmx); err != nil { + return fmt.Errorf("symlink dev ptmx %s", err) + } + return setupConsole(rootfs, console) +} diff --git a/pkg/libcontainer/nsinit/ns_linux.go b/pkg/libcontainer/nsinit/ns_linux.go index b54bc2b993..2392ffd770 100644 --- a/pkg/libcontainer/nsinit/ns_linux.go +++ b/pkg/libcontainer/nsinit/ns_linux.go @@ -2,27 +2,16 @@ package main import ( "github.com/dotcloud/docker/pkg/libcontainer" -) - -const ( - SIGCHLD = 0x14 - CLONE_VFORK = 0x00004000 - CLONE_NEWNS = 0x00020000 - CLONE_NEWUTS = 0x04000000 - CLONE_NEWIPC = 0x08000000 - CLONE_NEWUSER = 0x10000000 - CLONE_NEWPID = 0x20000000 - CLONE_NEWNET = 0x40000000 + "syscall" ) var namespaceMap = map[libcontainer.Namespace]int{ - "": 0, - libcontainer.CLONE_NEWNS: CLONE_NEWNS, - libcontainer.CLONE_NEWUTS: CLONE_NEWUTS, - libcontainer.CLONE_NEWIPC: CLONE_NEWIPC, - libcontainer.CLONE_NEWUSER: CLONE_NEWUSER, - libcontainer.CLONE_NEWPID: CLONE_NEWPID, - libcontainer.CLONE_NEWNET: CLONE_NEWNET, + libcontainer.CLONE_NEWNS: syscall.CLONE_NEWNS, + libcontainer.CLONE_NEWUTS: syscall.CLONE_NEWUTS, + libcontainer.CLONE_NEWIPC: syscall.CLONE_NEWIPC, + libcontainer.CLONE_NEWUSER: syscall.CLONE_NEWUSER, + libcontainer.CLONE_NEWPID: syscall.CLONE_NEWPID, + libcontainer.CLONE_NEWNET: syscall.CLONE_NEWNET, } // getNamespaceFlags parses the container's Namespaces options to set the correct From f3c48ec584707a9acaf9d79c2e161dadf1d2c99b Mon Sep 17 00:00:00 2001 From: "Guillaume J. Charmes" Date: Wed, 19 Feb 2014 16:50:10 -0800 Subject: [PATCH 12/81] OSX compilation Docker-DCO-1.1-Signed-off-by: Guillaume J. Charmes (github: creack) --- pkg/libcontainer/nsinit/exec.go | 2 ++ pkg/libcontainer/nsinit/init.go | 2 ++ pkg/libcontainer/nsinit/main.go | 13 +++++++++++++ pkg/libcontainer/nsinit/mount.go | 2 ++ 4 files changed, 19 insertions(+) diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index 4ac070db08..5b53be259c 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -1,3 +1,5 @@ +// +build linux + package main import ( diff --git a/pkg/libcontainer/nsinit/init.go b/pkg/libcontainer/nsinit/init.go index 16a30812f9..1c90ecca23 100644 --- a/pkg/libcontainer/nsinit/init.go +++ b/pkg/libcontainer/nsinit/init.go @@ -1,3 +1,5 @@ +// +build linux + package main import ( diff --git a/pkg/libcontainer/nsinit/main.go b/pkg/libcontainer/nsinit/main.go index 47abcce0c5..c9f9d7bc8f 100644 --- a/pkg/libcontainer/nsinit/main.go +++ b/pkg/libcontainer/nsinit/main.go @@ -2,17 +2,27 @@ package main import ( "encoding/json" + "errors" "github.com/dotcloud/docker/pkg/libcontainer" "log" "os" ) +var ( + ErrUnsupported = errors.New("Unsupported method") + ErrWrongArguments = errors.New("Wrong argument count") +) + func main() { container, err := loadContainer() if err != nil { log.Fatal(err) } + argc := len(os.Args) + if argc < 2 { + log.Fatal(ErrWrongArguments) + } switch os.Args[1] { case "exec": exitCode, err := execCommand(container) @@ -21,6 +31,9 @@ func main() { } os.Exit(exitCode) case "init": + if argc != 3 { + log.Fatal(ErrWrongArguments) + } if err := initCommand(container, os.Args[2]); err != nil { log.Fatal(err) } diff --git a/pkg/libcontainer/nsinit/mount.go b/pkg/libcontainer/nsinit/mount.go index 13ee13e001..baa850f0fb 100644 --- a/pkg/libcontainer/nsinit/mount.go +++ b/pkg/libcontainer/nsinit/mount.go @@ -1,3 +1,5 @@ +// +build linux + package main import ( From 5d62916c48cb97320b37640592805d97badfd8ff Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Wed, 19 Feb 2014 19:14:31 -0800 Subject: [PATCH 13/81] Refactor large funcs Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/container.go | 5 +- pkg/libcontainer/container.json | 3 +- pkg/libcontainer/nsinit/exec.go | 60 +++++++++++++++-------- pkg/libcontainer/nsinit/init.go | 87 +++++++++++++++------------------ pkg/libcontainer/types.go | 48 +++++++++--------- 5 files changed, 107 insertions(+), 96 deletions(-) diff --git a/pkg/libcontainer/container.go b/pkg/libcontainer/container.go index 3f3961d496..c8dbdd668f 100644 --- a/pkg/libcontainer/container.go +++ b/pkg/libcontainer/container.go @@ -1,14 +1,13 @@ package libcontainer type Container struct { - ID string `json:"id,omitempty"` - Command *Command `json:"command,omitempty"` + Hostname string `json:"hostname,omitempty"` ReadonlyFs bool `json:"readonly_fs,omitempty"` User string `json:"user,omitempty"` WorkingDir string `json:"working_dir,omitempty"` + Command *Command `json:"command,omitempty"` Namespaces Namespaces `json:"namespaces,omitempty"` Capabilities Capabilities `json:"capabilities,omitempty"` - LogFile string `json:"log_file,omitempty"` Network *Network `json:"network,omitempty"` } diff --git a/pkg/libcontainer/container.json b/pkg/libcontainer/container.json index 8731170c2a..2abf01adb9 100644 --- a/pkg/libcontainer/container.json +++ b/pkg/libcontainer/container.json @@ -1,6 +1,5 @@ { "id": "koye", - "log_file": "/root/logs", "command": { "args": [ "/bin/bash" @@ -9,7 +8,7 @@ "HOME=/", "PATH=PATH=$PATH:/bin:/usr/bin:/sbin:/usr/sbin", "container=docker", - "TERM=xterm" + "TERM=xterm-256color" ] }, "namespaces": [ diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index 5b53be259c..4abebd2941 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -27,6 +27,8 @@ func execCommand(container *libcontainer.Container) (int, error) { Cloneflags: uintptr(getNamespaceFlags(container.Namespaces) | syscall.CLONE_VFORK), // we need CLONE_VFORK so we can wait on the child } + // create a pipe so that we can syncronize with the namespaced process and + // pass the veth name to the child inPipe, err := command.StdinPipe() if err != nil { return -1, err @@ -39,34 +41,17 @@ func execCommand(container *libcontainer.Container) (int, error) { } if container.Network != nil { - name1, name2, err := createVethPair() + vethPair, err := setupVeth(container.Network.Bridge, command.Process.Pid) if err != nil { return -1, err } - if err := network.SetInterfaceMaster(name1, container.Network.Bridge); err != nil { - return -1, err - } - if err := network.InterfaceUp(name1); err != nil { - return -1, err - } - if err := network.SetInterfaceInNamespacePid(name2, command.Process.Pid); err != nil { - return -1, err - } - fmt.Fprint(inPipe, name2) - inPipe.Close() + sendVethName(vethPair, inPipe) } go io.Copy(os.Stdout, master) go io.Copy(master, os.Stdin) - ws, err := term.GetWinsize(os.Stdin.Fd()) - if err != nil { - return -1, err - } - if err := term.SetWinsize(master.Fd(), ws); err != nil { - return -1, err - } - state, err := term.SetRawTerminal(os.Stdin.Fd()) + state, err := setupWindow(master) if err != nil { command.Process.Kill() return -1, err @@ -81,6 +66,41 @@ func execCommand(container *libcontainer.Container) (int, error) { return command.ProcessState.Sys().(syscall.WaitStatus).ExitStatus(), nil } +func sendVethName(name string, pipe io.WriteCloser) { + // write the veth pair name to the child's stdin then close the + // pipe so that the child stops waiting + fmt.Fprint(pipe, name) + pipe.Close() +} + +func setupVeth(bridge string, nspid int) (string, error) { + name1, name2, err := createVethPair() + if err != nil { + return "", err + } + if err := network.SetInterfaceMaster(name1, bridge); err != nil { + return "", err + } + if err := network.InterfaceUp(name1); err != nil { + return "", err + } + if err := network.SetInterfaceInNamespacePid(name2, nspid); err != nil { + return "", err + } + return name2, nil +} + +func setupWindow(master *os.File) (*term.State, error) { + ws, err := term.GetWinsize(os.Stdin.Fd()) + if err != nil { + return nil, err + } + if err := term.SetWinsize(master.Fd(), ws); err != nil { + return nil, err + } + return term.SetRawTerminal(os.Stdin.Fd()) +} + func createMasterAndConsole() (*os.File, string, error) { master, err := os.OpenFile("/dev/ptmx", syscall.O_RDWR|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0) if err != nil { diff --git a/pkg/libcontainer/nsinit/init.go b/pkg/libcontainer/nsinit/init.go index 1c90ecca23..d853a32d03 100644 --- a/pkg/libcontainer/nsinit/init.go +++ b/pkg/libcontainer/nsinit/init.go @@ -9,17 +9,12 @@ import ( "github.com/dotcloud/docker/pkg/libcontainer/network" "github.com/dotcloud/docker/pkg/system" "io/ioutil" - "log" "os" "path/filepath" "syscall" ) func initCommand(container *libcontainer.Container, console string) error { - if err := setLogFile(container); err != nil { - return err - } - rootfs, err := resolveRootfs() if err != nil { return err @@ -27,11 +22,10 @@ func initCommand(container *libcontainer.Container, console string) error { var tempVethName string if container.Network != nil { - data, err := ioutil.ReadAll(os.Stdin) + tempVethName, err = getVethName() if err != nil { - return fmt.Errorf("error reading from stdin %s", err) + return err } - tempVethName = string(data) } // close pipes so that we can replace it with the pty @@ -61,13 +55,10 @@ func initCommand(container *libcontainer.Container, console string) error { if err := setupNewMountNamespace(rootfs, console, container.ReadonlyFs); err != nil { return fmt.Errorf("setup mount namespace %s", err) } - if container.Network != nil { - if err := setupNetworking(container.Network, tempVethName); err != nil { - return fmt.Errorf("setup networking %s", err) - } + if err := setupNetworking(container.Network, tempVethName); err != nil { + return fmt.Errorf("setup networking %s", err) } - - if err := system.Sethostname(container.ID); err != nil { + if err := system.Sethostname(container.Hostname); err != nil { return fmt.Errorf("sethostname %s", err) } if err := capabilities.DropCapabilities(container); err != nil { @@ -136,43 +127,45 @@ func openTerminal(name string, flag int) (*os.File, error) { return os.NewFile(uintptr(r), name), nil } -func setLogFile(container *libcontainer.Container) error { - if container.LogFile != "" { - f, err := os.OpenFile(container.LogFile, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0655) - if err != nil { - return err +func setupNetworking(config *libcontainer.Network, tempVethName string) error { + if config != nil { + if err := network.InterfaceDown(tempVethName); err != nil { + return fmt.Errorf("interface down %s %s", tempVethName, err) + } + if err := network.ChangeInterfaceName(tempVethName, "eth0"); err != nil { + return fmt.Errorf("change %s to eth0 %s", tempVethName, err) + } + if err := network.SetInterfaceIp("eth0", config.IP); err != nil { + return fmt.Errorf("set eth0 ip %s", err) + } + if err := network.SetMtu("eth0", config.Mtu); err != nil { + return fmt.Errorf("set eth0 mtu to %d %s", config.Mtu, err) + } + if err := network.InterfaceUp("eth0"); err != nil { + return fmt.Errorf("eth0 up %s", err) + } + if err := network.SetMtu("lo", config.Mtu); err != nil { + return fmt.Errorf("set lo mtu to %d %s", config.Mtu, err) + } + if err := network.InterfaceUp("lo"); err != nil { + return fmt.Errorf("lo up %s", err) + } + if config.Gateway != "" { + if err := network.SetDefaultGateway(config.Gateway); err != nil { + return fmt.Errorf("set gateway to %s %s", config.Gateway, err) + } } - log.SetOutput(f) } return nil } -func setupNetworking(config *libcontainer.Network, tempVethName string) error { - if err := network.InterfaceDown(tempVethName); err != nil { - return fmt.Errorf("interface down %s %s", tempVethName, err) +// getVethName reads from Stdin the temp veth name +// sent by the parent processes after the veth pair +// has been created and setup +func getVethName() (string, error) { + data, err := ioutil.ReadAll(os.Stdin) + if err != nil { + return "", fmt.Errorf("error reading from stdin %s", err) } - if err := network.ChangeInterfaceName(tempVethName, "eth0"); err != nil { - return fmt.Errorf("change %s to eth0 %s", tempVethName, err) - } - if err := network.SetInterfaceIp("eth0", config.IP); err != nil { - return fmt.Errorf("set eth0 ip %s", err) - } - if err := network.SetMtu("eth0", config.Mtu); err != nil { - return fmt.Errorf("set eth0 mtu to %d %s", config.Mtu, err) - } - if err := network.InterfaceUp("eth0"); err != nil { - return fmt.Errorf("eth0 up %s", err) - } - if err := network.SetMtu("lo", config.Mtu); err != nil { - return fmt.Errorf("set lo mtu to %d %s", config.Mtu, err) - } - if err := network.InterfaceUp("lo"); err != nil { - return fmt.Errorf("lo up %s", err) - } - if config.Gateway != "" { - if err := network.SetDefaultGateway(config.Gateway); err != nil { - return fmt.Errorf("set gateway to %s %s", config.Gateway, err) - } - } - return nil + return string(data), nil } diff --git a/pkg/libcontainer/types.go b/pkg/libcontainer/types.go index db1c3b9738..b5d9932671 100644 --- a/pkg/libcontainer/types.go +++ b/pkg/libcontainer/types.go @@ -1,29 +1,5 @@ package libcontainer -type Namespace string -type Namespaces []Namespace - -func (n Namespaces) Contains(ns Namespace) bool { - for _, nns := range n { - if nns == ns { - return true - } - } - return false -} - -type Capability string -type Capabilities []Capability - -func (c Capabilities) Contains(capp Capability) bool { - for _, cc := range c { - if cc == capp { - return true - } - } - return false -} - const ( CAP_SETPCAP Capability = "SETPCAP" CAP_SYS_MODULE Capability = "SYS_MODULE" @@ -47,3 +23,27 @@ const ( CLONE_NEWPID Namespace = "NEWPID" // pid CLONE_NEWNET Namespace = "NEWNET" // network ) + +type Namespace string +type Namespaces []Namespace + +func (n Namespaces) Contains(ns Namespace) bool { + for _, nns := range n { + if nns == ns { + return true + } + } + return false +} + +type Capability string +type Capabilities []Capability + +func (c Capabilities) Contains(capp Capability) bool { + for _, cc := range c { + if cc == capp { + return true + } + } + return false +} From 420b5eb211f877baac9622e7bedde2948c043619 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Wed, 19 Feb 2014 19:53:25 -0800 Subject: [PATCH 14/81] Add execin function to running a process in a namespace Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/container.json | 2 +- pkg/libcontainer/nsinit/execin.go | 115 ++++++++++++++++++++++++++++ pkg/libcontainer/nsinit/main.go | 8 ++ pkg/libcontainer/nsinit/ns_linux.go | 9 +++ 4 files changed, 133 insertions(+), 1 deletion(-) create mode 100644 pkg/libcontainer/nsinit/execin.go diff --git a/pkg/libcontainer/container.json b/pkg/libcontainer/container.json index 2abf01adb9..c5807a7b28 100644 --- a/pkg/libcontainer/container.json +++ b/pkg/libcontainer/container.json @@ -1,5 +1,5 @@ { - "id": "koye", + "hostname": "koye", "command": { "args": [ "/bin/bash" diff --git a/pkg/libcontainer/nsinit/execin.go b/pkg/libcontainer/nsinit/execin.go new file mode 100644 index 0000000000..362cf5afd5 --- /dev/null +++ b/pkg/libcontainer/nsinit/execin.go @@ -0,0 +1,115 @@ +package main + +import ( + "fmt" + "github.com/dotcloud/docker/pkg/libcontainer" + "github.com/dotcloud/docker/pkg/libcontainer/capabilities" + "github.com/dotcloud/docker/pkg/system" + "io/ioutil" + "os" + "path/filepath" + "strconv" + "syscall" +) + +func execinCommand(container *libcontainer.Container) (int, error) { + nspid, err := readPid() + if err != nil { + return -1, err + } + + for _, ns := range container.Namespaces { + if err := system.Unshare(namespaceMap[ns]); err != nil { + return -1, err + } + } + fds, err := getNsFds(nspid, container) + closeFds := func() { + for _, f := range fds { + system.Closefd(f) + } + } + if err != nil { + closeFds() + return -1, err + } + + for _, fd := range fds { + if fd > 0 { + if err := system.Setns(fd, 0); err != nil { + closeFds() + return -1, fmt.Errorf("setns %s", err) + } + } + system.Closefd(fd) + } + + // if the container has a new pid and mount namespace we need to + // remount proc and sys to pick up the changes + if container.Namespaces.Contains(libcontainer.CLONE_NEWNS) && + container.Namespaces.Contains(libcontainer.CLONE_NEWPID) { + + pid, err := system.Fork() + if err != nil { + return -1, err + } + if pid == 0 { + // TODO: make all raw syscalls to be fork safe + if err := system.Unshare(syscall.CLONE_NEWNS); err != nil { + return -1, err + } + if err := remountProc(); err != nil { + return -1, fmt.Errorf("remount proc %s", err) + } + if err := remountSys(); err != nil { + return -1, fmt.Errorf("remount sys %s", err) + } + if err := capabilities.DropCapabilities(container); err != nil { + return -1, fmt.Errorf("drop capabilities %s", err) + } + if err := system.Exec(container.Command.Args[0], container.Command.Args[0:], container.Command.Env); err != nil { + return -1, err + } + } + proc, err := os.FindProcess(pid) + if err != nil { + return -1, err + } + state, err := proc.Wait() + if err != nil { + return -1, err + } + os.Exit(state.Sys().(syscall.WaitStatus).ExitStatus()) + } + if err := capabilities.DropCapabilities(container); err != nil { + return -1, fmt.Errorf("drop capabilities %s", err) + } + if err := system.Exec(container.Command.Args[0], container.Command.Args[0:], container.Command.Env); err != nil { + return -1, err + } + panic("unreachable") +} + +func readPid() (int, error) { + data, err := ioutil.ReadFile(".nspid") + if err != nil { + return -1, err + } + pid, err := strconv.Atoi(string(data)) + if err != nil { + return -1, err + } + return pid, nil +} + +func getNsFds(pid int, container *libcontainer.Container) ([]uintptr, error) { + fds := make([]uintptr, len(container.Namespaces)) + for i, ns := range container.Namespaces { + f, err := os.OpenFile(filepath.Join("/proc/", strconv.Itoa(pid), "ns", namespaceFileMap[ns]), os.O_RDONLY, 0) + if err != nil { + return fds, err + } + fds[i] = f.Fd() + } + return fds, nil +} diff --git a/pkg/libcontainer/nsinit/main.go b/pkg/libcontainer/nsinit/main.go index c9f9d7bc8f..8fe700e064 100644 --- a/pkg/libcontainer/nsinit/main.go +++ b/pkg/libcontainer/nsinit/main.go @@ -37,6 +37,14 @@ func main() { if err := initCommand(container, os.Args[2]); err != nil { log.Fatal(err) } + case "execin": + exitCode, err := execinCommand(container) + if err != nil { + log.Fatal(err) + } + os.Exit(exitCode) + default: + log.Fatalf("command not supported for nsinit %s", os.Args[1]) } } diff --git a/pkg/libcontainer/nsinit/ns_linux.go b/pkg/libcontainer/nsinit/ns_linux.go index 2392ffd770..a2809eb199 100644 --- a/pkg/libcontainer/nsinit/ns_linux.go +++ b/pkg/libcontainer/nsinit/ns_linux.go @@ -14,6 +14,15 @@ var namespaceMap = map[libcontainer.Namespace]int{ libcontainer.CLONE_NEWNET: syscall.CLONE_NEWNET, } +var namespaceFileMap = map[libcontainer.Namespace]string{ + libcontainer.CLONE_NEWNS: "mnt", + libcontainer.CLONE_NEWUTS: "uts", + libcontainer.CLONE_NEWIPC: "ipc", + libcontainer.CLONE_NEWUSER: "user", + libcontainer.CLONE_NEWPID: "pid", + libcontainer.CLONE_NEWNET: "net", +} + // getNamespaceFlags parses the container's Namespaces options to set the correct // flags on clone, unshare, and setns func getNamespaceFlags(namespaces libcontainer.Namespaces) (flag int) { From d84feb8fe5e40838c81321249189f1f0a02825bb Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Wed, 19 Feb 2014 20:35:04 -0800 Subject: [PATCH 15/81] Refactor to remove cmd from container Pass the container's command via args Remove execin function and just look for an existing nspid file to join the namespace Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/container.go | 7 +----- pkg/libcontainer/container.json | 17 +++++--------- pkg/libcontainer/nsinit/exec.go | 21 +++++++++++++----- pkg/libcontainer/nsinit/execin.go | 24 +++----------------- pkg/libcontainer/nsinit/init.go | 4 ++-- pkg/libcontainer/nsinit/main.go | 37 +++++++++++++++++++++++-------- 6 files changed, 55 insertions(+), 55 deletions(-) diff --git a/pkg/libcontainer/container.go b/pkg/libcontainer/container.go index c8dbdd668f..763526f66b 100644 --- a/pkg/libcontainer/container.go +++ b/pkg/libcontainer/container.go @@ -5,17 +5,12 @@ type Container struct { ReadonlyFs bool `json:"readonly_fs,omitempty"` User string `json:"user,omitempty"` WorkingDir string `json:"working_dir,omitempty"` - Command *Command `json:"command,omitempty"` + Env []string `json:"environment,omitempty"` Namespaces Namespaces `json:"namespaces,omitempty"` Capabilities Capabilities `json:"capabilities,omitempty"` Network *Network `json:"network,omitempty"` } -type Command struct { - Args []string `json:"args,omitempty"` - Env []string `json:"environment,omitempty"` -} - type Network struct { IP string `json:"ip,omitempty"` Gateway string `json:"gateway,omitempty"` diff --git a/pkg/libcontainer/container.json b/pkg/libcontainer/container.json index c5807a7b28..ccc9abb041 100644 --- a/pkg/libcontainer/container.json +++ b/pkg/libcontainer/container.json @@ -1,16 +1,11 @@ { "hostname": "koye", - "command": { - "args": [ - "/bin/bash" - ], - "environment": [ - "HOME=/", - "PATH=PATH=$PATH:/bin:/usr/bin:/sbin:/usr/sbin", - "container=docker", - "TERM=xterm-256color" - ] - }, + "environment": [ + "HOME=/", + "PATH=PATH=$PATH:/bin:/usr/bin:/sbin:/usr/sbin", + "container=docker", + "TERM=xterm-256color" + ], "namespaces": [ "NEWIPC", "NEWNS", diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index 4abebd2941..67f907af53 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -16,17 +16,13 @@ import ( "syscall" ) -func execCommand(container *libcontainer.Container) (int, error) { +func execCommand(container *libcontainer.Container, args []string) (int, error) { master, console, err := createMasterAndConsole() if err != nil { return -1, err } - command := exec.Command("nsinit", "init", console) - command.SysProcAttr = &syscall.SysProcAttr{ - Cloneflags: uintptr(getNamespaceFlags(container.Namespaces) | syscall.CLONE_VFORK), // we need CLONE_VFORK so we can wait on the child - } - + command := createCommand(container, console, args) // create a pipe so that we can syncronize with the namespaced process and // pass the veth name to the child inPipe, err := command.StdinPipe() @@ -39,6 +35,7 @@ func execCommand(container *libcontainer.Container) (int, error) { if err := writePidFile(command); err != nil { return -1, err } + defer deletePidFile() if container.Network != nil { vethPair, err := setupVeth(container.Network.Bridge, command.Process.Pid) @@ -134,3 +131,15 @@ func createVethPair() (name1 string, name2 string, err error) { func writePidFile(command *exec.Cmd) error { return ioutil.WriteFile(".nspid", []byte(fmt.Sprint(command.Process.Pid)), 0655) } + +func deletePidFile() error { + return os.Remove(".nspid") +} + +func createCommand(container *libcontainer.Container, console string, args []string) *exec.Cmd { + command := exec.Command("nsinit", append([]string{"init", console}, args...)...) + command.SysProcAttr = &syscall.SysProcAttr{ + Cloneflags: uintptr(getNamespaceFlags(container.Namespaces) | syscall.CLONE_VFORK), // we need CLONE_VFORK so we can wait on the child + } + return command +} diff --git a/pkg/libcontainer/nsinit/execin.go b/pkg/libcontainer/nsinit/execin.go index 362cf5afd5..7f32620cb8 100644 --- a/pkg/libcontainer/nsinit/execin.go +++ b/pkg/libcontainer/nsinit/execin.go @@ -5,19 +5,13 @@ import ( "github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/libcontainer/capabilities" "github.com/dotcloud/docker/pkg/system" - "io/ioutil" "os" "path/filepath" "strconv" "syscall" ) -func execinCommand(container *libcontainer.Container) (int, error) { - nspid, err := readPid() - if err != nil { - return -1, err - } - +func execinCommand(container *libcontainer.Container, nspid int, args []string) (int, error) { for _, ns := range container.Namespaces { if err := system.Unshare(namespaceMap[ns]); err != nil { return -1, err @@ -67,7 +61,7 @@ func execinCommand(container *libcontainer.Container) (int, error) { if err := capabilities.DropCapabilities(container); err != nil { return -1, fmt.Errorf("drop capabilities %s", err) } - if err := system.Exec(container.Command.Args[0], container.Command.Args[0:], container.Command.Env); err != nil { + if err := system.Exec(args[0], args[0:], container.Env); err != nil { return -1, err } } @@ -84,24 +78,12 @@ func execinCommand(container *libcontainer.Container) (int, error) { if err := capabilities.DropCapabilities(container); err != nil { return -1, fmt.Errorf("drop capabilities %s", err) } - if err := system.Exec(container.Command.Args[0], container.Command.Args[0:], container.Command.Env); err != nil { + if err := system.Exec(args[0], args[0:], container.Env); err != nil { return -1, err } panic("unreachable") } -func readPid() (int, error) { - data, err := ioutil.ReadFile(".nspid") - if err != nil { - return -1, err - } - pid, err := strconv.Atoi(string(data)) - if err != nil { - return -1, err - } - return pid, nil -} - func getNsFds(pid int, container *libcontainer.Container) ([]uintptr, error) { fds := make([]uintptr, len(container.Namespaces)) for i, ns := range container.Namespaces { diff --git a/pkg/libcontainer/nsinit/init.go b/pkg/libcontainer/nsinit/init.go index d853a32d03..82706fdadd 100644 --- a/pkg/libcontainer/nsinit/init.go +++ b/pkg/libcontainer/nsinit/init.go @@ -14,7 +14,7 @@ import ( "syscall" ) -func initCommand(container *libcontainer.Container, console string) error { +func initCommand(container *libcontainer.Container, console string, args []string) error { rootfs, err := resolveRootfs() if err != nil { return err @@ -72,7 +72,7 @@ func initCommand(container *libcontainer.Container, console string) error { return fmt.Errorf("chdir to %s %s", container.WorkingDir, err) } } - if err := system.Exec(container.Command.Args[0], container.Command.Args[0:], container.Command.Env); err != nil { + if err := system.Exec(args[0], args[0:], container.Env); err != nil { return fmt.Errorf("exec %s", err) } panic("unreachable") diff --git a/pkg/libcontainer/nsinit/main.go b/pkg/libcontainer/nsinit/main.go index 8fe700e064..30c8b064e4 100644 --- a/pkg/libcontainer/nsinit/main.go +++ b/pkg/libcontainer/nsinit/main.go @@ -4,8 +4,10 @@ import ( "encoding/json" "errors" "github.com/dotcloud/docker/pkg/libcontainer" + "io/ioutil" "log" "os" + "strconv" ) var ( @@ -25,24 +27,29 @@ func main() { } switch os.Args[1] { case "exec": - exitCode, err := execCommand(container) + var exitCode int + nspid, err := readPid() + if err != nil { + if !os.IsNotExist(err) { + log.Fatal(err) + } + } + if nspid > 0 { + exitCode, err = execinCommand(container, nspid, os.Args[2:]) + } else { + exitCode, err = execCommand(container, os.Args[2:]) + } if err != nil { log.Fatal(err) } os.Exit(exitCode) case "init": - if argc != 3 { + if argc < 3 { log.Fatal(ErrWrongArguments) } - if err := initCommand(container, os.Args[2]); err != nil { + if err := initCommand(container, os.Args[2], os.Args[3:]); err != nil { log.Fatal(err) } - case "execin": - exitCode, err := execinCommand(container) - if err != nil { - log.Fatal(err) - } - os.Exit(exitCode) default: log.Fatalf("command not supported for nsinit %s", os.Args[1]) } @@ -61,3 +68,15 @@ func loadContainer() (*libcontainer.Container, error) { } return container, nil } + +func readPid() (int, error) { + data, err := ioutil.ReadFile(".nspid") + if err != nil { + return -1, err + } + pid, err := strconv.Atoi(string(data)) + if err != nil { + return -1, err + } + return pid, nil +} From 3a97fe27d8f5a9bbcf4992cc9efe33880e73f274 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Wed, 19 Feb 2014 21:15:44 -0800 Subject: [PATCH 16/81] Update readme and add TODO Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/README.md | 75 ++++++++++++++++++++++---------------- pkg/libcontainer/TODO.md | 17 +++++++++ 2 files changed, 61 insertions(+), 31 deletions(-) create mode 100644 pkg/libcontainer/TODO.md diff --git a/pkg/libcontainer/README.md b/pkg/libcontainer/README.md index 91d747863c..07fe4f7b2d 100644 --- a/pkg/libcontainer/README.md +++ b/pkg/libcontainer/README.md @@ -1,39 +1,34 @@ ## libcontainer - reference implementation for containers -#### playground +#### background + +libcontainer specifies configuration options for what a container is. It provides a native Go implementation +for using linux namespaces with no external dependencies. libcontainer provides many convience functions for working with namespaces, networking, and management. -Use the cli package to test out functionality - -First setup a container configuration. You will need a root fs, better go the path to a -stopped docker container and use that. - +#### container +A container is a self contained directory that is able to run one or more processes inside without +affecting the host system. The directory is usually a full system tree. Inside the directory +a `container.json` file just be placed with the runtime configuration for how the process +should be contained and run. Environment, networking, and different capabilities for the +process are specified in this file. +Sample `container.json` file: ```json { - "id": "koye", - "namespace_pid": 12265, - "command": { - "args": [ - "/bin/bash" - ], - "environment": [ - "HOME=/", - "PATH=PATH=$PATH:/bin:/usr/bin:/sbin:/usr/sbin", - "container=docker", - "TERM=xterm" - ] - }, - "rootfs": "/root/development/gocode/src/github.com/docker/libcontainer/namespaces/ubuntu", - "network": null, - "user": "", - "working_dir": "", + "hostname": "koye", + "environment": [ + "HOME=/", + "PATH=PATH=$PATH:/bin:/usr/bin:/sbin:/usr/sbin", + "container=docker", + "TERM=xterm-256color" + ], "namespaces": [ - "NEWNET", "NEWIPC", "NEWNS", "NEWPID", - "NEWUTS" + "NEWUTS", + "NEWNET" ], "capabilities": [ "SETPCAP", @@ -50,14 +45,32 @@ stopped docker container and use that. "AUDIT_CONTROL", "MAC_OVERRIDE", "MAC_ADMIN" - ] + ], + "network": { + "ip": "172.17.0.100/16", + "gateway": "172.17.42.1", + "bridge": "docker0", + "mtu": 1500 + } } ``` -After you have a json file and a rootfs path to use just run: -`./cli exec container.json` +Using this configuration and the current directory holding the rootfs for a process to live, one can se libcontainer to exec the container. Running the life of the namespace a `.nspid` file +is written to the current directory with the pid of the namespace'd process to the external word. A client can use this pid to wait, kill, or perform other operation with the container. If a user tries to run an new process inside an existing container with a live namespace with namespace will be joined by the new process. -If you want to attach to an existing namespace just use the same json -file with the container still running and do: -`./cli execin container.json` +#### nsinit + +`nsinit` is a cli application used as the reference implementation of libcontainer. It is able to +spawn or join new containers giving the current directory. To use `nsinit` cd into a linux +rootfs and copy a `container.json` file into the directory with your specified configuration. + +To execution `/bin/bash` in the current directory as a container just run: +```bash +nsinit exec /bin/bash +``` + +If you wish to spawn another process inside the container while your current bash session is +running just run the exact same command again to get another bash shell or change the command. If the original process dies, PID 1, all other processes spawned inside the container will also be killed and the namespace will be removed. + +You can identify if a process is running in a container by looking to see if `.nspid` is in the root of the directory. diff --git a/pkg/libcontainer/TODO.md b/pkg/libcontainer/TODO.md new file mode 100644 index 0000000000..f18c0b4c51 --- /dev/null +++ b/pkg/libcontainer/TODO.md @@ -0,0 +1,17 @@ +#### goals +* small and simple - line count is not everything but less code is better +* clean lines between what we do in the pkg +* provide primitives for working with namespaces not cater to every option +* extend via configuration not by features - host networking, no networking, veth network can be accomplished via adjusting the container.json, nothing to do with code + +#### tasks +* proper tty for a new process in an existing container +* use exec or raw syscalls for new process in existing container +* setup proper user in namespace if specified +* implement hook or clean interface for cgroups +* example configs for different setups (host networking, boot init) +* improve pkg documentation with comments +* testing - this is hard in a low level pkg but we could do some, maybe +* pivot root +* selinux +* apparmor From e0ff0f4dd6612e331459a2dec69adc728bc360fe Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Wed, 19 Feb 2014 21:21:49 -0800 Subject: [PATCH 17/81] Add CAP_NET_ADMIN Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/capabilities/capabilities.go | 1 + pkg/libcontainer/types.go | 1 + 2 files changed, 2 insertions(+) diff --git a/pkg/libcontainer/capabilities/capabilities.go b/pkg/libcontainer/capabilities/capabilities.go index 3301e10f7f..c19b719564 100644 --- a/pkg/libcontainer/capabilities/capabilities.go +++ b/pkg/libcontainer/capabilities/capabilities.go @@ -21,6 +21,7 @@ var capMap = map[libcontainer.Capability]capability.Cap{ libcontainer.CAP_AUDIT_CONTROL: capability.CAP_AUDIT_CONTROL, libcontainer.CAP_MAC_OVERRIDE: capability.CAP_MAC_OVERRIDE, libcontainer.CAP_MAC_ADMIN: capability.CAP_MAC_ADMIN, + libcontainer.CAP_NET_ADMIN: capability.CAP_NET_ADMIN, } // DropCapabilities drops capabilities for the current process based diff --git a/pkg/libcontainer/types.go b/pkg/libcontainer/types.go index b5d9932671..fcd00fd4f1 100644 --- a/pkg/libcontainer/types.go +++ b/pkg/libcontainer/types.go @@ -15,6 +15,7 @@ const ( CAP_AUDIT_CONTROL Capability = "AUDIT_CONTROL" CAP_MAC_OVERRIDE Capability = "MAC_OVERRIDE" CAP_MAC_ADMIN Capability = "MAC_ADMIN" + CAP_NET_ADMIN Capability = "NET_ADMIN" CLONE_NEWNS Namespace = "NEWNS" // mount CLONE_NEWUTS Namespace = "NEWUTS" // utsname From 70593be139647cdedca0130250626ea6e0b8a277 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Wed, 19 Feb 2014 22:43:40 -0800 Subject: [PATCH 18/81] Add comments to many functions Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/capabilities/capabilities.go | 1 + pkg/libcontainer/container.go | 22 ++++++---- pkg/libcontainer/nsinit/exec.go | 23 +++++++++-- pkg/libcontainer/nsinit/execin.go | 10 ++--- pkg/libcontainer/nsinit/init.go | 18 ++++---- pkg/libcontainer/nsinit/main.go | 4 +- pkg/libcontainer/nsinit/mount.go | 41 +++++++++++++------ pkg/libcontainer/nsinit/ns_linux.go | 3 ++ pkg/libcontainer/types.go | 18 +++++--- pkg/libcontainer/utils/utils.go | 2 + 10 files changed, 97 insertions(+), 45 deletions(-) diff --git a/pkg/libcontainer/capabilities/capabilities.go b/pkg/libcontainer/capabilities/capabilities.go index c19b719564..65fd455c26 100644 --- a/pkg/libcontainer/capabilities/capabilities.go +++ b/pkg/libcontainer/capabilities/capabilities.go @@ -41,6 +41,7 @@ func DropCapabilities(container *libcontainer.Container) error { return nil } +// getCapabilities returns the specific cap values for the libcontainer types func getCapabilities(container *libcontainer.Container) []capability.Cap { drop := []capability.Cap{} for _, c := range container.Capabilities { diff --git a/pkg/libcontainer/container.go b/pkg/libcontainer/container.go index 763526f66b..a6a57dab77 100644 --- a/pkg/libcontainer/container.go +++ b/pkg/libcontainer/container.go @@ -1,16 +1,22 @@ package libcontainer +// Container defines configuration options for how a +// container is setup inside a directory and how a process should be executed type Container struct { - Hostname string `json:"hostname,omitempty"` - ReadonlyFs bool `json:"readonly_fs,omitempty"` - User string `json:"user,omitempty"` - WorkingDir string `json:"working_dir,omitempty"` - Env []string `json:"environment,omitempty"` - Namespaces Namespaces `json:"namespaces,omitempty"` - Capabilities Capabilities `json:"capabilities,omitempty"` - Network *Network `json:"network,omitempty"` + Hostname string `json:"hostname,omitempty"` // hostname + ReadonlyFs bool `json:"readonly_fs,omitempty"` // set the containers rootfs as readonly + User string `json:"user,omitempty"` // user to execute the process as + WorkingDir string `json:"working_dir,omitempty"` // current working directory + Env []string `json:"environment,omitempty"` // environment to set + Namespaces Namespaces `json:"namespaces,omitempty"` // namespaces to apply + Capabilities Capabilities `json:"capabilities,omitempty"` // capabilities to drop + Network *Network `json:"network,omitempty"` // nil for host's network stack } +// Network defines configuration for a container's networking stack +// +// The network configuration can be omited from a container causing the +// container to be setup with the host's networking stack type Network struct { IP string `json:"ip,omitempty"` Gateway string `json:"gateway,omitempty"` diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index 67f907af53..202cfcab5e 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -38,7 +38,7 @@ func execCommand(container *libcontainer.Container, args []string) (int, error) defer deletePidFile() if container.Network != nil { - vethPair, err := setupVeth(container.Network.Bridge, command.Process.Pid) + vethPair, err := initializeContainerVeth(container.Network.Bridge, command.Process.Pid) if err != nil { return -1, err } @@ -63,14 +63,21 @@ func execCommand(container *libcontainer.Container, args []string) (int, error) return command.ProcessState.Sys().(syscall.WaitStatus).ExitStatus(), nil } +// sendVethName writes the veth pair name to the child's stdin then closes the +// pipe so that the child stops waiting for more data func sendVethName(name string, pipe io.WriteCloser) { - // write the veth pair name to the child's stdin then close the - // pipe so that the child stops waiting fmt.Fprint(pipe, name) pipe.Close() } -func setupVeth(bridge string, nspid int) (string, error) { +// initializeContainerVeth will create a veth pair and setup the host's +// side of the pair by setting the specified bridge as the master and bringing +// up the interface. +// +// Then will with set the other side of the veth pair into the container's namespaced +// using the pid and returns the veth's interface name to provide to the container to +// finish setting up the interface inside the namespace +func initializeContainerVeth(bridge string, nspid int) (string, error) { name1, name2, err := createVethPair() if err != nil { return "", err @@ -98,6 +105,8 @@ func setupWindow(master *os.File) (*term.State, error) { return term.SetRawTerminal(os.Stdin.Fd()) } +// createMasterAndConsole will open /dev/ptmx on the host and retreive the +// pts name for use as the pty slave inside the container func createMasterAndConsole() (*os.File, string, error) { master, err := os.OpenFile("/dev/ptmx", syscall.O_RDWR|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0) if err != nil { @@ -113,6 +122,8 @@ func createMasterAndConsole() (*os.File, string, error) { return master, console, nil } +// createVethPair will automatically generage two random names for +// the veth pair and ensure that they have been created func createVethPair() (name1 string, name2 string, err error) { name1, err = utils.GenerateRandomName("dock", 4) if err != nil { @@ -128,6 +139,7 @@ func createVethPair() (name1 string, name2 string, err error) { return } +// writePidFile writes the namespaced processes pid to .nspid in the rootfs for the container func writePidFile(command *exec.Cmd) error { return ioutil.WriteFile(".nspid", []byte(fmt.Sprint(command.Process.Pid)), 0655) } @@ -136,6 +148,9 @@ func deletePidFile() error { return os.Remove(".nspid") } +// createCommand will return an exec.Cmd with the Cloneflags set to the proper namespaces +// defined on the container's configuration and use the current binary as the init with the +// args provided func createCommand(container *libcontainer.Container, console string, args []string) *exec.Cmd { command := exec.Command("nsinit", append([]string{"init", console}, args...)...) command.SysProcAttr = &syscall.SysProcAttr{ diff --git a/pkg/libcontainer/nsinit/execin.go b/pkg/libcontainer/nsinit/execin.go index 7f32620cb8..d6224f95e6 100644 --- a/pkg/libcontainer/nsinit/execin.go +++ b/pkg/libcontainer/nsinit/execin.go @@ -28,6 +28,7 @@ func execinCommand(container *libcontainer.Container, nspid int, args []string) return -1, err } + // foreach namespace fd, use setns to join an existing container's namespaces for _, fd := range fds { if fd > 0 { if err := system.Setns(fd, 0); err != nil { @@ -42,7 +43,6 @@ func execinCommand(container *libcontainer.Container, nspid int, args []string) // remount proc and sys to pick up the changes if container.Namespaces.Contains(libcontainer.CLONE_NEWNS) && container.Namespaces.Contains(libcontainer.CLONE_NEWPID) { - pid, err := system.Fork() if err != nil { return -1, err @@ -58,12 +58,7 @@ func execinCommand(container *libcontainer.Container, nspid int, args []string) if err := remountSys(); err != nil { return -1, fmt.Errorf("remount sys %s", err) } - if err := capabilities.DropCapabilities(container); err != nil { - return -1, fmt.Errorf("drop capabilities %s", err) - } - if err := system.Exec(args[0], args[0:], container.Env); err != nil { - return -1, err - } + goto dropAndExec } proc, err := os.FindProcess(pid) if err != nil { @@ -75,6 +70,7 @@ func execinCommand(container *libcontainer.Container, nspid int, args []string) } os.Exit(state.Sys().(syscall.WaitStatus).ExitStatus()) } +dropAndExec: if err := capabilities.DropCapabilities(container); err != nil { return -1, fmt.Errorf("drop capabilities %s", err) } diff --git a/pkg/libcontainer/nsinit/init.go b/pkg/libcontainer/nsinit/init.go index 82706fdadd..c77fd90447 100644 --- a/pkg/libcontainer/nsinit/init.go +++ b/pkg/libcontainer/nsinit/init.go @@ -37,9 +37,6 @@ func initCommand(container *libcontainer.Container, console string, args []strin if err != nil { return fmt.Errorf("open terminal %s", err) } - if slave.Fd() != 0 { - return fmt.Errorf("slave fd should be 0") - } if err := dupSlave(slave); err != nil { return fmt.Errorf("dup2 slave %s", err) } @@ -55,7 +52,7 @@ func initCommand(container *libcontainer.Container, console string, args []strin if err := setupNewMountNamespace(rootfs, console, container.ReadonlyFs); err != nil { return fmt.Errorf("setup mount namespace %s", err) } - if err := setupNetworking(container.Network, tempVethName); err != nil { + if err := setupVethNetwork(container.Network, tempVethName); err != nil { return fmt.Errorf("setup networking %s", err) } if err := system.Sethostname(container.Hostname); err != nil { @@ -78,6 +75,8 @@ func initCommand(container *libcontainer.Container, console string, args []strin panic("unreachable") } +// resolveRootfs ensures that the current working directory is +// not a symlink and returns the absolute path to the rootfs func resolveRootfs() (string, error) { cwd, err := os.Getwd() if err != nil { @@ -104,8 +103,9 @@ func setupUser(container *libcontainer.Container) error { return nil } +// dupSlave dup2 the pty slave's fd into stdout and stdin and ensures that +// the slave's fd is 0, or stdin func dupSlave(slave *os.File) error { - // we close Stdin,etc so our pty slave should have fd 0 if slave.Fd() != 0 { return fmt.Errorf("slave fd not 0 %d", slave.Fd()) } @@ -118,7 +118,8 @@ func dupSlave(slave *os.File) error { return nil } -// openTerminal is a clone of os.OpenFile without the O_CLOEXEC addition. +// openTerminal is a clone of os.OpenFile without the O_CLOEXEC +// used to open the pty slave inside the container namespace func openTerminal(name string, flag int) (*os.File, error) { r, e := syscall.Open(name, flag, 0) if e != nil { @@ -127,7 +128,10 @@ func openTerminal(name string, flag int) (*os.File, error) { return os.NewFile(uintptr(r), name), nil } -func setupNetworking(config *libcontainer.Network, tempVethName string) error { +// setupVethNetwork uses the Network config if it is not nil to initialize +// the new veth interface inside the container for use by changing the name to eth0 +// setting the MTU and IP address along with the default gateway +func setupVethNetwork(config *libcontainer.Network, tempVethName string) error { if config != nil { if err := network.InterfaceDown(tempVethName); err != nil { return fmt.Errorf("interface down %s %s", tempVethName, err) diff --git a/pkg/libcontainer/nsinit/main.go b/pkg/libcontainer/nsinit/main.go index 30c8b064e4..f45fe55689 100644 --- a/pkg/libcontainer/nsinit/main.go +++ b/pkg/libcontainer/nsinit/main.go @@ -26,7 +26,7 @@ func main() { log.Fatal(ErrWrongArguments) } switch os.Args[1] { - case "exec": + case "exec": // this is executed outside of the namespace in the cwd var exitCode int nspid, err := readPid() if err != nil { @@ -43,7 +43,7 @@ func main() { log.Fatal(err) } os.Exit(exitCode) - case "init": + case "init": // this is executed inside of the namespace to setup the container if argc < 3 { log.Fatal(ErrWrongArguments) } diff --git a/pkg/libcontainer/nsinit/mount.go b/pkg/libcontainer/nsinit/mount.go index baa850f0fb..6eb2e09060 100644 --- a/pkg/libcontainer/nsinit/mount.go +++ b/pkg/libcontainer/nsinit/mount.go @@ -10,10 +10,16 @@ import ( "syscall" ) -// default mount point options +// default mount point flags const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV +// setupNewMountNamespace is used to initialize a new mount namespace for an new +// container in the rootfs that is specified. +// +// There is no need to unmount the new mounts because as soon as the mount namespace +// is no longer in use, the mounts will be removed automatically func setupNewMountNamespace(rootfs, console string, readonly bool) error { + // mount as slave so that the new mounts do not propagate to the host if err := system.Mount("", "/", "", syscall.MS_SLAVE|syscall.MS_REC, ""); err != nil { return fmt.Errorf("mounting / as slave %s", err) } @@ -55,6 +61,7 @@ func setupNewMountNamespace(rootfs, console string, readonly bool) error { return nil } +// copyDevNodes mknods the hosts devices so the new container has access to them func copyDevNodes(rootfs string) error { oldMask := system.Umask(0000) defer system.Umask(oldMask) @@ -82,6 +89,8 @@ func copyDevNodes(rootfs string) error { return nil } +// setupDev symlinks the current processes pipes into the +// appropriate destination on the containers rootfs func setupDev(rootfs string) error { for _, link := range []struct { from string @@ -104,6 +113,7 @@ func setupDev(rootfs string) error { return nil } +// setupConsole ensures that the container has a proper /dev/console setup func setupConsole(rootfs, console string) error { oldMask := system.Umask(0000) defer system.Umask(oldMask) @@ -161,6 +171,24 @@ func mountSystem(rootfs string) error { return nil } +// setupPtmx adds a symlink to pts/ptmx for /dev/ptmx and +// finishes setting up /dev/console +func setupPtmx(rootfs, console string) error { + ptmx := filepath.Join(rootfs, "dev/ptmx") + if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) { + return err + } + if err := os.Symlink("pts/ptmx", ptmx); err != nil { + return fmt.Errorf("symlink dev ptmx %s", err) + } + if err := setupConsole(rootfs, console); err != nil { + return err + } + return nil +} + +// remountProc is used to detach and remount the proc filesystem +// commonly needed with running a new process inside an existing container func remountProc() error { if err := system.Unmount("/proc", syscall.MNT_DETACH); err != nil { return err @@ -183,14 +211,3 @@ func remountSys() error { } return nil } - -func setupPtmx(rootfs, console string) error { - ptmx := filepath.Join(rootfs, "dev/ptmx") - if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) { - return err - } - if err := os.Symlink("pts/ptmx", ptmx); err != nil { - return fmt.Errorf("symlink dev ptmx %s", err) - } - return setupConsole(rootfs, console) -} diff --git a/pkg/libcontainer/nsinit/ns_linux.go b/pkg/libcontainer/nsinit/ns_linux.go index a2809eb199..481bdf79df 100644 --- a/pkg/libcontainer/nsinit/ns_linux.go +++ b/pkg/libcontainer/nsinit/ns_linux.go @@ -14,6 +14,9 @@ var namespaceMap = map[libcontainer.Namespace]int{ libcontainer.CLONE_NEWNET: syscall.CLONE_NEWNET, } +// namespaceFileMap is used to convert the libcontainer types +// into the names of the files located in /proc//ns/* for +// each namespace var namespaceFileMap = map[libcontainer.Namespace]string{ libcontainer.CLONE_NEWNS: "mnt", libcontainer.CLONE_NEWUTS: "uts", diff --git a/pkg/libcontainer/types.go b/pkg/libcontainer/types.go index fcd00fd4f1..bb54ff5130 100644 --- a/pkg/libcontainer/types.go +++ b/pkg/libcontainer/types.go @@ -1,5 +1,8 @@ package libcontainer +// These constants are defined as string types so that +// it is clear when adding the configuration in config files +// instead of using ints or other types const ( CAP_SETPCAP Capability = "SETPCAP" CAP_SYS_MODULE Capability = "SYS_MODULE" @@ -25,9 +28,15 @@ const ( CLONE_NEWNET Namespace = "NEWNET" // network ) -type Namespace string -type Namespaces []Namespace +type ( + Namespace string + Namespaces []Namespace + Capability string + Capabilities []Capability +) +// Contains returns true if the specified Namespace is +// in the slice func (n Namespaces) Contains(ns Namespace) bool { for _, nns := range n { if nns == ns { @@ -37,9 +46,8 @@ func (n Namespaces) Contains(ns Namespace) bool { return false } -type Capability string -type Capabilities []Capability - +// Contains returns true if the specified Capability is +// in the slice func (c Capabilities) Contains(capp Capability) bool { for _, cc := range c { if cc == capp { diff --git a/pkg/libcontainer/utils/utils.go b/pkg/libcontainer/utils/utils.go index d3223c3e4d..5050997ffd 100644 --- a/pkg/libcontainer/utils/utils.go +++ b/pkg/libcontainer/utils/utils.go @@ -6,6 +6,8 @@ import ( "io" ) +// GenerateRandomName returns a new name joined with a prefix. This size +// specified is used to truncate the randomly generated value func GenerateRandomName(prefix string, size int) (string, error) { id := make([]byte, 32) if _, err := io.ReadFull(rand.Reader, id); err != nil { From e133d895a6934e650f64f391f9f26b29b0379457 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Wed, 19 Feb 2014 22:46:02 -0800 Subject: [PATCH 19/81] Remove privileged.json config Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/privileged.json | 22 ---------------------- 1 file changed, 22 deletions(-) delete mode 100644 pkg/libcontainer/privileged.json diff --git a/pkg/libcontainer/privileged.json b/pkg/libcontainer/privileged.json deleted file mode 100644 index be877ad335..0000000000 --- a/pkg/libcontainer/privileged.json +++ /dev/null @@ -1,22 +0,0 @@ -{ - "id": "koye", - "namespace_pid": 3745, - "command": { - "args": [ - "/usr/lib/systemd/systemd" - ], - "environment": [ - "HOME=/", - "PATH=PATH=$PATH:/bin:/usr/bin:/sbin:/usr/sbin", - "container=docker", - "TERM=" - ] - }, - "rootfs": "/root/main/mycontainer", - "namespaces": [ - "NEWIPC", - "NEWNS", - "NEWPID", - "NEWUTS" - ] -} From f0b4dd6e5883a65dc23121934b6eed7e70ac2515 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Thu, 20 Feb 2014 12:00:54 -0800 Subject: [PATCH 20/81] WIP for setup kmsg Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/nsinit/mount.go | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/pkg/libcontainer/nsinit/mount.go b/pkg/libcontainer/nsinit/mount.go index 6eb2e09060..67f9020350 100644 --- a/pkg/libcontainer/nsinit/mount.go +++ b/pkg/libcontainer/nsinit/mount.go @@ -43,6 +43,9 @@ func setupNewMountNamespace(rootfs, console string, readonly bool) error { if err := setupPtmx(rootfs, console); err != nil { return err } + if err := setupKmsg(rootfs); err != nil { + return err + } if err := system.Chdir(rootfs); err != nil { return fmt.Errorf("chdir into %s %s", rootfs, err) } @@ -211,3 +214,32 @@ func remountSys() error { } return nil } + +func setupKmsg(rootfs string) error { + oldMask := system.Umask(0000) + defer system.Umask(oldMask) + + var ( + source = filepath.Join(rootfs, "dev/kmsg") + dest = filepath.Join(rootfs, "proc/kmsg") + ) + + if err := system.Mkfifo(source, 0600); err != nil { + return err + } + + os.Chmod(source, 0600) + os.Chown(source, 0, 0) + + if err := system.Mount(source, dest, "bind", syscall.MS_BIND, ""); err != nil { + return err + } + _, err := os.OpenFile(source, syscall.O_RDWR|syscall.O_NDELAY|syscall.O_CLOEXEC, 0) + if err != nil { + return err + } + if err := syscall.Unlink(source); err != nil { + return err + } + return nil +} From 664fc54e65ebc14ca9dd5bfc55e3dfe1796e51c8 Mon Sep 17 00:00:00 2001 From: Alexander Larsson Date: Thu, 20 Feb 2014 23:12:08 +0100 Subject: [PATCH 21/81] libcontainer: Initial version of cgroups support This is a minimal version of raw cgroup support for libcontainer. It has only enough for what docker needs, and it has no support for systemd yet. Docker-DCO-1.1-Signed-off-by: Alexander Larsson (github: alexlarsson) --- pkg/cgroups/cgroups.go | 16 ++- pkg/libcontainer/cgroup/cgroup.go | 177 ++++++++++++++++++++++++++++++ pkg/libcontainer/container.go | 7 ++ pkg/libcontainer/container.json | 5 +- pkg/libcontainer/nsinit/exec.go | 13 ++- pkg/libcontainer/nsinit/init.go | 10 +- 6 files changed, 218 insertions(+), 10 deletions(-) create mode 100644 pkg/libcontainer/cgroup/cgroup.go diff --git a/pkg/cgroups/cgroups.go b/pkg/cgroups/cgroups.go index 91ac3842ac..b9318f99e7 100644 --- a/pkg/cgroups/cgroups.go +++ b/pkg/cgroups/cgroups.go @@ -40,6 +40,16 @@ func GetThisCgroupDir(subsystem string) (string, error) { return parseCgroupFile(subsystem, f) } +func GetInitCgroupDir(subsystem string) (string, error) { + f, err := os.Open("/proc/1/cgroup") + if err != nil { + return "", err + } + defer f.Close() + + return parseCgroupFile(subsystem, f) +} + func parseCgroupFile(subsystem string, r io.Reader) (string, error) { s := bufio.NewScanner(r) @@ -49,8 +59,10 @@ func parseCgroupFile(subsystem string, r io.Reader) (string, error) { } text := s.Text() parts := strings.Split(text, ":") - if parts[1] == subsystem { - return parts[2], nil + for _, subs := range strings.Split(parts[1], ",") { + if subs == subsystem { + return parts[2], nil + } } } return "", fmt.Errorf("cgroup '%s' not found in /proc/self/cgroup", subsystem) diff --git a/pkg/libcontainer/cgroup/cgroup.go b/pkg/libcontainer/cgroup/cgroup.go new file mode 100644 index 0000000000..e30262ca50 --- /dev/null +++ b/pkg/libcontainer/cgroup/cgroup.go @@ -0,0 +1,177 @@ +package cgroup + +import ( + "fmt" + "github.com/dotcloud/docker/pkg/cgroups" + "github.com/dotcloud/docker/pkg/libcontainer" + "io/ioutil" + "os" + "path/filepath" + "strconv" +) + +// We have two implementation of cgroups support, one is based on +// systemd and the dbus api, and one is based on raw cgroup fs operations +// following the pre-single-writer model docs at: +// http://www.freedesktop.org/wiki/Software/systemd/PaxControlGroups/ +const ( + cgroupRoot = "/sys/fs/cgroup" +) + +func useSystemd() bool { + return false +} + +func applyCgroupSystemd(container *libcontainer.Container, pid int) error { + return fmt.Errorf("not supported yet") +} + +func writeFile(dir, file, data string) error { + return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700) +} + +func getCgroup(subsystem string, container *libcontainer.Container) (string, error) { + cgroup := container.CgroupName + if container.CgroupParent != "" { + cgroup = filepath.Join(container.CgroupParent, cgroup) + } + + initPath, err := cgroups.GetInitCgroupDir(subsystem) + if err != nil { + return "", err + } + + path := filepath.Join(cgroupRoot, subsystem, initPath, cgroup) + + return path, nil +} + +func joinCgroup(subsystem string, container *libcontainer.Container, pid int) (string, error) { + path, err := getCgroup(subsystem, container) + if err != nil { + return "", err + } + + if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) { + return "", err + } + + if err := writeFile(path, "tasks", strconv.Itoa(pid)); err != nil { + return "", err + } + + return path, nil +} + +func applyCgroupRaw(container *libcontainer.Container, pid int) (retErr error) { + if _, err := os.Stat(cgroupRoot); err != nil { + return fmt.Errorf("cgroups fs not found") + } + + if !container.DeviceAccess { + dir, err := joinCgroup("devices", container, pid) + if err != nil { + return err + } + defer func() { + if retErr != nil { + os.RemoveAll(dir) + } + }() + + if err := writeFile(dir, "devices.deny", "a"); err != nil { + return err + } + + allow := []string{ + // /dev/null, zero, full + "c 1:3 rwm", + "c 1:5 rwm", + "c 1:7 rwm", + + // consoles + "c 5:1 rwm", + "c 5:0 rwm", + "c 4:0 rwm", + "c 4:1 rwm", + + // /dev/urandom,/dev/random + "c 1:9 rwm", + "c 1:8 rwm", + + // /dev/pts/ - pts namespaces are "coming soon" + "c 136:* rwm", + "c 5:2 rwm", + + // tuntap + "c 10:200 rwm", + } + + for _, val := range allow { + if err := writeFile(dir, "devices.allow", val); err != nil { + return err + } + } + } + + if container.Memory != 0 || container.MemorySwap != 0 { + dir, err := joinCgroup("memory", container, pid) + if err != nil { + return err + } + defer func() { + if retErr != nil { + os.RemoveAll(dir) + } + }() + + if container.Memory != 0 { + if err := writeFile(dir, "memory.limit_in_bytes", strconv.FormatInt(container.Memory, 10)); err != nil { + return err + } + if err := writeFile(dir, "memory.soft_limit_in_bytes", strconv.FormatInt(container.Memory, 10)); err != nil { + return err + } + } + if container.MemorySwap != 0 { + if err := writeFile(dir, "memory.memsw.limit_in_bytes", strconv.FormatInt(container.MemorySwap, 10)); err != nil { + return err + } + } + } + + // We always want to join the cpu group, to allow fair cpu scheduling + // on a container basis + dir, err := joinCgroup("cpu", container, pid) + if err != nil { + return err + } + if container.CpuShares != 0 { + if err := writeFile(dir, "cpu.shares", strconv.FormatInt(container.CpuShares, 10)); err != nil { + return err + } + } + return nil +} + +func CleanupCgroup(container *libcontainer.Container) error { + path, _ := getCgroup("memory", container) + os.RemoveAll(path) + path, _ = getCgroup("devices", container) + os.RemoveAll(path) + path, _ = getCgroup("cpu", container) + os.RemoveAll(path) + return nil +} + +func ApplyCgroup(container *libcontainer.Container, pid int) error { + if container.CgroupName == "" { + return nil + } + + if useSystemd() { + return applyCgroupSystemd(container, pid) + } else { + return applyCgroupRaw(container, pid) + } +} diff --git a/pkg/libcontainer/container.go b/pkg/libcontainer/container.go index a6a57dab77..b34ac8b351 100644 --- a/pkg/libcontainer/container.go +++ b/pkg/libcontainer/container.go @@ -11,6 +11,13 @@ type Container struct { Namespaces Namespaces `json:"namespaces,omitempty"` // namespaces to apply Capabilities Capabilities `json:"capabilities,omitempty"` // capabilities to drop Network *Network `json:"network,omitempty"` // nil for host's network stack + + CgroupName string `json:"cgroup_name,omitempty"` // name of cgroup + CgroupParent string `json:"cgroup_parent,omitempty"` // name of parent cgroup or slice + DeviceAccess bool `json:"device_access,omitempty"` // name of parent cgroup or slice + Memory int64 `json:"memory,omitempty"` // Memory limit (in bytes) + MemorySwap int64 `json:"memory_swap,omitempty"` // Total memory usage (memory + swap); set `-1' to disable swap + CpuShares int64 `json:"cpu_shares,omitempty"` // CPU shares (relative weight vs. other containers) } // Network defines configuration for a container's networking stack diff --git a/pkg/libcontainer/container.json b/pkg/libcontainer/container.json index ccc9abb041..3e23600630 100644 --- a/pkg/libcontainer/container.json +++ b/pkg/libcontainer/container.json @@ -34,5 +34,8 @@ "gateway": "172.17.42.1", "bridge": "docker0", "mtu": 1500 - } + }, + "cgroup_name": "docker-koye", + "cgroup_parent": "docker", + "memory": 524800 } diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index 202cfcab5e..acff647c61 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -5,6 +5,7 @@ package main import ( "fmt" "github.com/dotcloud/docker/pkg/libcontainer" + "github.com/dotcloud/docker/pkg/libcontainer/cgroup" "github.com/dotcloud/docker/pkg/libcontainer/network" "github.com/dotcloud/docker/pkg/libcontainer/utils" "github.com/dotcloud/docker/pkg/system" @@ -33,10 +34,18 @@ func execCommand(container *libcontainer.Container, args []string) (int, error) return -1, err } if err := writePidFile(command); err != nil { + command.Process.Kill() return -1, err } defer deletePidFile() + // Do this before syncing with child so that no children + // can escape the cgroup + if err := cgroup.ApplyCgroup(container, command.Process.Pid); err != nil { + command.Process.Kill() + return -1, err + } + if container.Network != nil { vethPair, err := initializeContainerVeth(container.Network.Bridge, command.Process.Pid) if err != nil { @@ -45,6 +54,9 @@ func execCommand(container *libcontainer.Container, args []string) (int, error) sendVethName(vethPair, inPipe) } + // Sync with child + inPipe.Close() + go io.Copy(os.Stdout, master) go io.Copy(master, os.Stdin) @@ -67,7 +79,6 @@ func execCommand(container *libcontainer.Container, args []string) (int, error) // pipe so that the child stops waiting for more data func sendVethName(name string, pipe io.WriteCloser) { fmt.Fprint(pipe, name) - pipe.Close() } // initializeContainerVeth will create a veth pair and setup the host's diff --git a/pkg/libcontainer/nsinit/init.go b/pkg/libcontainer/nsinit/init.go index c77fd90447..f619276e60 100644 --- a/pkg/libcontainer/nsinit/init.go +++ b/pkg/libcontainer/nsinit/init.go @@ -20,12 +20,10 @@ func initCommand(container *libcontainer.Container, console string, args []strin return err } - var tempVethName string - if container.Network != nil { - tempVethName, err = getVethName() - if err != nil { - return err - } + // We always read this as it is a way to sync with the parent as well + tempVethName, err := getVethName() + if err != nil { + return err } // close pipes so that we can replace it with the pty From 5f84738ef139f696e339afb8280eb74917f2167c Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Thu, 20 Feb 2014 14:40:00 -0800 Subject: [PATCH 22/81] Revert "WIP for setup kmsg" This reverts commit 80db9a918337c4ae80ffa9a001da13bd24e848c8. Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/nsinit/mount.go | 32 -------------------------------- 1 file changed, 32 deletions(-) diff --git a/pkg/libcontainer/nsinit/mount.go b/pkg/libcontainer/nsinit/mount.go index 67f9020350..6eb2e09060 100644 --- a/pkg/libcontainer/nsinit/mount.go +++ b/pkg/libcontainer/nsinit/mount.go @@ -43,9 +43,6 @@ func setupNewMountNamespace(rootfs, console string, readonly bool) error { if err := setupPtmx(rootfs, console); err != nil { return err } - if err := setupKmsg(rootfs); err != nil { - return err - } if err := system.Chdir(rootfs); err != nil { return fmt.Errorf("chdir into %s %s", rootfs, err) } @@ -214,32 +211,3 @@ func remountSys() error { } return nil } - -func setupKmsg(rootfs string) error { - oldMask := system.Umask(0000) - defer system.Umask(oldMask) - - var ( - source = filepath.Join(rootfs, "dev/kmsg") - dest = filepath.Join(rootfs, "proc/kmsg") - ) - - if err := system.Mkfifo(source, 0600); err != nil { - return err - } - - os.Chmod(source, 0600) - os.Chown(source, 0, 0) - - if err := system.Mount(source, dest, "bind", syscall.MS_BIND, ""); err != nil { - return err - } - _, err := os.OpenFile(source, syscall.O_RDWR|syscall.O_NDELAY|syscall.O_CLOEXEC, 0) - if err != nil { - return err - } - if err := syscall.Unlink(source); err != nil { - return err - } - return nil -} From f00f37413826e31e9eb87096b67c609fdfa457b9 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Thu, 20 Feb 2014 14:40:36 -0800 Subject: [PATCH 23/81] Remove clone_vfork Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/nsinit/exec.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index acff647c61..f73ad3281e 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -165,7 +165,7 @@ func deletePidFile() error { func createCommand(container *libcontainer.Container, console string, args []string) *exec.Cmd { command := exec.Command("nsinit", append([]string{"init", console}, args...)...) command.SysProcAttr = &syscall.SysProcAttr{ - Cloneflags: uintptr(getNamespaceFlags(container.Namespaces) | syscall.CLONE_VFORK), // we need CLONE_VFORK so we can wait on the child + Cloneflags: uintptr(getNamespaceFlags(container.Namespaces)), } return command } From c44258630575f70231b11fb55bc4edc3fb677cab Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Thu, 20 Feb 2014 15:48:48 -0800 Subject: [PATCH 24/81] Refactory cgroups into general pkg Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/cgroups/cgroups.go | 61 +++++++++++++- pkg/libcontainer/cgroup/cgroup.go | 131 +++++++++++------------------- pkg/libcontainer/container.go | 28 +++---- pkg/libcontainer/container.json | 8 +- 4 files changed, 124 insertions(+), 104 deletions(-) diff --git a/pkg/cgroups/cgroups.go b/pkg/cgroups/cgroups.go index b9318f99e7..1e96caa7e3 100644 --- a/pkg/cgroups/cgroups.go +++ b/pkg/cgroups/cgroups.go @@ -5,10 +5,23 @@ import ( "fmt" "github.com/dotcloud/docker/pkg/mount" "io" + "io/ioutil" "os" + "path/filepath" + "strconv" "strings" ) +type Cgroup struct { + Name string `json:"name,omitempty"` + Parent string `json:"parent,omitempty"` + + DeviceAccess bool `json:"device_access,omitempty"` // name of parent cgroup or slice + Memory int64 `json:"memory,omitempty"` // Memory limit (in bytes) + MemorySwap int64 `json:"memory_swap,omitempty"` // Total memory usage (memory + swap); set `-1' to disable swap + CpuShares int64 `json:"cpu_shares,omitempty"` // CPU shares (relative weight vs. other containers) +} + // https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt func FindCgroupMountpoint(subsystem string) (string, error) { mounts, err := mount.GetMounts() @@ -25,7 +38,6 @@ func FindCgroupMountpoint(subsystem string) (string, error) { } } } - return "", fmt.Errorf("cgroup mountpoint not found for %s", subsystem) } @@ -50,9 +62,50 @@ func GetInitCgroupDir(subsystem string) (string, error) { return parseCgroupFile(subsystem, f) } +func (c *Cgroup) Path(root, subsystem string) (string, error) { + cgroup := c.Name + if c.Parent != "" { + cgroup = filepath.Join(c.Parent, cgroup) + } + initPath, err := GetInitCgroupDir(subsystem) + if err != nil { + return "", err + } + return filepath.Join(root, subsystem, initPath, cgroup), nil +} + +func (c *Cgroup) Join(root, subsystem string, pid int) (string, error) { + path, err := c.Path(root, subsystem) + if err != nil { + return "", err + } + if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) { + return "", err + } + if err := writeFile(path, "tasks", strconv.Itoa(pid)); err != nil { + return "", err + } + return path, nil +} + +func (c *Cgroup) Cleanup(root string) error { + get := func(subsystem string) string { + path, _ := c.Path(root, subsystem) + return path + } + + for _, path := range []string{ + get("memory"), + get("devices"), + get("cpu"), + } { + os.RemoveAll(path) + } + return nil +} + func parseCgroupFile(subsystem string, r io.Reader) (string, error) { s := bufio.NewScanner(r) - for s.Scan() { if err := s.Err(); err != nil { return "", err @@ -67,3 +120,7 @@ func parseCgroupFile(subsystem string, r io.Reader) (string, error) { } return "", fmt.Errorf("cgroup '%s' not found in /proc/self/cgroup", subsystem) } + +func writeFile(dir, file, data string) error { + return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700) +} diff --git a/pkg/libcontainer/cgroup/cgroup.go b/pkg/libcontainer/cgroup/cgroup.go index e30262ca50..5f27ac3ffb 100644 --- a/pkg/libcontainer/cgroup/cgroup.go +++ b/pkg/libcontainer/cgroup/cgroup.go @@ -10,71 +10,46 @@ import ( "strconv" ) -// We have two implementation of cgroups support, one is based on -// systemd and the dbus api, and one is based on raw cgroup fs operations -// following the pre-single-writer model docs at: -// http://www.freedesktop.org/wiki/Software/systemd/PaxControlGroups/ -const ( - cgroupRoot = "/sys/fs/cgroup" -) - -func useSystemd() bool { - return false -} - -func applyCgroupSystemd(container *libcontainer.Container, pid int) error { - return fmt.Errorf("not supported yet") -} - -func writeFile(dir, file, data string) error { - return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700) -} - -func getCgroup(subsystem string, container *libcontainer.Container) (string, error) { - cgroup := container.CgroupName - if container.CgroupParent != "" { - cgroup = filepath.Join(container.CgroupParent, cgroup) +func ApplyCgroup(container *libcontainer.Container, pid int) (err error) { + if container.Cgroups == nil { + return nil } - initPath, err := cgroups.GetInitCgroupDir(subsystem) + // We have two implementation of cgroups support, one is based on + // systemd and the dbus api, and one is based on raw cgroup fs operations + // following the pre-single-writer model docs at: + // http://www.freedesktop.org/wiki/Software/systemd/PaxControlGroups/ + // + // we can pick any subsystem to find the root + cgroupRoot, err := cgroups.FindCgroupMountpoint("memory") if err != nil { - return "", err + return err } - - path := filepath.Join(cgroupRoot, subsystem, initPath, cgroup) - - return path, nil -} - -func joinCgroup(subsystem string, container *libcontainer.Container, pid int) (string, error) { - path, err := getCgroup(subsystem, container) - if err != nil { - return "", err - } - - if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) { - return "", err - } - - if err := writeFile(path, "tasks", strconv.Itoa(pid)); err != nil { - return "", err - } - - return path, nil -} - -func applyCgroupRaw(container *libcontainer.Container, pid int) (retErr error) { + cgroupRoot = filepath.Dir(cgroupRoot) if _, err := os.Stat(cgroupRoot); err != nil { return fmt.Errorf("cgroups fs not found") } + if err := setupDevices(container, cgroupRoot, pid); err != nil { + return err + } + if err := setupMemory(container, cgroupRoot, pid); err != nil { + return err + } + if err := setupCpu(container, cgroupRoot, pid); err != nil { + return err + } + return nil +} - if !container.DeviceAccess { - dir, err := joinCgroup("devices", container, pid) +func setupDevices(container *libcontainer.Container, cgroupRoot string, pid int) (err error) { + if !container.Cgroups.DeviceAccess { + dir, err := container.Cgroups.Join(cgroupRoot, "devices", pid) if err != nil { return err } + defer func() { - if retErr != nil { + if err != nil { os.RemoveAll(dir) } }() @@ -113,65 +88,53 @@ func applyCgroupRaw(container *libcontainer.Container, pid int) (retErr error) { } } } + return nil +} - if container.Memory != 0 || container.MemorySwap != 0 { - dir, err := joinCgroup("memory", container, pid) +func setupMemory(container *libcontainer.Container, cgroupRoot string, pid int) (err error) { + if container.Cgroups.Memory != 0 || container.Cgroups.MemorySwap != 0 { + dir, err := container.Cgroups.Join(cgroupRoot, "memory", pid) if err != nil { return err } defer func() { - if retErr != nil { + if err != nil { os.RemoveAll(dir) } }() - if container.Memory != 0 { - if err := writeFile(dir, "memory.limit_in_bytes", strconv.FormatInt(container.Memory, 10)); err != nil { + if container.Cgroups.Memory != 0 { + if err := writeFile(dir, "memory.limit_in_bytes", strconv.FormatInt(container.Cgroups.Memory, 10)); err != nil { return err } - if err := writeFile(dir, "memory.soft_limit_in_bytes", strconv.FormatInt(container.Memory, 10)); err != nil { + if err := writeFile(dir, "memory.soft_limit_in_bytes", strconv.FormatInt(container.Cgroups.Memory, 10)); err != nil { return err } } - if container.MemorySwap != 0 { - if err := writeFile(dir, "memory.memsw.limit_in_bytes", strconv.FormatInt(container.MemorySwap, 10)); err != nil { + if container.Cgroups.MemorySwap != 0 { + if err := writeFile(dir, "memory.memsw.limit_in_bytes", strconv.FormatInt(container.Cgroups.MemorySwap, 10)); err != nil { return err } } } + return nil +} +func setupCpu(container *libcontainer.Container, cgroupRoot string, pid int) (err error) { // We always want to join the cpu group, to allow fair cpu scheduling // on a container basis - dir, err := joinCgroup("cpu", container, pid) + dir, err := container.Cgroups.Join(cgroupRoot, "cpu", pid) if err != nil { return err } - if container.CpuShares != 0 { - if err := writeFile(dir, "cpu.shares", strconv.FormatInt(container.CpuShares, 10)); err != nil { + if container.Cgroups.CpuShares != 0 { + if err := writeFile(dir, "cpu.shares", strconv.FormatInt(container.Cgroups.CpuShares, 10)); err != nil { return err } } return nil } -func CleanupCgroup(container *libcontainer.Container) error { - path, _ := getCgroup("memory", container) - os.RemoveAll(path) - path, _ = getCgroup("devices", container) - os.RemoveAll(path) - path, _ = getCgroup("cpu", container) - os.RemoveAll(path) - return nil -} - -func ApplyCgroup(container *libcontainer.Container, pid int) error { - if container.CgroupName == "" { - return nil - } - - if useSystemd() { - return applyCgroupSystemd(container, pid) - } else { - return applyCgroupRaw(container, pid) - } +func writeFile(dir, file, data string) error { + return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700) } diff --git a/pkg/libcontainer/container.go b/pkg/libcontainer/container.go index b34ac8b351..4c0e39a798 100644 --- a/pkg/libcontainer/container.go +++ b/pkg/libcontainer/container.go @@ -1,23 +1,21 @@ package libcontainer +import ( + "github.com/dotcloud/docker/pkg/cgroups" +) + // Container defines configuration options for how a // container is setup inside a directory and how a process should be executed type Container struct { - Hostname string `json:"hostname,omitempty"` // hostname - ReadonlyFs bool `json:"readonly_fs,omitempty"` // set the containers rootfs as readonly - User string `json:"user,omitempty"` // user to execute the process as - WorkingDir string `json:"working_dir,omitempty"` // current working directory - Env []string `json:"environment,omitempty"` // environment to set - Namespaces Namespaces `json:"namespaces,omitempty"` // namespaces to apply - Capabilities Capabilities `json:"capabilities,omitempty"` // capabilities to drop - Network *Network `json:"network,omitempty"` // nil for host's network stack - - CgroupName string `json:"cgroup_name,omitempty"` // name of cgroup - CgroupParent string `json:"cgroup_parent,omitempty"` // name of parent cgroup or slice - DeviceAccess bool `json:"device_access,omitempty"` // name of parent cgroup or slice - Memory int64 `json:"memory,omitempty"` // Memory limit (in bytes) - MemorySwap int64 `json:"memory_swap,omitempty"` // Total memory usage (memory + swap); set `-1' to disable swap - CpuShares int64 `json:"cpu_shares,omitempty"` // CPU shares (relative weight vs. other containers) + Hostname string `json:"hostname,omitempty"` // hostname + ReadonlyFs bool `json:"readonly_fs,omitempty"` // set the containers rootfs as readonly + User string `json:"user,omitempty"` // user to execute the process as + WorkingDir string `json:"working_dir,omitempty"` // current working directory + Env []string `json:"environment,omitempty"` // environment to set + Namespaces Namespaces `json:"namespaces,omitempty"` // namespaces to apply + Capabilities Capabilities `json:"capabilities,omitempty"` // capabilities to drop + Network *Network `json:"network,omitempty"` // nil for host's network stack + Cgroups *cgroups.Cgroup `json:"cgroups,omitempty"` } // Network defines configuration for a container's networking stack diff --git a/pkg/libcontainer/container.json b/pkg/libcontainer/container.json index 3e23600630..2207543bd0 100644 --- a/pkg/libcontainer/container.json +++ b/pkg/libcontainer/container.json @@ -35,7 +35,9 @@ "bridge": "docker0", "mtu": 1500 }, - "cgroup_name": "docker-koye", - "cgroup_parent": "docker", - "memory": 524800 + "cgroups": { + "name": "docker-koye", + "parent": "docker", + "memory": 524800 + } } From 3cb698125da7b55a7d7ec43b33858f35844a6143 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Thu, 20 Feb 2014 15:50:55 -0800 Subject: [PATCH 25/81] Change IP to address because it includes the subnet Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/README.md | 7 ++++++- pkg/libcontainer/container.go | 2 +- pkg/libcontainer/container.json | 2 +- pkg/libcontainer/nsinit/init.go | 2 +- 4 files changed, 9 insertions(+), 4 deletions(-) diff --git a/pkg/libcontainer/README.md b/pkg/libcontainer/README.md index 07fe4f7b2d..163161c178 100644 --- a/pkg/libcontainer/README.md +++ b/pkg/libcontainer/README.md @@ -47,10 +47,15 @@ Sample `container.json` file: "MAC_ADMIN" ], "network": { - "ip": "172.17.0.100/16", + "address": "172.17.0.100/16", "gateway": "172.17.42.1", "bridge": "docker0", "mtu": 1500 + }, + "cgroups": { + "name": "docker-koye", + "parent": "docker", + "memory": 524800 } } ``` diff --git a/pkg/libcontainer/container.go b/pkg/libcontainer/container.go index 4c0e39a798..e6e4b4747e 100644 --- a/pkg/libcontainer/container.go +++ b/pkg/libcontainer/container.go @@ -23,7 +23,7 @@ type Container struct { // The network configuration can be omited from a container causing the // container to be setup with the host's networking stack type Network struct { - IP string `json:"ip,omitempty"` + Address string `json:"address,omitempty"` Gateway string `json:"gateway,omitempty"` Bridge string `json:"bridge,omitempty"` Mtu int `json:"mtu,omitempty"` diff --git a/pkg/libcontainer/container.json b/pkg/libcontainer/container.json index 2207543bd0..c1a07dc55b 100644 --- a/pkg/libcontainer/container.json +++ b/pkg/libcontainer/container.json @@ -30,7 +30,7 @@ "MAC_ADMIN" ], "network": { - "ip": "172.17.0.100/16", + "address": "172.17.0.100/16", "gateway": "172.17.42.1", "bridge": "docker0", "mtu": 1500 diff --git a/pkg/libcontainer/nsinit/init.go b/pkg/libcontainer/nsinit/init.go index f619276e60..f89e53982c 100644 --- a/pkg/libcontainer/nsinit/init.go +++ b/pkg/libcontainer/nsinit/init.go @@ -137,7 +137,7 @@ func setupVethNetwork(config *libcontainer.Network, tempVethName string) error { if err := network.ChangeInterfaceName(tempVethName, "eth0"); err != nil { return fmt.Errorf("change %s to eth0 %s", tempVethName, err) } - if err := network.SetInterfaceIp("eth0", config.IP); err != nil { + if err := network.SetInterfaceIp("eth0", config.Address); err != nil { return fmt.Errorf("set eth0 ip %s", err) } if err := network.SetMtu("eth0", config.Mtu); err != nil { From 7020e208c70dfca5ebc97d699553e4bf1c6ab0bb Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Thu, 20 Feb 2014 16:11:22 -0800 Subject: [PATCH 26/81] Move rest of cgroups functions into cgroups pkg Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/cgroups/cgroups.go | 122 ++++++++++++++++++++++++++ pkg/libcontainer/cgroup/cgroup.go | 140 ------------------------------ pkg/libcontainer/nsinit/exec.go | 9 +- 3 files changed, 127 insertions(+), 144 deletions(-) delete mode 100644 pkg/libcontainer/cgroup/cgroup.go diff --git a/pkg/cgroups/cgroups.go b/pkg/cgroups/cgroups.go index 1e96caa7e3..96002f0af9 100644 --- a/pkg/cgroups/cgroups.go +++ b/pkg/cgroups/cgroups.go @@ -124,3 +124,125 @@ func parseCgroupFile(subsystem string, r io.Reader) (string, error) { func writeFile(dir, file, data string) error { return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700) } + +func (c *Cgroup) Apply(pid int) error { + // We have two implementation of cgroups support, one is based on + // systemd and the dbus api, and one is based on raw cgroup fs operations + // following the pre-single-writer model docs at: + // http://www.freedesktop.org/wiki/Software/systemd/PaxControlGroups/ + // + // we can pick any subsystem to find the root + cgroupRoot, err := FindCgroupMountpoint("memory") + if err != nil { + return err + } + cgroupRoot = filepath.Dir(cgroupRoot) + + if _, err := os.Stat(cgroupRoot); err != nil { + return fmt.Errorf("cgroups fs not found") + } + if err := c.setupDevices(cgroupRoot, pid); err != nil { + return err + } + if err := c.setupMemory(cgroupRoot, pid); err != nil { + return err + } + if err := c.setupCpu(cgroupRoot, pid); err != nil { + return err + } + return nil +} + +func (c *Cgroup) setupDevices(cgroupRoot string, pid int) (err error) { + if !c.DeviceAccess { + dir, err := c.Join(cgroupRoot, "devices", pid) + if err != nil { + return err + } + + defer func() { + if err != nil { + os.RemoveAll(dir) + } + }() + + if err := writeFile(dir, "devices.deny", "a"); err != nil { + return err + } + + allow := []string{ + // /dev/null, zero, full + "c 1:3 rwm", + "c 1:5 rwm", + "c 1:7 rwm", + + // consoles + "c 5:1 rwm", + "c 5:0 rwm", + "c 4:0 rwm", + "c 4:1 rwm", + + // /dev/urandom,/dev/random + "c 1:9 rwm", + "c 1:8 rwm", + + // /dev/pts/ - pts namespaces are "coming soon" + "c 136:* rwm", + "c 5:2 rwm", + + // tuntap + "c 10:200 rwm", + } + + for _, val := range allow { + if err := writeFile(dir, "devices.allow", val); err != nil { + return err + } + } + } + return nil +} + +func (c *Cgroup) setupMemory(cgroupRoot string, pid int) (err error) { + if c.Memory != 0 || c.MemorySwap != 0 { + dir, err := c.Join(cgroupRoot, "memory", pid) + if err != nil { + return err + } + defer func() { + if err != nil { + os.RemoveAll(dir) + } + }() + + if c.Memory != 0 { + if err := writeFile(dir, "memory.limit_in_bytes", strconv.FormatInt(c.Memory, 10)); err != nil { + return err + } + if err := writeFile(dir, "memory.soft_limit_in_bytes", strconv.FormatInt(c.Memory, 10)); err != nil { + return err + } + } + if c.MemorySwap != 0 { + if err := writeFile(dir, "memory.memsw.limit_in_bytes", strconv.FormatInt(c.MemorySwap, 10)); err != nil { + return err + } + } + } + return nil +} + +func (c *Cgroup) setupCpu(cgroupRoot string, pid int) (err error) { + // We always want to join the cpu group, to allow fair cpu scheduling + // on a container basis + dir, err := c.Join(cgroupRoot, "cpu", pid) + if err != nil { + return err + } + if c.CpuShares != 0 { + if err := writeFile(dir, "cpu.shares", strconv.FormatInt(c.CpuShares, 10)); err != nil { + return err + } + } + return nil +} diff --git a/pkg/libcontainer/cgroup/cgroup.go b/pkg/libcontainer/cgroup/cgroup.go deleted file mode 100644 index 5f27ac3ffb..0000000000 --- a/pkg/libcontainer/cgroup/cgroup.go +++ /dev/null @@ -1,140 +0,0 @@ -package cgroup - -import ( - "fmt" - "github.com/dotcloud/docker/pkg/cgroups" - "github.com/dotcloud/docker/pkg/libcontainer" - "io/ioutil" - "os" - "path/filepath" - "strconv" -) - -func ApplyCgroup(container *libcontainer.Container, pid int) (err error) { - if container.Cgroups == nil { - return nil - } - - // We have two implementation of cgroups support, one is based on - // systemd and the dbus api, and one is based on raw cgroup fs operations - // following the pre-single-writer model docs at: - // http://www.freedesktop.org/wiki/Software/systemd/PaxControlGroups/ - // - // we can pick any subsystem to find the root - cgroupRoot, err := cgroups.FindCgroupMountpoint("memory") - if err != nil { - return err - } - cgroupRoot = filepath.Dir(cgroupRoot) - if _, err := os.Stat(cgroupRoot); err != nil { - return fmt.Errorf("cgroups fs not found") - } - if err := setupDevices(container, cgroupRoot, pid); err != nil { - return err - } - if err := setupMemory(container, cgroupRoot, pid); err != nil { - return err - } - if err := setupCpu(container, cgroupRoot, pid); err != nil { - return err - } - return nil -} - -func setupDevices(container *libcontainer.Container, cgroupRoot string, pid int) (err error) { - if !container.Cgroups.DeviceAccess { - dir, err := container.Cgroups.Join(cgroupRoot, "devices", pid) - if err != nil { - return err - } - - defer func() { - if err != nil { - os.RemoveAll(dir) - } - }() - - if err := writeFile(dir, "devices.deny", "a"); err != nil { - return err - } - - allow := []string{ - // /dev/null, zero, full - "c 1:3 rwm", - "c 1:5 rwm", - "c 1:7 rwm", - - // consoles - "c 5:1 rwm", - "c 5:0 rwm", - "c 4:0 rwm", - "c 4:1 rwm", - - // /dev/urandom,/dev/random - "c 1:9 rwm", - "c 1:8 rwm", - - // /dev/pts/ - pts namespaces are "coming soon" - "c 136:* rwm", - "c 5:2 rwm", - - // tuntap - "c 10:200 rwm", - } - - for _, val := range allow { - if err := writeFile(dir, "devices.allow", val); err != nil { - return err - } - } - } - return nil -} - -func setupMemory(container *libcontainer.Container, cgroupRoot string, pid int) (err error) { - if container.Cgroups.Memory != 0 || container.Cgroups.MemorySwap != 0 { - dir, err := container.Cgroups.Join(cgroupRoot, "memory", pid) - if err != nil { - return err - } - defer func() { - if err != nil { - os.RemoveAll(dir) - } - }() - - if container.Cgroups.Memory != 0 { - if err := writeFile(dir, "memory.limit_in_bytes", strconv.FormatInt(container.Cgroups.Memory, 10)); err != nil { - return err - } - if err := writeFile(dir, "memory.soft_limit_in_bytes", strconv.FormatInt(container.Cgroups.Memory, 10)); err != nil { - return err - } - } - if container.Cgroups.MemorySwap != 0 { - if err := writeFile(dir, "memory.memsw.limit_in_bytes", strconv.FormatInt(container.Cgroups.MemorySwap, 10)); err != nil { - return err - } - } - } - return nil -} - -func setupCpu(container *libcontainer.Container, cgroupRoot string, pid int) (err error) { - // We always want to join the cpu group, to allow fair cpu scheduling - // on a container basis - dir, err := container.Cgroups.Join(cgroupRoot, "cpu", pid) - if err != nil { - return err - } - if container.Cgroups.CpuShares != 0 { - if err := writeFile(dir, "cpu.shares", strconv.FormatInt(container.Cgroups.CpuShares, 10)); err != nil { - return err - } - } - return nil -} - -func writeFile(dir, file, data string) error { - return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700) -} diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index f73ad3281e..f04e9bee20 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -5,7 +5,6 @@ package main import ( "fmt" "github.com/dotcloud/docker/pkg/libcontainer" - "github.com/dotcloud/docker/pkg/libcontainer/cgroup" "github.com/dotcloud/docker/pkg/libcontainer/network" "github.com/dotcloud/docker/pkg/libcontainer/utils" "github.com/dotcloud/docker/pkg/system" @@ -41,9 +40,11 @@ func execCommand(container *libcontainer.Container, args []string) (int, error) // Do this before syncing with child so that no children // can escape the cgroup - if err := cgroup.ApplyCgroup(container, command.Process.Pid); err != nil { - command.Process.Kill() - return -1, err + if container.Cgroups != nil { + if err := container.Cgroups.Apply(command.Process.Pid); err != nil { + command.Process.Kill() + return -1, err + } } if container.Network != nil { From b519d3ea5a50ad7c15d576a89ec9846c4fc123fa Mon Sep 17 00:00:00 2001 From: "Guillaume J. Charmes" Date: Thu, 20 Feb 2014 17:53:50 -0800 Subject: [PATCH 27/81] Use flag for init Docker-DCO-1.1-Signed-off-by: Guillaume J. Charmes (github: creack) --- pkg/libcontainer/nsinit/exec.go | 2 +- pkg/libcontainer/nsinit/main.go | 19 +++++++++++-------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index f04e9bee20..6d87f3b66d 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -164,7 +164,7 @@ func deletePidFile() error { // defined on the container's configuration and use the current binary as the init with the // args provided func createCommand(container *libcontainer.Container, console string, args []string) *exec.Cmd { - command := exec.Command("nsinit", append([]string{"init", console}, args...)...) + command := exec.Command("nsinit", append([]string{"-console", console, "init"}, args...)...) command.SysProcAttr = &syscall.SysProcAttr{ Cloneflags: uintptr(getNamespaceFlags(container.Namespaces)), } diff --git a/pkg/libcontainer/nsinit/main.go b/pkg/libcontainer/nsinit/main.go index f45fe55689..e7240df041 100644 --- a/pkg/libcontainer/nsinit/main.go +++ b/pkg/libcontainer/nsinit/main.go @@ -3,6 +3,7 @@ package main import ( "encoding/json" "errors" + "flag" "github.com/dotcloud/docker/pkg/libcontainer" "io/ioutil" "log" @@ -16,16 +17,18 @@ var ( ) func main() { + console := flag.String("console", "", "Console (pty slave) name") + flag.Parse() + container, err := loadContainer() if err != nil { log.Fatal(err) } - argc := len(os.Args) - if argc < 2 { + if flag.NArg() < 1 { log.Fatal(ErrWrongArguments) } - switch os.Args[1] { + switch flag.Arg(0) { case "exec": // this is executed outside of the namespace in the cwd var exitCode int nspid, err := readPid() @@ -35,23 +38,23 @@ func main() { } } if nspid > 0 { - exitCode, err = execinCommand(container, nspid, os.Args[2:]) + exitCode, err = execinCommand(container, nspid, flag.Args()[1:]) } else { - exitCode, err = execCommand(container, os.Args[2:]) + exitCode, err = execCommand(container, flag.Args()[1:]) } if err != nil { log.Fatal(err) } os.Exit(exitCode) case "init": // this is executed inside of the namespace to setup the container - if argc < 3 { + if flag.NArg() < 2 { log.Fatal(ErrWrongArguments) } - if err := initCommand(container, os.Args[2], os.Args[3:]); err != nil { + if err := initCommand(container, *console, flag.Args()[1:]); err != nil { log.Fatal(err) } default: - log.Fatalf("command not supported for nsinit %s", os.Args[1]) + log.Fatalf("command not supported for nsinit %s", flag.Arg(0)) } } From 8dec4adcb3fd905eb05f07678fa7f5bb47d8242f Mon Sep 17 00:00:00 2001 From: "Guillaume J. Charmes" Date: Thu, 20 Feb 2014 17:58:13 -0800 Subject: [PATCH 28/81] Use a custom pipe instead of stdin for sync net namespace Docker-DCO-1.1-Signed-off-by: Guillaume J. Charmes (github: creack) --- pkg/libcontainer/nsinit/exec.go | 16 ++++++++++------ pkg/libcontainer/nsinit/init.go | 11 +++++++---- pkg/libcontainer/nsinit/main.go | 3 ++- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index 6d87f3b66d..8007ed4691 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -22,16 +22,20 @@ func execCommand(container *libcontainer.Container, args []string) (int, error) return -1, err } - command := createCommand(container, console, args) // create a pipe so that we can syncronize with the namespaced process and // pass the veth name to the child - inPipe, err := command.StdinPipe() + r, w, err := os.Pipe() if err != nil { return -1, err } + system.UsetCloseOnExec(r.Fd()) + + command := createCommand(container, console, r.Fd(), args) + if err := command.Start(); err != nil { return -1, err } + if err := writePidFile(command); err != nil { command.Process.Kill() return -1, err @@ -52,11 +56,11 @@ func execCommand(container *libcontainer.Container, args []string) (int, error) if err != nil { return -1, err } - sendVethName(vethPair, inPipe) + sendVethName(vethPair, w) } // Sync with child - inPipe.Close() + w.Close() go io.Copy(os.Stdout, master) go io.Copy(master, os.Stdin) @@ -163,8 +167,8 @@ func deletePidFile() error { // createCommand will return an exec.Cmd with the Cloneflags set to the proper namespaces // defined on the container's configuration and use the current binary as the init with the // args provided -func createCommand(container *libcontainer.Container, console string, args []string) *exec.Cmd { - command := exec.Command("nsinit", append([]string{"-console", console, "init"}, args...)...) +func createCommand(container *libcontainer.Container, console string, pipe uintptr, args []string) *exec.Cmd { + command := exec.Command("nsinit", append([]string{"-console", console, "-pipe", fmt.Sprint(pipe), "init"}, args...)...) command.SysProcAttr = &syscall.SysProcAttr{ Cloneflags: uintptr(getNamespaceFlags(container.Namespaces)), } diff --git a/pkg/libcontainer/nsinit/init.go b/pkg/libcontainer/nsinit/init.go index f89e53982c..a0815eef1b 100644 --- a/pkg/libcontainer/nsinit/init.go +++ b/pkg/libcontainer/nsinit/init.go @@ -8,20 +8,21 @@ import ( "github.com/dotcloud/docker/pkg/libcontainer/capabilities" "github.com/dotcloud/docker/pkg/libcontainer/network" "github.com/dotcloud/docker/pkg/system" + "io" "io/ioutil" "os" "path/filepath" "syscall" ) -func initCommand(container *libcontainer.Container, console string, args []string) error { +func initCommand(container *libcontainer.Container, console string, pipe io.ReadCloser, args []string) error { rootfs, err := resolveRootfs() if err != nil { return err } // We always read this as it is a way to sync with the parent as well - tempVethName, err := getVethName() + tempVethName, err := getVethName(pipe) if err != nil { return err } @@ -164,8 +165,10 @@ func setupVethNetwork(config *libcontainer.Network, tempVethName string) error { // getVethName reads from Stdin the temp veth name // sent by the parent processes after the veth pair // has been created and setup -func getVethName() (string, error) { - data, err := ioutil.ReadAll(os.Stdin) +func getVethName(pipe io.ReadCloser) (string, error) { + defer pipe.Close() + + data, err := ioutil.ReadAll(pipe) if err != nil { return "", fmt.Errorf("error reading from stdin %s", err) } diff --git a/pkg/libcontainer/nsinit/main.go b/pkg/libcontainer/nsinit/main.go index e7240df041..6f2825b25b 100644 --- a/pkg/libcontainer/nsinit/main.go +++ b/pkg/libcontainer/nsinit/main.go @@ -18,6 +18,7 @@ var ( func main() { console := flag.String("console", "", "Console (pty slave) name") + pipeFd := flag.Int("pipe", 0, "sync pipe fd") flag.Parse() container, err := loadContainer() @@ -50,7 +51,7 @@ func main() { if flag.NArg() < 2 { log.Fatal(ErrWrongArguments) } - if err := initCommand(container, *console, flag.Args()[1:]); err != nil { + if err := initCommand(container, *console, os.NewFile(uintptr(*pipeFd), "pipe"), flag.Args()[1:]); err != nil { log.Fatal(err) } default: From 83dfdd1d9587a7335bbf3a4656572baefae4f28d Mon Sep 17 00:00:00 2001 From: "Guillaume J. Charmes" Date: Thu, 20 Feb 2014 17:59:08 -0800 Subject: [PATCH 29/81] Minor cleanup Docker-DCO-1.1-Signed-off-by: Guillaume J. Charmes (github: creack) --- pkg/libcontainer/nsinit/exec.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index 8007ed4691..7f552c2961 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -56,11 +56,12 @@ func execCommand(container *libcontainer.Container, args []string) (int, error) if err != nil { return -1, err } - sendVethName(vethPair, w) + sendVethName(w, vethPair) } // Sync with child w.Close() + r.Close() go io.Copy(os.Stdout, master) go io.Copy(master, os.Stdin) @@ -82,7 +83,7 @@ func execCommand(container *libcontainer.Container, args []string) (int, error) // sendVethName writes the veth pair name to the child's stdin then closes the // pipe so that the child stops waiting for more data -func sendVethName(name string, pipe io.WriteCloser) { +func sendVethName(pipe io.Writer, name string) { fmt.Fprint(pipe, name) } From 1a4fb0921919720ab379bc82b7508580057770ee Mon Sep 17 00:00:00 2001 From: "Guillaume J. Charmes" Date: Thu, 20 Feb 2014 18:05:40 -0800 Subject: [PATCH 30/81] Handle non-tty mode Docker-DCO-1.1-Signed-off-by: Guillaume J. Charmes (github: creack) --- pkg/libcontainer/nsinit/exec.go | 55 +++++++++++++++++++++++++------- pkg/libcontainer/nsinit/init.go | 30 +++++++++-------- pkg/libcontainer/nsinit/main.go | 9 ++++-- pkg/libcontainer/nsinit/mount.go | 6 ++-- 4 files changed, 70 insertions(+), 30 deletions(-) diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index 7f552c2961..b290ace3ce 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -16,10 +16,21 @@ import ( "syscall" ) -func execCommand(container *libcontainer.Container, args []string) (int, error) { - master, console, err := createMasterAndConsole() - if err != nil { - return -1, err +func execCommand(container *libcontainer.Container, tty bool, args []string) (int, error) { + var ( + master *os.File + console string + err error + + inPipe io.WriteCloser + outPipe, errPipe io.ReadCloser + ) + + if tty { + master, console, err = createMasterAndConsole() + if err != nil { + return -1, err + } } // create a pipe so that we can syncronize with the namespaced process and @@ -32,6 +43,21 @@ func execCommand(container *libcontainer.Container, args []string) (int, error) command := createCommand(container, console, r.Fd(), args) + if !tty { + inPipe, err = command.StdinPipe() + if err != nil { + return -1, err + } + outPipe, err = command.StdoutPipe() + if err != nil { + return -1, err + } + errPipe, err = command.StderrPipe() + if err != nil { + return -1, err + } + } + if err := command.Start(); err != nil { return -1, err } @@ -63,15 +89,20 @@ func execCommand(container *libcontainer.Container, args []string) (int, error) w.Close() r.Close() - go io.Copy(os.Stdout, master) - go io.Copy(master, os.Stdin) - - state, err := setupWindow(master) - if err != nil { - command.Process.Kill() - return -1, err + if tty { + go io.Copy(os.Stdout, master) + go io.Copy(master, os.Stdin) + state, err := setupWindow(master) + if err != nil { + command.Process.Kill() + return -1, err + } + defer term.RestoreTerminal(os.Stdin.Fd(), state) + } else { + go io.Copy(inPipe, os.Stdin) + go io.Copy(os.Stdout, outPipe) + go io.Copy(os.Stderr, errPipe) } - defer term.RestoreTerminal(os.Stdin.Fd(), state) if err := command.Wait(); err != nil { if _, ok := err.(*exec.ExitError); !ok { diff --git a/pkg/libcontainer/nsinit/init.go b/pkg/libcontainer/nsinit/init.go index a0815eef1b..ef7fc4e44c 100644 --- a/pkg/libcontainer/nsinit/init.go +++ b/pkg/libcontainer/nsinit/init.go @@ -27,23 +27,27 @@ func initCommand(container *libcontainer.Container, console string, pipe io.Read return err } - // close pipes so that we can replace it with the pty - os.Stdin.Close() - os.Stdout.Close() - os.Stderr.Close() + if console != "" { + // close pipes so that we can replace it with the pty + os.Stdin.Close() + os.Stdout.Close() + os.Stderr.Close() + slave, err := openTerminal(console, syscall.O_RDWR) + if err != nil { + return fmt.Errorf("open terminal %s", err) + } + if err := dupSlave(slave); err != nil { + return fmt.Errorf("dup2 slave %s", err) + } + } - slave, err := openTerminal(console, syscall.O_RDWR) - if err != nil { - return fmt.Errorf("open terminal %s", err) - } - if err := dupSlave(slave); err != nil { - return fmt.Errorf("dup2 slave %s", err) - } if _, err := system.Setsid(); err != nil { return fmt.Errorf("setsid %s", err) } - if err := system.Setctty(); err != nil { - return fmt.Errorf("setctty %s", err) + if console != "" { + if err := system.Setctty(); err != nil { + return fmt.Errorf("setctty %s", err) + } } if err := system.ParentDeathSignal(); err != nil { return fmt.Errorf("parent deth signal %s", err) diff --git a/pkg/libcontainer/nsinit/main.go b/pkg/libcontainer/nsinit/main.go index 6f2825b25b..f66ff0d855 100644 --- a/pkg/libcontainer/nsinit/main.go +++ b/pkg/libcontainer/nsinit/main.go @@ -17,8 +17,11 @@ var ( ) func main() { - console := flag.String("console", "", "Console (pty slave) name") - pipeFd := flag.Int("pipe", 0, "sync pipe fd") + var ( + console = flag.String("console", "", "Console (pty slave) name") + tty = flag.Bool("tty", false, "Create a tty") + pipeFd = flag.Int("pipe", 0, "sync pipe fd") + ) flag.Parse() container, err := loadContainer() @@ -41,7 +44,7 @@ func main() { if nspid > 0 { exitCode, err = execinCommand(container, nspid, flag.Args()[1:]) } else { - exitCode, err = execCommand(container, flag.Args()[1:]) + exitCode, err = execCommand(container, *tty, flag.Args()[1:]) } if err != nil { log.Fatal(err) diff --git a/pkg/libcontainer/nsinit/mount.go b/pkg/libcontainer/nsinit/mount.go index 6eb2e09060..9cf69f4184 100644 --- a/pkg/libcontainer/nsinit/mount.go +++ b/pkg/libcontainer/nsinit/mount.go @@ -40,8 +40,10 @@ func setupNewMountNamespace(rootfs, console string, readonly bool) error { if err := setupDev(rootfs); err != nil { return err } - if err := setupPtmx(rootfs, console); err != nil { - return err + if console != "" { + if err := setupPtmx(rootfs, console); err != nil { + return err + } } if err := system.Chdir(rootfs); err != nil { return fmt.Errorf("chdir into %s %s", rootfs, err) From 66baa0653b636180b8b5c57c58f4bbc805aca8c5 Mon Sep 17 00:00:00 2001 From: "Guillaume J. Charmes" Date: Thu, 20 Feb 2014 18:10:30 -0800 Subject: [PATCH 31/81] Make sure to close the pipe upon ctrl-d Docker-DCO-1.1-Signed-off-by: Guillaume J. Charmes (github: creack) --- pkg/libcontainer/nsinit/exec.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index b290ace3ce..44d9aff5f7 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -99,7 +99,10 @@ func execCommand(container *libcontainer.Container, tty bool, args []string) (in } defer term.RestoreTerminal(os.Stdin.Fd(), state) } else { - go io.Copy(inPipe, os.Stdin) + go func() { + defer inPipe.Close() + io.Copy(inPipe, os.Stdin) + }() go io.Copy(os.Stdout, outPipe) go io.Copy(os.Stderr, errPipe) } @@ -109,6 +112,7 @@ func execCommand(container *libcontainer.Container, tty bool, args []string) (in return -1, err } } + return command.ProcessState.Sys().(syscall.WaitStatus).ExitStatus(), nil } From 1316007e54e0c5a25f0d67675df7dec40286f5e8 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Thu, 20 Feb 2014 18:27:42 -0800 Subject: [PATCH 32/81] Make nsinit a proper go pkg and add the main in another dir Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/README.md | 4 +++- pkg/libcontainer/nsinit/exec.go | 6 ++++-- pkg/libcontainer/nsinit/execin.go | 5 +++-- pkg/libcontainer/nsinit/init.go | 6 ++++-- pkg/libcontainer/nsinit/mount.go | 2 +- pkg/libcontainer/nsinit/ns_linux.go | 2 +- pkg/libcontainer/nsinit/{ => nsinit}/main.go | 7 ++++--- 7 files changed, 20 insertions(+), 12 deletions(-) rename pkg/libcontainer/nsinit/{ => nsinit}/main.go (87%) diff --git a/pkg/libcontainer/README.md b/pkg/libcontainer/README.md index 163161c178..3a2a843b69 100644 --- a/pkg/libcontainer/README.md +++ b/pkg/libcontainer/README.md @@ -72,9 +72,11 @@ rootfs and copy a `container.json` file into the directory with your specified c To execution `/bin/bash` in the current directory as a container just run: ```bash -nsinit exec /bin/bash +nsinit -tty exec /bin/bash ``` +If you want a proper tty setup inside the new container you must use the `-tty` flag when running nsinit. + If you wish to spawn another process inside the container while your current bash session is running just run the exact same command again to get another bash shell or change the command. If the original process dies, PID 1, all other processes spawned inside the container will also be killed and the namespace will be removed. diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index 44d9aff5f7..9d0f7fff4e 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -1,6 +1,6 @@ // +build linux -package main +package nsinit import ( "fmt" @@ -16,7 +16,9 @@ import ( "syscall" ) -func execCommand(container *libcontainer.Container, tty bool, args []string) (int, error) { +// Exec performes setup outside of a namespace so that a container can be +// executed. Exec is a high level function for working with container namespaces. +func Exec(container *libcontainer.Container, tty bool, args []string) (int, error) { var ( master *os.File console string diff --git a/pkg/libcontainer/nsinit/execin.go b/pkg/libcontainer/nsinit/execin.go index d6224f95e6..85a89905c1 100644 --- a/pkg/libcontainer/nsinit/execin.go +++ b/pkg/libcontainer/nsinit/execin.go @@ -1,4 +1,4 @@ -package main +package nsinit import ( "fmt" @@ -11,7 +11,8 @@ import ( "syscall" ) -func execinCommand(container *libcontainer.Container, nspid int, args []string) (int, error) { +// ExecIn uses an existing pid and joins the pid's namespaces with the new command. +func ExecIn(container *libcontainer.Container, nspid int, args []string) (int, error) { for _, ns := range container.Namespaces { if err := system.Unshare(namespaceMap[ns]); err != nil { return -1, err diff --git a/pkg/libcontainer/nsinit/init.go b/pkg/libcontainer/nsinit/init.go index ef7fc4e44c..f80d785bc4 100644 --- a/pkg/libcontainer/nsinit/init.go +++ b/pkg/libcontainer/nsinit/init.go @@ -1,6 +1,6 @@ // +build linux -package main +package nsinit import ( "fmt" @@ -15,7 +15,9 @@ import ( "syscall" ) -func initCommand(container *libcontainer.Container, console string, pipe io.ReadCloser, args []string) error { +// Init is the init process that first runs inside a new namespace to setup mounts, users, networking, +// and other options required for the new container. +func Init(container *libcontainer.Container, console string, pipe io.ReadCloser, args []string) error { rootfs, err := resolveRootfs() if err != nil { return err diff --git a/pkg/libcontainer/nsinit/mount.go b/pkg/libcontainer/nsinit/mount.go index 9cf69f4184..a73e97e375 100644 --- a/pkg/libcontainer/nsinit/mount.go +++ b/pkg/libcontainer/nsinit/mount.go @@ -1,6 +1,6 @@ // +build linux -package main +package nsinit import ( "fmt" diff --git a/pkg/libcontainer/nsinit/ns_linux.go b/pkg/libcontainer/nsinit/ns_linux.go index 481bdf79df..e42d4b88d7 100644 --- a/pkg/libcontainer/nsinit/ns_linux.go +++ b/pkg/libcontainer/nsinit/ns_linux.go @@ -1,4 +1,4 @@ -package main +package nsinit import ( "github.com/dotcloud/docker/pkg/libcontainer" diff --git a/pkg/libcontainer/nsinit/main.go b/pkg/libcontainer/nsinit/nsinit/main.go similarity index 87% rename from pkg/libcontainer/nsinit/main.go rename to pkg/libcontainer/nsinit/nsinit/main.go index f66ff0d855..9d3c201aa6 100644 --- a/pkg/libcontainer/nsinit/main.go +++ b/pkg/libcontainer/nsinit/nsinit/main.go @@ -5,6 +5,7 @@ import ( "errors" "flag" "github.com/dotcloud/docker/pkg/libcontainer" + "github.com/dotcloud/docker/pkg/libcontainer/nsinit" "io/ioutil" "log" "os" @@ -42,9 +43,9 @@ func main() { } } if nspid > 0 { - exitCode, err = execinCommand(container, nspid, flag.Args()[1:]) + exitCode, err = nsinit.ExecIn(container, nspid, flag.Args()[1:]) } else { - exitCode, err = execCommand(container, *tty, flag.Args()[1:]) + exitCode, err = nsinit.Exec(container, *tty, flag.Args()[1:]) } if err != nil { log.Fatal(err) @@ -54,7 +55,7 @@ func main() { if flag.NArg() < 2 { log.Fatal(ErrWrongArguments) } - if err := initCommand(container, *console, os.NewFile(uintptr(*pipeFd), "pipe"), flag.Args()[1:]); err != nil { + if err := nsinit.Init(container, *console, os.NewFile(uintptr(*pipeFd), "pipe"), flag.Args()[1:]); err != nil { log.Fatal(err) } default: From 6b2e963ce0aef802e60eafe0e895f24abb294a07 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Thu, 20 Feb 2014 18:38:28 -0800 Subject: [PATCH 33/81] Refactor the flag management for main Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/nsinit/exec.go | 13 ++++--------- pkg/libcontainer/nsinit/init.go | 1 - pkg/libcontainer/nsinit/nsinit/main.go | 27 ++++++++++++++++---------- 3 files changed, 21 insertions(+), 20 deletions(-) diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index 9d0f7fff4e..d2b87b66ba 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -44,18 +44,14 @@ func Exec(container *libcontainer.Container, tty bool, args []string) (int, erro system.UsetCloseOnExec(r.Fd()) command := createCommand(container, console, r.Fd(), args) - if !tty { - inPipe, err = command.StdinPipe() - if err != nil { + if inPipe, err = command.StdinPipe(); err != nil { return -1, err } - outPipe, err = command.StdoutPipe() - if err != nil { + if outPipe, err = command.StdoutPipe(); err != nil { return -1, err } - errPipe, err = command.StderrPipe() - if err != nil { + if errPipe, err = command.StderrPipe(); err != nil { return -1, err } } @@ -63,7 +59,6 @@ func Exec(container *libcontainer.Container, tty bool, args []string) (int, erro if err := command.Start(); err != nil { return -1, err } - if err := writePidFile(command); err != nil { command.Process.Kill() return -1, err @@ -94,6 +89,7 @@ func Exec(container *libcontainer.Container, tty bool, args []string) (int, erro if tty { go io.Copy(os.Stdout, master) go io.Copy(master, os.Stdin) + state, err := setupWindow(master) if err != nil { command.Process.Kill() @@ -114,7 +110,6 @@ func Exec(container *libcontainer.Container, tty bool, args []string) (int, erro return -1, err } } - return command.ProcessState.Sys().(syscall.WaitStatus).ExitStatus(), nil } diff --git a/pkg/libcontainer/nsinit/init.go b/pkg/libcontainer/nsinit/init.go index f80d785bc4..88a5c3c5d5 100644 --- a/pkg/libcontainer/nsinit/init.go +++ b/pkg/libcontainer/nsinit/init.go @@ -173,7 +173,6 @@ func setupVethNetwork(config *libcontainer.Network, tempVethName string) error { // has been created and setup func getVethName(pipe io.ReadCloser) (string, error) { defer pipe.Close() - data, err := ioutil.ReadAll(pipe) if err != nil { return "", fmt.Errorf("error reading from stdin %s", err) diff --git a/pkg/libcontainer/nsinit/nsinit/main.go b/pkg/libcontainer/nsinit/nsinit/main.go index 9d3c201aa6..33a7747594 100644 --- a/pkg/libcontainer/nsinit/nsinit/main.go +++ b/pkg/libcontainer/nsinit/nsinit/main.go @@ -12,27 +12,34 @@ import ( "strconv" ) +var ( + console string + tty bool + pipeFd int +) + var ( ErrUnsupported = errors.New("Unsupported method") ErrWrongArguments = errors.New("Wrong argument count") ) -func main() { - var ( - console = flag.String("console", "", "Console (pty slave) name") - tty = flag.Bool("tty", false, "Create a tty") - pipeFd = flag.Int("pipe", 0, "sync pipe fd") - ) - flag.Parse() +func init() { + flag.StringVar(&console, "console", "", "console (pty slave) path") + flag.BoolVar(&tty, "tty", false, "create a tty") + flag.IntVar(&pipeFd, "pipe", 0, "sync pipe fd") + flag.Parse() +} + +func main() { container, err := loadContainer() if err != nil { log.Fatal(err) } - if flag.NArg() < 1 { log.Fatal(ErrWrongArguments) } + switch flag.Arg(0) { case "exec": // this is executed outside of the namespace in the cwd var exitCode int @@ -45,7 +52,7 @@ func main() { if nspid > 0 { exitCode, err = nsinit.ExecIn(container, nspid, flag.Args()[1:]) } else { - exitCode, err = nsinit.Exec(container, *tty, flag.Args()[1:]) + exitCode, err = nsinit.Exec(container, tty, flag.Args()[1:]) } if err != nil { log.Fatal(err) @@ -55,7 +62,7 @@ func main() { if flag.NArg() < 2 { log.Fatal(ErrWrongArguments) } - if err := nsinit.Init(container, *console, os.NewFile(uintptr(*pipeFd), "pipe"), flag.Args()[1:]); err != nil { + if err := nsinit.Init(container, console, os.NewFile(uintptr(pipeFd), "pipe"), flag.Args()[1:]); err != nil { log.Fatal(err) } default: From 7f247e7006761ac8922a58651a76b194a4655ffa Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Fri, 21 Feb 2014 13:53:11 -0800 Subject: [PATCH 34/81] Move tty into container.json Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/README.md | 7 +++---- pkg/libcontainer/container.go | 1 + pkg/libcontainer/container.json | 3 ++- pkg/libcontainer/nsinit/exec.go | 8 ++++---- pkg/libcontainer/nsinit/nsinit/main.go | 4 +--- 5 files changed, 11 insertions(+), 12 deletions(-) diff --git a/pkg/libcontainer/README.md b/pkg/libcontainer/README.md index 3a2a843b69..89a4ec0c48 100644 --- a/pkg/libcontainer/README.md +++ b/pkg/libcontainer/README.md @@ -17,6 +17,7 @@ Sample `container.json` file: ```json { "hostname": "koye", + "tty": true, "environment": [ "HOME=/", "PATH=PATH=$PATH:/bin:/usr/bin:/sbin:/usr/sbin", @@ -55,7 +56,7 @@ Sample `container.json` file: "cgroups": { "name": "docker-koye", "parent": "docker", - "memory": 524800 + "memory": 5248000 } } ``` @@ -72,11 +73,9 @@ rootfs and copy a `container.json` file into the directory with your specified c To execution `/bin/bash` in the current directory as a container just run: ```bash -nsinit -tty exec /bin/bash +nsinit exec /bin/bash ``` -If you want a proper tty setup inside the new container you must use the `-tty` flag when running nsinit. - If you wish to spawn another process inside the container while your current bash session is running just run the exact same command again to get another bash shell or change the command. If the original process dies, PID 1, all other processes spawned inside the container will also be killed and the namespace will be removed. diff --git a/pkg/libcontainer/container.go b/pkg/libcontainer/container.go index e6e4b4747e..3c1b62b65a 100644 --- a/pkg/libcontainer/container.go +++ b/pkg/libcontainer/container.go @@ -12,6 +12,7 @@ type Container struct { User string `json:"user,omitempty"` // user to execute the process as WorkingDir string `json:"working_dir,omitempty"` // current working directory Env []string `json:"environment,omitempty"` // environment to set + Tty bool `json:"tty,omitempty"` // setup a proper tty or not Namespaces Namespaces `json:"namespaces,omitempty"` // namespaces to apply Capabilities Capabilities `json:"capabilities,omitempty"` // capabilities to drop Network *Network `json:"network,omitempty"` // nil for host's network stack diff --git a/pkg/libcontainer/container.json b/pkg/libcontainer/container.json index c1a07dc55b..07e52df428 100644 --- a/pkg/libcontainer/container.json +++ b/pkg/libcontainer/container.json @@ -1,5 +1,6 @@ { "hostname": "koye", + "tty": true, "environment": [ "HOME=/", "PATH=PATH=$PATH:/bin:/usr/bin:/sbin:/usr/sbin", @@ -38,6 +39,6 @@ "cgroups": { "name": "docker-koye", "parent": "docker", - "memory": 524800 + "memory": 5248000 } } diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index d2b87b66ba..e2adf3d4f2 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -18,7 +18,7 @@ import ( // Exec performes setup outside of a namespace so that a container can be // executed. Exec is a high level function for working with container namespaces. -func Exec(container *libcontainer.Container, tty bool, args []string) (int, error) { +func Exec(container *libcontainer.Container, args []string) (int, error) { var ( master *os.File console string @@ -28,7 +28,7 @@ func Exec(container *libcontainer.Container, tty bool, args []string) (int, erro outPipe, errPipe io.ReadCloser ) - if tty { + if container.Tty { master, console, err = createMasterAndConsole() if err != nil { return -1, err @@ -44,7 +44,7 @@ func Exec(container *libcontainer.Container, tty bool, args []string) (int, erro system.UsetCloseOnExec(r.Fd()) command := createCommand(container, console, r.Fd(), args) - if !tty { + if !container.Tty { if inPipe, err = command.StdinPipe(); err != nil { return -1, err } @@ -86,7 +86,7 @@ func Exec(container *libcontainer.Container, tty bool, args []string) (int, erro w.Close() r.Close() - if tty { + if container.Tty { go io.Copy(os.Stdout, master) go io.Copy(master, os.Stdin) diff --git a/pkg/libcontainer/nsinit/nsinit/main.go b/pkg/libcontainer/nsinit/nsinit/main.go index 33a7747594..6508a3e9dd 100644 --- a/pkg/libcontainer/nsinit/nsinit/main.go +++ b/pkg/libcontainer/nsinit/nsinit/main.go @@ -14,7 +14,6 @@ import ( var ( console string - tty bool pipeFd int ) @@ -25,7 +24,6 @@ var ( func init() { flag.StringVar(&console, "console", "", "console (pty slave) path") - flag.BoolVar(&tty, "tty", false, "create a tty") flag.IntVar(&pipeFd, "pipe", 0, "sync pipe fd") flag.Parse() @@ -52,7 +50,7 @@ func main() { if nspid > 0 { exitCode, err = nsinit.ExecIn(container, nspid, flag.Args()[1:]) } else { - exitCode, err = nsinit.Exec(container, tty, flag.Args()[1:]) + exitCode, err = nsinit.Exec(container, flag.Args()[1:]) } if err != nil { log.Fatal(err) From 50c752fcb06497e9e597049a1007c53d77032d17 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Fri, 21 Feb 2014 14:49:55 -0800 Subject: [PATCH 35/81] Add good logging support to both sides Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/nsinit/exec.go | 35 +++++++++++++++++++---- pkg/libcontainer/nsinit/init.go | 23 ++++++++------- pkg/libcontainer/nsinit/nsinit/main.go | 39 ++++++++++++++++++++++---- 3 files changed, 76 insertions(+), 21 deletions(-) diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index e2adf3d4f2..24e722a22f 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -11,6 +11,7 @@ import ( "github.com/dotcloud/docker/pkg/term" "io" "io/ioutil" + "log" "os" "os/exec" "syscall" @@ -18,7 +19,7 @@ import ( // Exec performes setup outside of a namespace so that a container can be // executed. Exec is a high level function for working with container namespaces. -func Exec(container *libcontainer.Container, args []string) (int, error) { +func Exec(container *libcontainer.Container, logFile string, args []string) (int, error) { var ( master *os.File console string @@ -29,6 +30,7 @@ func Exec(container *libcontainer.Container, args []string) (int, error) { ) if container.Tty { + log.Printf("setting up master and console") master, console, err = createMasterAndConsole() if err != nil { return -1, err @@ -43,8 +45,9 @@ func Exec(container *libcontainer.Container, args []string) (int, error) { } system.UsetCloseOnExec(r.Fd()) - command := createCommand(container, console, r.Fd(), args) + command := createCommand(container, console, logFile, r.Fd(), args) if !container.Tty { + log.Printf("opening pipes on command") if inPipe, err = command.StdinPipe(); err != nil { return -1, err } @@ -56,9 +59,11 @@ func Exec(container *libcontainer.Container, args []string) (int, error) { } } + log.Printf("staring init") if err := command.Start(); err != nil { return -1, err } + log.Printf("writting state file") if err := writePidFile(command); err != nil { command.Process.Kill() return -1, err @@ -68,6 +73,7 @@ func Exec(container *libcontainer.Container, args []string) (int, error) { // Do this before syncing with child so that no children // can escape the cgroup if container.Cgroups != nil { + log.Printf("setting up cgroups") if err := container.Cgroups.Apply(command.Process.Pid); err != nil { command.Process.Kill() return -1, err @@ -75,18 +81,22 @@ func Exec(container *libcontainer.Container, args []string) (int, error) { } if container.Network != nil { - vethPair, err := initializeContainerVeth(container.Network.Bridge, command.Process.Pid) + log.Printf("creating veth pair") + vethPair, err := initializeContainerVeth(container.Network.Bridge, container.Network.Mtu, command.Process.Pid) if err != nil { return -1, err } + log.Printf("sending %s as veth pair name", vethPair) sendVethName(w, vethPair) } // Sync with child + log.Printf("closing sync pipes") w.Close() r.Close() if container.Tty { + log.Printf("starting copy for tty") go io.Copy(os.Stdout, master) go io.Copy(master, os.Stdin) @@ -97,6 +107,7 @@ func Exec(container *libcontainer.Container, args []string) (int, error) { } defer term.RestoreTerminal(os.Stdin.Fd(), state) } else { + log.Printf("starting copy for std pipes") go func() { defer inPipe.Close() io.Copy(inPipe, os.Stdin) @@ -105,11 +116,13 @@ func Exec(container *libcontainer.Container, args []string) (int, error) { go io.Copy(os.Stderr, errPipe) } + log.Printf("waiting on process") if err := command.Wait(); err != nil { if _, ok := err.(*exec.ExitError); !ok { return -1, err } } + log.Printf("process ended") return command.ProcessState.Sys().(syscall.WaitStatus).ExitStatus(), nil } @@ -126,17 +139,22 @@ func sendVethName(pipe io.Writer, name string) { // Then will with set the other side of the veth pair into the container's namespaced // using the pid and returns the veth's interface name to provide to the container to // finish setting up the interface inside the namespace -func initializeContainerVeth(bridge string, nspid int) (string, error) { +func initializeContainerVeth(bridge string, mtu, nspid int) (string, error) { name1, name2, err := createVethPair() if err != nil { return "", err } + log.Printf("veth pair created %s <> %s", name1, name2) if err := network.SetInterfaceMaster(name1, bridge); err != nil { return "", err } + if err := network.SetMtu(name1, mtu); err != nil { + return "", err + } if err := network.InterfaceUp(name1); err != nil { return "", err } + log.Printf("setting %s inside %d namespace", name2, nspid) if err := network.SetInterfaceInNamespacePid(name2, nspid); err != nil { return "", err } @@ -200,8 +218,13 @@ func deletePidFile() error { // createCommand will return an exec.Cmd with the Cloneflags set to the proper namespaces // defined on the container's configuration and use the current binary as the init with the // args provided -func createCommand(container *libcontainer.Container, console string, pipe uintptr, args []string) *exec.Cmd { - command := exec.Command("nsinit", append([]string{"-console", console, "-pipe", fmt.Sprint(pipe), "init"}, args...)...) +func createCommand(container *libcontainer.Container, console, logFile string, pipe uintptr, args []string) *exec.Cmd { + command := exec.Command("nsinit", append([]string{ + "-console", console, + "-pipe", fmt.Sprint(pipe), + "-log", logFile, + "init"}, args...)...) + command.SysProcAttr = &syscall.SysProcAttr{ Cloneflags: uintptr(getNamespaceFlags(container.Namespaces)), } diff --git a/pkg/libcontainer/nsinit/init.go b/pkg/libcontainer/nsinit/init.go index 88a5c3c5d5..8fc5f3d05c 100644 --- a/pkg/libcontainer/nsinit/init.go +++ b/pkg/libcontainer/nsinit/init.go @@ -10,6 +10,7 @@ import ( "github.com/dotcloud/docker/pkg/system" "io" "io/ioutil" + "log" "os" "path/filepath" "syscall" @@ -17,19 +18,23 @@ import ( // Init is the init process that first runs inside a new namespace to setup mounts, users, networking, // and other options required for the new container. -func Init(container *libcontainer.Container, console string, pipe io.ReadCloser, args []string) error { - rootfs, err := resolveRootfs() +func Init(container *libcontainer.Container, uncleanRootfs, console string, pipe io.ReadCloser, args []string) error { + rootfs, err := resolveRootfs(uncleanRootfs) if err != nil { return err } + log.Printf("initializing namespace at %s", rootfs) // We always read this as it is a way to sync with the parent as well tempVethName, err := getVethName(pipe) if err != nil { return err } - + if tempVethName != "" { + log.Printf("received veth name %s", tempVethName) + } if console != "" { + log.Printf("setting up console for %s", console) // close pipes so that we can replace it with the pty os.Stdin.Close() os.Stdout.Close() @@ -42,7 +47,6 @@ func Init(container *libcontainer.Container, console string, pipe io.ReadCloser, return fmt.Errorf("dup2 slave %s", err) } } - if _, err := system.Setsid(); err != nil { return fmt.Errorf("setsid %s", err) } @@ -63,9 +67,11 @@ func Init(container *libcontainer.Container, console string, pipe io.ReadCloser, if err := system.Sethostname(container.Hostname); err != nil { return fmt.Errorf("sethostname %s", err) } + log.Printf("dropping capabilities") if err := capabilities.DropCapabilities(container); err != nil { return fmt.Errorf("drop capabilities %s", err) } + log.Printf("setting user in namespace") if err := setupUser(container); err != nil { return fmt.Errorf("setup user %s", err) } @@ -74,6 +80,7 @@ func Init(container *libcontainer.Container, console string, pipe io.ReadCloser, return fmt.Errorf("chdir to %s %s", container.WorkingDir, err) } } + log.Printf("execing %s goodbye", args[0]) if err := system.Exec(args[0], args[0:], container.Env); err != nil { return fmt.Errorf("exec %s", err) } @@ -82,12 +89,8 @@ func Init(container *libcontainer.Container, console string, pipe io.ReadCloser, // resolveRootfs ensures that the current working directory is // not a symlink and returns the absolute path to the rootfs -func resolveRootfs() (string, error) { - cwd, err := os.Getwd() - if err != nil { - return "", err - } - rootfs, err := filepath.Abs(cwd) +func resolveRootfs(uncleanRootfs string) (string, error) { + rootfs, err := filepath.Abs(uncleanRootfs) if err != nil { return "", err } diff --git a/pkg/libcontainer/nsinit/nsinit/main.go b/pkg/libcontainer/nsinit/nsinit/main.go index 6508a3e9dd..0873c09fe0 100644 --- a/pkg/libcontainer/nsinit/nsinit/main.go +++ b/pkg/libcontainer/nsinit/nsinit/main.go @@ -6,6 +6,7 @@ import ( "flag" "github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/libcontainer/nsinit" + "io" "io/ioutil" "log" "os" @@ -15,6 +16,7 @@ import ( var ( console string pipeFd int + logFile string ) var ( @@ -24,22 +26,27 @@ var ( func init() { flag.StringVar(&console, "console", "", "console (pty slave) path") + flag.StringVar(&logFile, "log", "none", "log options (none, stderr, or a file path)") flag.IntVar(&pipeFd, "pipe", 0, "sync pipe fd") flag.Parse() } func main() { + if flag.NArg() < 1 { + log.Fatal(ErrWrongArguments) + } container, err := loadContainer() if err != nil { log.Fatal(err) } - if flag.NArg() < 1 { - log.Fatal(ErrWrongArguments) + if err := setupLogging(); err != nil { + log.Fatal(err) } - switch flag.Arg(0) { case "exec": // this is executed outside of the namespace in the cwd + log.SetPrefix("[nsinit exec] ") + var exitCode int nspid, err := readPid() if err != nil { @@ -50,17 +57,22 @@ func main() { if nspid > 0 { exitCode, err = nsinit.ExecIn(container, nspid, flag.Args()[1:]) } else { - exitCode, err = nsinit.Exec(container, flag.Args()[1:]) + exitCode, err = nsinit.Exec(container, logFile, flag.Args()[1:]) } if err != nil { log.Fatal(err) } os.Exit(exitCode) case "init": // this is executed inside of the namespace to setup the container + log.SetPrefix("[nsinit init] ") + cwd, err := os.Getwd() + if err != nil { + log.Fatal(err) + } if flag.NArg() < 2 { log.Fatal(ErrWrongArguments) } - if err := nsinit.Init(container, console, os.NewFile(uintptr(pipeFd), "pipe"), flag.Args()[1:]); err != nil { + if err := nsinit.Init(container, cwd, console, os.NewFile(uintptr(pipeFd), "pipe"), flag.Args()[1:]); err != nil { log.Fatal(err) } default: @@ -93,3 +105,20 @@ func readPid() (int, error) { } return pid, nil } + +func setupLogging() (err error) { + var writer io.Writer + switch logFile { + case "stderr": + writer = os.Stderr + case "none", "": + writer = ioutil.Discard + default: + writer, err = os.OpenFile(logFile, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0755) + if err != nil { + return err + } + } + log.SetOutput(writer) + return nil +} From ba025cb75cceaa8536d0d512889ea86f13349950 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Fri, 21 Feb 2014 15:32:50 -0800 Subject: [PATCH 36/81] User os.Args[0] as name to reexec Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/nsinit/exec.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index 24e722a22f..ba548a2bd7 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -219,7 +219,9 @@ func deletePidFile() error { // defined on the container's configuration and use the current binary as the init with the // args provided func createCommand(container *libcontainer.Container, console, logFile string, pipe uintptr, args []string) *exec.Cmd { - command := exec.Command("nsinit", append([]string{ + // get our binary name so we can always reexec ourself + name := os.Args[0] + command := exec.Command(name, append([]string{ "-console", console, "-pipe", fmt.Sprint(pipe), "-log", logFile, From a352ecb01a788eff3446fe12191ca0434fce1eed Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Fri, 21 Feb 2014 16:17:18 -0800 Subject: [PATCH 37/81] Use lookup path for init Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/nsinit/exec.go | 1 + pkg/libcontainer/nsinit/init.go | 10 ++++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index ba548a2bd7..80fe8495ff 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -230,5 +230,6 @@ func createCommand(container *libcontainer.Container, console, logFile string, p command.SysProcAttr = &syscall.SysProcAttr{ Cloneflags: uintptr(getNamespaceFlags(container.Namespaces)), } + command.Env = container.Env return command } diff --git a/pkg/libcontainer/nsinit/init.go b/pkg/libcontainer/nsinit/init.go index 8fc5f3d05c..04716ba645 100644 --- a/pkg/libcontainer/nsinit/init.go +++ b/pkg/libcontainer/nsinit/init.go @@ -12,6 +12,7 @@ import ( "io/ioutil" "log" "os" + "os/exec" "path/filepath" "syscall" ) @@ -80,8 +81,13 @@ func Init(container *libcontainer.Container, uncleanRootfs, console string, pipe return fmt.Errorf("chdir to %s %s", container.WorkingDir, err) } } - log.Printf("execing %s goodbye", args[0]) - if err := system.Exec(args[0], args[0:], container.Env); err != nil { + name, err := exec.LookPath(args[0]) + if err != nil { + return err + } + + log.Printf("execing %s goodbye", name) + if err := system.Exec(name, args[0:], container.Env); err != nil { return fmt.Errorf("exec %s", err) } panic("unreachable") From c8fd81c27821576f339ccf4fd85c47375ba34042 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Fri, 21 Feb 2014 16:28:43 -0800 Subject: [PATCH 38/81] Pass pipes into Exec function Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/nsinit/exec.go | 14 +++++++------- pkg/libcontainer/nsinit/nsinit/main.go | 4 +++- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index 80fe8495ff..98f5209f03 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -19,7 +19,7 @@ import ( // Exec performes setup outside of a namespace so that a container can be // executed. Exec is a high level function for working with container namespaces. -func Exec(container *libcontainer.Container, logFile string, args []string) (int, error) { +func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io.Writer, logFile string, args []string) (int, error) { var ( master *os.File console string @@ -97,23 +97,23 @@ func Exec(container *libcontainer.Container, logFile string, args []string) (int if container.Tty { log.Printf("starting copy for tty") - go io.Copy(os.Stdout, master) - go io.Copy(master, os.Stdin) + go io.Copy(stdout, master) + go io.Copy(master, stdin) state, err := setupWindow(master) if err != nil { command.Process.Kill() return -1, err } - defer term.RestoreTerminal(os.Stdin.Fd(), state) + defer term.RestoreTerminal(uintptr(syscall.Stdin), state) } else { log.Printf("starting copy for std pipes") go func() { defer inPipe.Close() - io.Copy(inPipe, os.Stdin) + io.Copy(inPipe, stdin) }() - go io.Copy(os.Stdout, outPipe) - go io.Copy(os.Stderr, errPipe) + go io.Copy(stdout, outPipe) + go io.Copy(stderr, errPipe) } log.Printf("waiting on process") diff --git a/pkg/libcontainer/nsinit/nsinit/main.go b/pkg/libcontainer/nsinit/nsinit/main.go index 0873c09fe0..e6e3827713 100644 --- a/pkg/libcontainer/nsinit/nsinit/main.go +++ b/pkg/libcontainer/nsinit/nsinit/main.go @@ -57,7 +57,9 @@ func main() { if nspid > 0 { exitCode, err = nsinit.ExecIn(container, nspid, flag.Args()[1:]) } else { - exitCode, err = nsinit.Exec(container, logFile, flag.Args()[1:]) + exitCode, err = nsinit.Exec(container, + os.Stdin, os.Stdout, os.Stderr, + logFile, flag.Args()[1:]) } if err != nil { log.Fatal(err) From 332755b99d345a8ffbf4fb636ca8fed604a233c0 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Fri, 21 Feb 2014 16:40:32 -0800 Subject: [PATCH 39/81] Pass tty master to Exec Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/nsinit/exec.go | 3 +-- pkg/libcontainer/nsinit/nsinit/main.go | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index 98f5209f03..3622196b78 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -19,9 +19,8 @@ import ( // Exec performes setup outside of a namespace so that a container can be // executed. Exec is a high level function for working with container namespaces. -func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io.Writer, logFile string, args []string) (int, error) { +func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io.Writer, master *os.File, logFile string, args []string) (int, error) { var ( - master *os.File console string err error diff --git a/pkg/libcontainer/nsinit/nsinit/main.go b/pkg/libcontainer/nsinit/nsinit/main.go index e6e3827713..28d42d4643 100644 --- a/pkg/libcontainer/nsinit/nsinit/main.go +++ b/pkg/libcontainer/nsinit/nsinit/main.go @@ -58,7 +58,7 @@ func main() { exitCode, err = nsinit.ExecIn(container, nspid, flag.Args()[1:]) } else { exitCode, err = nsinit.Exec(container, - os.Stdin, os.Stdout, os.Stderr, + os.Stdin, os.Stdout, os.Stderr, nil, logFile, flag.Args()[1:]) } if err != nil { From 2419e63d243255ef38f16799ffdc64084aa18fe4 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Fri, 21 Feb 2014 17:11:57 -0800 Subject: [PATCH 40/81] Initial commit of libcontainer running docker Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- container.go | 1 + execdriver/namespaces/default_template.go | 41 +++ execdriver/namespaces/driver.go | 349 ++++++++++++++++++++++ execdriver/namespaces/term.go | 26 ++ pkg/libcontainer/nsinit/exec.go | 2 +- pkg/libcontainer/nsinit/ns_linux.go | 2 +- runtime.go | 5 +- 7 files changed, 422 insertions(+), 4 deletions(-) create mode 100644 execdriver/namespaces/default_template.go create mode 100644 execdriver/namespaces/driver.go create mode 100644 execdriver/namespaces/term.go diff --git a/container.go b/container.go index ca53bb57c7..76e51cdad3 100644 --- a/container.go +++ b/container.go @@ -530,6 +530,7 @@ func (container *Container) Start() (err error) { } populateCommand(container) + container.command.Env = env // Setup logging of stdout and stderr to disk if err := container.runtime.LogToDisk(container.stdout, container.logPath("json"), "stdout"); err != nil { diff --git a/execdriver/namespaces/default_template.go b/execdriver/namespaces/default_template.go new file mode 100644 index 0000000000..79b6ac1c11 --- /dev/null +++ b/execdriver/namespaces/default_template.go @@ -0,0 +1,41 @@ +package namespaces + +import ( + "github.com/dotcloud/docker/pkg/cgroups" + "github.com/dotcloud/docker/pkg/libcontainer" +) + +// getDefaultTemplate returns the docker default for +// the libcontainer configuration file +func getDefaultTemplate() *libcontainer.Container { + return &libcontainer.Container{ + Capabilities: libcontainer.Capabilities{ + libcontainer.CAP_SETPCAP, + libcontainer.CAP_SYS_MODULE, + libcontainer.CAP_SYS_RAWIO, + libcontainer.CAP_SYS_PACCT, + libcontainer.CAP_SYS_ADMIN, + libcontainer.CAP_SYS_NICE, + libcontainer.CAP_SYS_RESOURCE, + libcontainer.CAP_SYS_TIME, + libcontainer.CAP_SYS_TTY_CONFIG, + libcontainer.CAP_MKNOD, + libcontainer.CAP_AUDIT_WRITE, + libcontainer.CAP_AUDIT_CONTROL, + libcontainer.CAP_MAC_ADMIN, + libcontainer.CAP_MAC_OVERRIDE, + libcontainer.CAP_NET_ADMIN, + }, + Namespaces: libcontainer.Namespaces{ + libcontainer.CLONE_NEWIPC, + libcontainer.CLONE_NEWNET, + libcontainer.CLONE_NEWNS, + libcontainer.CLONE_NEWPID, + libcontainer.CLONE_NEWUTS, + }, + Cgroups: &cgroups.Cgroup{ + Name: "docker", + DeviceAccess: false, + }, + } +} diff --git a/execdriver/namespaces/driver.go b/execdriver/namespaces/driver.go new file mode 100644 index 0000000000..e243c64703 --- /dev/null +++ b/execdriver/namespaces/driver.go @@ -0,0 +1,349 @@ +package namespaces + +import ( + "encoding/json" + "errors" + "fmt" + "github.com/dotcloud/docker/execdriver" + "github.com/dotcloud/docker/pkg/libcontainer" + "github.com/dotcloud/docker/pkg/libcontainer/network" + "github.com/dotcloud/docker/pkg/libcontainer/nsinit" + "github.com/dotcloud/docker/pkg/libcontainer/utils" + "github.com/dotcloud/docker/pkg/system" + "github.com/dotcloud/docker/pkg/term" + "io" + "io/ioutil" + "log" + "os" + "os/exec" + "path/filepath" + "strings" + "syscall" +) + +const ( + DriverName = "namespaces" + Version = "0.1" +) + +var ( + ErrNotSupported = errors.New("not supported") +) + +func init() { + execdriver.RegisterInitFunc(DriverName, func(args *execdriver.InitArgs) error { + return nil + }) +} + +type driver struct { +} + +func NewDriver() (*driver, error) { + return &driver{}, nil +} + +func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (int, error) { + container := createContainer(c) + if err := writeContainerFile(container, c.Rootfs); err != nil { + return -1, err + } + + var ( + console string + master *os.File + err error + + inPipe io.WriteCloser + outPipe, errPipe io.ReadCloser + ) + + if container.Tty { + log.Printf("setting up master and console") + master, console, err = createMasterAndConsole() + if err != nil { + return -1, err + } + } + c.Terminal = NewTerm(pipes, master) + + // create a pipe so that we can syncronize with the namespaced process and + // pass the veth name to the child + r, w, err := os.Pipe() + if err != nil { + return -1, err + } + system.UsetCloseOnExec(r.Fd()) + + args := append([]string{c.Entrypoint}, c.Arguments...) + createCommand(c, container, console, "/nsinit.logs", r.Fd(), args) + command := c + + if !container.Tty { + log.Printf("opening pipes on command") + if inPipe, err = command.StdinPipe(); err != nil { + return -1, err + } + if outPipe, err = command.StdoutPipe(); err != nil { + return -1, err + } + if errPipe, err = command.StderrPipe(); err != nil { + return -1, err + } + } + + log.Printf("staring init") + if err := command.Start(); err != nil { + return -1, err + } + log.Printf("writting state file") + if err := writePidFile(c.Rootfs, command.Process.Pid); err != nil { + command.Process.Kill() + return -1, err + } + defer deletePidFile(c.Rootfs) + + // Do this before syncing with child so that no children + // can escape the cgroup + if container.Cgroups != nil { + log.Printf("setting up cgroups") + if err := container.Cgroups.Apply(command.Process.Pid); err != nil { + command.Process.Kill() + return -1, err + } + } + + if container.Network != nil { + log.Printf("creating veth pair") + vethPair, err := initializeContainerVeth(container.Network.Bridge, container.Network.Mtu, command.Process.Pid) + if err != nil { + return -1, err + } + log.Printf("sending %s as veth pair name", vethPair) + sendVethName(w, vethPair) + } + + // Sync with child + log.Printf("closing sync pipes") + w.Close() + r.Close() + + if container.Tty { + log.Printf("starting copy for tty") + go io.Copy(pipes.Stdout, master) + if pipes.Stdin != nil { + go io.Copy(master, pipes.Stdin) + } + + /* + state, err := setupWindow(master) + if err != nil { + command.Process.Kill() + return -1, err + } + defer term.RestoreTerminal(uintptr(syscall.Stdin), state) + */ + } else { + log.Printf("starting copy for std pipes") + if pipes.Stdin != nil { + go func() { + defer inPipe.Close() + io.Copy(inPipe, pipes.Stdin) + }() + } + go io.Copy(pipes.Stdout, outPipe) + go io.Copy(pipes.Stderr, errPipe) + } + + if startCallback != nil { + startCallback(c) + } + + log.Printf("waiting on process") + if err := command.Wait(); err != nil { + if _, ok := err.(*exec.ExitError); !ok { + return -1, err + } + } + log.Printf("process ended") + return command.ProcessState.Sys().(syscall.WaitStatus).ExitStatus(), nil +} + +func (d *driver) Kill(p *execdriver.Command, sig int) error { + return p.Process.Kill() +} + +func (d *driver) Restore(c *execdriver.Command) error { + return ErrNotSupported +} + +func (d *driver) Info(id string) execdriver.Info { + return nil +} + +func (d *driver) Name() string { + return fmt.Sprintf("%s-%s", DriverName, Version) +} + +func (d *driver) GetPidsForContainer(id string) ([]int, error) { + return nil, ErrNotSupported +} + +func writeContainerFile(container *libcontainer.Container, rootfs string) error { + data, err := json.Marshal(container) + if err != nil { + return err + } + return ioutil.WriteFile(filepath.Join(rootfs, "container.json"), data, 0755) +} + +func getEnv(key string, env []string) string { + for _, pair := range env { + parts := strings.Split(pair, "=") + if parts[0] == key { + return parts[1] + } + } + return "" +} + +// sendVethName writes the veth pair name to the child's stdin then closes the +// pipe so that the child stops waiting for more data +func sendVethName(pipe io.Writer, name string) { + fmt.Fprint(pipe, name) +} + +// initializeContainerVeth will create a veth pair and setup the host's +// side of the pair by setting the specified bridge as the master and bringing +// up the interface. +// +// Then will with set the other side of the veth pair into the container's namespaced +// using the pid and returns the veth's interface name to provide to the container to +// finish setting up the interface inside the namespace +func initializeContainerVeth(bridge string, mtu, nspid int) (string, error) { + name1, name2, err := createVethPair() + if err != nil { + return "", err + } + log.Printf("veth pair created %s <> %s", name1, name2) + if err := network.SetInterfaceMaster(name1, bridge); err != nil { + return "", err + } + if err := network.SetMtu(name1, mtu); err != nil { + return "", err + } + if err := network.InterfaceUp(name1); err != nil { + return "", err + } + log.Printf("setting %s inside %d namespace", name2, nspid) + if err := network.SetInterfaceInNamespacePid(name2, nspid); err != nil { + return "", err + } + return name2, nil +} + +func setupWindow(master *os.File) (*term.State, error) { + ws, err := term.GetWinsize(os.Stdin.Fd()) + if err != nil { + return nil, err + } + if err := term.SetWinsize(master.Fd(), ws); err != nil { + return nil, err + } + return term.SetRawTerminal(os.Stdin.Fd()) +} + +// createMasterAndConsole will open /dev/ptmx on the host and retreive the +// pts name for use as the pty slave inside the container +func createMasterAndConsole() (*os.File, string, error) { + master, err := os.OpenFile("/dev/ptmx", syscall.O_RDWR|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0) + if err != nil { + return nil, "", err + } + console, err := system.Ptsname(master) + if err != nil { + return nil, "", err + } + if err := system.Unlockpt(master); err != nil { + return nil, "", err + } + return master, console, nil +} + +// createVethPair will automatically generage two random names for +// the veth pair and ensure that they have been created +func createVethPair() (name1 string, name2 string, err error) { + name1, err = utils.GenerateRandomName("dock", 4) + if err != nil { + return + } + name2, err = utils.GenerateRandomName("dock", 4) + if err != nil { + return + } + if err = network.CreateVethPair(name1, name2); err != nil { + return + } + return +} + +// writePidFile writes the namespaced processes pid to .nspid in the rootfs for the container +func writePidFile(rootfs string, pid int) error { + return ioutil.WriteFile(filepath.Join(rootfs, ".nspid"), []byte(fmt.Sprint(pid)), 0655) +} + +func deletePidFile(rootfs string) error { + return os.Remove(filepath.Join(rootfs, ".nspid")) +} + +// createCommand will return an exec.Cmd with the Cloneflags set to the proper namespaces +// defined on the container's configuration and use the current binary as the init with the +// args provided +func createCommand(c *execdriver.Command, container *libcontainer.Container, + console, logFile string, pipe uintptr, args []string) { + + aname, _ := exec.LookPath("nsinit") + c.Path = aname + c.Args = append([]string{ + aname, + "-console", console, + "-pipe", fmt.Sprint(pipe), + "-log", logFile, + "init", + }, args...) + c.SysProcAttr = &syscall.SysProcAttr{ + Cloneflags: uintptr(nsinit.GetNamespaceFlags(container.Namespaces)), + } + c.Env = container.Env + c.Dir = c.Rootfs +} + +func createContainer(c *execdriver.Command) *libcontainer.Container { + container := getDefaultTemplate() + + container.Hostname = getEnv("HOSTNAME", c.Env) + container.Tty = c.Tty + container.User = c.User + container.WorkingDir = c.WorkingDir + container.Env = c.Env + + container.Env = append(container.Env, "container=docker") + + if c.Network != nil { + container.Network = &libcontainer.Network{ + Mtu: c.Network.Mtu, + Address: fmt.Sprintf("%s/%d", c.Network.IPAddress, c.Network.IPPrefixLen), + Gateway: c.Network.Gateway, + Bridge: c.Network.Bridge, + } + } + if c.Privileged { + container.Capabilities = nil + } + if c.Resources != nil { + container.Cgroups.CpuShares = c.Resources.CpuShares + container.Cgroups.Memory = c.Resources.Memory + container.Cgroups.MemorySwap = c.Resources.MemorySwap + } + return container +} diff --git a/execdriver/namespaces/term.go b/execdriver/namespaces/term.go new file mode 100644 index 0000000000..682c6a27b1 --- /dev/null +++ b/execdriver/namespaces/term.go @@ -0,0 +1,26 @@ +package namespaces + +import ( + "github.com/dotcloud/docker/execdriver" + "github.com/dotcloud/docker/pkg/term" + "os" +) + +type NsinitTerm struct { + master *os.File +} + +func NewTerm(pipes *execdriver.Pipes, master *os.File) *NsinitTerm { + return &NsinitTerm{master} +} + +func (t *NsinitTerm) Close() error { + return t.master.Close() +} + +func (t *NsinitTerm) Resize(h, w int) error { + if t.master != nil { + return term.SetWinsize(t.master.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)}) + } + return nil +} diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index 3622196b78..6671ebe129 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -227,7 +227,7 @@ func createCommand(container *libcontainer.Container, console, logFile string, p "init"}, args...)...) command.SysProcAttr = &syscall.SysProcAttr{ - Cloneflags: uintptr(getNamespaceFlags(container.Namespaces)), + Cloneflags: uintptr(GetNamespaceFlags(container.Namespaces)), } command.Env = container.Env return command diff --git a/pkg/libcontainer/nsinit/ns_linux.go b/pkg/libcontainer/nsinit/ns_linux.go index e42d4b88d7..58af24798f 100644 --- a/pkg/libcontainer/nsinit/ns_linux.go +++ b/pkg/libcontainer/nsinit/ns_linux.go @@ -28,7 +28,7 @@ var namespaceFileMap = map[libcontainer.Namespace]string{ // getNamespaceFlags parses the container's Namespaces options to set the correct // flags on clone, unshare, and setns -func getNamespaceFlags(namespaces libcontainer.Namespaces) (flag int) { +func GetNamespaceFlags(namespaces libcontainer.Namespaces) (flag int) { for _, ns := range namespaces { flag |= namespaceMap[ns] } diff --git a/runtime.go b/runtime.go index a38109cca0..9f16d6213b 100644 --- a/runtime.go +++ b/runtime.go @@ -7,7 +7,8 @@ import ( "github.com/dotcloud/docker/dockerversion" "github.com/dotcloud/docker/engine" "github.com/dotcloud/docker/execdriver" - "github.com/dotcloud/docker/execdriver/lxc" + _ "github.com/dotcloud/docker/execdriver/lxc" + "github.com/dotcloud/docker/execdriver/namespaces" "github.com/dotcloud/docker/graphdriver" "github.com/dotcloud/docker/graphdriver/aufs" _ "github.com/dotcloud/docker/graphdriver/btrfs" @@ -703,7 +704,7 @@ func NewRuntimeFromDirectory(config *DaemonConfig, eng *engine.Engine) (*Runtime sysInfo := sysinfo.New(false) - ed, err := lxc.NewDriver(config.Root, sysInfo.AppArmor) + ed, err := namespaces.NewDriver() if err != nil { return nil, err } From 9876e5b8901199bad2ab424593131d574b582bf9 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Fri, 21 Feb 2014 21:14:21 -0800 Subject: [PATCH 41/81] Export functions of nsinit Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/nsinit/exec.go | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index 6671ebe129..b2eaa0bc65 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -30,7 +30,7 @@ func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io. if container.Tty { log.Printf("setting up master and console") - master, console, err = createMasterAndConsole() + master, console, err = CreateMasterAndConsole() if err != nil { return -1, err } @@ -44,7 +44,7 @@ func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io. } system.UsetCloseOnExec(r.Fd()) - command := createCommand(container, console, logFile, r.Fd(), args) + command := CreateCommand(container, console, logFile, r.Fd(), args) if !container.Tty { log.Printf("opening pipes on command") if inPipe, err = command.StdinPipe(); err != nil { @@ -81,12 +81,12 @@ func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io. if container.Network != nil { log.Printf("creating veth pair") - vethPair, err := initializeContainerVeth(container.Network.Bridge, container.Network.Mtu, command.Process.Pid) + vethPair, err := InitializeContainerVeth(container.Network.Bridge, container.Network.Mtu, command.Process.Pid) if err != nil { return -1, err } log.Printf("sending %s as veth pair name", vethPair) - sendVethName(w, vethPair) + SendVethName(w, vethPair) } // Sync with child @@ -99,7 +99,7 @@ func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io. go io.Copy(stdout, master) go io.Copy(master, stdin) - state, err := setupWindow(master) + state, err := SetupWindow(master, os.Stdin) if err != nil { command.Process.Kill() return -1, err @@ -125,9 +125,9 @@ func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io. return command.ProcessState.Sys().(syscall.WaitStatus).ExitStatus(), nil } -// sendVethName writes the veth pair name to the child's stdin then closes the +// SendVethName writes the veth pair name to the child's stdin then closes the // pipe so that the child stops waiting for more data -func sendVethName(pipe io.Writer, name string) { +func SendVethName(pipe io.Writer, name string) { fmt.Fprint(pipe, name) } @@ -138,7 +138,7 @@ func sendVethName(pipe io.Writer, name string) { // Then will with set the other side of the veth pair into the container's namespaced // using the pid and returns the veth's interface name to provide to the container to // finish setting up the interface inside the namespace -func initializeContainerVeth(bridge string, mtu, nspid int) (string, error) { +func InitializeContainerVeth(bridge string, mtu, nspid int) (string, error) { name1, name2, err := createVethPair() if err != nil { return "", err @@ -160,20 +160,22 @@ func initializeContainerVeth(bridge string, mtu, nspid int) (string, error) { return name2, nil } -func setupWindow(master *os.File) (*term.State, error) { - ws, err := term.GetWinsize(os.Stdin.Fd()) +// SetupWindow gets the parent window size and sets the master +// pty to the current size and set the parents mode to RAW +func SetupWindow(master, parent *os.File) (*term.State, error) { + ws, err := term.GetWinsize(parent.Fd()) if err != nil { return nil, err } if err := term.SetWinsize(master.Fd(), ws); err != nil { return nil, err } - return term.SetRawTerminal(os.Stdin.Fd()) + return term.SetRawTerminal(parent.Fd()) } -// createMasterAndConsole will open /dev/ptmx on the host and retreive the +// CreateMasterAndConsole will open /dev/ptmx on the host and retreive the // pts name for use as the pty slave inside the container -func createMasterAndConsole() (*os.File, string, error) { +func CreateMasterAndConsole() (*os.File, string, error) { master, err := os.OpenFile("/dev/ptmx", syscall.O_RDWR|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0) if err != nil { return nil, "", err @@ -217,7 +219,7 @@ func deletePidFile() error { // createCommand will return an exec.Cmd with the Cloneflags set to the proper namespaces // defined on the container's configuration and use the current binary as the init with the // args provided -func createCommand(container *libcontainer.Container, console, logFile string, pipe uintptr, args []string) *exec.Cmd { +func CreateCommand(container *libcontainer.Container, console, logFile string, pipe uintptr, args []string) *exec.Cmd { // get our binary name so we can always reexec ourself name := os.Args[0] command := exec.Command(name, append([]string{ From 5a4069f3aacd0dc30ee7c5dd97f0dc9a6e416f35 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Fri, 21 Feb 2014 22:20:15 -0800 Subject: [PATCH 42/81] Refactor network creation and initialization into strategies Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/README.md | 9 ++- pkg/libcontainer/container.go | 13 ++-- pkg/libcontainer/container.json | 9 ++- pkg/libcontainer/network/strategy.go | 32 +++++++++ pkg/libcontainer/network/veth.go | 103 +++++++++++++++++++++++++++ pkg/libcontainer/nsinit/exec.go | 97 +++++++++---------------- pkg/libcontainer/nsinit/init.go | 55 +++++--------- 7 files changed, 211 insertions(+), 107 deletions(-) create mode 100644 pkg/libcontainer/network/strategy.go create mode 100644 pkg/libcontainer/network/veth.go diff --git a/pkg/libcontainer/README.md b/pkg/libcontainer/README.md index 89a4ec0c48..36553af5bc 100644 --- a/pkg/libcontainer/README.md +++ b/pkg/libcontainer/README.md @@ -45,12 +45,17 @@ Sample `container.json` file: "AUDIT_WRITE", "AUDIT_CONTROL", "MAC_OVERRIDE", - "MAC_ADMIN" + "MAC_ADMIN", + "NET_ADMIN" ], "network": { + "type": "veth", + "context": { + "bridge": "docker0", + "prefix": "dock" + }, "address": "172.17.0.100/16", "gateway": "172.17.42.1", - "bridge": "docker0", "mtu": 1500 }, "cgroups": { diff --git a/pkg/libcontainer/container.go b/pkg/libcontainer/container.go index 3c1b62b65a..4a47977334 100644 --- a/pkg/libcontainer/container.go +++ b/pkg/libcontainer/container.go @@ -4,6 +4,10 @@ import ( "github.com/dotcloud/docker/pkg/cgroups" ) +// Context is a generic key value pair that allows +// arbatrary data to be sent +type Context map[string]string + // Container defines configuration options for how a // container is setup inside a directory and how a process should be executed type Container struct { @@ -24,8 +28,9 @@ type Container struct { // The network configuration can be omited from a container causing the // container to be setup with the host's networking stack type Network struct { - Address string `json:"address,omitempty"` - Gateway string `json:"gateway,omitempty"` - Bridge string `json:"bridge,omitempty"` - Mtu int `json:"mtu,omitempty"` + Type string `json:"type,omitempty"` // type of networking to setup i.e. veth, macvlan, etc + Context Context `json:"context,omitempty"` // generic context for type specific networking options + Address string `json:"address,omitempty"` + Gateway string `json:"gateway,omitempty"` + Mtu int `json:"mtu,omitempty"` } diff --git a/pkg/libcontainer/container.json b/pkg/libcontainer/container.json index 07e52df428..c2b21f8609 100644 --- a/pkg/libcontainer/container.json +++ b/pkg/libcontainer/container.json @@ -28,12 +28,17 @@ "AUDIT_WRITE", "AUDIT_CONTROL", "MAC_OVERRIDE", - "MAC_ADMIN" + "MAC_ADMIN", + "NET_ADMIN" ], "network": { + "type": "veth", + "context": { + "bridge": "docker0", + "prefix": "dock" + }, "address": "172.17.0.100/16", "gateway": "172.17.42.1", - "bridge": "docker0", "mtu": 1500 }, "cgroups": { diff --git a/pkg/libcontainer/network/strategy.go b/pkg/libcontainer/network/strategy.go new file mode 100644 index 0000000000..8ecc11a24d --- /dev/null +++ b/pkg/libcontainer/network/strategy.go @@ -0,0 +1,32 @@ +package network + +import ( + "errors" + "github.com/dotcloud/docker/pkg/libcontainer" +) + +var ( + ErrNotValidStrategyType = errors.New("not a valid network strategy type") +) + +var strategies = map[string]NetworkStrategy{ + "veth": &Veth{}, +} + +// NetworkStrategy represends a specific network configuration for +// a containers networking stack +type NetworkStrategy interface { + Create(*libcontainer.Network, int) (libcontainer.Context, error) + Initialize(*libcontainer.Network, libcontainer.Context) error +} + +// GetStrategy returns the specific network strategy for the +// provided type. If no strategy is registered for the type an +// ErrNotValidStrategyType is returned. +func GetStrategy(tpe string) (NetworkStrategy, error) { + s, exists := strategies[tpe] + if !exists { + return nil, ErrNotValidStrategyType + } + return s, nil +} diff --git a/pkg/libcontainer/network/veth.go b/pkg/libcontainer/network/veth.go new file mode 100644 index 0000000000..61fec5500c --- /dev/null +++ b/pkg/libcontainer/network/veth.go @@ -0,0 +1,103 @@ +package network + +import ( + "fmt" + "github.com/dotcloud/docker/pkg/libcontainer" + "github.com/dotcloud/docker/pkg/libcontainer/utils" + "log" +) + +type Veth struct { +} + +func (v *Veth) Create(n *libcontainer.Network, nspid int) (libcontainer.Context, error) { + log.Printf("creating veth network") + var ( + bridge string + prefix string + exists bool + ) + if bridge, exists = n.Context["bridge"]; !exists { + return nil, fmt.Errorf("bridge does not exist in network context") + } + if prefix, exists = n.Context["prefix"]; !exists { + return nil, fmt.Errorf("veth prefix does not exist in network context") + } + name1, name2, err := createVethPair(prefix) + if err != nil { + return nil, err + } + context := libcontainer.Context{ + "vethHost": name1, + "vethChild": name2, + } + log.Printf("veth pair created %s <> %s", name1, name2) + if err := SetInterfaceMaster(name1, bridge); err != nil { + return context, err + } + if err := SetMtu(name1, n.Mtu); err != nil { + return context, err + } + if err := InterfaceUp(name1); err != nil { + return context, err + } + log.Printf("setting %s inside %d namespace", name2, nspid) + if err := SetInterfaceInNamespacePid(name2, nspid); err != nil { + return context, err + } + return context, nil +} + +func (v *Veth) Initialize(config *libcontainer.Network, context libcontainer.Context) error { + var ( + vethChild string + exists bool + ) + if vethChild, exists = context["vethChild"]; !exists { + return fmt.Errorf("vethChild does not exist in network context") + } + if err := InterfaceDown(vethChild); err != nil { + return fmt.Errorf("interface down %s %s", vethChild, err) + } + if err := ChangeInterfaceName(vethChild, "eth0"); err != nil { + return fmt.Errorf("change %s to eth0 %s", vethChild, err) + } + if err := SetInterfaceIp("eth0", config.Address); err != nil { + return fmt.Errorf("set eth0 ip %s", err) + } + if err := SetMtu("eth0", config.Mtu); err != nil { + return fmt.Errorf("set eth0 mtu to %d %s", config.Mtu, err) + } + if err := InterfaceUp("eth0"); err != nil { + return fmt.Errorf("eth0 up %s", err) + } + if err := SetMtu("lo", config.Mtu); err != nil { + return fmt.Errorf("set lo mtu to %d %s", config.Mtu, err) + } + if err := InterfaceUp("lo"); err != nil { + return fmt.Errorf("lo up %s", err) + } + if config.Gateway != "" { + if err := SetDefaultGateway(config.Gateway); err != nil { + return fmt.Errorf("set gateway to %s %s", config.Gateway, err) + } + } + return nil +} + +// createVethPair will automatically generage two random names for +// the veth pair and ensure that they have been created +func createVethPair(prefix string) (name1 string, name2 string, err error) { + name1, err = utils.GenerateRandomName(prefix, 4) + if err != nil { + return + } + name2, err = utils.GenerateRandomName(prefix, 4) + if err != nil { + return + } + if err = CreateVethPair(name1, name2); err != nil { + return + } + return +} diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index b2eaa0bc65..6c4d7666a2 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -3,10 +3,10 @@ package nsinit import ( + "encoding/json" "fmt" "github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/libcontainer/network" - "github.com/dotcloud/docker/pkg/libcontainer/utils" "github.com/dotcloud/docker/pkg/system" "github.com/dotcloud/docker/pkg/term" "io" @@ -19,11 +19,11 @@ import ( // Exec performes setup outside of a namespace so that a container can be // executed. Exec is a high level function for working with container namespaces. -func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io.Writer, master *os.File, logFile string, args []string) (int, error) { +func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io.Writer, + master *os.File, logFile string, args []string) (int, error) { var ( - console string - err error - + console string + err error inPipe io.WriteCloser outPipe, errPipe io.ReadCloser ) @@ -46,7 +46,7 @@ func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io. command := CreateCommand(container, console, logFile, r.Fd(), args) if !container.Tty { - log.Printf("opening pipes on command") + log.Printf("opening std pipes") if inPipe, err = command.StdinPipe(); err != nil { return -1, err } @@ -78,15 +78,9 @@ func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io. return -1, err } } - - if container.Network != nil { - log.Printf("creating veth pair") - vethPair, err := InitializeContainerVeth(container.Network.Bridge, container.Network.Mtu, command.Process.Pid) - if err != nil { - return -1, err - } - log.Printf("sending %s as veth pair name", vethPair) - SendVethName(w, vethPair) + if err := InitializeNetworking(container, command.Process.Pid, w); err != nil { + command.Process.Kill() + return -1, err } // Sync with child @@ -104,7 +98,7 @@ func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io. command.Process.Kill() return -1, err } - defer term.RestoreTerminal(uintptr(syscall.Stdin), state) + defer term.RestoreTerminal(os.Stdin.Fd(), state) } else { log.Printf("starting copy for std pipes") go func() { @@ -125,39 +119,34 @@ func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io. return command.ProcessState.Sys().(syscall.WaitStatus).ExitStatus(), nil } -// SendVethName writes the veth pair name to the child's stdin then closes the -// pipe so that the child stops waiting for more data -func SendVethName(pipe io.Writer, name string) { - fmt.Fprint(pipe, name) +func InitializeNetworking(container *libcontainer.Container, nspid int, pipe io.Writer) error { + if container.Network != nil { + log.Printf("creating host network configuration type %s", container.Network.Type) + strategy, err := network.GetStrategy(container.Network.Type) + if err != nil { + return err + } + networkContext, err := strategy.Create(container.Network, nspid) + if err != nil { + return err + } + log.Printf("sending %v as network context", networkContext) + if err := SendContext(pipe, networkContext); err != nil { + return err + } + } + return nil } -// initializeContainerVeth will create a veth pair and setup the host's -// side of the pair by setting the specified bridge as the master and bringing -// up the interface. -// -// Then will with set the other side of the veth pair into the container's namespaced -// using the pid and returns the veth's interface name to provide to the container to -// finish setting up the interface inside the namespace -func InitializeContainerVeth(bridge string, mtu, nspid int) (string, error) { - name1, name2, err := createVethPair() +// SendContext writes the veth pair name to the child's stdin then closes the +// pipe so that the child stops waiting for more data +func SendContext(pipe io.Writer, context libcontainer.Context) error { + data, err := json.Marshal(context) if err != nil { - return "", err + return err } - log.Printf("veth pair created %s <> %s", name1, name2) - if err := network.SetInterfaceMaster(name1, bridge); err != nil { - return "", err - } - if err := network.SetMtu(name1, mtu); err != nil { - return "", err - } - if err := network.InterfaceUp(name1); err != nil { - return "", err - } - log.Printf("setting %s inside %d namespace", name2, nspid) - if err := network.SetInterfaceInNamespacePid(name2, nspid); err != nil { - return "", err - } - return name2, nil + pipe.Write(data) + return nil } // SetupWindow gets the parent window size and sets the master @@ -190,29 +179,13 @@ func CreateMasterAndConsole() (*os.File, string, error) { return master, console, nil } -// createVethPair will automatically generage two random names for -// the veth pair and ensure that they have been created -func createVethPair() (name1 string, name2 string, err error) { - name1, err = utils.GenerateRandomName("dock", 4) - if err != nil { - return - } - name2, err = utils.GenerateRandomName("dock", 4) - if err != nil { - return - } - if err = network.CreateVethPair(name1, name2); err != nil { - return - } - return -} - // writePidFile writes the namespaced processes pid to .nspid in the rootfs for the container func writePidFile(command *exec.Cmd) error { return ioutil.WriteFile(".nspid", []byte(fmt.Sprint(command.Process.Pid)), 0655) } func deletePidFile() error { + log.Printf("removing .nspid file") return os.Remove(".nspid") } diff --git a/pkg/libcontainer/nsinit/init.go b/pkg/libcontainer/nsinit/init.go index 04716ba645..f530d4a52a 100644 --- a/pkg/libcontainer/nsinit/init.go +++ b/pkg/libcontainer/nsinit/init.go @@ -3,6 +3,7 @@ package nsinit import ( + "encoding/json" "fmt" "github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/libcontainer/capabilities" @@ -27,13 +28,10 @@ func Init(container *libcontainer.Container, uncleanRootfs, console string, pipe log.Printf("initializing namespace at %s", rootfs) // We always read this as it is a way to sync with the parent as well - tempVethName, err := getVethName(pipe) + context, err := GetContextFromParent(pipe) if err != nil { return err } - if tempVethName != "" { - log.Printf("received veth name %s", tempVethName) - } if console != "" { log.Printf("setting up console for %s", console) // close pipes so that we can replace it with the pty @@ -62,7 +60,7 @@ func Init(container *libcontainer.Container, uncleanRootfs, console string, pipe if err := setupNewMountNamespace(rootfs, console, container.ReadonlyFs); err != nil { return fmt.Errorf("setup mount namespace %s", err) } - if err := setupVethNetwork(container.Network, tempVethName); err != nil { + if err := setupNetwork(container.Network, context); err != nil { return fmt.Errorf("setup networking %s", err) } if err := system.Sethostname(container.Hostname); err != nil { @@ -145,46 +143,29 @@ func openTerminal(name string, flag int) (*os.File, error) { // setupVethNetwork uses the Network config if it is not nil to initialize // the new veth interface inside the container for use by changing the name to eth0 // setting the MTU and IP address along with the default gateway -func setupVethNetwork(config *libcontainer.Network, tempVethName string) error { +func setupNetwork(config *libcontainer.Network, context libcontainer.Context) error { if config != nil { - if err := network.InterfaceDown(tempVethName); err != nil { - return fmt.Errorf("interface down %s %s", tempVethName, err) - } - if err := network.ChangeInterfaceName(tempVethName, "eth0"); err != nil { - return fmt.Errorf("change %s to eth0 %s", tempVethName, err) - } - if err := network.SetInterfaceIp("eth0", config.Address); err != nil { - return fmt.Errorf("set eth0 ip %s", err) - } - if err := network.SetMtu("eth0", config.Mtu); err != nil { - return fmt.Errorf("set eth0 mtu to %d %s", config.Mtu, err) - } - if err := network.InterfaceUp("eth0"); err != nil { - return fmt.Errorf("eth0 up %s", err) - } - if err := network.SetMtu("lo", config.Mtu); err != nil { - return fmt.Errorf("set lo mtu to %d %s", config.Mtu, err) - } - if err := network.InterfaceUp("lo"); err != nil { - return fmt.Errorf("lo up %s", err) - } - if config.Gateway != "" { - if err := network.SetDefaultGateway(config.Gateway); err != nil { - return fmt.Errorf("set gateway to %s %s", config.Gateway, err) - } + strategy, err := network.GetStrategy(config.Type) + if err != nil { + return err } + return strategy.Initialize(config, context) } return nil } -// getVethName reads from Stdin the temp veth name -// sent by the parent processes after the veth pair -// has been created and setup -func getVethName(pipe io.ReadCloser) (string, error) { +func GetContextFromParent(pipe io.ReadCloser) (libcontainer.Context, error) { defer pipe.Close() data, err := ioutil.ReadAll(pipe) if err != nil { - return "", fmt.Errorf("error reading from stdin %s", err) + return nil, fmt.Errorf("error reading from stdin %s", err) } - return string(data), nil + var context libcontainer.Context + if len(data) > 0 { + if err := json.Unmarshal(data, &context); err != nil { + return nil, err + } + log.Printf("received context %v", context) + } + return context, nil } From dd59f7fb286f2abff6cee2699e62fff564425149 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Fri, 21 Feb 2014 22:37:09 -0800 Subject: [PATCH 43/81] Refactor exec method Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/nsinit/exec.go | 138 ++++++++++++++++++-------------- 1 file changed, 77 insertions(+), 61 deletions(-) diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index 6c4d7666a2..3cbe43ae7a 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -22,20 +22,10 @@ import ( func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io.Writer, master *os.File, logFile string, args []string) (int, error) { var ( - console string - err error - inPipe io.WriteCloser - outPipe, errPipe io.ReadCloser + console string + err error ) - if container.Tty { - log.Printf("setting up master and console") - master, console, err = CreateMasterAndConsole() - if err != nil { - return -1, err - } - } - // create a pipe so that we can syncronize with the namespaced process and // pass the veth name to the child r, w, err := os.Pipe() @@ -44,49 +34,15 @@ func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io. } system.UsetCloseOnExec(r.Fd()) + if container.Tty { + log.Printf("setting up master and console") + master, console, err = CreateMasterAndConsole() + if err != nil { + return -1, err + } + } + command := CreateCommand(container, console, logFile, r.Fd(), args) - if !container.Tty { - log.Printf("opening std pipes") - if inPipe, err = command.StdinPipe(); err != nil { - return -1, err - } - if outPipe, err = command.StdoutPipe(); err != nil { - return -1, err - } - if errPipe, err = command.StderrPipe(); err != nil { - return -1, err - } - } - - log.Printf("staring init") - if err := command.Start(); err != nil { - return -1, err - } - log.Printf("writting state file") - if err := writePidFile(command); err != nil { - command.Process.Kill() - return -1, err - } - defer deletePidFile() - - // Do this before syncing with child so that no children - // can escape the cgroup - if container.Cgroups != nil { - log.Printf("setting up cgroups") - if err := container.Cgroups.Apply(command.Process.Pid); err != nil { - command.Process.Kill() - return -1, err - } - } - if err := InitializeNetworking(container, command.Process.Pid, w); err != nil { - command.Process.Kill() - return -1, err - } - - // Sync with child - log.Printf("closing sync pipes") - w.Close() - r.Close() if container.Tty { log.Printf("starting copy for tty") @@ -100,15 +56,39 @@ func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io. } defer term.RestoreTerminal(os.Stdin.Fd(), state) } else { - log.Printf("starting copy for std pipes") - go func() { - defer inPipe.Close() - io.Copy(inPipe, stdin) - }() - go io.Copy(stdout, outPipe) - go io.Copy(stderr, errPipe) + if err := startStdCopy(command, stdin, stdout, stderr); err != nil { + command.Process.Kill() + return -1, err + } } + log.Printf("staring init") + if err := command.Start(); err != nil { + return -1, err + } + log.Printf("writing state file") + if err := writePidFile(command); err != nil { + command.Process.Kill() + return -1, err + } + defer deletePidFile() + + // Do this before syncing with child so that no children + // can escape the cgroup + if err := SetupCgroups(container, command.Process.Pid); err != nil { + command.Process.Kill() + return -1, err + } + if err := InitializeNetworking(container, command.Process.Pid, w); err != nil { + command.Process.Kill() + return -1, err + } + + // Sync with child + log.Printf("closing sync pipes") + w.Close() + r.Close() + log.Printf("waiting on process") if err := command.Wait(); err != nil { if _, ok := err.(*exec.ExitError); !ok { @@ -119,6 +99,16 @@ func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io. return command.ProcessState.Sys().(syscall.WaitStatus).ExitStatus(), nil } +func SetupCgroups(container *libcontainer.Container, nspid int) error { + if container.Cgroups != nil { + log.Printf("setting up cgroups") + if err := container.Cgroups.Apply(nspid); err != nil { + return err + } + } + return nil +} + func InitializeNetworking(container *libcontainer.Container, nspid int, pipe io.Writer) error { if container.Network != nil { log.Printf("creating host network configuration type %s", container.Network.Type) @@ -207,3 +197,29 @@ func CreateCommand(container *libcontainer.Container, console, logFile string, p command.Env = container.Env return command } + +func startStdCopy(command *exec.Cmd, stdin io.Reader, stdout, stderr io.Writer) error { + log.Printf("opening std pipes") + inPipe, err := command.StdinPipe() + if err != nil { + return err + } + outPipe, err := command.StdoutPipe() + if err != nil { + return err + } + errPipe, err := command.StderrPipe() + if err != nil { + return err + } + + log.Printf("starting copy for std pipes") + go func() { + defer inPipe.Close() + io.Copy(inPipe, stdin) + }() + go io.Copy(stdout, outPipe) + go io.Copy(stderr, errPipe) + + return nil +} From 2412656ef54cb4df36df2f8122e1fda24ec8e8a4 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Fri, 21 Feb 2014 22:58:30 -0800 Subject: [PATCH 44/81] Add syncpipe for passing context Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/nsinit/exec.go | 27 +++------- pkg/libcontainer/nsinit/init.go | 44 +++++++--------- pkg/libcontainer/nsinit/nsinit/main.go | 6 ++- pkg/libcontainer/nsinit/sync_pipe.go | 73 ++++++++++++++++++++++++++ 4 files changed, 102 insertions(+), 48 deletions(-) create mode 100644 pkg/libcontainer/nsinit/sync_pipe.go diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index 3cbe43ae7a..ec75e9c923 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -3,7 +3,6 @@ package nsinit import ( - "encoding/json" "fmt" "github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/libcontainer/network" @@ -28,11 +27,10 @@ func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io. // create a pipe so that we can syncronize with the namespaced process and // pass the veth name to the child - r, w, err := os.Pipe() + syncPipe, err := NewSyncPipe() if err != nil { return -1, err } - system.UsetCloseOnExec(r.Fd()) if container.Tty { log.Printf("setting up master and console") @@ -42,8 +40,7 @@ func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io. } } - command := CreateCommand(container, console, logFile, r.Fd(), args) - + command := CreateCommand(container, console, logFile, syncPipe.child.Fd(), args) if container.Tty { log.Printf("starting copy for tty") go io.Copy(stdout, master) @@ -79,15 +76,14 @@ func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io. command.Process.Kill() return -1, err } - if err := InitializeNetworking(container, command.Process.Pid, w); err != nil { + if err := InitializeNetworking(container, command.Process.Pid, syncPipe); err != nil { command.Process.Kill() return -1, err } // Sync with child log.Printf("closing sync pipes") - w.Close() - r.Close() + syncPipe.Close() log.Printf("waiting on process") if err := command.Wait(); err != nil { @@ -109,7 +105,7 @@ func SetupCgroups(container *libcontainer.Container, nspid int) error { return nil } -func InitializeNetworking(container *libcontainer.Container, nspid int, pipe io.Writer) error { +func InitializeNetworking(container *libcontainer.Container, nspid int, pipe *SyncPipe) error { if container.Network != nil { log.Printf("creating host network configuration type %s", container.Network.Type) strategy, err := network.GetStrategy(container.Network.Type) @@ -121,24 +117,13 @@ func InitializeNetworking(container *libcontainer.Container, nspid int, pipe io. return err } log.Printf("sending %v as network context", networkContext) - if err := SendContext(pipe, networkContext); err != nil { + if err := pipe.SendToChild(networkContext); err != nil { return err } } return nil } -// SendContext writes the veth pair name to the child's stdin then closes the -// pipe so that the child stops waiting for more data -func SendContext(pipe io.Writer, context libcontainer.Context) error { - data, err := json.Marshal(context) - if err != nil { - return err - } - pipe.Write(data) - return nil -} - // SetupWindow gets the parent window size and sets the master // pty to the current size and set the parents mode to RAW func SetupWindow(master, parent *os.File) (*term.State, error) { diff --git a/pkg/libcontainer/nsinit/init.go b/pkg/libcontainer/nsinit/init.go index f530d4a52a..cdedc14769 100644 --- a/pkg/libcontainer/nsinit/init.go +++ b/pkg/libcontainer/nsinit/init.go @@ -3,14 +3,11 @@ package nsinit import ( - "encoding/json" "fmt" "github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/libcontainer/capabilities" "github.com/dotcloud/docker/pkg/libcontainer/network" "github.com/dotcloud/docker/pkg/system" - "io" - "io/ioutil" "log" "os" "os/exec" @@ -20,7 +17,7 @@ import ( // Init is the init process that first runs inside a new namespace to setup mounts, users, networking, // and other options required for the new container. -func Init(container *libcontainer.Container, uncleanRootfs, console string, pipe io.ReadCloser, args []string) error { +func Init(container *libcontainer.Container, uncleanRootfs, console string, syncPipe *SyncPipe, args []string) error { rootfs, err := resolveRootfs(uncleanRootfs) if err != nil { return err @@ -28,16 +25,18 @@ func Init(container *libcontainer.Container, uncleanRootfs, console string, pipe log.Printf("initializing namespace at %s", rootfs) // We always read this as it is a way to sync with the parent as well - context, err := GetContextFromParent(pipe) + context, err := syncPipe.ReadFromParent() if err != nil { + syncPipe.Close() return err } + syncPipe.Close() + log.Printf("received context from parent %v", context) + if console != "" { log.Printf("setting up console for %s", console) // close pipes so that we can replace it with the pty - os.Stdin.Close() - os.Stdout.Close() - os.Stderr.Close() + closeStdPipes() slave, err := openTerminal(console, syscall.O_RDWR) if err != nil { return fmt.Errorf("open terminal %s", err) @@ -79,18 +78,27 @@ func Init(container *libcontainer.Container, uncleanRootfs, console string, pipe return fmt.Errorf("chdir to %s %s", container.WorkingDir, err) } } + return execArgs(args, container.Env) +} + +func execArgs(args []string, env []string) error { name, err := exec.LookPath(args[0]) if err != nil { return err } - log.Printf("execing %s goodbye", name) - if err := system.Exec(name, args[0:], container.Env); err != nil { + if err := system.Exec(name, args[0:], env); err != nil { return fmt.Errorf("exec %s", err) } panic("unreachable") } +func closeStdPipes() { + os.Stdin.Close() + os.Stdout.Close() + os.Stderr.Close() +} + // resolveRootfs ensures that the current working directory is // not a symlink and returns the absolute path to the rootfs func resolveRootfs(uncleanRootfs string) (string, error) { @@ -153,19 +161,3 @@ func setupNetwork(config *libcontainer.Network, context libcontainer.Context) er } return nil } - -func GetContextFromParent(pipe io.ReadCloser) (libcontainer.Context, error) { - defer pipe.Close() - data, err := ioutil.ReadAll(pipe) - if err != nil { - return nil, fmt.Errorf("error reading from stdin %s", err) - } - var context libcontainer.Context - if len(data) > 0 { - if err := json.Unmarshal(data, &context); err != nil { - return nil, err - } - log.Printf("received context %v", context) - } - return context, nil -} diff --git a/pkg/libcontainer/nsinit/nsinit/main.go b/pkg/libcontainer/nsinit/nsinit/main.go index 28d42d4643..2400ab6903 100644 --- a/pkg/libcontainer/nsinit/nsinit/main.go +++ b/pkg/libcontainer/nsinit/nsinit/main.go @@ -74,7 +74,11 @@ func main() { if flag.NArg() < 2 { log.Fatal(ErrWrongArguments) } - if err := nsinit.Init(container, cwd, console, os.NewFile(uintptr(pipeFd), "pipe"), flag.Args()[1:]); err != nil { + syncPipe, err := nsinit.NewSyncPipeFromFd(0, uintptr(pipeFd)) + if err != nil { + log.Fatal(err) + } + if err := nsinit.Init(container, cwd, console, syncPipe, flag.Args()[1:]); err != nil { log.Fatal(err) } default: diff --git a/pkg/libcontainer/nsinit/sync_pipe.go b/pkg/libcontainer/nsinit/sync_pipe.go new file mode 100644 index 0000000000..7b29e98680 --- /dev/null +++ b/pkg/libcontainer/nsinit/sync_pipe.go @@ -0,0 +1,73 @@ +package nsinit + +import ( + "encoding/json" + "fmt" + "github.com/dotcloud/docker/pkg/libcontainer" + "github.com/dotcloud/docker/pkg/system" + "io/ioutil" + "os" +) + +// SyncPipe allows communication to and from the child processes +// to it's parent and allows the two independent processes to +// syncronize their state. +type SyncPipe struct { + parent, child *os.File +} + +func NewSyncPipe() (s *SyncPipe, err error) { + s = &SyncPipe{} + s.child, s.parent, err = os.Pipe() + if err != nil { + return nil, err + } + system.UsetCloseOnExec(s.child.Fd()) + return s, nil +} + +func NewSyncPipeFromFd(parendFd, childFd uintptr) (*SyncPipe, error) { + s := &SyncPipe{} + if parendFd > 0 { + s.parent = os.NewFile(parendFd, "parendPipe") + } else if childFd > 0 { + s.child = os.NewFile(childFd, "childPipe") + } else { + return nil, fmt.Errorf("no valid sync pipe fd specified") + } + return s, nil +} + +func (s *SyncPipe) SendToChild(context libcontainer.Context) error { + data, err := json.Marshal(context) + if err != nil { + return err + } + s.parent.Write(data) + return nil +} + +func (s *SyncPipe) ReadFromParent() (libcontainer.Context, error) { + data, err := ioutil.ReadAll(s.child) + if err != nil { + return nil, fmt.Errorf("error reading from sync pipe %s", err) + } + var context libcontainer.Context + if len(data) > 0 { + if err := json.Unmarshal(data, &context); err != nil { + return nil, err + } + } + return context, nil + +} + +func (s *SyncPipe) Close() error { + if s.parent != nil { + s.parent.Close() + } + if s.child != nil { + s.child.Close() + } + return nil +} From ae423a036e6f884572491b1ff5ef8a626b1592aa Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Sat, 22 Feb 2014 00:29:21 -0800 Subject: [PATCH 45/81] Abstract out diff implementations for importing Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/nsinit/command.go | 34 ++++++++ pkg/libcontainer/nsinit/exec.go | 107 ++++-------------------- pkg/libcontainer/nsinit/nsinit/main.go | 4 +- pkg/libcontainer/nsinit/state.go | 24 ++++++ pkg/libcontainer/nsinit/term.go | 109 +++++++++++++++++++++++++ 5 files changed, 184 insertions(+), 94 deletions(-) create mode 100644 pkg/libcontainer/nsinit/command.go create mode 100644 pkg/libcontainer/nsinit/state.go create mode 100644 pkg/libcontainer/nsinit/term.go diff --git a/pkg/libcontainer/nsinit/command.go b/pkg/libcontainer/nsinit/command.go new file mode 100644 index 0000000000..b1c5631b4b --- /dev/null +++ b/pkg/libcontainer/nsinit/command.go @@ -0,0 +1,34 @@ +package nsinit + +import ( + "fmt" + "github.com/dotcloud/docker/pkg/libcontainer" + "os" + "os/exec" + "syscall" +) + +type CommandFactory interface { + Create(container *libcontainer.Container, console, logFile string, syncFd uintptr, args []string) *exec.Cmd +} + +type DefaultCommandFactory struct{} + +// Create will return an exec.Cmd with the Cloneflags set to the proper namespaces +// defined on the container's configuration and use the current binary as the init with the +// args provided +func (c *DefaultCommandFactory) Create(container *libcontainer.Container, console, logFile string, pipe uintptr, args []string) *exec.Cmd { + // get our binary name so we can always reexec ourself + name := os.Args[0] + command := exec.Command(name, append([]string{ + "-console", console, + "-pipe", fmt.Sprint(pipe), + "-log", logFile, + "init"}, args...)...) + + command.SysProcAttr = &syscall.SysProcAttr{ + Cloneflags: uintptr(GetNamespaceFlags(container.Namespaces)), + } + command.Env = container.Env + return command +} diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index ec75e9c923..ee83f4f107 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -3,13 +3,9 @@ package nsinit import ( - "fmt" "github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/libcontainer/network" "github.com/dotcloud/docker/pkg/system" - "github.com/dotcloud/docker/pkg/term" - "io" - "io/ioutil" "log" "os" "os/exec" @@ -18,9 +14,11 @@ import ( // Exec performes setup outside of a namespace so that a container can be // executed. Exec is a high level function for working with container namespaces. -func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io.Writer, - master *os.File, logFile string, args []string) (int, error) { +func Exec(container *libcontainer.Container, + factory CommandFactory, state StateWriter, term Terminal, + logFile string, args []string) (int, error) { var ( + master *os.File console string err error ) @@ -38,37 +36,28 @@ func Exec(container *libcontainer.Container, stdin io.Reader, stdout, stderr io. if err != nil { return -1, err } + term.SetMaster(master) } - command := CreateCommand(container, console, logFile, syncPipe.child.Fd(), args) - if container.Tty { - log.Printf("starting copy for tty") - go io.Copy(stdout, master) - go io.Copy(master, stdin) - - state, err := SetupWindow(master, os.Stdin) - if err != nil { - command.Process.Kill() - return -1, err - } - defer term.RestoreTerminal(os.Stdin.Fd(), state) - } else { - if err := startStdCopy(command, stdin, stdout, stderr); err != nil { - command.Process.Kill() - return -1, err - } + command := factory.Create(container, console, logFile, syncPipe.child.Fd(), args) + if err := term.Attach(command); err != nil { + return -1, err } + defer term.Close() log.Printf("staring init") if err := command.Start(); err != nil { return -1, err } log.Printf("writing state file") - if err := writePidFile(command); err != nil { + if err := state.WritePid(command.Process.Pid); err != nil { command.Process.Kill() return -1, err } - defer deletePidFile() + defer func() { + log.Printf("removing state file") + state.DeletePid() + }() // Do this before syncing with child so that no children // can escape the cgroup @@ -124,19 +113,6 @@ func InitializeNetworking(container *libcontainer.Container, nspid int, pipe *Sy return nil } -// SetupWindow gets the parent window size and sets the master -// pty to the current size and set the parents mode to RAW -func SetupWindow(master, parent *os.File) (*term.State, error) { - ws, err := term.GetWinsize(parent.Fd()) - if err != nil { - return nil, err - } - if err := term.SetWinsize(master.Fd(), ws); err != nil { - return nil, err - } - return term.SetRawTerminal(parent.Fd()) -} - // CreateMasterAndConsole will open /dev/ptmx on the host and retreive the // pts name for use as the pty slave inside the container func CreateMasterAndConsole() (*os.File, string, error) { @@ -153,58 +129,3 @@ func CreateMasterAndConsole() (*os.File, string, error) { } return master, console, nil } - -// writePidFile writes the namespaced processes pid to .nspid in the rootfs for the container -func writePidFile(command *exec.Cmd) error { - return ioutil.WriteFile(".nspid", []byte(fmt.Sprint(command.Process.Pid)), 0655) -} - -func deletePidFile() error { - log.Printf("removing .nspid file") - return os.Remove(".nspid") -} - -// createCommand will return an exec.Cmd with the Cloneflags set to the proper namespaces -// defined on the container's configuration and use the current binary as the init with the -// args provided -func CreateCommand(container *libcontainer.Container, console, logFile string, pipe uintptr, args []string) *exec.Cmd { - // get our binary name so we can always reexec ourself - name := os.Args[0] - command := exec.Command(name, append([]string{ - "-console", console, - "-pipe", fmt.Sprint(pipe), - "-log", logFile, - "init"}, args...)...) - - command.SysProcAttr = &syscall.SysProcAttr{ - Cloneflags: uintptr(GetNamespaceFlags(container.Namespaces)), - } - command.Env = container.Env - return command -} - -func startStdCopy(command *exec.Cmd, stdin io.Reader, stdout, stderr io.Writer) error { - log.Printf("opening std pipes") - inPipe, err := command.StdinPipe() - if err != nil { - return err - } - outPipe, err := command.StdoutPipe() - if err != nil { - return err - } - errPipe, err := command.StderrPipe() - if err != nil { - return err - } - - log.Printf("starting copy for std pipes") - go func() { - defer inPipe.Close() - io.Copy(inPipe, stdin) - }() - go io.Copy(stdout, outPipe) - go io.Copy(stderr, errPipe) - - return nil -} diff --git a/pkg/libcontainer/nsinit/nsinit/main.go b/pkg/libcontainer/nsinit/nsinit/main.go index 2400ab6903..c299412c7b 100644 --- a/pkg/libcontainer/nsinit/nsinit/main.go +++ b/pkg/libcontainer/nsinit/nsinit/main.go @@ -57,8 +57,10 @@ func main() { if nspid > 0 { exitCode, err = nsinit.ExecIn(container, nspid, flag.Args()[1:]) } else { + term := nsinit.NewTerminal(os.Stdin, os.Stdout, os.Stderr, container.Tty) exitCode, err = nsinit.Exec(container, - os.Stdin, os.Stdout, os.Stderr, nil, + &nsinit.DefaultCommandFactory{}, &nsinit.DefaultStateWriter{}, + term, logFile, flag.Args()[1:]) } if err != nil { diff --git a/pkg/libcontainer/nsinit/state.go b/pkg/libcontainer/nsinit/state.go new file mode 100644 index 0000000000..1f0fedd110 --- /dev/null +++ b/pkg/libcontainer/nsinit/state.go @@ -0,0 +1,24 @@ +package nsinit + +import ( + "fmt" + "io/ioutil" + "os" +) + +type StateWriter interface { + WritePid(pid int) error + DeletePid() error +} + +type DefaultStateWriter struct { +} + +// writePidFile writes the namespaced processes pid to .nspid in the rootfs for the container +func (*DefaultStateWriter) WritePid(pid int) error { + return ioutil.WriteFile(".nspid", []byte(fmt.Sprint(pid)), 0655) +} + +func (*DefaultStateWriter) DeletePid() error { + return os.Remove(".nspid") +} diff --git a/pkg/libcontainer/nsinit/term.go b/pkg/libcontainer/nsinit/term.go new file mode 100644 index 0000000000..649246891e --- /dev/null +++ b/pkg/libcontainer/nsinit/term.go @@ -0,0 +1,109 @@ +package nsinit + +import ( + "github.com/dotcloud/docker/pkg/term" + "io" + "os" + "os/exec" +) + +type Terminal interface { + io.Closer + SetMaster(*os.File) + Attach(*exec.Cmd) error +} + +func NewTerminal(stdin io.Reader, stdout, stderr io.Writer, tty bool) Terminal { + if tty { + return &TtyTerminal{ + stdin: stdin, + stdout: stdout, + stderr: stderr, + } + } + return &StdTerminal{ + stdin: stdin, + stdout: stdout, + stderr: stderr, + } +} + +type TtyTerminal struct { + stdin io.Reader + stdout, stderr io.Writer + master *os.File + state *term.State +} + +func (t *TtyTerminal) SetMaster(master *os.File) { + t.master = master +} + +func (t *TtyTerminal) Attach(command *exec.Cmd) error { + go io.Copy(t.stdout, t.master) + go io.Copy(t.master, t.stdin) + + state, err := t.setupWindow(t.master, os.Stdin) + if err != nil { + command.Process.Kill() + return err + } + t.state = state + return err +} + +// SetupWindow gets the parent window size and sets the master +// pty to the current size and set the parents mode to RAW +func (t *TtyTerminal) setupWindow(master, parent *os.File) (*term.State, error) { + ws, err := term.GetWinsize(parent.Fd()) + if err != nil { + return nil, err + } + if err := term.SetWinsize(master.Fd(), ws); err != nil { + return nil, err + } + return term.SetRawTerminal(parent.Fd()) +} + +func (t *TtyTerminal) Close() error { + term.RestoreTerminal(os.Stdin.Fd(), t.state) + return t.master.Close() +} + +type StdTerminal struct { + stdin io.Reader + stdout, stderr io.Writer +} + +func (s *StdTerminal) SetMaster(*os.File) { + // no need to set master on non tty +} + +func (s *StdTerminal) Close() error { + return nil +} + +func (s *StdTerminal) Attach(command *exec.Cmd) error { + inPipe, err := command.StdinPipe() + if err != nil { + return err + } + outPipe, err := command.StdoutPipe() + if err != nil { + return err + } + errPipe, err := command.StderrPipe() + if err != nil { + return err + } + + go func() { + defer inPipe.Close() + io.Copy(inPipe, s.stdin) + }() + + go io.Copy(s.stdout, outPipe) + go io.Copy(s.stderr, errPipe) + + return nil +} From fac41af25bd5f42269424a788783a4280dd7fc9c Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Sat, 22 Feb 2014 01:21:26 -0800 Subject: [PATCH 46/81] Refactor driver to use Exec function from nsini Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- execdriver/lxc/term.go | 37 ++-- execdriver/namespaces/driver.go | 296 +++++++++---------------------- pkg/libcontainer/nsinit/state.go | 10 +- pkg/libcontainer/nsinit/term.go | 9 + 4 files changed, 113 insertions(+), 239 deletions(-) diff --git a/execdriver/lxc/term.go b/execdriver/lxc/term.go index d772f60972..db58c3181a 100644 --- a/execdriver/lxc/term.go +++ b/execdriver/lxc/term.go @@ -6,6 +6,7 @@ import ( "github.com/kr/pty" "io" "os" + "os/exec" ) func SetTerminal(command *execdriver.Command, pipes *execdriver.Pipes) error { @@ -26,8 +27,8 @@ func SetTerminal(command *execdriver.Command, pipes *execdriver.Pipes) error { } type TtyConsole struct { - master *os.File - slave *os.File + MasterPty *os.File + SlavePty *os.File } func NewTtyConsole(command *execdriver.Command, pipes *execdriver.Pipes) (*TtyConsole, error) { @@ -36,28 +37,28 @@ func NewTtyConsole(command *execdriver.Command, pipes *execdriver.Pipes) (*TtyCo return nil, err } tty := &TtyConsole{ - master: ptyMaster, - slave: ptySlave, + MasterPty: ptyMaster, + SlavePty: ptySlave, } - if err := tty.attach(command, pipes); err != nil { + if err := tty.AttachPipes(&command.Cmd, pipes); err != nil { tty.Close() return nil, err } + command.Console = tty.SlavePty.Name() return tty, nil } func (t *TtyConsole) Master() *os.File { - return t.master + return t.MasterPty } func (t *TtyConsole) Resize(h, w int) error { - return term.SetWinsize(t.master.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)}) + return term.SetWinsize(t.MasterPty.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)}) } -func (t *TtyConsole) attach(command *execdriver.Command, pipes *execdriver.Pipes) error { - command.Stdout = t.slave - command.Stderr = t.slave - command.Console = t.slave.Name() +func (t *TtyConsole) AttachPipes(command *exec.Cmd, pipes *execdriver.Pipes) error { + command.Stdout = t.SlavePty + command.Stderr = t.SlavePty go func() { if wb, ok := pipes.Stdout.(interface { @@ -65,24 +66,24 @@ func (t *TtyConsole) attach(command *execdriver.Command, pipes *execdriver.Pipes }); ok { defer wb.CloseWriters() } - io.Copy(pipes.Stdout, t.master) + io.Copy(pipes.Stdout, t.MasterPty) }() if pipes.Stdin != nil { - command.Stdin = t.slave + command.Stdin = t.SlavePty command.SysProcAttr.Setctty = true go func() { defer pipes.Stdin.Close() - io.Copy(t.master, pipes.Stdin) + io.Copy(t.MasterPty, pipes.Stdin) }() } return nil } func (t *TtyConsole) Close() error { - t.slave.Close() - return t.master.Close() + t.SlavePty.Close() + return t.MasterPty.Close() } type StdConsole struct { @@ -91,13 +92,13 @@ type StdConsole struct { func NewStdConsole(command *execdriver.Command, pipes *execdriver.Pipes) (*StdConsole, error) { std := &StdConsole{} - if err := std.attach(command, pipes); err != nil { + if err := std.AttachPipes(&command.Cmd, pipes); err != nil { return nil, err } return std, nil } -func (s *StdConsole) attach(command *execdriver.Command, pipes *execdriver.Pipes) error { +func (s *StdConsole) AttachPipes(command *exec.Cmd, pipes *execdriver.Pipes) error { command.Stdout = pipes.Stdout command.Stderr = pipes.Stderr diff --git a/execdriver/namespaces/driver.go b/execdriver/namespaces/driver.go index e243c64703..a3f095f464 100644 --- a/execdriver/namespaces/driver.go +++ b/execdriver/namespaces/driver.go @@ -5,15 +5,10 @@ import ( "errors" "fmt" "github.com/dotcloud/docker/execdriver" + "github.com/dotcloud/docker/execdriver/lxc" "github.com/dotcloud/docker/pkg/libcontainer" - "github.com/dotcloud/docker/pkg/libcontainer/network" "github.com/dotcloud/docker/pkg/libcontainer/nsinit" - "github.com/dotcloud/docker/pkg/libcontainer/utils" - "github.com/dotcloud/docker/pkg/system" - "github.com/dotcloud/docker/pkg/term" - "io" "io/ioutil" - "log" "os" "os/exec" "path/filepath" @@ -44,129 +39,31 @@ func NewDriver() (*driver, error) { } func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (int, error) { - container := createContainer(c) + var ( + term nsinit.Terminal + container = createContainer(c) + factory = &dockerCommandFactory{c} + stateWriter = &dockerStateWriter{ + callback: startCallback, + c: c, + dsw: &nsinit.DefaultStateWriter{c.Rootfs}, + } + ) + if c.Tty { + term = &dockerTtyTerm{ + pipes: pipes, + } + } else { + term = &dockerStdTerm{ + pipes: pipes, + } + } + c.Terminal = term if err := writeContainerFile(container, c.Rootfs); err != nil { return -1, err } - - var ( - console string - master *os.File - err error - - inPipe io.WriteCloser - outPipe, errPipe io.ReadCloser - ) - - if container.Tty { - log.Printf("setting up master and console") - master, console, err = createMasterAndConsole() - if err != nil { - return -1, err - } - } - c.Terminal = NewTerm(pipes, master) - - // create a pipe so that we can syncronize with the namespaced process and - // pass the veth name to the child - r, w, err := os.Pipe() - if err != nil { - return -1, err - } - system.UsetCloseOnExec(r.Fd()) - args := append([]string{c.Entrypoint}, c.Arguments...) - createCommand(c, container, console, "/nsinit.logs", r.Fd(), args) - command := c - - if !container.Tty { - log.Printf("opening pipes on command") - if inPipe, err = command.StdinPipe(); err != nil { - return -1, err - } - if outPipe, err = command.StdoutPipe(); err != nil { - return -1, err - } - if errPipe, err = command.StderrPipe(); err != nil { - return -1, err - } - } - - log.Printf("staring init") - if err := command.Start(); err != nil { - return -1, err - } - log.Printf("writting state file") - if err := writePidFile(c.Rootfs, command.Process.Pid); err != nil { - command.Process.Kill() - return -1, err - } - defer deletePidFile(c.Rootfs) - - // Do this before syncing with child so that no children - // can escape the cgroup - if container.Cgroups != nil { - log.Printf("setting up cgroups") - if err := container.Cgroups.Apply(command.Process.Pid); err != nil { - command.Process.Kill() - return -1, err - } - } - - if container.Network != nil { - log.Printf("creating veth pair") - vethPair, err := initializeContainerVeth(container.Network.Bridge, container.Network.Mtu, command.Process.Pid) - if err != nil { - return -1, err - } - log.Printf("sending %s as veth pair name", vethPair) - sendVethName(w, vethPair) - } - - // Sync with child - log.Printf("closing sync pipes") - w.Close() - r.Close() - - if container.Tty { - log.Printf("starting copy for tty") - go io.Copy(pipes.Stdout, master) - if pipes.Stdin != nil { - go io.Copy(master, pipes.Stdin) - } - - /* - state, err := setupWindow(master) - if err != nil { - command.Process.Kill() - return -1, err - } - defer term.RestoreTerminal(uintptr(syscall.Stdin), state) - */ - } else { - log.Printf("starting copy for std pipes") - if pipes.Stdin != nil { - go func() { - defer inPipe.Close() - io.Copy(inPipe, pipes.Stdin) - }() - } - go io.Copy(pipes.Stdout, outPipe) - go io.Copy(pipes.Stderr, errPipe) - } - - if startCallback != nil { - startCallback(c) - } - - log.Printf("waiting on process") - if err := command.Wait(); err != nil { - if _, ok := err.(*exec.ExitError); !ok { - return -1, err - } - } - log.Printf("process ended") - return command.ProcessState.Sys().(syscall.WaitStatus).ExitStatus(), nil + return nsinit.Exec(container, factory, stateWriter, term, "/nsinit.log", args) } func (d *driver) Kill(p *execdriver.Command, sig int) error { @@ -207,107 +104,22 @@ func getEnv(key string, env []string) string { return "" } -// sendVethName writes the veth pair name to the child's stdin then closes the -// pipe so that the child stops waiting for more data -func sendVethName(pipe io.Writer, name string) { - fmt.Fprint(pipe, name) -} - -// initializeContainerVeth will create a veth pair and setup the host's -// side of the pair by setting the specified bridge as the master and bringing -// up the interface. -// -// Then will with set the other side of the veth pair into the container's namespaced -// using the pid and returns the veth's interface name to provide to the container to -// finish setting up the interface inside the namespace -func initializeContainerVeth(bridge string, mtu, nspid int) (string, error) { - name1, name2, err := createVethPair() - if err != nil { - return "", err - } - log.Printf("veth pair created %s <> %s", name1, name2) - if err := network.SetInterfaceMaster(name1, bridge); err != nil { - return "", err - } - if err := network.SetMtu(name1, mtu); err != nil { - return "", err - } - if err := network.InterfaceUp(name1); err != nil { - return "", err - } - log.Printf("setting %s inside %d namespace", name2, nspid) - if err := network.SetInterfaceInNamespacePid(name2, nspid); err != nil { - return "", err - } - return name2, nil -} - -func setupWindow(master *os.File) (*term.State, error) { - ws, err := term.GetWinsize(os.Stdin.Fd()) - if err != nil { - return nil, err - } - if err := term.SetWinsize(master.Fd(), ws); err != nil { - return nil, err - } - return term.SetRawTerminal(os.Stdin.Fd()) -} - -// createMasterAndConsole will open /dev/ptmx on the host and retreive the -// pts name for use as the pty slave inside the container -func createMasterAndConsole() (*os.File, string, error) { - master, err := os.OpenFile("/dev/ptmx", syscall.O_RDWR|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0) - if err != nil { - return nil, "", err - } - console, err := system.Ptsname(master) - if err != nil { - return nil, "", err - } - if err := system.Unlockpt(master); err != nil { - return nil, "", err - } - return master, console, nil -} - -// createVethPair will automatically generage two random names for -// the veth pair and ensure that they have been created -func createVethPair() (name1 string, name2 string, err error) { - name1, err = utils.GenerateRandomName("dock", 4) - if err != nil { - return - } - name2, err = utils.GenerateRandomName("dock", 4) - if err != nil { - return - } - if err = network.CreateVethPair(name1, name2); err != nil { - return - } - return -} - -// writePidFile writes the namespaced processes pid to .nspid in the rootfs for the container -func writePidFile(rootfs string, pid int) error { - return ioutil.WriteFile(filepath.Join(rootfs, ".nspid"), []byte(fmt.Sprint(pid)), 0655) -} - -func deletePidFile(rootfs string) error { - return os.Remove(filepath.Join(rootfs, ".nspid")) +type dockerCommandFactory struct { + c *execdriver.Command } // createCommand will return an exec.Cmd with the Cloneflags set to the proper namespaces // defined on the container's configuration and use the current binary as the init with the // args provided -func createCommand(c *execdriver.Command, container *libcontainer.Container, - console, logFile string, pipe uintptr, args []string) { - +func (d *dockerCommandFactory) Create(container *libcontainer.Container, + console, logFile string, syncFd uintptr, args []string) *exec.Cmd { + c := d.c aname, _ := exec.LookPath("nsinit") c.Path = aname c.Args = append([]string{ aname, "-console", console, - "-pipe", fmt.Sprint(pipe), + "-pipe", fmt.Sprint(syncFd), "-log", logFile, "init", }, args...) @@ -316,6 +128,26 @@ func createCommand(c *execdriver.Command, container *libcontainer.Container, } c.Env = container.Env c.Dir = c.Rootfs + + return &c.Cmd +} + +type dockerStateWriter struct { + dsw nsinit.StateWriter + c *execdriver.Command + callback execdriver.StartCallback +} + +func (d *dockerStateWriter) WritePid(pid int) error { + err := d.dsw.WritePid(pid) + if d.callback != nil { + d.callback(d.c) + } + return err +} + +func (d *dockerStateWriter) DeletePid() error { + return d.dsw.DeletePid() } func createContainer(c *execdriver.Command) *libcontainer.Container { @@ -334,7 +166,11 @@ func createContainer(c *execdriver.Command) *libcontainer.Container { Mtu: c.Network.Mtu, Address: fmt.Sprintf("%s/%d", c.Network.IPAddress, c.Network.IPPrefixLen), Gateway: c.Network.Gateway, - Bridge: c.Network.Bridge, + Type: "veth", + Context: libcontainer.Context{ + "prefix": "dock", + "bridge": c.Network.Bridge, + }, } } if c.Privileged { @@ -347,3 +183,29 @@ func createContainer(c *execdriver.Command) *libcontainer.Container { } return container } + +type dockerStdTerm struct { + lxc.StdConsole + pipes *execdriver.Pipes +} + +func (d *dockerStdTerm) Attach(cmd *exec.Cmd) error { + return d.AttachPipes(cmd, d.pipes) +} + +func (d *dockerStdTerm) SetMaster(master *os.File) { + // do nothing +} + +type dockerTtyTerm struct { + lxc.TtyConsole + pipes *execdriver.Pipes +} + +func (t *dockerTtyTerm) Attach(cmd *exec.Cmd) error { + return t.AttachPipes(cmd, t.pipes) +} + +func (t *dockerTtyTerm) SetMaster(master *os.File) { + t.MasterPty = master +} diff --git a/pkg/libcontainer/nsinit/state.go b/pkg/libcontainer/nsinit/state.go index 1f0fedd110..2dbaaa5977 100644 --- a/pkg/libcontainer/nsinit/state.go +++ b/pkg/libcontainer/nsinit/state.go @@ -4,6 +4,7 @@ import ( "fmt" "io/ioutil" "os" + "path/filepath" ) type StateWriter interface { @@ -12,13 +13,14 @@ type StateWriter interface { } type DefaultStateWriter struct { + Root string } // writePidFile writes the namespaced processes pid to .nspid in the rootfs for the container -func (*DefaultStateWriter) WritePid(pid int) error { - return ioutil.WriteFile(".nspid", []byte(fmt.Sprint(pid)), 0655) +func (d *DefaultStateWriter) WritePid(pid int) error { + return ioutil.WriteFile(filepath.Join(d.Root, ".nspid"), []byte(fmt.Sprint(pid)), 0655) } -func (*DefaultStateWriter) DeletePid() error { - return os.Remove(".nspid") +func (d *DefaultStateWriter) DeletePid() error { + return os.Remove(filepath.Join(d.Root, ".nspid")) } diff --git a/pkg/libcontainer/nsinit/term.go b/pkg/libcontainer/nsinit/term.go index 649246891e..58dccab2b8 100644 --- a/pkg/libcontainer/nsinit/term.go +++ b/pkg/libcontainer/nsinit/term.go @@ -11,6 +11,7 @@ type Terminal interface { io.Closer SetMaster(*os.File) Attach(*exec.Cmd) error + Resize(h, w int) error } func NewTerminal(stdin io.Reader, stdout, stderr io.Writer, tty bool) Terminal { @@ -35,6 +36,10 @@ type TtyTerminal struct { state *term.State } +func (t *TtyTerminal) Resize(h, w int) error { + return term.SetWinsize(t.master.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)}) +} + func (t *TtyTerminal) SetMaster(master *os.File) { t.master = master } @@ -83,6 +88,10 @@ func (s *StdTerminal) Close() error { return nil } +func (s *StdTerminal) Resize(h, w int) error { + return nil +} + func (s *StdTerminal) Attach(command *exec.Cmd) error { inPipe, err := command.StdinPipe() if err != nil { From 172260a49be6c3516edc6869d58957e844f9c69b Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Sat, 22 Feb 2014 01:28:59 -0800 Subject: [PATCH 47/81] Fix tty copy for driver Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- execdriver/namespaces/driver.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/execdriver/namespaces/driver.go b/execdriver/namespaces/driver.go index a3f095f464..efbb09d131 100644 --- a/execdriver/namespaces/driver.go +++ b/execdriver/namespaces/driver.go @@ -8,6 +8,7 @@ import ( "github.com/dotcloud/docker/execdriver/lxc" "github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/libcontainer/nsinit" + "io" "io/ioutil" "os" "os/exec" @@ -203,7 +204,11 @@ type dockerTtyTerm struct { } func (t *dockerTtyTerm) Attach(cmd *exec.Cmd) error { - return t.AttachPipes(cmd, t.pipes) + go io.Copy(t.pipes.Stdout, t.MasterPty) + if t.pipes.Stdin != nil { + go io.Copy(t.MasterPty, t.pipes.Stdin) + } + return nil } func (t *dockerTtyTerm) SetMaster(master *os.File) { From a08e78a78c4b548919515fa7910db56990ef44dc Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Mon, 24 Feb 2014 10:46:20 -0800 Subject: [PATCH 48/81] Look for cpu subsystem instead of memory Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/cgroups/cgroups.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/cgroups/cgroups.go b/pkg/cgroups/cgroups.go index 96002f0af9..e260d67661 100644 --- a/pkg/cgroups/cgroups.go +++ b/pkg/cgroups/cgroups.go @@ -132,7 +132,7 @@ func (c *Cgroup) Apply(pid int) error { // http://www.freedesktop.org/wiki/Software/systemd/PaxControlGroups/ // // we can pick any subsystem to find the root - cgroupRoot, err := FindCgroupMountpoint("memory") + cgroupRoot, err := FindCgroupMountpoint("cpu") if err != nil { return err } From 8f20058307cb30c9697b8dbee1c7bf2b1e0ba766 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Mon, 24 Feb 2014 12:21:13 -0800 Subject: [PATCH 49/81] Compile nsinit into docker for use with dockerinit Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- docker/docker.go | 2 +- execdriver/driver.go | 2 ++ execdriver/namespaces/driver.go | 33 ++++++++++++++++++++++++++++----- sysinit/sysinit.go | 4 ++++ 4 files changed, 35 insertions(+), 6 deletions(-) diff --git a/docker/docker.go b/docker/docker.go index 9e93b71767..6c6ad66abf 100644 --- a/docker/docker.go +++ b/docker/docker.go @@ -17,7 +17,7 @@ import ( ) func main() { - if selfPath := utils.SelfPath(); selfPath == "/sbin/init" || selfPath == "/.dockerinit" { + if selfPath := utils.SelfPath(); selfPath == "/sbin/init" || strings.Contains(selfPath, "/.dockerinit") { // Running in init mode sysinit.SysInit() return diff --git a/execdriver/driver.go b/execdriver/driver.go index a6d865caf3..8b5dd5ccc7 100644 --- a/execdriver/driver.go +++ b/execdriver/driver.go @@ -51,6 +51,8 @@ type InitArgs struct { Args []string Mtu int Driver string + Console string + Pipe int } // Driver specific information based on diff --git a/execdriver/namespaces/driver.go b/execdriver/namespaces/driver.go index efbb09d131..5657b8cc46 100644 --- a/execdriver/namespaces/driver.go +++ b/execdriver/namespaces/driver.go @@ -28,6 +28,28 @@ var ( func init() { execdriver.RegisterInitFunc(DriverName, func(args *execdriver.InitArgs) error { + var container *libcontainer.Container + f, err := os.Open("container.json") + if err != nil { + return err + } + if err := json.NewDecoder(f).Decode(&container); err != nil { + f.Close() + return err + } + f.Close() + + cwd, err := os.Getwd() + if err != nil { + return err + } + syncPipe, err := nsinit.NewSyncPipeFromFd(0, uintptr(args.Pipe)) + if err != nil { + return err + } + if err := nsinit.Init(container, cwd, args.Console, syncPipe, args.Args); err != nil { + return err + } return nil }) } @@ -115,14 +137,15 @@ type dockerCommandFactory struct { func (d *dockerCommandFactory) Create(container *libcontainer.Container, console, logFile string, syncFd uintptr, args []string) *exec.Cmd { c := d.c - aname, _ := exec.LookPath("nsinit") - c.Path = aname + // we need to join the rootfs because nsinit will setup the rootfs and chroot + initPath := filepath.Join(c.Rootfs, c.InitPath) + + c.Path = initPath c.Args = append([]string{ - aname, + initPath, + "-driver", DriverName, "-console", console, "-pipe", fmt.Sprint(syncFd), - "-log", logFile, - "init", }, args...) c.SysProcAttr = &syscall.SysProcAttr{ Cloneflags: uintptr(nsinit.GetNamespaceFlags(container.Namespaces)), diff --git a/sysinit/sysinit.go b/sysinit/sysinit.go index b02cf027aa..e10c0baad0 100644 --- a/sysinit/sysinit.go +++ b/sysinit/sysinit.go @@ -53,6 +53,8 @@ func SysInit() { privileged = flag.Bool("privileged", false, "privileged mode") mtu = flag.Int("mtu", 1500, "interface mtu") driver = flag.String("driver", "", "exec driver") + pipe = flag.Int("pipe", 0, "sync pipe fd") + console = flag.String("console", "", "console (pty slave) path") ) flag.Parse() @@ -79,6 +81,8 @@ func SysInit() { Args: flag.Args(), Mtu: *mtu, Driver: *driver, + Console: *console, + Pipe: *pipe, } if err := executeProgram(args); err != nil { From 01f9815b55742654b2f35d13c3aba6a9e48634c7 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Mon, 24 Feb 2014 13:40:17 -0800 Subject: [PATCH 50/81] Fix tests with dockerinit lookup path Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- docker/docker.go | 2 +- execdriver/namespaces/driver.go | 3 ++- integration/runtime_test.go | 2 +- pkg/libcontainer/nsinit/nsinit/main.go | 4 +++- sysinit/sysinit.go | 24 +++++++++++++++++++++++- 5 files changed, 30 insertions(+), 5 deletions(-) diff --git a/docker/docker.go b/docker/docker.go index 6c6ad66abf..66c4dbf02a 100644 --- a/docker/docker.go +++ b/docker/docker.go @@ -17,7 +17,7 @@ import ( ) func main() { - if selfPath := utils.SelfPath(); selfPath == "/sbin/init" || strings.Contains(selfPath, "/.dockerinit") { + if selfPath := utils.SelfPath(); selfPath == "/sbin/init" || strings.Contains(selfPath, ".dockerinit") { // Running in init mode sysinit.SysInit() return diff --git a/execdriver/namespaces/driver.go b/execdriver/namespaces/driver.go index 5657b8cc46..9f153bd3eb 100644 --- a/execdriver/namespaces/driver.go +++ b/execdriver/namespaces/driver.go @@ -86,7 +86,7 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba return -1, err } args := append([]string{c.Entrypoint}, c.Arguments...) - return nsinit.Exec(container, factory, stateWriter, term, "/nsinit.log", args) + return nsinit.Exec(container, factory, stateWriter, term, "", args) } func (d *driver) Kill(p *execdriver.Command, sig int) error { @@ -146,6 +146,7 @@ func (d *dockerCommandFactory) Create(container *libcontainer.Container, "-driver", DriverName, "-console", console, "-pipe", fmt.Sprint(syncFd), + "-log", logFile, }, args...) c.SysProcAttr = &syscall.SysProcAttr{ Cloneflags: uintptr(nsinit.GetNamespaceFlags(container.Namespaces)), diff --git a/integration/runtime_test.go b/integration/runtime_test.go index ca2119ce1f..522e910562 100644 --- a/integration/runtime_test.go +++ b/integration/runtime_test.go @@ -85,7 +85,7 @@ func init() { os.Setenv("TEST", "1") // Hack to run sys init during unit testing - if selfPath := utils.SelfPath(); selfPath == "/sbin/init" || selfPath == "/.dockerinit" { + if selfPath := utils.SelfPath(); selfPath == "/sbin/init" || strings.Contains(selfPath, ".dockerinit") { sysinit.SysInit() return } diff --git a/pkg/libcontainer/nsinit/nsinit/main.go b/pkg/libcontainer/nsinit/nsinit/main.go index c299412c7b..786c9c1ea6 100644 --- a/pkg/libcontainer/nsinit/nsinit/main.go +++ b/pkg/libcontainer/nsinit/nsinit/main.go @@ -24,7 +24,7 @@ var ( ErrWrongArguments = errors.New("Wrong argument count") ) -func init() { +func registerFlags() { flag.StringVar(&console, "console", "", "console (pty slave) path") flag.StringVar(&logFile, "log", "none", "log options (none, stderr, or a file path)") flag.IntVar(&pipeFd, "pipe", 0, "sync pipe fd") @@ -33,6 +33,8 @@ func init() { } func main() { + registerFlags() + if flag.NArg() < 1 { log.Fatal(ErrWrongArguments) } diff --git a/sysinit/sysinit.go b/sysinit/sysinit.go index e10c0baad0..6d604fcccc 100644 --- a/sysinit/sysinit.go +++ b/sysinit/sysinit.go @@ -7,6 +7,7 @@ import ( "github.com/dotcloud/docker/execdriver" _ "github.com/dotcloud/docker/execdriver/chroot" _ "github.com/dotcloud/docker/execdriver/lxc" + "io" "io/ioutil" "log" "os" @@ -55,9 +56,14 @@ func SysInit() { driver = flag.String("driver", "", "exec driver") pipe = flag.Int("pipe", 0, "sync pipe fd") console = flag.String("console", "", "console (pty slave) path") + logFile = flag.String("log", "", "log file path") ) flag.Parse() + if err := setupLogging(*logFile); err != nil { + log.Fatalf("setup logging %s", err) + } + // Get env var env []string content, err := ioutil.ReadFile("/.dockerenv") @@ -67,7 +73,6 @@ func SysInit() { if err := json.Unmarshal(content, &env); err != nil { log.Fatalf("Unable to unmarshal environment variables: %v", err) } - // Propagate the plugin-specific container env variable env = append(env, "container="+os.Getenv("container")) @@ -89,3 +94,20 @@ func SysInit() { log.Fatal(err) } } + +func setupLogging(logFile string) (err error) { + var writer io.Writer + switch logFile { + case "stderr": + writer = os.Stderr + case "none", "": + writer = ioutil.Discard + default: + writer, err = os.OpenFile(logFile, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0755) + if err != nil { + return err + } + } + log.SetOutput(writer) + return nil +} From 1c79b747bb10a389249aba90ad217ca0128afb74 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Mon, 24 Feb 2014 13:52:56 -0800 Subject: [PATCH 51/81] Honor user passed on container in nsinit Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/nsinit/init.go | 34 ++++++++++++++++++++++++--------- pkg/system/calls_linux.go | 8 ++++++++ 2 files changed, 33 insertions(+), 9 deletions(-) diff --git a/pkg/libcontainer/nsinit/init.go b/pkg/libcontainer/nsinit/init.go index cdedc14769..23303cd704 100644 --- a/pkg/libcontainer/nsinit/init.go +++ b/pkg/libcontainer/nsinit/init.go @@ -8,6 +8,7 @@ import ( "github.com/dotcloud/docker/pkg/libcontainer/capabilities" "github.com/dotcloud/docker/pkg/libcontainer/network" "github.com/dotcloud/docker/pkg/system" + "github.com/dotcloud/docker/pkg/user" "log" "os" "os/exec" @@ -110,15 +111,30 @@ func resolveRootfs(uncleanRootfs string) (string, error) { } func setupUser(container *libcontainer.Container) error { - // TODO: honor user passed on container - if err := system.Setgroups(nil); err != nil { - return err - } - if err := system.Setresgid(0, 0, 0); err != nil { - return err - } - if err := system.Setresuid(0, 0, 0); err != nil { - return err + if container.User != "" { + uid, gid, suppGids, err := user.GetUserGroupSupplementary(container.User, syscall.Getuid(), syscall.Getgid()) + if err != nil { + return err + } + if err := system.Setgroups(suppGids); err != nil { + return err + } + if err := system.Setgid(gid); err != nil { + return err + } + if err := system.Setuid(uid); err != nil { + return err + } + } else { + if err := system.Setgroups(nil); err != nil { + return err + } + if err := system.Setresgid(0, 0, 0); err != nil { + return err + } + if err := system.Setresuid(0, 0, 0); err != nil { + return err + } } return nil } diff --git a/pkg/system/calls_linux.go b/pkg/system/calls_linux.go index 42afa349c2..0bf42e3c71 100644 --- a/pkg/system/calls_linux.go +++ b/pkg/system/calls_linux.go @@ -71,6 +71,14 @@ func Setresuid(ruid, euid, suid int) error { return syscall.Setresuid(ruid, euid, suid) } +func Setgid(gid int) error { + return syscall.Setgid(gid) +} + +func Setuid(uid int) error { + return syscall.Setuid(uid) +} + func Sethostname(name string) error { return syscall.Sethostname([]byte(name)) } From cfd188e9251f5047e4fd677fe8f2921ae28b8bcc Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Mon, 24 Feb 2014 14:11:09 -0800 Subject: [PATCH 52/81] Add info for driver Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- execdriver/namespaces/driver.go | 25 ++++++++++++++++++++++--- integration/container_test.go | 2 +- runtime.go | 2 +- 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/execdriver/namespaces/driver.go b/execdriver/namespaces/driver.go index 9f153bd3eb..7dbea4c5c8 100644 --- a/execdriver/namespaces/driver.go +++ b/execdriver/namespaces/driver.go @@ -55,10 +55,26 @@ func init() { } type driver struct { + root string } -func NewDriver() (*driver, error) { - return &driver{}, nil +type info struct { + ID string + driver *driver +} + +func (i *info) IsRunning() bool { + p := filepath.Join(i.driver.root, "containers", i.ID, "rootfs", ".nspid") + if _, err := os.Stat(p); err == nil { + return true + } + return false +} + +func NewDriver(root string) (*driver, error) { + return &driver{ + root: root, + }, nil } func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (int, error) { @@ -98,7 +114,10 @@ func (d *driver) Restore(c *execdriver.Command) error { } func (d *driver) Info(id string) execdriver.Info { - return nil + return &info{ + ID: id, + driver: d, + } } func (d *driver) Name() string { diff --git a/integration/container_test.go b/integration/container_test.go index b961e1d147..ea0283260a 100644 --- a/integration/container_test.go +++ b/integration/container_test.go @@ -1044,7 +1044,7 @@ func TestEnv(t *testing.T) { goodEnv := []string{ "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", "HOME=/", - "container=lxc", + "container=docker", "HOSTNAME=" + utils.TruncateID(container.ID), "FALSE=true", "TRUE=false", diff --git a/runtime.go b/runtime.go index 9f16d6213b..739cc7ee56 100644 --- a/runtime.go +++ b/runtime.go @@ -704,7 +704,7 @@ func NewRuntimeFromDirectory(config *DaemonConfig, eng *engine.Engine) (*Runtime sysInfo := sysinfo.New(false) - ed, err := namespaces.NewDriver() + ed, err := namespaces.NewDriver(config.Root) if err != nil { return nil, err } From a6e5e1851197ccc262c61190d5904dd410293466 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Mon, 24 Feb 2014 14:35:27 -0800 Subject: [PATCH 53/81] Fix kill signals and rootfs path for pid Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- execdriver/namespaces/driver.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/execdriver/namespaces/driver.go b/execdriver/namespaces/driver.go index 7dbea4c5c8..6cb2e5b522 100644 --- a/execdriver/namespaces/driver.go +++ b/execdriver/namespaces/driver.go @@ -64,7 +64,7 @@ type info struct { } func (i *info) IsRunning() bool { - p := filepath.Join(i.driver.root, "containers", i.ID, "rootfs", ".nspid") + p := filepath.Join(i.driver.root, "containers", i.ID, "root", ".nspid") if _, err := os.Stat(p); err == nil { return true } @@ -106,7 +106,7 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba } func (d *driver) Kill(p *execdriver.Command, sig int) error { - return p.Process.Kill() + return syscall.Kill(p.Process.Pid, syscall.Signal(sig)) } func (d *driver) Restore(c *execdriver.Command) error { From a76407ac61dd57429a1350f840c323f45a97b27f Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Mon, 24 Feb 2014 15:47:23 -0800 Subject: [PATCH 54/81] Cgroups allow devices for privileged containers Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- execdriver/namespaces/driver.go | 38 ++++++++++++++++++++++++++++++++- integration/container_test.go | 4 ++-- pkg/libcontainer/nsinit/init.go | 9 +------- 3 files changed, 40 insertions(+), 11 deletions(-) diff --git a/execdriver/namespaces/driver.go b/execdriver/namespaces/driver.go index 6cb2e5b522..cd5bb9f35e 100644 --- a/execdriver/namespaces/driver.go +++ b/execdriver/namespaces/driver.go @@ -6,6 +6,7 @@ import ( "fmt" "github.com/dotcloud/docker/execdriver" "github.com/dotcloud/docker/execdriver/lxc" + "github.com/dotcloud/docker/pkg/cgroups" "github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/libcontainer/nsinit" "io" @@ -13,6 +14,7 @@ import ( "os" "os/exec" "path/filepath" + "strconv" "strings" "syscall" ) @@ -125,7 +127,40 @@ func (d *driver) Name() string { } func (d *driver) GetPidsForContainer(id string) ([]int, error) { - return nil, ErrNotSupported + pids := []int{} + + subsystem := "cpu" + cgroupRoot, err := cgroups.FindCgroupMountpoint(subsystem) + if err != nil { + return pids, err + } + cgroupRoot = filepath.Dir(cgroupRoot) + + cgroupDir, err := cgroups.GetThisCgroupDir(subsystem) + if err != nil { + return pids, err + } + + filename := filepath.Join(cgroupRoot, cgroupDir, id, "tasks") + if _, err := os.Stat(filename); os.IsNotExist(err) { + filename = filepath.Join(cgroupRoot, cgroupDir, "docker", id, "tasks") + } + + output, err := ioutil.ReadFile(filename) + if err != nil { + return pids, err + } + for _, p := range strings.Split(string(output), "\n") { + if len(p) == 0 { + continue + } + pid, err := strconv.Atoi(p) + if err != nil { + return pids, fmt.Errorf("Invalid pid '%s': %s", p, err) + } + pids = append(pids, pid) + } + return pids, nil } func writeContainerFile(container *libcontainer.Container, rootfs string) error { @@ -219,6 +254,7 @@ func createContainer(c *execdriver.Command) *libcontainer.Container { } if c.Privileged { container.Capabilities = nil + container.Cgroups.DeviceAccess = true } if c.Resources != nil { container.Cgroups.CpuShares = c.Resources.CpuShares diff --git a/integration/container_test.go b/integration/container_test.go index ea0283260a..8e4314f456 100644 --- a/integration/container_test.go +++ b/integration/container_test.go @@ -1581,8 +1581,8 @@ func TestPrivilegedCanMknod(t *testing.T) { eng := NewTestEngine(t) runtime := mkRuntimeFromEngine(eng, t) defer runtime.Nuke() - if output, _ := runContainer(eng, runtime, []string{"-privileged", "_", "sh", "-c", "mknod /tmp/sda b 8 0 && echo ok"}, t); output != "ok\n" { - t.Fatal("Could not mknod into privileged container") + if output, err := runContainer(eng, runtime, []string{"-privileged", "_", "sh", "-c", "mknod /tmp/sda b 8 0 && echo ok"}, t); output != "ok\n" { + t.Fatalf("Could not mknod into privileged container %s %v", output, err) } } diff --git a/pkg/libcontainer/nsinit/init.go b/pkg/libcontainer/nsinit/init.go index 23303cd704..d6d7dc3347 100644 --- a/pkg/libcontainer/nsinit/init.go +++ b/pkg/libcontainer/nsinit/init.go @@ -9,7 +9,6 @@ import ( "github.com/dotcloud/docker/pkg/libcontainer/network" "github.com/dotcloud/docker/pkg/system" "github.com/dotcloud/docker/pkg/user" - "log" "os" "os/exec" "path/filepath" @@ -23,7 +22,6 @@ func Init(container *libcontainer.Container, uncleanRootfs, console string, sync if err != nil { return err } - log.Printf("initializing namespace at %s", rootfs) // We always read this as it is a way to sync with the parent as well context, err := syncPipe.ReadFromParent() @@ -32,10 +30,8 @@ func Init(container *libcontainer.Container, uncleanRootfs, console string, sync return err } syncPipe.Close() - log.Printf("received context from parent %v", context) if console != "" { - log.Printf("setting up console for %s", console) // close pipes so that we can replace it with the pty closeStdPipes() slave, err := openTerminal(console, syscall.O_RDWR) @@ -66,11 +62,9 @@ func Init(container *libcontainer.Container, uncleanRootfs, console string, sync if err := system.Sethostname(container.Hostname); err != nil { return fmt.Errorf("sethostname %s", err) } - log.Printf("dropping capabilities") if err := capabilities.DropCapabilities(container); err != nil { return fmt.Errorf("drop capabilities %s", err) } - log.Printf("setting user in namespace") if err := setupUser(container); err != nil { return fmt.Errorf("setup user %s", err) } @@ -87,7 +81,6 @@ func execArgs(args []string, env []string) error { if err != nil { return err } - log.Printf("execing %s goodbye", name) if err := system.Exec(name, args[0:], env); err != nil { return fmt.Errorf("exec %s", err) } @@ -111,7 +104,7 @@ func resolveRootfs(uncleanRootfs string) (string, error) { } func setupUser(container *libcontainer.Container) error { - if container.User != "" { + if container.User != "" && container.User != "root" { uid, gid, suppGids, err := user.GetUserGroupSupplementary(container.User, syscall.Getuid(), syscall.Getgid()) if err != nil { return err From 9bf6cb2692a9b14cac559b4b9c9403e7f6f13ecd Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Mon, 24 Feb 2014 16:26:06 -0800 Subject: [PATCH 55/81] Fix get pids for nsinit Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- execdriver/namespaces/default_template.go | 2 +- execdriver/namespaces/driver.go | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/execdriver/namespaces/default_template.go b/execdriver/namespaces/default_template.go index 79b6ac1c11..f84544adc9 100644 --- a/execdriver/namespaces/default_template.go +++ b/execdriver/namespaces/default_template.go @@ -34,7 +34,7 @@ func getDefaultTemplate() *libcontainer.Container { libcontainer.CLONE_NEWUTS, }, Cgroups: &cgroups.Cgroup{ - Name: "docker", + Parent: "docker", DeviceAccess: false, }, } diff --git a/execdriver/namespaces/driver.go b/execdriver/namespaces/driver.go index cd5bb9f35e..8143b39d90 100644 --- a/execdriver/namespaces/driver.go +++ b/execdriver/namespaces/driver.go @@ -129,13 +129,11 @@ func (d *driver) Name() string { func (d *driver) GetPidsForContainer(id string) ([]int, error) { pids := []int{} - subsystem := "cpu" + subsystem := "devices" cgroupRoot, err := cgroups.FindCgroupMountpoint(subsystem) if err != nil { return pids, err } - cgroupRoot = filepath.Dir(cgroupRoot) - cgroupDir, err := cgroups.GetThisCgroupDir(subsystem) if err != nil { return pids, err @@ -252,6 +250,7 @@ func createContainer(c *execdriver.Command) *libcontainer.Container { }, } } + container.Cgroups.Name = c.ID if c.Privileged { container.Capabilities = nil container.Cgroups.DeviceAccess = true From 9f03fd76b578f2d9d00b0a1bd76b776e20a7d681 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Mon, 24 Feb 2014 16:35:11 -0800 Subject: [PATCH 56/81] Fix restore container by nspid Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- execdriver/namespaces/driver.go | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/execdriver/namespaces/driver.go b/execdriver/namespaces/driver.go index 8143b39d90..d948594059 100644 --- a/execdriver/namespaces/driver.go +++ b/execdriver/namespaces/driver.go @@ -112,7 +112,24 @@ func (d *driver) Kill(p *execdriver.Command, sig int) error { } func (d *driver) Restore(c *execdriver.Command) error { - return ErrNotSupported + var ( + nspid int + p = filepath.Join(d.root, "containers", c.ID, "root", ".nspid") + ) + f, err := os.Open(p) + if err != nil { + return err + } + defer f.Close() + if _, err := fmt.Fscanf(f, "%d", &nspid); err != nil { + return err + } + proc, err := os.FindProcess(nspid) + if err != nil { + return err + } + _, err = proc.Wait() + return err } func (d *driver) Info(id string) execdriver.Info { From 77f68f74c7e6fc977fb2681c7ee60ac652be983e Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Mon, 24 Feb 2014 16:42:30 -0800 Subject: [PATCH 57/81] Rename namespace driver to docker Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- execdriver/{namespaces => docker}/default_template.go | 2 +- execdriver/{namespaces => docker}/driver.go | 2 +- execdriver/{namespaces => docker}/term.go | 2 +- runtime.go | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) rename execdriver/{namespaces => docker}/default_template.go (98%) rename execdriver/{namespaces => docker}/driver.go (99%) rename execdriver/{namespaces => docker}/term.go (96%) diff --git a/execdriver/namespaces/default_template.go b/execdriver/docker/default_template.go similarity index 98% rename from execdriver/namespaces/default_template.go rename to execdriver/docker/default_template.go index f84544adc9..4d6dd954c0 100644 --- a/execdriver/namespaces/default_template.go +++ b/execdriver/docker/default_template.go @@ -1,4 +1,4 @@ -package namespaces +package docker import ( "github.com/dotcloud/docker/pkg/cgroups" diff --git a/execdriver/namespaces/driver.go b/execdriver/docker/driver.go similarity index 99% rename from execdriver/namespaces/driver.go rename to execdriver/docker/driver.go index d948594059..2cd03dd16c 100644 --- a/execdriver/namespaces/driver.go +++ b/execdriver/docker/driver.go @@ -1,4 +1,4 @@ -package namespaces +package docker import ( "encoding/json" diff --git a/execdriver/namespaces/term.go b/execdriver/docker/term.go similarity index 96% rename from execdriver/namespaces/term.go rename to execdriver/docker/term.go index 682c6a27b1..6705fc49e4 100644 --- a/execdriver/namespaces/term.go +++ b/execdriver/docker/term.go @@ -1,4 +1,4 @@ -package namespaces +package docker import ( "github.com/dotcloud/docker/execdriver" diff --git a/runtime.go b/runtime.go index 739cc7ee56..1924687451 100644 --- a/runtime.go +++ b/runtime.go @@ -7,8 +7,8 @@ import ( "github.com/dotcloud/docker/dockerversion" "github.com/dotcloud/docker/engine" "github.com/dotcloud/docker/execdriver" + "github.com/dotcloud/docker/execdriver/docker" _ "github.com/dotcloud/docker/execdriver/lxc" - "github.com/dotcloud/docker/execdriver/namespaces" "github.com/dotcloud/docker/graphdriver" "github.com/dotcloud/docker/graphdriver/aufs" _ "github.com/dotcloud/docker/graphdriver/btrfs" @@ -704,7 +704,7 @@ func NewRuntimeFromDirectory(config *DaemonConfig, eng *engine.Engine) (*Runtime sysInfo := sysinfo.New(false) - ed, err := namespaces.NewDriver(config.Root) + ed, err := docker.NewDriver(config.Root) if err != nil { return nil, err } From e84e344b1f887f06852953bc75912fc24a64d08a Mon Sep 17 00:00:00 2001 From: "Guillaume J. Charmes" Date: Mon, 24 Feb 2014 17:02:46 -0800 Subject: [PATCH 58/81] Use the dockerenv file from the current dir instead of root Docker-DCO-1.1-Signed-off-by: Guillaume J. Charmes (github: creack) --- sysinit/sysinit.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sysinit/sysinit.go b/sysinit/sysinit.go index 6d604fcccc..67962e30c6 100644 --- a/sysinit/sysinit.go +++ b/sysinit/sysinit.go @@ -66,7 +66,7 @@ func SysInit() { // Get env var env []string - content, err := ioutil.ReadFile("/.dockerenv") + content, err := ioutil.ReadFile(".dockerenv") if err != nil { log.Fatalf("Unable to load environment variables: %v", err) } From 9cb4573d33607bc32e7db19981b3e9d5eaf449a0 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Mon, 24 Feb 2014 18:38:24 -0800 Subject: [PATCH 59/81] Improve logging for nsinit Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- execdriver/docker/driver.go | 8 +++-- pkg/libcontainer/network/veth.go | 4 --- pkg/libcontainer/nsinit/exec.go | 43 +++++++++++++------------- pkg/libcontainer/nsinit/execin.go | 2 +- pkg/libcontainer/nsinit/init.go | 2 +- pkg/libcontainer/nsinit/nsinit.go | 29 +++++++++++++++++ pkg/libcontainer/nsinit/nsinit/main.go | 36 +++++++++++---------- 7 files changed, 78 insertions(+), 46 deletions(-) create mode 100644 pkg/libcontainer/nsinit/nsinit.go diff --git a/execdriver/docker/driver.go b/execdriver/docker/driver.go index 2cd03dd16c..7b6d3474af 100644 --- a/execdriver/docker/driver.go +++ b/execdriver/docker/driver.go @@ -11,6 +11,7 @@ import ( "github.com/dotcloud/docker/pkg/libcontainer/nsinit" "io" "io/ioutil" + "log" "os" "os/exec" "path/filepath" @@ -26,6 +27,7 @@ const ( var ( ErrNotSupported = errors.New("not supported") + noOpLog = log.New(ioutil.Discard, "[nsinit] ", log.LstdFlags) ) func init() { @@ -49,7 +51,8 @@ func init() { if err != nil { return err } - if err := nsinit.Init(container, cwd, args.Console, syncPipe, args.Args); err != nil { + ns := nsinit.NewNsInit(noOpLog, "", &nsinit.DefaultCommandFactory{}, &nsinit.DefaultStateWriter{}) + if err := ns.Init(container, cwd, args.Console, syncPipe, args.Args); err != nil { return err } return nil @@ -90,6 +93,7 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba dsw: &nsinit.DefaultStateWriter{c.Rootfs}, } ) + ns := nsinit.NewNsInit(noOpLog, "", factory, stateWriter) if c.Tty { term = &dockerTtyTerm{ pipes: pipes, @@ -104,7 +108,7 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba return -1, err } args := append([]string{c.Entrypoint}, c.Arguments...) - return nsinit.Exec(container, factory, stateWriter, term, "", args) + return ns.Exec(container, term, args) } func (d *driver) Kill(p *execdriver.Command, sig int) error { diff --git a/pkg/libcontainer/network/veth.go b/pkg/libcontainer/network/veth.go index 61fec5500c..321c68eca4 100644 --- a/pkg/libcontainer/network/veth.go +++ b/pkg/libcontainer/network/veth.go @@ -4,14 +4,12 @@ import ( "fmt" "github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/libcontainer/utils" - "log" ) type Veth struct { } func (v *Veth) Create(n *libcontainer.Network, nspid int) (libcontainer.Context, error) { - log.Printf("creating veth network") var ( bridge string prefix string @@ -31,7 +29,6 @@ func (v *Veth) Create(n *libcontainer.Network, nspid int) (libcontainer.Context, "vethHost": name1, "vethChild": name2, } - log.Printf("veth pair created %s <> %s", name1, name2) if err := SetInterfaceMaster(name1, bridge); err != nil { return context, err } @@ -41,7 +38,6 @@ func (v *Veth) Create(n *libcontainer.Network, nspid int) (libcontainer.Context, if err := InterfaceUp(name1); err != nil { return context, err } - log.Printf("setting %s inside %d namespace", name2, nspid) if err := SetInterfaceInNamespacePid(name2, nspid); err != nil { return context, err } diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index ee83f4f107..c4073235ef 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -6,7 +6,6 @@ import ( "github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/libcontainer/network" "github.com/dotcloud/docker/pkg/system" - "log" "os" "os/exec" "syscall" @@ -14,9 +13,7 @@ import ( // Exec performes setup outside of a namespace so that a container can be // executed. Exec is a high level function for working with container namespaces. -func Exec(container *libcontainer.Container, - factory CommandFactory, state StateWriter, term Terminal, - logFile string, args []string) (int, error) { +func (ns *linuxNs) Exec(container *libcontainer.Container, term Terminal, args []string) (int, error) { var ( master *os.File console string @@ -31,7 +28,7 @@ func Exec(container *libcontainer.Container, } if container.Tty { - log.Printf("setting up master and console") + ns.logger.Printf("setting up master and console") master, console, err = CreateMasterAndConsole() if err != nil { return -1, err @@ -39,54 +36,56 @@ func Exec(container *libcontainer.Container, term.SetMaster(master) } - command := factory.Create(container, console, logFile, syncPipe.child.Fd(), args) + command := ns.commandFactory.Create(container, console, ns.logFile, syncPipe.child.Fd(), args) if err := term.Attach(command); err != nil { return -1, err } defer term.Close() - log.Printf("staring init") + ns.logger.Printf("staring init") if err := command.Start(); err != nil { return -1, err } - log.Printf("writing state file") - if err := state.WritePid(command.Process.Pid); err != nil { + ns.logger.Printf("writing state file") + if err := ns.stateWriter.WritePid(command.Process.Pid); err != nil { command.Process.Kill() return -1, err } defer func() { - log.Printf("removing state file") - state.DeletePid() + ns.logger.Printf("removing state file") + ns.stateWriter.DeletePid() }() // Do this before syncing with child so that no children // can escape the cgroup - if err := SetupCgroups(container, command.Process.Pid); err != nil { + if err := ns.SetupCgroups(container, command.Process.Pid); err != nil { command.Process.Kill() return -1, err } - if err := InitializeNetworking(container, command.Process.Pid, syncPipe); err != nil { + if err := ns.InitializeNetworking(container, command.Process.Pid, syncPipe); err != nil { command.Process.Kill() return -1, err } // Sync with child - log.Printf("closing sync pipes") + ns.logger.Printf("closing sync pipes") syncPipe.Close() - log.Printf("waiting on process") + ns.logger.Printf("waiting on process") if err := command.Wait(); err != nil { if _, ok := err.(*exec.ExitError); !ok { return -1, err } } - log.Printf("process ended") - return command.ProcessState.Sys().(syscall.WaitStatus).ExitStatus(), nil + + exitCode := command.ProcessState.Sys().(syscall.WaitStatus).ExitStatus() + ns.logger.Printf("process ended with exit code %d", exitCode) + return exitCode, nil } -func SetupCgroups(container *libcontainer.Container, nspid int) error { +func (ns *linuxNs) SetupCgroups(container *libcontainer.Container, nspid int) error { if container.Cgroups != nil { - log.Printf("setting up cgroups") + ns.logger.Printf("setting up cgroups") if err := container.Cgroups.Apply(nspid); err != nil { return err } @@ -94,9 +93,9 @@ func SetupCgroups(container *libcontainer.Container, nspid int) error { return nil } -func InitializeNetworking(container *libcontainer.Container, nspid int, pipe *SyncPipe) error { +func (ns *linuxNs) InitializeNetworking(container *libcontainer.Container, nspid int, pipe *SyncPipe) error { if container.Network != nil { - log.Printf("creating host network configuration type %s", container.Network.Type) + ns.logger.Printf("creating host network configuration type %s", container.Network.Type) strategy, err := network.GetStrategy(container.Network.Type) if err != nil { return err @@ -105,7 +104,7 @@ func InitializeNetworking(container *libcontainer.Container, nspid int, pipe *Sy if err != nil { return err } - log.Printf("sending %v as network context", networkContext) + ns.logger.Printf("sending %v as network context", networkContext) if err := pipe.SendToChild(networkContext); err != nil { return err } diff --git a/pkg/libcontainer/nsinit/execin.go b/pkg/libcontainer/nsinit/execin.go index 85a89905c1..9c33f69b4d 100644 --- a/pkg/libcontainer/nsinit/execin.go +++ b/pkg/libcontainer/nsinit/execin.go @@ -12,7 +12,7 @@ import ( ) // ExecIn uses an existing pid and joins the pid's namespaces with the new command. -func ExecIn(container *libcontainer.Container, nspid int, args []string) (int, error) { +func (ns *linuxNs) ExecIn(container *libcontainer.Container, nspid int, args []string) (int, error) { for _, ns := range container.Namespaces { if err := system.Unshare(namespaceMap[ns]); err != nil { return -1, err diff --git a/pkg/libcontainer/nsinit/init.go b/pkg/libcontainer/nsinit/init.go index d6d7dc3347..5e33169b65 100644 --- a/pkg/libcontainer/nsinit/init.go +++ b/pkg/libcontainer/nsinit/init.go @@ -17,7 +17,7 @@ import ( // Init is the init process that first runs inside a new namespace to setup mounts, users, networking, // and other options required for the new container. -func Init(container *libcontainer.Container, uncleanRootfs, console string, syncPipe *SyncPipe, args []string) error { +func (ns *linuxNs) Init(container *libcontainer.Container, uncleanRootfs, console string, syncPipe *SyncPipe, args []string) error { rootfs, err := resolveRootfs(uncleanRootfs) if err != nil { return err diff --git a/pkg/libcontainer/nsinit/nsinit.go b/pkg/libcontainer/nsinit/nsinit.go new file mode 100644 index 0000000000..599461e434 --- /dev/null +++ b/pkg/libcontainer/nsinit/nsinit.go @@ -0,0 +1,29 @@ +package nsinit + +import ( + "github.com/dotcloud/docker/pkg/libcontainer" + "log" +) + +type NsInit interface { + Exec(container *libcontainer.Container, term Terminal, args []string) (int, error) + ExecIn(container *libcontainer.Container, nspid int, args []string) (int, error) + Init(container *libcontainer.Container, uncleanRootfs, console string, syncPipe *SyncPipe, args []string) error +} + +type linuxNs struct { + root string + logFile string + logger *log.Logger + commandFactory CommandFactory + stateWriter StateWriter +} + +func NewNsInit(logger *log.Logger, logFile string, command CommandFactory, state StateWriter) NsInit { + return &linuxNs{ + logger: logger, + commandFactory: command, + stateWriter: state, + logFile: logFile, + } +} diff --git a/pkg/libcontainer/nsinit/nsinit/main.go b/pkg/libcontainer/nsinit/nsinit/main.go index 786c9c1ea6..c25037fa8b 100644 --- a/pkg/libcontainer/nsinit/nsinit/main.go +++ b/pkg/libcontainer/nsinit/nsinit/main.go @@ -42,13 +42,13 @@ func main() { if err != nil { log.Fatal(err) } - if err := setupLogging(); err != nil { + ns, err := newNsInit() + if err != nil { log.Fatal(err) } + switch flag.Arg(0) { case "exec": // this is executed outside of the namespace in the cwd - log.SetPrefix("[nsinit exec] ") - var exitCode int nspid, err := readPid() if err != nil { @@ -57,20 +57,16 @@ func main() { } } if nspid > 0 { - exitCode, err = nsinit.ExecIn(container, nspid, flag.Args()[1:]) + exitCode, err = ns.ExecIn(container, nspid, flag.Args()[1:]) } else { term := nsinit.NewTerminal(os.Stdin, os.Stdout, os.Stderr, container.Tty) - exitCode, err = nsinit.Exec(container, - &nsinit.DefaultCommandFactory{}, &nsinit.DefaultStateWriter{}, - term, - logFile, flag.Args()[1:]) + exitCode, err = ns.Exec(container, term, flag.Args()[1:]) } if err != nil { log.Fatal(err) } os.Exit(exitCode) case "init": // this is executed inside of the namespace to setup the container - log.SetPrefix("[nsinit init] ") cwd, err := os.Getwd() if err != nil { log.Fatal(err) @@ -82,7 +78,7 @@ func main() { if err != nil { log.Fatal(err) } - if err := nsinit.Init(container, cwd, console, syncPipe, flag.Args()[1:]); err != nil { + if err := ns.Init(container, cwd, console, syncPipe, flag.Args()[1:]); err != nil { log.Fatal(err) } default: @@ -116,19 +112,27 @@ func readPid() (int, error) { return pid, nil } -func setupLogging() (err error) { +func newNsInit() (nsinit.NsInit, error) { + logger, err := setupLogging() + if err != nil { + return nil, err + } + return nsinit.NewNsInit(logger, logFile, &nsinit.DefaultCommandFactory{}, &nsinit.DefaultStateWriter{}), nil +} + +func setupLogging() (logger *log.Logger, err error) { var writer io.Writer + switch logFile { case "stderr": writer = os.Stderr case "none", "": writer = ioutil.Discard default: - writer, err = os.OpenFile(logFile, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0755) - if err != nil { - return err + if writer, err = os.OpenFile(logFile, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0755); err != nil { + return } } - log.SetOutput(writer) - return nil + logger = log.New(writer, "", log.LstdFlags) + return } From ca537a63a8a21eb4d5d31144c934ca18846bb856 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Mon, 24 Feb 2014 18:52:48 -0800 Subject: [PATCH 60/81] Remove chroot driver, it's not needed Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- execdriver/chroot/driver.go | 101 ------------------------------------ sysinit/sysinit.go | 1 - 2 files changed, 102 deletions(-) delete mode 100644 execdriver/chroot/driver.go diff --git a/execdriver/chroot/driver.go b/execdriver/chroot/driver.go deleted file mode 100644 index dfec680d84..0000000000 --- a/execdriver/chroot/driver.go +++ /dev/null @@ -1,101 +0,0 @@ -package chroot - -import ( - "fmt" - "github.com/dotcloud/docker/execdriver" - "github.com/dotcloud/docker/pkg/mount" - "os" - "os/exec" - "syscall" -) - -const ( - DriverName = "chroot" - Version = "0.1" -) - -func init() { - execdriver.RegisterInitFunc(DriverName, func(args *execdriver.InitArgs) error { - if err := mount.ForceMount("proc", "proc", "proc", ""); err != nil { - return err - } - defer mount.ForceUnmount("proc") - cmd := exec.Command(args.Args[0], args.Args[1:]...) - - cmd.Stderr = os.Stderr - cmd.Stdout = os.Stdout - cmd.Stdin = os.Stdin - - return cmd.Run() - }) -} - -type driver struct { -} - -func NewDriver() (*driver, error) { - return &driver{}, nil -} - -func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (int, error) { - params := []string{ - "chroot", - c.Rootfs, - "/.dockerinit", - "-driver", - DriverName, - } - params = append(params, c.Entrypoint) - params = append(params, c.Arguments...) - - var ( - name = params[0] - arg = params[1:] - ) - aname, err := exec.LookPath(name) - if err != nil { - aname = name - } - c.Path = aname - c.Args = append([]string{name}, arg...) - - if err := c.Start(); err != nil { - return -1, err - } - - if startCallback != nil { - startCallback(c) - } - - err = c.Wait() - return getExitCode(c), err -} - -/// Return the exit code of the process -// if the process has not exited -1 will be returned -func getExitCode(c *execdriver.Command) int { - if c.ProcessState == nil { - return -1 - } - return c.ProcessState.Sys().(syscall.WaitStatus).ExitStatus() -} - -func (d *driver) Kill(p *execdriver.Command, sig int) error { - return p.Process.Kill() -} - -func (d *driver) Restore(c *execdriver.Command) error { - panic("Not Implemented") -} - -func (d *driver) Info(id string) execdriver.Info { - panic("Not implemented") -} - -func (d *driver) Name() string { - return fmt.Sprintf("%s-%s", DriverName, Version) -} - -func (d *driver) GetPidsForContainer(id string) ([]int, error) { - return nil, fmt.Errorf("Not supported") -} diff --git a/sysinit/sysinit.go b/sysinit/sysinit.go index 67962e30c6..056f033295 100644 --- a/sysinit/sysinit.go +++ b/sysinit/sysinit.go @@ -5,7 +5,6 @@ import ( "flag" "fmt" "github.com/dotcloud/docker/execdriver" - _ "github.com/dotcloud/docker/execdriver/chroot" _ "github.com/dotcloud/docker/execdriver/lxc" "io" "io/ioutil" From d59c05a37c3e964cdeef818afc2e7c3a3e8d0cf1 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Mon, 24 Feb 2014 20:41:09 -0800 Subject: [PATCH 61/81] Fix exec driver flag, rename new driver to 'native' Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- docker/docker.go | 2 +- .../{docker => native}/default_template.go | 2 +- execdriver/{docker => native}/driver.go | 2 +- execdriver/{docker => native}/term.go | 2 +- runtime.go | 21 ++++++++++--------- sysinit/sysinit.go | 1 + 6 files changed, 16 insertions(+), 14 deletions(-) rename execdriver/{docker => native}/default_template.go (98%) rename execdriver/{docker => native}/driver.go (99%) rename execdriver/{docker => native}/term.go (97%) diff --git a/docker/docker.go b/docker/docker.go index 17449210c7..a2d83bfa86 100644 --- a/docker/docker.go +++ b/docker/docker.go @@ -39,7 +39,7 @@ func main() { flDefaultIp = flag.String([]string{"#ip", "-ip"}, "0.0.0.0", "Default IP address to use when binding container ports") flInterContainerComm = flag.Bool([]string{"#icc", "-icc"}, true, "Enable inter-container communication") flGraphDriver = flag.String([]string{"s", "-storage-driver"}, "", "Force the docker runtime to use a specific storage driver") - flExecDriver = flag.String([]string{"e", "-exec-driver"}, "", "Force the docker runtime to use a specific exec driver") + flExecDriver = flag.String([]string{"e", "-exec-driver"}, "native", "Force the docker runtime to use a specific exec driver") flHosts = opts.NewListOpts(api.ValidateHost) flMtu = flag.Int([]string{"#mtu", "-mtu"}, 0, "Set the containers network MTU; if no value is provided: default to the default route MTU or 1500 if no default route is available") ) diff --git a/execdriver/docker/default_template.go b/execdriver/native/default_template.go similarity index 98% rename from execdriver/docker/default_template.go rename to execdriver/native/default_template.go index 4d6dd954c0..f202fd2529 100644 --- a/execdriver/docker/default_template.go +++ b/execdriver/native/default_template.go @@ -1,4 +1,4 @@ -package docker +package native import ( "github.com/dotcloud/docker/pkg/cgroups" diff --git a/execdriver/docker/driver.go b/execdriver/native/driver.go similarity index 99% rename from execdriver/docker/driver.go rename to execdriver/native/driver.go index 7b6d3474af..59cbe67402 100644 --- a/execdriver/docker/driver.go +++ b/execdriver/native/driver.go @@ -1,4 +1,4 @@ -package docker +package native import ( "encoding/json" diff --git a/execdriver/docker/term.go b/execdriver/native/term.go similarity index 97% rename from execdriver/docker/term.go rename to execdriver/native/term.go index 6705fc49e4..b32a80e4df 100644 --- a/execdriver/docker/term.go +++ b/execdriver/native/term.go @@ -1,4 +1,4 @@ -package docker +package native import ( "github.com/dotcloud/docker/execdriver" diff --git a/runtime.go b/runtime.go index 6dc4476b87..1003c35d5d 100644 --- a/runtime.go +++ b/runtime.go @@ -7,8 +7,8 @@ import ( "github.com/dotcloud/docker/dockerversion" "github.com/dotcloud/docker/engine" "github.com/dotcloud/docker/execdriver" - "github.com/dotcloud/docker/execdriver/docker" "github.com/dotcloud/docker/execdriver/lxc" + "github.com/dotcloud/docker/execdriver/native" "github.com/dotcloud/docker/graphdriver" "github.com/dotcloud/docker/graphdriver/aufs" _ "github.com/dotcloud/docker/graphdriver/btrfs" @@ -702,17 +702,18 @@ func NewRuntimeFromDirectory(config *DaemonConfig, eng *engine.Engine) (*Runtime sysInitPath = localCopy } - sysInfo := sysinfo.New(false) + var ( + ed execdriver.Driver + sysInfo = sysinfo.New(false) + ) - var ed execdriver.Driver - utils.Debugf("execDriver: provided %s", config.ExecDriver) - if config.ExecDriver == "chroot" && false { - // chroot is presently a noop driver https://github.com/dotcloud/docker/pull/4189#issuecomment-35330655 - ed, err = chroot.NewDriver() - utils.Debugf("execDriver: using chroot") - } else { + switch config.ExecDriver { + case "lxc": ed, err = lxc.NewDriver(config.Root, sysInfo.AppArmor) - utils.Debugf("execDriver: using lxc") + case "native": + ed, err = native.NewDriver(config.Root) + default: + return nil, fmt.Errorf("unknow exec driver %s", config.ExecDriver) } if err != nil { return nil, err diff --git a/sysinit/sysinit.go b/sysinit/sysinit.go index 056f033295..cd74e0c88e 100644 --- a/sysinit/sysinit.go +++ b/sysinit/sysinit.go @@ -6,6 +6,7 @@ import ( "fmt" "github.com/dotcloud/docker/execdriver" _ "github.com/dotcloud/docker/execdriver/lxc" + _ "github.com/dotcloud/docker/execdriver/native" "io" "io/ioutil" "log" From f8453cd0499a51f5d3ffd2c2a6012972aef7f69f Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Mon, 24 Feb 2014 21:11:52 -0800 Subject: [PATCH 62/81] Refactor and improve libcontainer and driver Remove logging for now because it is complicating things Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- execdriver/native/driver.go | 27 +++++------ pkg/libcontainer/network/veth.go | 3 ++ pkg/libcontainer/nsinit/command.go | 8 ++-- pkg/libcontainer/nsinit/exec.go | 35 ++------------ pkg/libcontainer/nsinit/execin.go | 6 +-- pkg/libcontainer/nsinit/init.go | 64 +++++++------------------- pkg/libcontainer/nsinit/nsinit.go | 9 ++-- pkg/libcontainer/nsinit/nsinit/main.go | 26 +---------- pkg/libcontainer/nsinit/state.go | 2 + pkg/libcontainer/utils/utils.go | 11 +++++ pkg/system/calls_linux.go | 9 ++++ pkg/system/pty_linux.go | 27 +++++++++++ 12 files changed, 96 insertions(+), 131 deletions(-) diff --git a/execdriver/native/driver.go b/execdriver/native/driver.go index 59cbe67402..e0ed5685e4 100644 --- a/execdriver/native/driver.go +++ b/execdriver/native/driver.go @@ -11,7 +11,6 @@ import ( "github.com/dotcloud/docker/pkg/libcontainer/nsinit" "io" "io/ioutil" - "log" "os" "os/exec" "path/filepath" @@ -27,7 +26,6 @@ const ( var ( ErrNotSupported = errors.New("not supported") - noOpLog = log.New(ioutil.Discard, "[nsinit] ", log.LstdFlags) ) func init() { @@ -51,7 +49,7 @@ func init() { if err != nil { return err } - ns := nsinit.NewNsInit(noOpLog, "", &nsinit.DefaultCommandFactory{}, &nsinit.DefaultStateWriter{}) + ns := nsinit.NewNsInit(&nsinit.DefaultCommandFactory{}, &nsinit.DefaultStateWriter{}) if err := ns.Init(container, cwd, args.Console, syncPipe, args.Args); err != nil { return err } @@ -93,7 +91,7 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba dsw: &nsinit.DefaultStateWriter{c.Rootfs}, } ) - ns := nsinit.NewNsInit(noOpLog, "", factory, stateWriter) + ns := nsinit.NewNsInit(factory, stateWriter) if c.Tty { term = &dockerTtyTerm{ pipes: pipes, @@ -147,6 +145,8 @@ func (d *driver) Name() string { return fmt.Sprintf("%s-%s", DriverName, Version) } +// TODO: this can be improved with our driver +// there has to be a better way to do this func (d *driver) GetPidsForContainer(id string) ([]int, error) { pids := []int{} @@ -207,27 +207,24 @@ type dockerCommandFactory struct { // createCommand will return an exec.Cmd with the Cloneflags set to the proper namespaces // defined on the container's configuration and use the current binary as the init with the // args provided -func (d *dockerCommandFactory) Create(container *libcontainer.Container, - console, logFile string, syncFd uintptr, args []string) *exec.Cmd { - c := d.c +func (d *dockerCommandFactory) Create(container *libcontainer.Container, console string, syncFd uintptr, args []string) *exec.Cmd { // we need to join the rootfs because nsinit will setup the rootfs and chroot - initPath := filepath.Join(c.Rootfs, c.InitPath) + initPath := filepath.Join(d.c.Rootfs, d.c.InitPath) - c.Path = initPath - c.Args = append([]string{ + d.c.Path = initPath + d.c.Args = append([]string{ initPath, "-driver", DriverName, "-console", console, "-pipe", fmt.Sprint(syncFd), - "-log", logFile, }, args...) - c.SysProcAttr = &syscall.SysProcAttr{ + d.c.SysProcAttr = &syscall.SysProcAttr{ Cloneflags: uintptr(nsinit.GetNamespaceFlags(container.Namespaces)), } - c.Env = container.Env - c.Dir = c.Rootfs + d.c.Env = container.Env + d.c.Dir = d.c.Rootfs - return &c.Cmd + return &d.c.Cmd } type dockerStateWriter struct { diff --git a/pkg/libcontainer/network/veth.go b/pkg/libcontainer/network/veth.go index 321c68eca4..49e63f0779 100644 --- a/pkg/libcontainer/network/veth.go +++ b/pkg/libcontainer/network/veth.go @@ -6,6 +6,9 @@ import ( "github.com/dotcloud/docker/pkg/libcontainer/utils" ) +// Veth is a network strategy that uses a bridge and creates +// a veth pair, one that stays outside on the host and the other +// is placed inside the container's namespace type Veth struct { } diff --git a/pkg/libcontainer/nsinit/command.go b/pkg/libcontainer/nsinit/command.go index b1c5631b4b..5eb378ac23 100644 --- a/pkg/libcontainer/nsinit/command.go +++ b/pkg/libcontainer/nsinit/command.go @@ -8,8 +8,11 @@ import ( "syscall" ) +// CommandFactory takes the container's configuration and options passed by the +// parent processes and creates an *exec.Cmd that will be used to fork/exec the +// namespaced init process type CommandFactory interface { - Create(container *libcontainer.Container, console, logFile string, syncFd uintptr, args []string) *exec.Cmd + Create(container *libcontainer.Container, console string, syncFd uintptr, args []string) *exec.Cmd } type DefaultCommandFactory struct{} @@ -17,13 +20,12 @@ type DefaultCommandFactory struct{} // Create will return an exec.Cmd with the Cloneflags set to the proper namespaces // defined on the container's configuration and use the current binary as the init with the // args provided -func (c *DefaultCommandFactory) Create(container *libcontainer.Container, console, logFile string, pipe uintptr, args []string) *exec.Cmd { +func (c *DefaultCommandFactory) Create(container *libcontainer.Container, console string, pipe uintptr, args []string) *exec.Cmd { // get our binary name so we can always reexec ourself name := os.Args[0] command := exec.Command(name, append([]string{ "-console", console, "-pipe", fmt.Sprint(pipe), - "-log", logFile, "init"}, args...)...) command.SysProcAttr = &syscall.SysProcAttr{ diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index c4073235ef..b13326ba7e 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -28,31 +28,27 @@ func (ns *linuxNs) Exec(container *libcontainer.Container, term Terminal, args [ } if container.Tty { - ns.logger.Printf("setting up master and console") - master, console, err = CreateMasterAndConsole() + master, console, err = system.CreateMasterAndConsole() if err != nil { return -1, err } term.SetMaster(master) } - command := ns.commandFactory.Create(container, console, ns.logFile, syncPipe.child.Fd(), args) + command := ns.commandFactory.Create(container, console, syncPipe.child.Fd(), args) if err := term.Attach(command); err != nil { return -1, err } defer term.Close() - ns.logger.Printf("staring init") if err := command.Start(); err != nil { return -1, err } - ns.logger.Printf("writing state file") if err := ns.stateWriter.WritePid(command.Process.Pid); err != nil { command.Process.Kill() return -1, err } defer func() { - ns.logger.Printf("removing state file") ns.stateWriter.DeletePid() }() @@ -68,24 +64,18 @@ func (ns *linuxNs) Exec(container *libcontainer.Container, term Terminal, args [ } // Sync with child - ns.logger.Printf("closing sync pipes") syncPipe.Close() - ns.logger.Printf("waiting on process") if err := command.Wait(); err != nil { if _, ok := err.(*exec.ExitError); !ok { return -1, err } } - - exitCode := command.ProcessState.Sys().(syscall.WaitStatus).ExitStatus() - ns.logger.Printf("process ended with exit code %d", exitCode) - return exitCode, nil + return command.ProcessState.Sys().(syscall.WaitStatus).ExitStatus(), nil } func (ns *linuxNs) SetupCgroups(container *libcontainer.Container, nspid int) error { if container.Cgroups != nil { - ns.logger.Printf("setting up cgroups") if err := container.Cgroups.Apply(nspid); err != nil { return err } @@ -95,7 +85,6 @@ func (ns *linuxNs) SetupCgroups(container *libcontainer.Container, nspid int) er func (ns *linuxNs) InitializeNetworking(container *libcontainer.Container, nspid int, pipe *SyncPipe) error { if container.Network != nil { - ns.logger.Printf("creating host network configuration type %s", container.Network.Type) strategy, err := network.GetStrategy(container.Network.Type) if err != nil { return err @@ -104,27 +93,9 @@ func (ns *linuxNs) InitializeNetworking(container *libcontainer.Container, nspid if err != nil { return err } - ns.logger.Printf("sending %v as network context", networkContext) if err := pipe.SendToChild(networkContext); err != nil { return err } } return nil } - -// CreateMasterAndConsole will open /dev/ptmx on the host and retreive the -// pts name for use as the pty slave inside the container -func CreateMasterAndConsole() (*os.File, string, error) { - master, err := os.OpenFile("/dev/ptmx", syscall.O_RDWR|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0) - if err != nil { - return nil, "", err - } - console, err := system.Ptsname(master) - if err != nil { - return nil, "", err - } - if err := system.Unlockpt(master); err != nil { - return nil, "", err - } - return master, console, nil -} diff --git a/pkg/libcontainer/nsinit/execin.go b/pkg/libcontainer/nsinit/execin.go index 9c33f69b4d..463196c7c0 100644 --- a/pkg/libcontainer/nsinit/execin.go +++ b/pkg/libcontainer/nsinit/execin.go @@ -18,7 +18,7 @@ func (ns *linuxNs) ExecIn(container *libcontainer.Container, nspid int, args []s return -1, err } } - fds, err := getNsFds(nspid, container) + fds, err := ns.getNsFds(nspid, container) closeFds := func() { for _, f := range fds { system.Closefd(f) @@ -75,13 +75,13 @@ dropAndExec: if err := capabilities.DropCapabilities(container); err != nil { return -1, fmt.Errorf("drop capabilities %s", err) } - if err := system.Exec(args[0], args[0:], container.Env); err != nil { + if err := system.Execv(args[0], args[0:], container.Env); err != nil { return -1, err } panic("unreachable") } -func getNsFds(pid int, container *libcontainer.Container) ([]uintptr, error) { +func (ns *linuxNs) getNsFds(pid int, container *libcontainer.Container) ([]uintptr, error) { fds := make([]uintptr, len(container.Namespaces)) for i, ns := range container.Namespaces { f, err := os.OpenFile(filepath.Join("/proc/", strconv.Itoa(pid), "ns", namespaceFileMap[ns]), os.O_RDONLY, 0) diff --git a/pkg/libcontainer/nsinit/init.go b/pkg/libcontainer/nsinit/init.go index 5e33169b65..1229560b5e 100644 --- a/pkg/libcontainer/nsinit/init.go +++ b/pkg/libcontainer/nsinit/init.go @@ -7,18 +7,17 @@ import ( "github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/libcontainer/capabilities" "github.com/dotcloud/docker/pkg/libcontainer/network" + "github.com/dotcloud/docker/pkg/libcontainer/utils" "github.com/dotcloud/docker/pkg/system" "github.com/dotcloud/docker/pkg/user" "os" - "os/exec" - "path/filepath" "syscall" ) // Init is the init process that first runs inside a new namespace to setup mounts, users, networking, // and other options required for the new container. func (ns *linuxNs) Init(container *libcontainer.Container, uncleanRootfs, console string, syncPipe *SyncPipe, args []string) error { - rootfs, err := resolveRootfs(uncleanRootfs) + rootfs, err := utils.ResolveRootfs(uncleanRootfs) if err != nil { return err } @@ -34,7 +33,7 @@ func (ns *linuxNs) Init(container *libcontainer.Container, uncleanRootfs, consol if console != "" { // close pipes so that we can replace it with the pty closeStdPipes() - slave, err := openTerminal(console, syscall.O_RDWR) + slave, err := system.OpenTerminal(console, syscall.O_RDWR) if err != nil { return fmt.Errorf("open terminal %s", err) } @@ -50,6 +49,7 @@ func (ns *linuxNs) Init(container *libcontainer.Container, uncleanRootfs, consol return fmt.Errorf("setctty %s", err) } } + if err := system.ParentDeathSignal(); err != nil { return fmt.Errorf("parent deth signal %s", err) } @@ -73,18 +73,7 @@ func (ns *linuxNs) Init(container *libcontainer.Container, uncleanRootfs, consol return fmt.Errorf("chdir to %s %s", container.WorkingDir, err) } } - return execArgs(args, container.Env) -} - -func execArgs(args []string, env []string) error { - name, err := exec.LookPath(args[0]) - if err != nil { - return err - } - if err := system.Exec(name, args[0:], env); err != nil { - return fmt.Errorf("exec %s", err) - } - panic("unreachable") + return system.Execv(args[0], args[0:], container.Env) } func closeStdPipes() { @@ -93,18 +82,19 @@ func closeStdPipes() { os.Stderr.Close() } -// resolveRootfs ensures that the current working directory is -// not a symlink and returns the absolute path to the rootfs -func resolveRootfs(uncleanRootfs string) (string, error) { - rootfs, err := filepath.Abs(uncleanRootfs) - if err != nil { - return "", err - } - return filepath.EvalSymlinks(rootfs) -} - func setupUser(container *libcontainer.Container) error { - if container.User != "" && container.User != "root" { + switch container.User { + case "root", "": + if err := system.Setgroups(nil); err != nil { + return err + } + if err := system.Setresgid(0, 0, 0); err != nil { + return err + } + if err := system.Setresuid(0, 0, 0); err != nil { + return err + } + default: uid, gid, suppGids, err := user.GetUserGroupSupplementary(container.User, syscall.Getuid(), syscall.Getgid()) if err != nil { return err @@ -118,16 +108,6 @@ func setupUser(container *libcontainer.Container) error { if err := system.Setuid(uid); err != nil { return err } - } else { - if err := system.Setgroups(nil); err != nil { - return err - } - if err := system.Setresgid(0, 0, 0); err != nil { - return err - } - if err := system.Setresuid(0, 0, 0); err != nil { - return err - } } return nil } @@ -147,16 +127,6 @@ func dupSlave(slave *os.File) error { return nil } -// openTerminal is a clone of os.OpenFile without the O_CLOEXEC -// used to open the pty slave inside the container namespace -func openTerminal(name string, flag int) (*os.File, error) { - r, e := syscall.Open(name, flag, 0) - if e != nil { - return nil, &os.PathError{"open", name, e} - } - return os.NewFile(uintptr(r), name), nil -} - // setupVethNetwork uses the Network config if it is not nil to initialize // the new veth interface inside the container for use by changing the name to eth0 // setting the MTU and IP address along with the default gateway diff --git a/pkg/libcontainer/nsinit/nsinit.go b/pkg/libcontainer/nsinit/nsinit.go index 599461e434..f09a130aa2 100644 --- a/pkg/libcontainer/nsinit/nsinit.go +++ b/pkg/libcontainer/nsinit/nsinit.go @@ -2,9 +2,10 @@ package nsinit import ( "github.com/dotcloud/docker/pkg/libcontainer" - "log" ) +// NsInit is an interface with the public facing methods to provide high level +// exec operations on a container type NsInit interface { Exec(container *libcontainer.Container, term Terminal, args []string) (int, error) ExecIn(container *libcontainer.Container, nspid int, args []string) (int, error) @@ -13,17 +14,13 @@ type NsInit interface { type linuxNs struct { root string - logFile string - logger *log.Logger commandFactory CommandFactory stateWriter StateWriter } -func NewNsInit(logger *log.Logger, logFile string, command CommandFactory, state StateWriter) NsInit { +func NewNsInit(command CommandFactory, state StateWriter) NsInit { return &linuxNs{ - logger: logger, commandFactory: command, stateWriter: state, - logFile: logFile, } } diff --git a/pkg/libcontainer/nsinit/nsinit/main.go b/pkg/libcontainer/nsinit/nsinit/main.go index c25037fa8b..e385e7fb70 100644 --- a/pkg/libcontainer/nsinit/nsinit/main.go +++ b/pkg/libcontainer/nsinit/nsinit/main.go @@ -6,7 +6,6 @@ import ( "flag" "github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/libcontainer/nsinit" - "io" "io/ioutil" "log" "os" @@ -16,7 +15,6 @@ import ( var ( console string pipeFd int - logFile string ) var ( @@ -26,7 +24,6 @@ var ( func registerFlags() { flag.StringVar(&console, "console", "", "console (pty slave) path") - flag.StringVar(&logFile, "log", "none", "log options (none, stderr, or a file path)") flag.IntVar(&pipeFd, "pipe", 0, "sync pipe fd") flag.Parse() @@ -113,26 +110,5 @@ func readPid() (int, error) { } func newNsInit() (nsinit.NsInit, error) { - logger, err := setupLogging() - if err != nil { - return nil, err - } - return nsinit.NewNsInit(logger, logFile, &nsinit.DefaultCommandFactory{}, &nsinit.DefaultStateWriter{}), nil -} - -func setupLogging() (logger *log.Logger, err error) { - var writer io.Writer - - switch logFile { - case "stderr": - writer = os.Stderr - case "none", "": - writer = ioutil.Discard - default: - if writer, err = os.OpenFile(logFile, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0755); err != nil { - return - } - } - logger = log.New(writer, "", log.LstdFlags) - return + return nsinit.NewNsInit(&nsinit.DefaultCommandFactory{}, &nsinit.DefaultStateWriter{}), nil } diff --git a/pkg/libcontainer/nsinit/state.go b/pkg/libcontainer/nsinit/state.go index 2dbaaa5977..5c719e1c54 100644 --- a/pkg/libcontainer/nsinit/state.go +++ b/pkg/libcontainer/nsinit/state.go @@ -7,6 +7,8 @@ import ( "path/filepath" ) +// StateWriter handles writing and deleting the pid file +// on disk type StateWriter interface { WritePid(pid int) error DeletePid() error diff --git a/pkg/libcontainer/utils/utils.go b/pkg/libcontainer/utils/utils.go index 5050997ffd..0d919bc43d 100644 --- a/pkg/libcontainer/utils/utils.go +++ b/pkg/libcontainer/utils/utils.go @@ -4,6 +4,7 @@ import ( "crypto/rand" "encoding/hex" "io" + "path/filepath" ) // GenerateRandomName returns a new name joined with a prefix. This size @@ -15,3 +16,13 @@ func GenerateRandomName(prefix string, size int) (string, error) { } return prefix + hex.EncodeToString(id)[:size], nil } + +// ResolveRootfs ensures that the current working directory is +// not a symlink and returns the absolute path to the rootfs +func ResolveRootfs(uncleanRootfs string) (string, error) { + rootfs, err := filepath.Abs(uncleanRootfs) + if err != nil { + return "", err + } + return filepath.EvalSymlinks(rootfs) +} diff --git a/pkg/system/calls_linux.go b/pkg/system/calls_linux.go index 0bf42e3c71..b7a8f140ba 100644 --- a/pkg/system/calls_linux.go +++ b/pkg/system/calls_linux.go @@ -1,6 +1,7 @@ package system import ( + "os/exec" "syscall" ) @@ -16,6 +17,14 @@ func Exec(cmd string, args []string, env []string) error { return syscall.Exec(cmd, args, env) } +func Execv(cmd string, args []string, env []string) error { + name, err := exec.LookPath(cmd) + if err != nil { + return err + } + return Exec(name, args, env) +} + func Fork() (int, error) { syscall.ForkLock.Lock() pid, _, err := syscall.Syscall(syscall.SYS_FORK, 0, 0, 0) diff --git a/pkg/system/pty_linux.go b/pkg/system/pty_linux.go index b281b719fb..ca588d8ce9 100644 --- a/pkg/system/pty_linux.go +++ b/pkg/system/pty_linux.go @@ -24,8 +24,35 @@ func Ptsname(f *os.File) (string, error) { return fmt.Sprintf("/dev/pts/%d", n), nil } +// CreateMasterAndConsole will open /dev/ptmx on the host and retreive the +// pts name for use as the pty slave inside the container +func CreateMasterAndConsole() (*os.File, string, error) { + master, err := os.OpenFile("/dev/ptmx", syscall.O_RDWR|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0) + if err != nil { + return nil, "", err + } + console, err := Ptsname(master) + if err != nil { + return nil, "", err + } + if err := Unlockpt(master); err != nil { + return nil, "", err + } + return master, console, nil +} + // OpenPtmx opens /dev/ptmx, i.e. the PTY master. func OpenPtmx() (*os.File, error) { // O_NOCTTY and O_CLOEXEC are not present in os package so we use the syscall's one for all. return os.OpenFile("/dev/ptmx", syscall.O_RDONLY|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0) } + +// OpenTerminal is a clone of os.OpenFile without the O_CLOEXEC +// used to open the pty slave inside the container namespace +func OpenTerminal(name string, flag int) (*os.File, error) { + r, e := syscall.Open(name, flag, 0) + if e != nil { + return nil, &os.PathError{"open", name, e} + } + return os.NewFile(uintptr(r), name), nil +} From 8db740a38e333158e613bc5b3a7acc2605131581 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Mon, 24 Feb 2014 21:21:35 -0800 Subject: [PATCH 63/81] Move types around in native driver Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- execdriver/native/driver.go | 55 ++++--------------------------------- execdriver/native/info.go | 22 +++++++++++++++ execdriver/native/term.go | 37 ++++++++++++++++++------- 3 files changed, 55 insertions(+), 59 deletions(-) create mode 100644 execdriver/native/info.go diff --git a/execdriver/native/driver.go b/execdriver/native/driver.go index e0ed5685e4..31b5ae290c 100644 --- a/execdriver/native/driver.go +++ b/execdriver/native/driver.go @@ -5,11 +5,9 @@ import ( "errors" "fmt" "github.com/dotcloud/docker/execdriver" - "github.com/dotcloud/docker/execdriver/lxc" "github.com/dotcloud/docker/pkg/cgroups" "github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/libcontainer/nsinit" - "io" "io/ioutil" "os" "os/exec" @@ -20,7 +18,7 @@ import ( ) const ( - DriverName = "namespaces" + DriverName = "native" Version = "0.1" ) @@ -30,7 +28,10 @@ var ( func init() { execdriver.RegisterInitFunc(DriverName, func(args *execdriver.InitArgs) error { - var container *libcontainer.Container + var ( + container *libcontainer.Container + ns = nsinit.NewNsInit(&nsinit.DefaultCommandFactory{}, &nsinit.DefaultStateWriter{}) + ) f, err := os.Open("container.json") if err != nil { return err @@ -49,7 +50,6 @@ func init() { if err != nil { return err } - ns := nsinit.NewNsInit(&nsinit.DefaultCommandFactory{}, &nsinit.DefaultStateWriter{}) if err := ns.Init(container, cwd, args.Console, syncPipe, args.Args); err != nil { return err } @@ -61,19 +61,6 @@ type driver struct { root string } -type info struct { - ID string - driver *driver -} - -func (i *info) IsRunning() bool { - p := filepath.Join(i.driver.root, "containers", i.ID, "root", ".nspid") - if _, err := os.Stat(p); err == nil { - return true - } - return false -} - func NewDriver(root string) (*driver, error) { return &driver{ root: root, @@ -90,8 +77,8 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba c: c, dsw: &nsinit.DefaultStateWriter{c.Rootfs}, } + ns = nsinit.NewNsInit(factory, stateWriter) ) - ns := nsinit.NewNsInit(factory, stateWriter) if c.Tty { term = &dockerTtyTerm{ pipes: pipes, @@ -280,33 +267,3 @@ func createContainer(c *execdriver.Command) *libcontainer.Container { } return container } - -type dockerStdTerm struct { - lxc.StdConsole - pipes *execdriver.Pipes -} - -func (d *dockerStdTerm) Attach(cmd *exec.Cmd) error { - return d.AttachPipes(cmd, d.pipes) -} - -func (d *dockerStdTerm) SetMaster(master *os.File) { - // do nothing -} - -type dockerTtyTerm struct { - lxc.TtyConsole - pipes *execdriver.Pipes -} - -func (t *dockerTtyTerm) Attach(cmd *exec.Cmd) error { - go io.Copy(t.pipes.Stdout, t.MasterPty) - if t.pipes.Stdin != nil { - go io.Copy(t.MasterPty, t.pipes.Stdin) - } - return nil -} - -func (t *dockerTtyTerm) SetMaster(master *os.File) { - t.MasterPty = master -} diff --git a/execdriver/native/info.go b/execdriver/native/info.go new file mode 100644 index 0000000000..a2ab7f2a0a --- /dev/null +++ b/execdriver/native/info.go @@ -0,0 +1,22 @@ +package native + +import ( + "os" + "path/filepath" +) + +type info struct { + ID string + driver *driver +} + +// IsRunning is determined by looking for the +// .nspid file for a container. If the file exists then the +// container is currently running +func (i *info) IsRunning() bool { + p := filepath.Join(i.driver.root, "containers", i.ID, "root", ".nspid") + if _, err := os.Stat(p); err == nil { + return true + } + return false +} diff --git a/execdriver/native/term.go b/execdriver/native/term.go index b32a80e4df..9b8e813aab 100644 --- a/execdriver/native/term.go +++ b/execdriver/native/term.go @@ -1,26 +1,43 @@ +/* + These types are wrappers around the libcontainer Terminal interface so that + we can resuse the docker implementations where possible. +*/ package native import ( "github.com/dotcloud/docker/execdriver" - "github.com/dotcloud/docker/pkg/term" + "github.com/dotcloud/docker/execdriver/lxc" + "io" "os" + "os/exec" ) -type NsinitTerm struct { - master *os.File +type dockerStdTerm struct { + lxc.StdConsole + pipes *execdriver.Pipes } -func NewTerm(pipes *execdriver.Pipes, master *os.File) *NsinitTerm { - return &NsinitTerm{master} +func (d *dockerStdTerm) Attach(cmd *exec.Cmd) error { + return d.AttachPipes(cmd, d.pipes) } -func (t *NsinitTerm) Close() error { - return t.master.Close() +func (d *dockerStdTerm) SetMaster(master *os.File) { + // do nothing } -func (t *NsinitTerm) Resize(h, w int) error { - if t.master != nil { - return term.SetWinsize(t.master.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)}) +type dockerTtyTerm struct { + lxc.TtyConsole + pipes *execdriver.Pipes +} + +func (t *dockerTtyTerm) Attach(cmd *exec.Cmd) error { + go io.Copy(t.pipes.Stdout, t.MasterPty) + if t.pipes.Stdin != nil { + go io.Copy(t.MasterPty, t.pipes.Stdin) } return nil } + +func (t *dockerTtyTerm) SetMaster(master *os.File) { + t.MasterPty = master +} From 431d510cae85bc1265c861028dd9751ae95088b2 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Mon, 24 Feb 2014 21:51:00 -0800 Subject: [PATCH 64/81] Remove container env var from libcontainer Update tests to use native driver Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- execdriver/native/default_template.go | 38 +++++++++++++++++++++++++++ execdriver/native/driver.go | 36 ------------------------- integration/container_test.go | 1 - integration/utils_test.go | 1 + runtime.go | 2 +- 5 files changed, 40 insertions(+), 38 deletions(-) diff --git a/execdriver/native/default_template.go b/execdriver/native/default_template.go index f202fd2529..7426e678e8 100644 --- a/execdriver/native/default_template.go +++ b/execdriver/native/default_template.go @@ -1,10 +1,48 @@ package native import ( + "fmt" + "github.com/dotcloud/docker/execdriver" "github.com/dotcloud/docker/pkg/cgroups" "github.com/dotcloud/docker/pkg/libcontainer" ) +// createContainer populates and configrues the container type with the +// data provided by the execdriver.Command +func createContainer(c *execdriver.Command) *libcontainer.Container { + container := getDefaultTemplate() + + container.Hostname = getEnv("HOSTNAME", c.Env) + container.Tty = c.Tty + container.User = c.User + container.WorkingDir = c.WorkingDir + container.Env = c.Env + + if c.Network != nil { + container.Network = &libcontainer.Network{ + Mtu: c.Network.Mtu, + Address: fmt.Sprintf("%s/%d", c.Network.IPAddress, c.Network.IPPrefixLen), + Gateway: c.Network.Gateway, + Type: "veth", + Context: libcontainer.Context{ + "prefix": "dock", + "bridge": c.Network.Bridge, + }, + } + } + container.Cgroups.Name = c.ID + if c.Privileged { + container.Capabilities = nil + container.Cgroups.DeviceAccess = true + } + if c.Resources != nil { + container.Cgroups.CpuShares = c.Resources.CpuShares + container.Cgroups.Memory = c.Resources.Memory + container.Cgroups.MemorySwap = c.Resources.MemorySwap + } + return container +} + // getDefaultTemplate returns the docker default for // the libcontainer configuration file func getDefaultTemplate() *libcontainer.Container { diff --git a/execdriver/native/driver.go b/execdriver/native/driver.go index 31b5ae290c..a10125c120 100644 --- a/execdriver/native/driver.go +++ b/execdriver/native/driver.go @@ -231,39 +231,3 @@ func (d *dockerStateWriter) WritePid(pid int) error { func (d *dockerStateWriter) DeletePid() error { return d.dsw.DeletePid() } - -func createContainer(c *execdriver.Command) *libcontainer.Container { - container := getDefaultTemplate() - - container.Hostname = getEnv("HOSTNAME", c.Env) - container.Tty = c.Tty - container.User = c.User - container.WorkingDir = c.WorkingDir - container.Env = c.Env - - container.Env = append(container.Env, "container=docker") - - if c.Network != nil { - container.Network = &libcontainer.Network{ - Mtu: c.Network.Mtu, - Address: fmt.Sprintf("%s/%d", c.Network.IPAddress, c.Network.IPPrefixLen), - Gateway: c.Network.Gateway, - Type: "veth", - Context: libcontainer.Context{ - "prefix": "dock", - "bridge": c.Network.Bridge, - }, - } - } - container.Cgroups.Name = c.ID - if c.Privileged { - container.Capabilities = nil - container.Cgroups.DeviceAccess = true - } - if c.Resources != nil { - container.Cgroups.CpuShares = c.Resources.CpuShares - container.Cgroups.Memory = c.Resources.Memory - container.Cgroups.MemorySwap = c.Resources.MemorySwap - } - return container -} diff --git a/integration/container_test.go b/integration/container_test.go index 8e4314f456..4efb95a2a1 100644 --- a/integration/container_test.go +++ b/integration/container_test.go @@ -1044,7 +1044,6 @@ func TestEnv(t *testing.T) { goodEnv := []string{ "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", "HOME=/", - "container=docker", "HOSTNAME=" + utils.TruncateID(container.ID), "FALSE=true", "TRUE=false", diff --git a/integration/utils_test.go b/integration/utils_test.go index fc66e8bcbd..2d8a3e709e 100644 --- a/integration/utils_test.go +++ b/integration/utils_test.go @@ -190,6 +190,7 @@ func newTestEngine(t utils.Fataler, autorestart bool, root string) *engine.Engin job := eng.Job("initserver") job.Setenv("Root", root) job.SetenvBool("AutoRestart", autorestart) + job.Setenv("ExecDriver", "native") // TestGetEnabledCors and TestOptionsRoute require EnableCors=true job.SetenvBool("EnableCors", true) if err := job.Run(); err != nil { diff --git a/runtime.go b/runtime.go index 1003c35d5d..1a95ad6270 100644 --- a/runtime.go +++ b/runtime.go @@ -713,7 +713,7 @@ func NewRuntimeFromDirectory(config *DaemonConfig, eng *engine.Engine) (*Runtime case "native": ed, err = native.NewDriver(config.Root) default: - return nil, fmt.Errorf("unknow exec driver %s", config.ExecDriver) + return nil, fmt.Errorf("unknown exec driver %s", config.ExecDriver) } if err != nil { return nil, err From 91bf120c51dec3bae98a1974929e2ae8107340c0 Mon Sep 17 00:00:00 2001 From: "Guillaume J. Charmes" Date: Mon, 24 Feb 2014 21:52:29 -0800 Subject: [PATCH 65/81] Better capability/namespace management Docker-DCO-1.1-Signed-off-by: Guillaume J. Charmes (github: creack) --- execdriver/native/default_template.go | 41 ++--- pkg/libcontainer/capabilities/capabilities.go | 20 +-- pkg/libcontainer/nsinit/execin.go | 7 +- pkg/libcontainer/nsinit/ns_linux.go | 24 +-- pkg/libcontainer/types.go | 155 +++++++++++++----- 5 files changed, 140 insertions(+), 107 deletions(-) diff --git a/execdriver/native/default_template.go b/execdriver/native/default_template.go index 7426e678e8..cc464631cd 100644 --- a/execdriver/native/default_template.go +++ b/execdriver/native/default_template.go @@ -48,28 +48,29 @@ func createContainer(c *execdriver.Command) *libcontainer.Container { func getDefaultTemplate() *libcontainer.Container { return &libcontainer.Container{ Capabilities: libcontainer.Capabilities{ - libcontainer.CAP_SETPCAP, - libcontainer.CAP_SYS_MODULE, - libcontainer.CAP_SYS_RAWIO, - libcontainer.CAP_SYS_PACCT, - libcontainer.CAP_SYS_ADMIN, - libcontainer.CAP_SYS_NICE, - libcontainer.CAP_SYS_RESOURCE, - libcontainer.CAP_SYS_TIME, - libcontainer.CAP_SYS_TTY_CONFIG, - libcontainer.CAP_MKNOD, - libcontainer.CAP_AUDIT_WRITE, - libcontainer.CAP_AUDIT_CONTROL, - libcontainer.CAP_MAC_ADMIN, - libcontainer.CAP_MAC_OVERRIDE, - libcontainer.CAP_NET_ADMIN, + libcontainer.GetCapability("SETPCAP"), + libcontainer.GetCapability("SYS_MODULE"), + libcontainer.GetCapability("SYS_RAWIO"), + libcontainer.GetCapability("SYS_PACCT"), + libcontainer.GetCapability("SYS_ADMIN"), + libcontainer.GetCapability("SYS_NICE"), + libcontainer.GetCapability("SYS_RESOURCE"), + libcontainer.GetCapability("SYS_TIME"), + libcontainer.GetCapability("SYS_TTY_CONFIG"), + libcontainer.GetCapability("MKNOD"), + libcontainer.GetCapability("AUDIT_WRITE"), + libcontainer.GetCapability("AUDIT_CONTROL"), + libcontainer.GetCapability("MAC_OVERRIDE"), + libcontainer.GetCapability("MAC_ADMIN"), + libcontainer.GetCapability("NET_ADMIN"), }, Namespaces: libcontainer.Namespaces{ - libcontainer.CLONE_NEWIPC, - libcontainer.CLONE_NEWNET, - libcontainer.CLONE_NEWNS, - libcontainer.CLONE_NEWPID, - libcontainer.CLONE_NEWUTS, + libcontainer.GetNamespace("NEWNS"), + libcontainer.GetNamespace("NEWUTS"), + libcontainer.GetNamespace("NEWIPC"), + libcontainer.GetNamespace("NEWUSER"), + libcontainer.GetNamespace("NEWPID"), + libcontainer.GetNamespace("NEWNET"), }, Cgroups: &cgroups.Cgroup{ Parent: "docker", diff --git a/pkg/libcontainer/capabilities/capabilities.go b/pkg/libcontainer/capabilities/capabilities.go index 65fd455c26..3c6d752496 100644 --- a/pkg/libcontainer/capabilities/capabilities.go +++ b/pkg/libcontainer/capabilities/capabilities.go @@ -6,24 +6,6 @@ import ( "os" ) -var capMap = map[libcontainer.Capability]capability.Cap{ - libcontainer.CAP_SETPCAP: capability.CAP_SETPCAP, - libcontainer.CAP_SYS_MODULE: capability.CAP_SYS_MODULE, - libcontainer.CAP_SYS_RAWIO: capability.CAP_SYS_RAWIO, - libcontainer.CAP_SYS_PACCT: capability.CAP_SYS_PACCT, - libcontainer.CAP_SYS_ADMIN: capability.CAP_SYS_ADMIN, - libcontainer.CAP_SYS_NICE: capability.CAP_SYS_NICE, - libcontainer.CAP_SYS_RESOURCE: capability.CAP_SYS_RESOURCE, - libcontainer.CAP_SYS_TIME: capability.CAP_SYS_TIME, - libcontainer.CAP_SYS_TTY_CONFIG: capability.CAP_SYS_TTY_CONFIG, - libcontainer.CAP_MKNOD: capability.CAP_MKNOD, - libcontainer.CAP_AUDIT_WRITE: capability.CAP_AUDIT_WRITE, - libcontainer.CAP_AUDIT_CONTROL: capability.CAP_AUDIT_CONTROL, - libcontainer.CAP_MAC_OVERRIDE: capability.CAP_MAC_OVERRIDE, - libcontainer.CAP_MAC_ADMIN: capability.CAP_MAC_ADMIN, - libcontainer.CAP_NET_ADMIN: capability.CAP_NET_ADMIN, -} - // DropCapabilities drops capabilities for the current process based // on the container's configuration. func DropCapabilities(container *libcontainer.Container) error { @@ -45,7 +27,7 @@ func DropCapabilities(container *libcontainer.Container) error { func getCapabilities(container *libcontainer.Container) []capability.Cap { drop := []capability.Cap{} for _, c := range container.Capabilities { - drop = append(drop, capMap[c]) + drop = append(drop, c.Value) } return drop } diff --git a/pkg/libcontainer/nsinit/execin.go b/pkg/libcontainer/nsinit/execin.go index 463196c7c0..306250cf9b 100644 --- a/pkg/libcontainer/nsinit/execin.go +++ b/pkg/libcontainer/nsinit/execin.go @@ -14,7 +14,7 @@ import ( // ExecIn uses an existing pid and joins the pid's namespaces with the new command. func (ns *linuxNs) ExecIn(container *libcontainer.Container, nspid int, args []string) (int, error) { for _, ns := range container.Namespaces { - if err := system.Unshare(namespaceMap[ns]); err != nil { + if err := system.Unshare(ns.Value); err != nil { return -1, err } } @@ -42,8 +42,7 @@ func (ns *linuxNs) ExecIn(container *libcontainer.Container, nspid int, args []s // if the container has a new pid and mount namespace we need to // remount proc and sys to pick up the changes - if container.Namespaces.Contains(libcontainer.CLONE_NEWNS) && - container.Namespaces.Contains(libcontainer.CLONE_NEWPID) { + if container.Namespaces.Contains("CLONE_NEWNS") && container.Namespaces.Contains("CLONE_NEWPID") { pid, err := system.Fork() if err != nil { return -1, err @@ -84,7 +83,7 @@ dropAndExec: func (ns *linuxNs) getNsFds(pid int, container *libcontainer.Container) ([]uintptr, error) { fds := make([]uintptr, len(container.Namespaces)) for i, ns := range container.Namespaces { - f, err := os.OpenFile(filepath.Join("/proc/", strconv.Itoa(pid), "ns", namespaceFileMap[ns]), os.O_RDONLY, 0) + f, err := os.OpenFile(filepath.Join("/proc/", strconv.Itoa(pid), "ns", ns.File), os.O_RDONLY, 0) if err != nil { return fds, err } diff --git a/pkg/libcontainer/nsinit/ns_linux.go b/pkg/libcontainer/nsinit/ns_linux.go index 58af24798f..ab6322e75c 100644 --- a/pkg/libcontainer/nsinit/ns_linux.go +++ b/pkg/libcontainer/nsinit/ns_linux.go @@ -2,35 +2,13 @@ package nsinit import ( "github.com/dotcloud/docker/pkg/libcontainer" - "syscall" ) -var namespaceMap = map[libcontainer.Namespace]int{ - libcontainer.CLONE_NEWNS: syscall.CLONE_NEWNS, - libcontainer.CLONE_NEWUTS: syscall.CLONE_NEWUTS, - libcontainer.CLONE_NEWIPC: syscall.CLONE_NEWIPC, - libcontainer.CLONE_NEWUSER: syscall.CLONE_NEWUSER, - libcontainer.CLONE_NEWPID: syscall.CLONE_NEWPID, - libcontainer.CLONE_NEWNET: syscall.CLONE_NEWNET, -} - -// namespaceFileMap is used to convert the libcontainer types -// into the names of the files located in /proc//ns/* for -// each namespace -var namespaceFileMap = map[libcontainer.Namespace]string{ - libcontainer.CLONE_NEWNS: "mnt", - libcontainer.CLONE_NEWUTS: "uts", - libcontainer.CLONE_NEWIPC: "ipc", - libcontainer.CLONE_NEWUSER: "user", - libcontainer.CLONE_NEWPID: "pid", - libcontainer.CLONE_NEWNET: "net", -} - // getNamespaceFlags parses the container's Namespaces options to set the correct // flags on clone, unshare, and setns func GetNamespaceFlags(namespaces libcontainer.Namespaces) (flag int) { for _, ns := range namespaces { - flag |= namespaceMap[ns] + flag |= ns.Value } return flag } diff --git a/pkg/libcontainer/types.go b/pkg/libcontainer/types.go index bb54ff5130..cb64db1f29 100644 --- a/pkg/libcontainer/types.go +++ b/pkg/libcontainer/types.go @@ -1,58 +1,131 @@ package libcontainer -// These constants are defined as string types so that -// it is clear when adding the configuration in config files -// instead of using ints or other types -const ( - CAP_SETPCAP Capability = "SETPCAP" - CAP_SYS_MODULE Capability = "SYS_MODULE" - CAP_SYS_RAWIO Capability = "SYS_RAWIO" - CAP_SYS_PACCT Capability = "SYS_PACCT" - CAP_SYS_ADMIN Capability = "SYS_ADMIN" - CAP_SYS_NICE Capability = "SYS_NICE" - CAP_SYS_RESOURCE Capability = "SYS_RESOURCE" - CAP_SYS_TIME Capability = "SYS_TIME" - CAP_SYS_TTY_CONFIG Capability = "SYS_TTY_CONFIG" - CAP_MKNOD Capability = "MKNOD" - CAP_AUDIT_WRITE Capability = "AUDIT_WRITE" - CAP_AUDIT_CONTROL Capability = "AUDIT_CONTROL" - CAP_MAC_OVERRIDE Capability = "MAC_OVERRIDE" - CAP_MAC_ADMIN Capability = "MAC_ADMIN" - CAP_NET_ADMIN Capability = "NET_ADMIN" +import ( + "encoding/json" + "errors" + "github.com/syndtr/gocapability/capability" + "os" + "syscall" +) - CLONE_NEWNS Namespace = "NEWNS" // mount - CLONE_NEWUTS Namespace = "NEWUTS" // utsname - CLONE_NEWIPC Namespace = "NEWIPC" // ipc - CLONE_NEWUSER Namespace = "NEWUSER" // user - CLONE_NEWPID Namespace = "NEWPID" // pid - CLONE_NEWNET Namespace = "NEWNET" // network +var ( + ErrUnkownNamespace error = errors.New("Unkown namespace") +) + +// namespaceList is used to convert the libcontainer types +// into the names of the files located in /proc//ns/* for +// each namespace +var ( + namespaceList = Namespaces{ + {Key: "NEWNS", Value: syscall.CLONE_NEWNS, File: "mnt"}, + {Key: "NEWUTS", Value: syscall.CLONE_NEWUTS, File: "uts"}, + {Key: "NEWIPC", Value: syscall.CLONE_NEWIPC, File: "ipc"}, + {Key: "NEWUSER", Value: syscall.CLONE_NEWUSER, File: "user"}, + {Key: "NEWPID", Value: syscall.CLONE_NEWPID, File: "pid"}, + {Key: "NEWNET", Value: syscall.CLONE_NEWNET, File: "net"}, + } + capabilityList = Capabilities{ + {Key: "SETPCAP", Value: capability.CAP_SETPCAP}, + {Key: "SYS_MODULE", Value: capability.CAP_SYS_MODULE}, + {Key: "SYS_RAWIO", Value: capability.CAP_SYS_RAWIO}, + {Key: "SYS_PACCT", Value: capability.CAP_SYS_PACCT}, + {Key: "SYS_ADMIN", Value: capability.CAP_SYS_ADMIN}, + {Key: "SYS_NICE", Value: capability.CAP_SYS_NICE}, + {Key: "SYS_RESOURCE", Value: capability.CAP_SYS_RESOURCE}, + {Key: "SYS_TIME", Value: capability.CAP_SYS_TIME}, + {Key: "SYS_TTY_CONFIG", Value: capability.CAP_SYS_TTY_CONFIG}, + {Key: "MKNOD", Value: capability.CAP_MKNOD}, + {Key: "AUDIT_WRITE", Value: capability.CAP_AUDIT_WRITE}, + {Key: "AUDIT_CONTROL", Value: capability.CAP_AUDIT_CONTROL}, + {Key: "MAC_OVERRIDE", Value: capability.CAP_MAC_OVERRIDE}, + {Key: "MAC_ADMIN", Value: capability.CAP_MAC_ADMIN}, + {Key: "NET_ADMIN", Value: capability.CAP_NET_ADMIN}, + } ) type ( - Namespace string - Namespaces []Namespace - Capability string - Capabilities []Capability + Namespace struct { + Key string + Value int + File string + } + Namespaces []*Namespace ) +func (ns *Namespace) MarshalJSON() ([]byte, error) { + return json.Marshal(ns.Key) +} + +func (ns *Namespace) UnmarshalJSON(src []byte) error { + var nsName string + if err := json.Unmarshal(src, &nsName); err != nil { + return err + } + ret := GetNamespace(nsName) + if ret == nil { + return ErrUnkownNamespace + } + *ns = *ret + return nil +} + +func GetNamespace(key string) *Namespace { + for _, ns := range namespaceList { + if ns.Key == key { + return ns + } + } + if os.Getenv("DEBUG") != "" { + panic("Unreachable: Namespace not found") + } + return nil +} + // Contains returns true if the specified Namespace is // in the slice -func (n Namespaces) Contains(ns Namespace) bool { - for _, nns := range n { - if nns == ns { - return true +func (n Namespaces) Contains(ns string) bool { + return GetNamespace(ns) != nil +} + +type ( + Capability struct { + Key string + Value capability.Cap + } + Capabilities []*Capability +) + +func (ns *Capability) MarshalJSON() ([]byte, error) { + return json.Marshal(ns.Key) +} + +func (ns *Capability) UnmarshalJSON(src []byte) error { + var capName string + if err := json.Unmarshal(src, &capName); err != nil { + return err + } + ret := GetCapability(capName) + if ret == nil { + return ErrUnkownNamespace + } + *ns = *ret + return nil +} + +func GetCapability(key string) *Capability { + for _, capp := range capabilityList { + if capp.Key == key { + return capp } } - return false + if os.Getenv("DEBUG") != "" { + panic("Unreachable: Namespace not found") + } + return nil } // Contains returns true if the specified Capability is // in the slice -func (c Capabilities) Contains(capp Capability) bool { - for _, cc := range c { - if cc == capp { - return true - } - } - return false +func (c Capabilities) Contains(capp string) bool { + return GetCapability(capp) != nil } From a64ebabdfaca66709d664cb87a35d689e35cfd0d Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Mon, 24 Feb 2014 21:54:37 -0800 Subject: [PATCH 66/81] Remove setup logging from sysinit Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- sysinit/sysinit.go | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/sysinit/sysinit.go b/sysinit/sysinit.go index cd74e0c88e..3f2b0b6066 100644 --- a/sysinit/sysinit.go +++ b/sysinit/sysinit.go @@ -7,7 +7,6 @@ import ( "github.com/dotcloud/docker/execdriver" _ "github.com/dotcloud/docker/execdriver/lxc" _ "github.com/dotcloud/docker/execdriver/native" - "io" "io/ioutil" "log" "os" @@ -56,14 +55,9 @@ func SysInit() { driver = flag.String("driver", "", "exec driver") pipe = flag.Int("pipe", 0, "sync pipe fd") console = flag.String("console", "", "console (pty slave) path") - logFile = flag.String("log", "", "log file path") ) flag.Parse() - if err := setupLogging(*logFile); err != nil { - log.Fatalf("setup logging %s", err) - } - // Get env var env []string content, err := ioutil.ReadFile(".dockerenv") @@ -94,20 +88,3 @@ func SysInit() { log.Fatal(err) } } - -func setupLogging(logFile string) (err error) { - var writer io.Writer - switch logFile { - case "stderr": - writer = os.Stderr - case "none", "": - writer = ioutil.Discard - default: - writer, err = os.OpenFile(logFile, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0755) - if err != nil { - return err - } - } - log.SetOutput(writer) - return nil -} From de083400b8d7c2074d71a30a92e4f3c8bcd8bad8 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Tue, 25 Feb 2014 10:54:41 -0800 Subject: [PATCH 67/81] Address initial feedback from pr Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- docker/docker.go | 2 +- execdriver/native/driver.go | 10 ++++------ integration/runtime_test.go | 2 +- pkg/system/setns_linux.go | 21 ++++++++++++++++++++- pkg/system/setns_linux_amd64.go | 8 -------- 5 files changed, 26 insertions(+), 17 deletions(-) delete mode 100644 pkg/system/setns_linux_amd64.go diff --git a/docker/docker.go b/docker/docker.go index a2d83bfa86..3ea3d63027 100644 --- a/docker/docker.go +++ b/docker/docker.go @@ -17,7 +17,7 @@ import ( ) func main() { - if selfPath := utils.SelfPath(); selfPath == "/sbin/init" || strings.Contains(selfPath, ".dockerinit") { + if selfPath := utils.SelfPath(); strings.Contains(selfPath, ".dockerinit") { // Running in init mode sysinit.SysInit() return diff --git a/execdriver/native/driver.go b/execdriver/native/driver.go index a10125c120..16e5ea1b49 100644 --- a/execdriver/native/driver.go +++ b/execdriver/native/driver.go @@ -2,7 +2,6 @@ package native import ( "encoding/json" - "errors" "fmt" "github.com/dotcloud/docker/execdriver" "github.com/dotcloud/docker/pkg/cgroups" @@ -22,10 +21,6 @@ const ( Version = "0.1" ) -var ( - ErrNotSupported = errors.New("not supported") -) - func init() { execdriver.RegisterInitFunc(DriverName, func(args *execdriver.InitArgs) error { var ( @@ -109,10 +104,13 @@ func (d *driver) Restore(c *execdriver.Command) error { if err != nil { return err } - defer f.Close() if _, err := fmt.Fscanf(f, "%d", &nspid); err != nil { + f.Close() return err } + f.Close() + defer os.Remove(p) + proc, err := os.FindProcess(nspid) if err != nil { return err diff --git a/integration/runtime_test.go b/integration/runtime_test.go index 060980f4e1..6003c89b51 100644 --- a/integration/runtime_test.go +++ b/integration/runtime_test.go @@ -85,7 +85,7 @@ func init() { os.Setenv("TEST", "1") // Hack to run sys init during unit testing - if selfPath := utils.SelfPath(); selfPath == "/sbin/init" || strings.Contains(selfPath, ".dockerinit") { + if selfPath := utils.SelfPath(); strings.Contains(selfPath, ".dockerinit") { sysinit.SysInit() return } diff --git a/pkg/system/setns_linux.go b/pkg/system/setns_linux.go index be6f3edb30..07b1c93b4a 100644 --- a/pkg/system/setns_linux.go +++ b/pkg/system/setns_linux.go @@ -1,11 +1,30 @@ package system import ( + "errors" + "fmt" + "runtime" "syscall" ) +var ( + ErrNotSupportedPlatform = errors.New("platform and architecture is not supported") +) + +// Via http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=7b21fddd087678a70ad64afc0f632e0f1071b092 +// +// We need different setns values for the different platforms and arch +// We are declaring the macro here because the SETNS syscall does not exist in th stdlib +var setNsMap = map[string]uintptr{ + "linux/amd64": 308, +} + func Setns(fd uintptr, flags uintptr) error { - _, _, err := syscall.RawSyscall(SYS_SETNS, fd, flags, 0) + ns, exists := setNsMap[fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH)] + if !exists { + return ErrNotSupportedPlatform + } + _, _, err := syscall.RawSyscall(ns, fd, flags, 0) if err != 0 { return err } diff --git a/pkg/system/setns_linux_amd64.go b/pkg/system/setns_linux_amd64.go deleted file mode 100644 index 4e306253d9..0000000000 --- a/pkg/system/setns_linux_amd64.go +++ /dev/null @@ -1,8 +0,0 @@ -// +build linux,amd64 - -package system - -// Via http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=7b21fddd087678a70ad64afc0f632e0f1071b092 -const ( - SYS_SETNS = 308 -) From 96e33a7646b3669632f48ed1071aeb61b8016be1 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Tue, 25 Feb 2014 12:41:31 -0800 Subject: [PATCH 68/81] Move container.json and pid file into a root specific driver dir Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- execdriver/driver.go | 1 + execdriver/native/driver.go | 38 +++++++++++++++++--------- execdriver/native/info.go | 3 +- pkg/libcontainer/nsinit/execin.go | 2 +- pkg/libcontainer/nsinit/nsinit/main.go | 12 ++++---- pkg/libcontainer/nsinit/state.go | 6 ++-- runtime.go | 2 +- sysinit/sysinit.go | 2 ++ 8 files changed, 41 insertions(+), 25 deletions(-) diff --git a/execdriver/driver.go b/execdriver/driver.go index 8b5dd5ccc7..d64c08fa6c 100644 --- a/execdriver/driver.go +++ b/execdriver/driver.go @@ -53,6 +53,7 @@ type InitArgs struct { Driver string Console string Pipe int + Root string } // Driver specific information based on diff --git a/execdriver/native/driver.go b/execdriver/native/driver.go index 16e5ea1b49..6236950476 100644 --- a/execdriver/native/driver.go +++ b/execdriver/native/driver.go @@ -25,9 +25,9 @@ func init() { execdriver.RegisterInitFunc(DriverName, func(args *execdriver.InitArgs) error { var ( container *libcontainer.Container - ns = nsinit.NewNsInit(&nsinit.DefaultCommandFactory{}, &nsinit.DefaultStateWriter{}) + ns = nsinit.NewNsInit(&nsinit.DefaultCommandFactory{}, &nsinit.DefaultStateWriter{args.Root}) ) - f, err := os.Open("container.json") + f, err := os.Open(filepath.Join(args.Root, "container.json")) if err != nil { return err } @@ -57,6 +57,9 @@ type driver struct { } func NewDriver(root string) (*driver, error) { + if err := os.MkdirAll(root, 0655); err != nil { + return nil, err + } return &driver{ root: root, }, nil @@ -66,14 +69,18 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba var ( term nsinit.Terminal container = createContainer(c) - factory = &dockerCommandFactory{c} + factory = &dockerCommandFactory{c: c, driver: d} stateWriter = &dockerStateWriter{ callback: startCallback, c: c, - dsw: &nsinit.DefaultStateWriter{c.Rootfs}, + dsw: &nsinit.DefaultStateWriter{filepath.Join(d.root, c.ID)}, } - ns = nsinit.NewNsInit(factory, stateWriter) + ns = nsinit.NewNsInit(factory, stateWriter) + args = append([]string{c.Entrypoint}, c.Arguments...) ) + if err := d.createContainerRoot(c.ID); err != nil { + return -1, err + } if c.Tty { term = &dockerTtyTerm{ pipes: pipes, @@ -84,10 +91,9 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba } } c.Terminal = term - if err := writeContainerFile(container, c.Rootfs); err != nil { + if err := d.writeContainerFile(container, c.ID); err != nil { return -1, err } - args := append([]string{c.Entrypoint}, c.Arguments...) return ns.Exec(container, term, args) } @@ -98,9 +104,9 @@ func (d *driver) Kill(p *execdriver.Command, sig int) error { func (d *driver) Restore(c *execdriver.Command) error { var ( nspid int - p = filepath.Join(d.root, "containers", c.ID, "root", ".nspid") + path = filepath.Join(d.root, c.ID, "pid") ) - f, err := os.Open(p) + f, err := os.Open(path) if err != nil { return err } @@ -109,7 +115,7 @@ func (d *driver) Restore(c *execdriver.Command) error { return err } f.Close() - defer os.Remove(p) + defer os.Remove(path) proc, err := os.FindProcess(nspid) if err != nil { @@ -167,12 +173,16 @@ func (d *driver) GetPidsForContainer(id string) ([]int, error) { return pids, nil } -func writeContainerFile(container *libcontainer.Container, rootfs string) error { +func (d *driver) writeContainerFile(container *libcontainer.Container, id string) error { data, err := json.Marshal(container) if err != nil { return err } - return ioutil.WriteFile(filepath.Join(rootfs, "container.json"), data, 0755) + return ioutil.WriteFile(filepath.Join(d.root, id, "container.json"), data, 0655) +} + +func (d *driver) createContainerRoot(id string) error { + return os.MkdirAll(filepath.Join(d.root, id), 0655) } func getEnv(key string, env []string) string { @@ -186,7 +196,8 @@ func getEnv(key string, env []string) string { } type dockerCommandFactory struct { - c *execdriver.Command + c *execdriver.Command + driver *driver } // createCommand will return an exec.Cmd with the Cloneflags set to the proper namespaces @@ -202,6 +213,7 @@ func (d *dockerCommandFactory) Create(container *libcontainer.Container, console "-driver", DriverName, "-console", console, "-pipe", fmt.Sprint(syncFd), + "-root", filepath.Join(d.driver.root, d.c.ID), }, args...) d.c.SysProcAttr = &syscall.SysProcAttr{ Cloneflags: uintptr(nsinit.GetNamespaceFlags(container.Namespaces)), diff --git a/execdriver/native/info.go b/execdriver/native/info.go index a2ab7f2a0a..5223feee83 100644 --- a/execdriver/native/info.go +++ b/execdriver/native/info.go @@ -14,8 +14,7 @@ type info struct { // .nspid file for a container. If the file exists then the // container is currently running func (i *info) IsRunning() bool { - p := filepath.Join(i.driver.root, "containers", i.ID, "root", ".nspid") - if _, err := os.Stat(p); err == nil { + if _, err := os.Stat(filepath.Join(i.driver.root, i.ID, "pid")); err == nil { return true } return false diff --git a/pkg/libcontainer/nsinit/execin.go b/pkg/libcontainer/nsinit/execin.go index 306250cf9b..253fbdcea4 100644 --- a/pkg/libcontainer/nsinit/execin.go +++ b/pkg/libcontainer/nsinit/execin.go @@ -42,7 +42,7 @@ func (ns *linuxNs) ExecIn(container *libcontainer.Container, nspid int, args []s // if the container has a new pid and mount namespace we need to // remount proc and sys to pick up the changes - if container.Namespaces.Contains("CLONE_NEWNS") && container.Namespaces.Contains("CLONE_NEWPID") { + if container.Namespaces.Contains("NEWNS") && container.Namespaces.Contains("NEWPID") { pid, err := system.Fork() if err != nil { return -1, err diff --git a/pkg/libcontainer/nsinit/nsinit/main.go b/pkg/libcontainer/nsinit/nsinit/main.go index e385e7fb70..e6b020b74b 100644 --- a/pkg/libcontainer/nsinit/nsinit/main.go +++ b/pkg/libcontainer/nsinit/nsinit/main.go @@ -9,12 +9,13 @@ import ( "io/ioutil" "log" "os" + "path/filepath" "strconv" ) var ( - console string - pipeFd int + root, console string + pipeFd int ) var ( @@ -25,6 +26,7 @@ var ( func registerFlags() { flag.StringVar(&console, "console", "", "console (pty slave) path") flag.IntVar(&pipeFd, "pipe", 0, "sync pipe fd") + flag.StringVar(&root, "root", ".", "root for storing configuration data") flag.Parse() } @@ -84,7 +86,7 @@ func main() { } func loadContainer() (*libcontainer.Container, error) { - f, err := os.Open("container.json") + f, err := os.Open(filepath.Join(root, "container.json")) if err != nil { return nil, err } @@ -98,7 +100,7 @@ func loadContainer() (*libcontainer.Container, error) { } func readPid() (int, error) { - data, err := ioutil.ReadFile(".nspid") + data, err := ioutil.ReadFile(filepath.Join(root, "pid")) if err != nil { return -1, err } @@ -110,5 +112,5 @@ func readPid() (int, error) { } func newNsInit() (nsinit.NsInit, error) { - return nsinit.NewNsInit(&nsinit.DefaultCommandFactory{}, &nsinit.DefaultStateWriter{}), nil + return nsinit.NewNsInit(&nsinit.DefaultCommandFactory{}, &nsinit.DefaultStateWriter{root}), nil } diff --git a/pkg/libcontainer/nsinit/state.go b/pkg/libcontainer/nsinit/state.go index 5c719e1c54..af38008c03 100644 --- a/pkg/libcontainer/nsinit/state.go +++ b/pkg/libcontainer/nsinit/state.go @@ -18,11 +18,11 @@ type DefaultStateWriter struct { Root string } -// writePidFile writes the namespaced processes pid to .nspid in the rootfs for the container +// writePidFile writes the namespaced processes pid to pid in the rootfs for the container func (d *DefaultStateWriter) WritePid(pid int) error { - return ioutil.WriteFile(filepath.Join(d.Root, ".nspid"), []byte(fmt.Sprint(pid)), 0655) + return ioutil.WriteFile(filepath.Join(d.Root, "pid"), []byte(fmt.Sprint(pid)), 0655) } func (d *DefaultStateWriter) DeletePid() error { - return os.Remove(filepath.Join(d.Root, ".nspid")) + return os.Remove(filepath.Join(d.Root, "pid")) } diff --git a/runtime.go b/runtime.go index 1a95ad6270..c062578a31 100644 --- a/runtime.go +++ b/runtime.go @@ -711,7 +711,7 @@ func NewRuntimeFromDirectory(config *DaemonConfig, eng *engine.Engine) (*Runtime case "lxc": ed, err = lxc.NewDriver(config.Root, sysInfo.AppArmor) case "native": - ed, err = native.NewDriver(config.Root) + ed, err = native.NewDriver(path.Join(config.Root, "native")) default: return nil, fmt.Errorf("unknown exec driver %s", config.ExecDriver) } diff --git a/sysinit/sysinit.go b/sysinit/sysinit.go index 3f2b0b6066..c84c05982c 100644 --- a/sysinit/sysinit.go +++ b/sysinit/sysinit.go @@ -55,6 +55,7 @@ func SysInit() { driver = flag.String("driver", "", "exec driver") pipe = flag.Int("pipe", 0, "sync pipe fd") console = flag.String("console", "", "console (pty slave) path") + root = flag.String("root", ".", "root path for configuration files") ) flag.Parse() @@ -82,6 +83,7 @@ func SysInit() { Driver: *driver, Console: *console, Pipe: *pipe, + Root: *root, } if err := executeProgram(args); err != nil { From 93ed15075c43d521f05f4b8f96264efb7fe174e4 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Tue, 25 Feb 2014 15:19:13 -0800 Subject: [PATCH 69/81] Fix cross compile for make cross Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- execdriver/native/driver.go | 9 ++++--- pkg/libcontainer/nsinit/command.go | 25 ++++++++++++------ pkg/libcontainer/nsinit/execin.go | 2 ++ pkg/libcontainer/nsinit/ns_linux.go | 14 ----------- pkg/libcontainer/nsinit/nsinit/main.go | 12 +++------ pkg/libcontainer/nsinit/unsupported.go | 19 ++++++++++++++ pkg/libcontainer/types.go | 35 ++++++++++++++------------ pkg/libcontainer/types_linux.go | 16 ++++++++++++ pkg/system/calls_linux.go | 7 ++++++ pkg/system/errors.go | 9 +++++++ pkg/system/setns_linux.go | 5 ---- pkg/system/unsupported.go | 15 +++++++++++ runtime.go | 5 +++- 13 files changed, 117 insertions(+), 56 deletions(-) delete mode 100644 pkg/libcontainer/nsinit/ns_linux.go create mode 100644 pkg/libcontainer/nsinit/unsupported.go create mode 100644 pkg/libcontainer/types_linux.go create mode 100644 pkg/system/errors.go create mode 100644 pkg/system/unsupported.go diff --git a/execdriver/native/driver.go b/execdriver/native/driver.go index 6236950476..0603b92400 100644 --- a/execdriver/native/driver.go +++ b/execdriver/native/driver.go @@ -7,6 +7,7 @@ import ( "github.com/dotcloud/docker/pkg/cgroups" "github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/libcontainer/nsinit" + "github.com/dotcloud/docker/pkg/system" "io/ioutil" "os" "os/exec" @@ -215,9 +216,11 @@ func (d *dockerCommandFactory) Create(container *libcontainer.Container, console "-pipe", fmt.Sprint(syncFd), "-root", filepath.Join(d.driver.root, d.c.ID), }, args...) - d.c.SysProcAttr = &syscall.SysProcAttr{ - Cloneflags: uintptr(nsinit.GetNamespaceFlags(container.Namespaces)), - } + + // set this to nil so that when we set the clone flags anything else is reset + d.c.SysProcAttr = nil + system.SetCloneFlags(&d.c.Cmd, uintptr(nsinit.GetNamespaceFlags(container.Namespaces))) + d.c.Env = container.Env d.c.Dir = d.c.Rootfs diff --git a/pkg/libcontainer/nsinit/command.go b/pkg/libcontainer/nsinit/command.go index 5eb378ac23..8ddf1e7e71 100644 --- a/pkg/libcontainer/nsinit/command.go +++ b/pkg/libcontainer/nsinit/command.go @@ -3,9 +3,9 @@ package nsinit import ( "fmt" "github.com/dotcloud/docker/pkg/libcontainer" + "github.com/dotcloud/docker/pkg/system" "os" "os/exec" - "syscall" ) // CommandFactory takes the container's configuration and options passed by the @@ -15,22 +15,31 @@ type CommandFactory interface { Create(container *libcontainer.Container, console string, syncFd uintptr, args []string) *exec.Cmd } -type DefaultCommandFactory struct{} +type DefaultCommandFactory struct { + Root string +} // Create will return an exec.Cmd with the Cloneflags set to the proper namespaces // defined on the container's configuration and use the current binary as the init with the // args provided func (c *DefaultCommandFactory) Create(container *libcontainer.Container, console string, pipe uintptr, args []string) *exec.Cmd { - // get our binary name so we can always reexec ourself - name := os.Args[0] - command := exec.Command(name, append([]string{ + // get our binary name from arg0 so we can always reexec ourself + command := exec.Command(os.Args[0], append([]string{ "-console", console, "-pipe", fmt.Sprint(pipe), + "-root", c.Root, "init"}, args...)...) - command.SysProcAttr = &syscall.SysProcAttr{ - Cloneflags: uintptr(GetNamespaceFlags(container.Namespaces)), - } + system.SetCloneFlags(command, uintptr(GetNamespaceFlags(container.Namespaces))) command.Env = container.Env return command } + +// GetNamespaceFlags parses the container's Namespaces options to set the correct +// flags on clone, unshare, and setns +func GetNamespaceFlags(namespaces libcontainer.Namespaces) (flag int) { + for _, ns := range namespaces { + flag |= ns.Value + } + return flag +} diff --git a/pkg/libcontainer/nsinit/execin.go b/pkg/libcontainer/nsinit/execin.go index 253fbdcea4..55f7b9695b 100644 --- a/pkg/libcontainer/nsinit/execin.go +++ b/pkg/libcontainer/nsinit/execin.go @@ -1,3 +1,5 @@ +// +build linux + package nsinit import ( diff --git a/pkg/libcontainer/nsinit/ns_linux.go b/pkg/libcontainer/nsinit/ns_linux.go deleted file mode 100644 index ab6322e75c..0000000000 --- a/pkg/libcontainer/nsinit/ns_linux.go +++ /dev/null @@ -1,14 +0,0 @@ -package nsinit - -import ( - "github.com/dotcloud/docker/pkg/libcontainer" -) - -// getNamespaceFlags parses the container's Namespaces options to set the correct -// flags on clone, unshare, and setns -func GetNamespaceFlags(namespaces libcontainer.Namespaces) (flag int) { - for _, ns := range namespaces { - flag |= ns.Value - } - return flag -} diff --git a/pkg/libcontainer/nsinit/nsinit/main.go b/pkg/libcontainer/nsinit/nsinit/main.go index e6b020b74b..61921c59a3 100644 --- a/pkg/libcontainer/nsinit/nsinit/main.go +++ b/pkg/libcontainer/nsinit/nsinit/main.go @@ -2,7 +2,6 @@ package main import ( "encoding/json" - "errors" "flag" "github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/libcontainer/nsinit" @@ -18,11 +17,6 @@ var ( pipeFd int ) -var ( - ErrUnsupported = errors.New("Unsupported method") - ErrWrongArguments = errors.New("Wrong argument count") -) - func registerFlags() { flag.StringVar(&console, "console", "", "console (pty slave) path") flag.IntVar(&pipeFd, "pipe", 0, "sync pipe fd") @@ -35,7 +29,7 @@ func main() { registerFlags() if flag.NArg() < 1 { - log.Fatal(ErrWrongArguments) + log.Fatalf("wrong number of argments %d", flag.NArg()) } container, err := loadContainer() if err != nil { @@ -71,7 +65,7 @@ func main() { log.Fatal(err) } if flag.NArg() < 2 { - log.Fatal(ErrWrongArguments) + log.Fatalf("wrong number of argments %d", flag.NArg()) } syncPipe, err := nsinit.NewSyncPipeFromFd(0, uintptr(pipeFd)) if err != nil { @@ -112,5 +106,5 @@ func readPid() (int, error) { } func newNsInit() (nsinit.NsInit, error) { - return nsinit.NewNsInit(&nsinit.DefaultCommandFactory{}, &nsinit.DefaultStateWriter{root}), nil + return nsinit.NewNsInit(&nsinit.DefaultCommandFactory{root}, &nsinit.DefaultStateWriter{root}), nil } diff --git a/pkg/libcontainer/nsinit/unsupported.go b/pkg/libcontainer/nsinit/unsupported.go new file mode 100644 index 0000000000..2412223d28 --- /dev/null +++ b/pkg/libcontainer/nsinit/unsupported.go @@ -0,0 +1,19 @@ +// +build !linux + +package nsinit + +import ( + "github.com/dotcloud/docker/pkg/libcontainer" +) + +func (ns *linuxNs) Exec(container *libcontainer.Container, term Terminal, args []string) (int, error) { + return -1, libcontainer.ErrUnsupported +} + +func (ns *linuxNs) ExecIn(container *libcontainer.Container, nspid int, args []string) (int, error) { + return -1, libcontainer.ErrUnsupported +} + +func (ns *linuxNs) Init(container *libcontainer.Container, uncleanRootfs, console string, syncPipe *SyncPipe, args []string) error { + return libcontainer.ErrUnsupported +} diff --git a/pkg/libcontainer/types.go b/pkg/libcontainer/types.go index cb64db1f29..8c28530140 100644 --- a/pkg/libcontainer/types.go +++ b/pkg/libcontainer/types.go @@ -5,25 +5,20 @@ import ( "errors" "github.com/syndtr/gocapability/capability" "os" - "syscall" ) var ( - ErrUnkownNamespace error = errors.New("Unkown namespace") + ErrUnkownNamespace = errors.New("Unknown namespace") + ErrUnkownCapability = errors.New("Unknown capability") + ErrUnsupported = errors.New("Unsupported method") ) // namespaceList is used to convert the libcontainer types // into the names of the files located in /proc//ns/* for // each namespace var ( - namespaceList = Namespaces{ - {Key: "NEWNS", Value: syscall.CLONE_NEWNS, File: "mnt"}, - {Key: "NEWUTS", Value: syscall.CLONE_NEWUTS, File: "uts"}, - {Key: "NEWIPC", Value: syscall.CLONE_NEWIPC, File: "ipc"}, - {Key: "NEWUSER", Value: syscall.CLONE_NEWUSER, File: "user"}, - {Key: "NEWPID", Value: syscall.CLONE_NEWPID, File: "pid"}, - {Key: "NEWNET", Value: syscall.CLONE_NEWNET, File: "net"}, - } + namespaceList = Namespaces{} + capabilityList = Capabilities{ {Key: "SETPCAP", Value: capability.CAP_SETPCAP}, {Key: "SYS_MODULE", Value: capability.CAP_SYS_MODULE}, @@ -52,6 +47,10 @@ type ( Namespaces []*Namespace ) +func (ns *Namespace) String() string { + return ns.Key +} + func (ns *Namespace) MarshalJSON() ([]byte, error) { return json.Marshal(ns.Key) } @@ -95,20 +94,24 @@ type ( Capabilities []*Capability ) -func (ns *Capability) MarshalJSON() ([]byte, error) { - return json.Marshal(ns.Key) +func (c *Capability) String() string { + return c.Key } -func (ns *Capability) UnmarshalJSON(src []byte) error { +func (c *Capability) MarshalJSON() ([]byte, error) { + return json.Marshal(c.Key) +} + +func (c *Capability) UnmarshalJSON(src []byte) error { var capName string if err := json.Unmarshal(src, &capName); err != nil { return err } ret := GetCapability(capName) if ret == nil { - return ErrUnkownNamespace + return ErrUnkownCapability } - *ns = *ret + *c = *ret return nil } @@ -119,7 +122,7 @@ func GetCapability(key string) *Capability { } } if os.Getenv("DEBUG") != "" { - panic("Unreachable: Namespace not found") + panic("Unreachable: Capability not found") } return nil } diff --git a/pkg/libcontainer/types_linux.go b/pkg/libcontainer/types_linux.go new file mode 100644 index 0000000000..c14531df20 --- /dev/null +++ b/pkg/libcontainer/types_linux.go @@ -0,0 +1,16 @@ +package libcontainer + +import ( + "syscall" +) + +func init() { + namespaceList = Namespaces{ + {Key: "NEWNS", Value: syscall.CLONE_NEWNS, File: "mnt"}, + {Key: "NEWUTS", Value: syscall.CLONE_NEWUTS, File: "uts"}, + {Key: "NEWIPC", Value: syscall.CLONE_NEWIPC, File: "ipc"}, + {Key: "NEWUSER", Value: syscall.CLONE_NEWUSER, File: "user"}, + {Key: "NEWPID", Value: syscall.CLONE_NEWPID, File: "pid"}, + {Key: "NEWNET", Value: syscall.CLONE_NEWNET, File: "net"}, + } +} diff --git a/pkg/system/calls_linux.go b/pkg/system/calls_linux.go index b7a8f140ba..bf667c535b 100644 --- a/pkg/system/calls_linux.go +++ b/pkg/system/calls_linux.go @@ -136,3 +136,10 @@ func Mkfifo(name string, mode uint32) error { func Umask(mask int) int { return syscall.Umask(mask) } + +func SetCloneFlags(cmd *exec.Cmd, flag uintptr) { + if cmd.SysProcAttr == nil { + cmd.SysProcAttr = &syscall.SysProcAttr{} + } + cmd.SysProcAttr.Cloneflags = flag +} diff --git a/pkg/system/errors.go b/pkg/system/errors.go new file mode 100644 index 0000000000..63045186fe --- /dev/null +++ b/pkg/system/errors.go @@ -0,0 +1,9 @@ +package system + +import ( + "errors" +) + +var ( + ErrNotSupportedPlatform = errors.New("platform and architecture is not supported") +) diff --git a/pkg/system/setns_linux.go b/pkg/system/setns_linux.go index 07b1c93b4a..2b6f9e77ec 100644 --- a/pkg/system/setns_linux.go +++ b/pkg/system/setns_linux.go @@ -1,16 +1,11 @@ package system import ( - "errors" "fmt" "runtime" "syscall" ) -var ( - ErrNotSupportedPlatform = errors.New("platform and architecture is not supported") -) - // Via http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=7b21fddd087678a70ad64afc0f632e0f1071b092 // // We need different setns values for the different platforms and arch diff --git a/pkg/system/unsupported.go b/pkg/system/unsupported.go new file mode 100644 index 0000000000..eb3ec7ee92 --- /dev/null +++ b/pkg/system/unsupported.go @@ -0,0 +1,15 @@ +// +build !linux + +package system + +import ( + "os/exec" +) + +func SetCloneFlags(cmd *exec.Cmd, flag uintptr) { + +} + +func UsetCloseOnExec(fd uintptr) error { + return ErrNotSupportedPlatform +} diff --git a/runtime.go b/runtime.go index c062578a31..0096f184e1 100644 --- a/runtime.go +++ b/runtime.go @@ -709,9 +709,12 @@ func NewRuntimeFromDirectory(config *DaemonConfig, eng *engine.Engine) (*Runtime switch config.ExecDriver { case "lxc": + // we want to five the lxc driver the full docker root because it needs + // to access and write config and template files in /var/lib/docker/containers/* + // to be backwards compatible ed, err = lxc.NewDriver(config.Root, sysInfo.AppArmor) case "native": - ed, err = native.NewDriver(path.Join(config.Root, "native")) + ed, err = native.NewDriver(path.Join(config.Root, "execdriver", "native")) default: return nil, fmt.Errorf("unknown exec driver %s", config.ExecDriver) } From 5c67d2e6346d493e2b8b0469d453a64d41c22cb2 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Tue, 25 Feb 2014 16:27:07 -0800 Subject: [PATCH 70/81] Ensure that the container's dir is remove from native driver on stop Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- execdriver/native/driver.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/execdriver/native/driver.go b/execdriver/native/driver.go index 0603b92400..1460b09e87 100644 --- a/execdriver/native/driver.go +++ b/execdriver/native/driver.go @@ -82,6 +82,8 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba if err := d.createContainerRoot(c.ID); err != nil { return -1, err } + defer d.removeContainerRoot(c.ID) + if c.Tty { term = &dockerTtyTerm{ pipes: pipes, @@ -186,6 +188,10 @@ func (d *driver) createContainerRoot(id string) error { return os.MkdirAll(filepath.Join(d.root, id), 0655) } +func (d *driver) removeContainerRoot(id string) error { + return os.RemoveAll(filepath.Join(d.root, id)) +} + func getEnv(key string, env []string) string { for _, pair := range env { parts := strings.Split(pair, "=") From bfdf07ac98e1a425892b787cf224109d5925a798 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Tue, 25 Feb 2014 17:13:00 -0800 Subject: [PATCH 71/81] Return error for lxc-conf when using native driver Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- execdriver/native/driver.go | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/execdriver/native/driver.go b/execdriver/native/driver.go index 1460b09e87..ba7e8a719f 100644 --- a/execdriver/native/driver.go +++ b/execdriver/native/driver.go @@ -67,6 +67,9 @@ func NewDriver(root string) (*driver, error) { } func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (int, error) { + if err := d.validateCommand(c); err != nil { + return -1, err + } var ( term nsinit.Terminal container = createContainer(c) @@ -192,6 +195,17 @@ func (d *driver) removeContainerRoot(id string) error { return os.RemoveAll(filepath.Join(d.root, id)) } +func (d *driver) validateCommand(c *execdriver.Command) error { + // we need to check the Config of the command to make sure that we + // do not have any of the lxc-conf variables + for _, conf := range c.Config { + if strings.Contains(conf, "lxc") { + return fmt.Errorf("%s is not supported by the native driver", conf) + } + } + return nil +} + func getEnv(key string, env []string) string { for _, pair := range env { parts := strings.Split(pair, "=") From 6016126c71272f7943458bbb8392dfd1f5877269 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Tue, 25 Feb 2014 19:45:57 -0800 Subject: [PATCH 72/81] Fix cgroups swap issue when it is not supported Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/cgroups/cgroups.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pkg/cgroups/cgroups.go b/pkg/cgroups/cgroups.go index e260d67661..b40e1a31fa 100644 --- a/pkg/cgroups/cgroups.go +++ b/pkg/cgroups/cgroups.go @@ -223,8 +223,10 @@ func (c *Cgroup) setupMemory(cgroupRoot string, pid int) (err error) { return err } } - if c.MemorySwap != 0 { - if err := writeFile(dir, "memory.memsw.limit_in_bytes", strconv.FormatInt(c.MemorySwap, 10)); err != nil { + // By default, MemorySwap is set to twice the size of RAM. + // If you want to omit MemorySwap, set it to `-1'. + if c.MemorySwap != -1 { + if err := writeFile(dir, "memory.memsw.limit_in_bytes", strconv.FormatInt(c.Memory*2, 10)); err != nil { return err } } From 70820b69ec2b82ab150af9b8829e37843f67f75a Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Wed, 26 Feb 2014 14:19:39 -0800 Subject: [PATCH 73/81] Make network a slice to support multiple types Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- execdriver/native/default_template.go | 19 +++++++++++-------- pkg/libcontainer/README.md | 21 +++++++++++---------- pkg/libcontainer/container.go | 2 +- pkg/libcontainer/container.json | 21 +++++++++++---------- pkg/libcontainer/network/strategy.go | 2 +- pkg/libcontainer/network/veth.go | 26 ++++++++++++-------------- pkg/libcontainer/nsinit/exec.go | 13 +++++-------- pkg/libcontainer/nsinit/init.go | 6 +++--- 8 files changed, 55 insertions(+), 55 deletions(-) diff --git a/execdriver/native/default_template.go b/execdriver/native/default_template.go index 4e1ec7057a..102967a489 100644 --- a/execdriver/native/default_template.go +++ b/execdriver/native/default_template.go @@ -19,17 +19,20 @@ func createContainer(c *execdriver.Command) *libcontainer.Container { container.Env = c.Env if c.Network != nil { - container.Network = &libcontainer.Network{ - Mtu: c.Network.Mtu, - Address: fmt.Sprintf("%s/%d", c.Network.IPAddress, c.Network.IPPrefixLen), - Gateway: c.Network.Gateway, - Type: "veth", - Context: libcontainer.Context{ - "prefix": "dock", - "bridge": c.Network.Bridge, + container.Networks = []*libcontainer.Network{ + { + Mtu: c.Network.Mtu, + Address: fmt.Sprintf("%s/%d", c.Network.IPAddress, c.Network.IPPrefixLen), + Gateway: c.Network.Gateway, + Type: "veth", + Context: libcontainer.Context{ + "prefix": "dock", + "bridge": c.Network.Bridge, + }, }, } } + container.Cgroups.Name = c.ID if c.Privileged { container.Capabilities = nil diff --git a/pkg/libcontainer/README.md b/pkg/libcontainer/README.md index 36553af5bc..4c8da8e925 100644 --- a/pkg/libcontainer/README.md +++ b/pkg/libcontainer/README.md @@ -48,16 +48,17 @@ Sample `container.json` file: "MAC_ADMIN", "NET_ADMIN" ], - "network": { - "type": "veth", - "context": { - "bridge": "docker0", - "prefix": "dock" - }, - "address": "172.17.0.100/16", - "gateway": "172.17.42.1", - "mtu": 1500 - }, + "networks": [{ + "type": "veth", + "context": { + "bridge": "docker0", + "prefix": "dock" + }, + "address": "172.17.0.100/16", + "gateway": "172.17.42.1", + "mtu": 1500 + } + ], "cgroups": { "name": "docker-koye", "parent": "docker", diff --git a/pkg/libcontainer/container.go b/pkg/libcontainer/container.go index 4a47977334..12a3d7ba8e 100644 --- a/pkg/libcontainer/container.go +++ b/pkg/libcontainer/container.go @@ -19,7 +19,7 @@ type Container struct { Tty bool `json:"tty,omitempty"` // setup a proper tty or not Namespaces Namespaces `json:"namespaces,omitempty"` // namespaces to apply Capabilities Capabilities `json:"capabilities,omitempty"` // capabilities to drop - Network *Network `json:"network,omitempty"` // nil for host's network stack + Networks []*Network `json:"networks,omitempty"` // nil for host's network stack Cgroups *cgroups.Cgroup `json:"cgroups,omitempty"` } diff --git a/pkg/libcontainer/container.json b/pkg/libcontainer/container.json index c2b21f8609..83e407467c 100644 --- a/pkg/libcontainer/container.json +++ b/pkg/libcontainer/container.json @@ -31,16 +31,17 @@ "MAC_ADMIN", "NET_ADMIN" ], - "network": { - "type": "veth", - "context": { - "bridge": "docker0", - "prefix": "dock" - }, - "address": "172.17.0.100/16", - "gateway": "172.17.42.1", - "mtu": 1500 - }, + "networks": [{ + "type": "veth", + "context": { + "bridge": "docker0", + "prefix": "dock" + }, + "address": "172.17.0.100/16", + "gateway": "172.17.42.1", + "mtu": 1500 + } + ], "cgroups": { "name": "docker-koye", "parent": "docker", diff --git a/pkg/libcontainer/network/strategy.go b/pkg/libcontainer/network/strategy.go index 8ecc11a24d..a2f4f8f073 100644 --- a/pkg/libcontainer/network/strategy.go +++ b/pkg/libcontainer/network/strategy.go @@ -16,7 +16,7 @@ var strategies = map[string]NetworkStrategy{ // NetworkStrategy represends a specific network configuration for // a containers networking stack type NetworkStrategy interface { - Create(*libcontainer.Network, int) (libcontainer.Context, error) + Create(*libcontainer.Network, int, libcontainer.Context) error Initialize(*libcontainer.Network, libcontainer.Context) error } diff --git a/pkg/libcontainer/network/veth.go b/pkg/libcontainer/network/veth.go index 49e63f0779..3ab1b2393b 100644 --- a/pkg/libcontainer/network/veth.go +++ b/pkg/libcontainer/network/veth.go @@ -12,39 +12,37 @@ import ( type Veth struct { } -func (v *Veth) Create(n *libcontainer.Network, nspid int) (libcontainer.Context, error) { +func (v *Veth) Create(n *libcontainer.Network, nspid int, context libcontainer.Context) error { var ( bridge string prefix string exists bool ) if bridge, exists = n.Context["bridge"]; !exists { - return nil, fmt.Errorf("bridge does not exist in network context") + return fmt.Errorf("bridge does not exist in network context") } if prefix, exists = n.Context["prefix"]; !exists { - return nil, fmt.Errorf("veth prefix does not exist in network context") + return fmt.Errorf("veth prefix does not exist in network context") } name1, name2, err := createVethPair(prefix) if err != nil { - return nil, err - } - context := libcontainer.Context{ - "vethHost": name1, - "vethChild": name2, + return err } + context["veth-host"] = name1 + context["veth-child"] = name2 if err := SetInterfaceMaster(name1, bridge); err != nil { - return context, err + return err } if err := SetMtu(name1, n.Mtu); err != nil { - return context, err + return err } if err := InterfaceUp(name1); err != nil { - return context, err + return err } if err := SetInterfaceInNamespacePid(name2, nspid); err != nil { - return context, err + return err } - return context, nil + return nil } func (v *Veth) Initialize(config *libcontainer.Network, context libcontainer.Context) error { @@ -52,7 +50,7 @@ func (v *Veth) Initialize(config *libcontainer.Network, context libcontainer.Con vethChild string exists bool ) - if vethChild, exists = context["vethChild"]; !exists { + if vethChild, exists = context["veth-child"]; !exists { return fmt.Errorf("vethChild does not exist in network context") } if err := InterfaceDown(vethChild); err != nil { diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index b13326ba7e..f7a9c17d7f 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -84,18 +84,15 @@ func (ns *linuxNs) SetupCgroups(container *libcontainer.Container, nspid int) er } func (ns *linuxNs) InitializeNetworking(container *libcontainer.Container, nspid int, pipe *SyncPipe) error { - if container.Network != nil { - strategy, err := network.GetStrategy(container.Network.Type) + context := libcontainer.Context{} + for _, config := range container.Networks { + strategy, err := network.GetStrategy(config.Type) if err != nil { return err } - networkContext, err := strategy.Create(container.Network, nspid) - if err != nil { - return err - } - if err := pipe.SendToChild(networkContext); err != nil { + if err := strategy.Create(config, nspid, context); err != nil { return err } } - return nil + return pipe.SendToChild(context) } diff --git a/pkg/libcontainer/nsinit/init.go b/pkg/libcontainer/nsinit/init.go index 1229560b5e..cfc5058559 100644 --- a/pkg/libcontainer/nsinit/init.go +++ b/pkg/libcontainer/nsinit/init.go @@ -56,7 +56,7 @@ func (ns *linuxNs) Init(container *libcontainer.Container, uncleanRootfs, consol if err := setupNewMountNamespace(rootfs, console, container.ReadonlyFs); err != nil { return fmt.Errorf("setup mount namespace %s", err) } - if err := setupNetwork(container.Network, context); err != nil { + if err := setupNetwork(container, context); err != nil { return fmt.Errorf("setup networking %s", err) } if err := system.Sethostname(container.Hostname); err != nil { @@ -130,8 +130,8 @@ func dupSlave(slave *os.File) error { // setupVethNetwork uses the Network config if it is not nil to initialize // the new veth interface inside the container for use by changing the name to eth0 // setting the MTU and IP address along with the default gateway -func setupNetwork(config *libcontainer.Network, context libcontainer.Context) error { - if config != nil { +func setupNetwork(container *libcontainer.Container, context libcontainer.Context) error { + for _, config := range container.Networks { strategy, err := network.GetStrategy(config.Type) if err != nil { return err From 7cd224594733e5fa0560cb912e3cf2dcef168370 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Wed, 26 Feb 2014 17:21:09 -0800 Subject: [PATCH 74/81] Ensure that loopback devices are mounted inside the conatiner Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/nsinit/mount.go | 53 +++++++++++++++++++++++++++----- 1 file changed, 46 insertions(+), 7 deletions(-) diff --git a/pkg/libcontainer/nsinit/mount.go b/pkg/libcontainer/nsinit/mount.go index a73e97e375..55c2655ab0 100644 --- a/pkg/libcontainer/nsinit/mount.go +++ b/pkg/libcontainer/nsinit/mount.go @@ -37,6 +37,9 @@ func setupNewMountNamespace(rootfs, console string, readonly bool) error { if err := copyDevNodes(rootfs); err != nil { return fmt.Errorf("copy dev nodes %s", err) } + if err := setupLoopbackDevices(rootfs); err != nil { + return fmt.Errorf("setup loopback devices %s", err) + } if err := setupDev(rootfs); err != nil { return err } @@ -76,21 +79,57 @@ func copyDevNodes(rootfs string) error { "urandom", "tty", } { - stat, err := os.Stat(filepath.Join("/dev", node)) + if err := copyDevNode(rootfs, node); err != nil { + return err + } + } + return nil +} + +func setupLoopbackDevices(rootfs string) error { + for i := 0; ; i++ { + var ( + device = fmt.Sprintf("loop%d", i) + source = filepath.Join("/dev", device) + dest = filepath.Join(rootfs, "dev", device) + ) + + if _, err := os.Stat(source); err != nil { + if !os.IsNotExist(err) { + return err + } + return nil + } + if _, err := os.Stat(dest); err == nil { + os.Remove(dest) + } + f, err := os.Create(dest) if err != nil { return err } - var ( - dest = filepath.Join(rootfs, "dev", node) - st = stat.Sys().(*syscall.Stat_t) - ) - if err := system.Mknod(dest, st.Mode, int(st.Rdev)); err != nil && !os.IsExist(err) { - return fmt.Errorf("copy %s %s", node, err) + f.Close() + if err := system.Mount(source, dest, "none", syscall.MS_BIND, ""); err != nil { + return err } } return nil } +func copyDevNode(rootfs, node string) error { + stat, err := os.Stat(filepath.Join("/dev", node)) + if err != nil { + return err + } + var ( + dest = filepath.Join(rootfs, "dev", node) + st = stat.Sys().(*syscall.Stat_t) + ) + if err := system.Mknod(dest, st.Mode, int(st.Rdev)); err != nil && !os.IsExist(err) { + return fmt.Errorf("copy %s %s", node, err) + } + return nil +} + // setupDev symlinks the current processes pipes into the // appropriate destination on the containers rootfs func setupDev(rootfs string) error { From fb08b8b221a9a722910d63db678ffb5a8f91b517 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Wed, 26 Feb 2014 19:19:14 -0800 Subject: [PATCH 75/81] Code review updates Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- execdriver/native/default_template.go | 2 +- execdriver/native/driver.go | 2 +- execdriver/native/info.go | 2 +- pkg/libcontainer/README.md | 12 +++++++----- pkg/libcontainer/network/strategy.go | 4 ++-- pkg/libcontainer/nsinit/exec.go | 4 +--- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/execdriver/native/default_template.go b/execdriver/native/default_template.go index 102967a489..91fd646c8e 100644 --- a/execdriver/native/default_template.go +++ b/execdriver/native/default_template.go @@ -7,7 +7,7 @@ import ( "github.com/dotcloud/docker/pkg/libcontainer" ) -// createContainer populates and configrues the container type with the +// createContainer populates and configures the container type with the // data provided by the execdriver.Command func createContainer(c *execdriver.Command) *libcontainer.Container { container := getDefaultTemplate() diff --git a/execdriver/native/driver.go b/execdriver/native/driver.go index ba7e8a719f..dc1c903f01 100644 --- a/execdriver/native/driver.go +++ b/execdriver/native/driver.go @@ -58,7 +58,7 @@ type driver struct { } func NewDriver(root string) (*driver, error) { - if err := os.MkdirAll(root, 0655); err != nil { + if err := os.MkdirAll(root, 0700); err != nil { return nil, err } return &driver{ diff --git a/execdriver/native/info.go b/execdriver/native/info.go index 5223feee83..aef2f85c6b 100644 --- a/execdriver/native/info.go +++ b/execdriver/native/info.go @@ -11,7 +11,7 @@ type info struct { } // IsRunning is determined by looking for the -// .nspid file for a container. If the file exists then the +// pid file for a container. If the file exists then the // container is currently running func (i *info) IsRunning() bool { if _, err := os.Stat(filepath.Join(i.driver.root, i.ID, "pid")); err == nil { diff --git a/pkg/libcontainer/README.md b/pkg/libcontainer/README.md index 4c8da8e925..b81401cd09 100644 --- a/pkg/libcontainer/README.md +++ b/pkg/libcontainer/README.md @@ -9,9 +9,9 @@ for using linux namespaces with no external dependencies. libcontainer provides #### container A container is a self contained directory that is able to run one or more processes inside without affecting the host system. The directory is usually a full system tree. Inside the directory -a `container.json` file just be placed with the runtime configuration for how the process -should be contained and run. Environment, networking, and different capabilities for the -process are specified in this file. +a `container.json` file is placed with the runtime configuration for how the processes +should be contained and ran. Environment, networking, and different capabilities for the +process are specified in this file. The configuration is used for each process executed inside the container. Sample `container.json` file: ```json @@ -67,10 +67,12 @@ Sample `container.json` file: } ``` -Using this configuration and the current directory holding the rootfs for a process to live, one can se libcontainer to exec the container. Running the life of the namespace a `.nspid` file -is written to the current directory with the pid of the namespace'd process to the external word. A client can use this pid to wait, kill, or perform other operation with the container. If a user tries to run an new process inside an existing container with a live namespace with namespace will be joined by the new process. +Using this configuration and the current directory holding the rootfs for a process to live, one can use libcontainer to exec the container. Running the life of the namespace a `pid` file +is written to the current directory with the pid of the namespace'd process to the external world. A client can use this pid to wait, kill, or perform other operation with the container. If a user tries to run an new process inside an existing container with a live namespace with namespace will be joined by the new process. +You may also specify an alternate root to to place the `container.json` file is read and where the `pid` file will be saved. + #### nsinit `nsinit` is a cli application used as the reference implementation of libcontainer. It is able to diff --git a/pkg/libcontainer/network/strategy.go b/pkg/libcontainer/network/strategy.go index a2f4f8f073..234fcc0aa2 100644 --- a/pkg/libcontainer/network/strategy.go +++ b/pkg/libcontainer/network/strategy.go @@ -13,8 +13,8 @@ var strategies = map[string]NetworkStrategy{ "veth": &Veth{}, } -// NetworkStrategy represends a specific network configuration for -// a containers networking stack +// NetworkStrategy represents a specific network configuration for +// a container's networking stack type NetworkStrategy interface { Create(*libcontainer.Network, int, libcontainer.Context) error Initialize(*libcontainer.Network, libcontainer.Context) error diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index f7a9c17d7f..f1a4e2477a 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -48,9 +48,7 @@ func (ns *linuxNs) Exec(container *libcontainer.Container, term Terminal, args [ command.Process.Kill() return -1, err } - defer func() { - ns.stateWriter.DeletePid() - }() + defer ns.stateWriter.DeletePid() // Do this before syncing with child so that no children // can escape the cgroup From a115ce797b80a14d268fcd96521cf05d1e1074c1 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Thu, 27 Feb 2014 08:28:02 -0800 Subject: [PATCH 76/81] Ensure that the container dir is remove on restore Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- execdriver/native/driver.go | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/execdriver/native/driver.go b/execdriver/native/driver.go index dc1c903f01..f2d7ed8ae2 100644 --- a/execdriver/native/driver.go +++ b/execdriver/native/driver.go @@ -108,20 +108,18 @@ func (d *driver) Kill(p *execdriver.Command, sig int) error { } func (d *driver) Restore(c *execdriver.Command) error { - var ( - nspid int - path = filepath.Join(d.root, c.ID, "pid") - ) - f, err := os.Open(path) + var nspid int + f, err := os.Open(filepath.Join(d.root, c.ID, "pid")) if err != nil { return err } + defer d.removeContainerRoot(c.ID) + if _, err := fmt.Fscanf(f, "%d", &nspid); err != nil { f.Close() return err } f.Close() - defer os.Remove(path) proc, err := os.FindProcess(nspid) if err != nil { From fdeea90fc806d8d2cccdc76a6ecb214dd03093ec Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Thu, 27 Feb 2014 09:28:26 -0800 Subject: [PATCH 77/81] Allow child process to live if daemon dies Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- execdriver/native/driver.go | 21 ++++++++++++++++----- pkg/libcontainer/nsinit/init.go | 8 +++++--- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/execdriver/native/driver.go b/execdriver/native/driver.go index f2d7ed8ae2..ff96510262 100644 --- a/execdriver/native/driver.go +++ b/execdriver/native/driver.go @@ -15,6 +15,7 @@ import ( "strconv" "strings" "syscall" + "time" ) const ( @@ -121,12 +122,22 @@ func (d *driver) Restore(c *execdriver.Command) error { } f.Close() - proc, err := os.FindProcess(nspid) - if err != nil { - return err + if _, err := os.FindProcess(nspid); err != nil { + return fmt.Errorf("finding existing pid %d %s", nspid, err) } - _, err = proc.Wait() - return err + c.Process = &os.Process{ + Pid: nspid, + } + + for _ = range time.Tick(500 * time.Millisecond) { + if err := syscall.Kill(nspid, 0); err != nil { + if strings.Contains(err.Error(), "no such process") { + return nil + } + return fmt.Errorf("signal error %s", err) + } + } + return nil } func (d *driver) Info(id string) execdriver.Info { diff --git a/pkg/libcontainer/nsinit/init.go b/pkg/libcontainer/nsinit/init.go index cfc5058559..cc481e2cc8 100644 --- a/pkg/libcontainer/nsinit/init.go +++ b/pkg/libcontainer/nsinit/init.go @@ -50,9 +50,11 @@ func (ns *linuxNs) Init(container *libcontainer.Container, uncleanRootfs, consol } } - if err := system.ParentDeathSignal(); err != nil { - return fmt.Errorf("parent deth signal %s", err) - } + /* + if err := system.ParentDeathSignal(); err != nil { + return fmt.Errorf("parent death signal %s", err) + } + */ if err := setupNewMountNamespace(rootfs, console, container.ReadonlyFs); err != nil { return fmt.Errorf("setup mount namespace %s", err) } From 44c3b7133218a66f44cb51b7563b7c0016583eda Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Thu, 27 Feb 2014 11:40:25 -0800 Subject: [PATCH 78/81] Ensure that ticker does not leak Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- execdriver/native/driver.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/execdriver/native/driver.go b/execdriver/native/driver.go index ff96510262..88253a5940 100644 --- a/execdriver/native/driver.go +++ b/execdriver/native/driver.go @@ -128,8 +128,10 @@ func (d *driver) Restore(c *execdriver.Command) error { c.Process = &os.Process{ Pid: nspid, } + ticker := time.NewTicker(500 * time.Millisecond) + defer ticker.Stop() - for _ = range time.Tick(500 * time.Millisecond) { + for _ = range ticker.C { if err := syscall.Kill(nspid, 0); err != nil { if strings.Contains(err.Error(), "no such process") { return nil From 2f35f8e2a88a378d7ff8eacf5346f9711a59489a Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Mon, 3 Mar 2014 11:31:37 -0800 Subject: [PATCH 79/81] Update readme to remove .nspid Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/libcontainer/README.md b/pkg/libcontainer/README.md index b81401cd09..103253150d 100644 --- a/pkg/libcontainer/README.md +++ b/pkg/libcontainer/README.md @@ -87,4 +87,4 @@ nsinit exec /bin/bash If you wish to spawn another process inside the container while your current bash session is running just run the exact same command again to get another bash shell or change the command. If the original process dies, PID 1, all other processes spawned inside the container will also be killed and the namespace will be removed. -You can identify if a process is running in a container by looking to see if `.nspid` is in the root of the directory. +You can identify if a process is running in a container by looking to see if `pid` is in the root of the directory. From 5465fdf00f3ece165cbd3bb680dcc571e81510dd Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Mon, 3 Mar 2014 12:15:47 -0800 Subject: [PATCH 80/81] Factor out finalize namespace Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/nsinit/execin.go | 5 ++--- pkg/libcontainer/nsinit/init.go | 29 +++++++++++++++++++---------- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/pkg/libcontainer/nsinit/execin.go b/pkg/libcontainer/nsinit/execin.go index 55f7b9695b..488fe0e248 100644 --- a/pkg/libcontainer/nsinit/execin.go +++ b/pkg/libcontainer/nsinit/execin.go @@ -5,7 +5,6 @@ package nsinit import ( "fmt" "github.com/dotcloud/docker/pkg/libcontainer" - "github.com/dotcloud/docker/pkg/libcontainer/capabilities" "github.com/dotcloud/docker/pkg/system" "os" "path/filepath" @@ -73,8 +72,8 @@ func (ns *linuxNs) ExecIn(container *libcontainer.Container, nspid int, args []s os.Exit(state.Sys().(syscall.WaitStatus).ExitStatus()) } dropAndExec: - if err := capabilities.DropCapabilities(container); err != nil { - return -1, fmt.Errorf("drop capabilities %s", err) + if err := finalizeNamespace(container); err != nil { + return -1, err } if err := system.Execv(args[0], args[0:], container.Env); err != nil { return -1, err diff --git a/pkg/libcontainer/nsinit/init.go b/pkg/libcontainer/nsinit/init.go index cc481e2cc8..565030f252 100644 --- a/pkg/libcontainer/nsinit/init.go +++ b/pkg/libcontainer/nsinit/init.go @@ -64,16 +64,8 @@ func (ns *linuxNs) Init(container *libcontainer.Container, uncleanRootfs, consol if err := system.Sethostname(container.Hostname); err != nil { return fmt.Errorf("sethostname %s", err) } - if err := capabilities.DropCapabilities(container); err != nil { - return fmt.Errorf("drop capabilities %s", err) - } - if err := setupUser(container); err != nil { - return fmt.Errorf("setup user %s", err) - } - if container.WorkingDir != "" { - if err := system.Chdir(container.WorkingDir); err != nil { - return fmt.Errorf("chdir to %s %s", container.WorkingDir, err) - } + if err := finalizeNamespace(container); err != nil { + return fmt.Errorf("finalize namespace %s", err) } return system.Execv(args[0], args[0:], container.Env) } @@ -142,3 +134,20 @@ func setupNetwork(container *libcontainer.Container, context libcontainer.Contex } return nil } + +// finalizeNamespace drops the caps and sets the correct user +// and working dir before execing the command inside the namespace +func finalizeNamespace(container *libcontainer.Container) error { + if err := capabilities.DropCapabilities(container); err != nil { + return fmt.Errorf("drop capabilities %s", err) + } + if err := setupUser(container); err != nil { + return fmt.Errorf("setup user %s", err) + } + if container.WorkingDir != "" { + if err := system.Chdir(container.WorkingDir); err != nil { + return fmt.Errorf("chdir to %s %s", container.WorkingDir, err) + } + } + return nil +} From 2e71adac9f2935abaf17741a440497e7e31388e2 Mon Sep 17 00:00:00 2001 From: Sven Dowideit Date: Thu, 27 Feb 2014 23:36:19 -0800 Subject: [PATCH 81/81] very minor spelling Docker-DCO-1.1-Signed-off-by: Sven Dowideit (github: SvenDowideit) --- pkg/libcontainer/README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pkg/libcontainer/README.md b/pkg/libcontainer/README.md index 103253150d..d6e4dedd63 100644 --- a/pkg/libcontainer/README.md +++ b/pkg/libcontainer/README.md @@ -7,7 +7,7 @@ for using linux namespaces with no external dependencies. libcontainer provides #### container -A container is a self contained directory that is able to run one or more processes inside without +A container is a self contained directory that is able to run one or more processes without affecting the host system. The directory is usually a full system tree. Inside the directory a `container.json` file is placed with the runtime configuration for how the processes should be contained and ran. Environment, networking, and different capabilities for the @@ -67,11 +67,11 @@ Sample `container.json` file: } ``` -Using this configuration and the current directory holding the rootfs for a process to live, one can use libcontainer to exec the container. Running the life of the namespace a `pid` file -is written to the current directory with the pid of the namespace'd process to the external world. A client can use this pid to wait, kill, or perform other operation with the container. If a user tries to run an new process inside an existing container with a live namespace with namespace will be joined by the new process. +Using this configuration and the current directory holding the rootfs for a process, one can use libcontainer to exec the container. Running the life of the namespace, a `pid` file +is written to the current directory with the pid of the namespaced process to the external world. A client can use this pid to wait, kill, or perform other operation with the container. If a user tries to run an new process inside an existing container with a live namespace the namespace will be joined by the new process. -You may also specify an alternate root to to place the `container.json` file is read and where the `pid` file will be saved. +You may also specify an alternate root place where the `container.json` file is read and where the `pid` file will be saved. #### nsinit @@ -79,7 +79,7 @@ You may also specify an alternate root to to place the `container.json` file is spawn or join new containers giving the current directory. To use `nsinit` cd into a linux rootfs and copy a `container.json` file into the directory with your specified configuration. -To execution `/bin/bash` in the current directory as a container just run: +To execute `/bin/bash` in the current directory as a container just run: ```bash nsinit exec /bin/bash ```