1
0
Fork 0
mirror of https://github.com/moby/moby.git synced 2022-11-09 12:21:53 -05:00

Update libcontainer to 1597c68f7b941fd97881155d7f077852e2914e7b

This commit contains changes for docker:
* user.GetGroupFile to user.GetGroupPath docker/libcontainer#301
* Add systemd support for OOM docker/libcontainer#307
* Support for custom namespaces docker/libcontainer#279, docker/libcontainer#312
* Fixes #9699 docker/libcontainer#308

Signed-off-by: Alexander Morozov <lk4d4@docker.com>
This commit is contained in:
Alexander Morozov 2014-12-23 12:10:03 -08:00
parent 043e33c2c1
commit 50905a6d6c
43 changed files with 517 additions and 362 deletions

View file

@ -1399,7 +1399,7 @@ func serveFd(addr string, job *engine.Job) error {
}
func lookupGidByName(nameOrGid string) (int, error) {
groupFile, err := user.GetGroupFile()
groupFile, err := user.GetGroupPath()
if err != nil {
return -1, err
}

View file

@ -82,7 +82,7 @@ func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Config, e
func (d *driver) createNetwork(container *libcontainer.Config, c *execdriver.Command) error {
if c.Network.HostNetworking {
container.Namespaces["NEWNET"] = false
container.Namespaces.Remove(libcontainer.NEWNET)
return nil
}
@ -119,10 +119,7 @@ func (d *driver) createNetwork(container *libcontainer.Config, c *execdriver.Com
cmd := active.cmd
nspath := filepath.Join("/proc", fmt.Sprint(cmd.Process.Pid), "ns", "net")
container.Networks = append(container.Networks, &libcontainer.Network{
Type: "netns",
NsPath: nspath,
})
container.Namespaces.Add(libcontainer.NEWNET, nspath)
}
return nil
@ -130,7 +127,7 @@ func (d *driver) createNetwork(container *libcontainer.Config, c *execdriver.Com
func (d *driver) createIpc(container *libcontainer.Config, c *execdriver.Command) error {
if c.Ipc.HostIpc {
container.Namespaces["NEWIPC"] = false
container.Namespaces.Remove(libcontainer.NEWIPC)
return nil
}
@ -144,7 +141,7 @@ func (d *driver) createIpc(container *libcontainer.Config, c *execdriver.Command
}
cmd := active.cmd
container.IpcNsPath = filepath.Join("/proc", fmt.Sprint(cmd.Process.Pid), "ns", "ipc")
container.Namespaces.Add(libcontainer.NEWIPC, filepath.Join("/proc", fmt.Sprint(cmd.Process.Pid), "ns", "ipc"))
}
return nil

View file

@ -61,10 +61,6 @@ func NewDriver(root, initPath string) (*driver, error) {
}, nil
}
func (d *driver) notifyOnOOM(config *libcontainer.Config) (<-chan struct{}, error) {
return fs.NotifyOnOOM(config.Cgroups)
}
type execOutput struct {
exitCode int
err error
@ -152,11 +148,16 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
}
oomKill := false
oomKillNotification, err := d.notifyOnOOM(container)
state, err := libcontainer.GetState(filepath.Join(d.root, c.ID))
if err == nil {
_, oomKill = <-oomKillNotification
oomKillNotification, err := libcontainer.NotifyOnOOM(state)
if err == nil {
_, oomKill = <-oomKillNotification
} else {
log.Warnf("WARNING: Your kernel does not support OOM notifications: %s", err)
}
} else {
log.Warnf("WARNING: Your kernel does not support OOM notifications: %s", err)
log.Warnf("Failed to get container state, oom notify will not work: %s", err)
}
// wait for the container to exit.
execOutput := <-execOutputChan

View file

@ -25,12 +25,12 @@ func New() *libcontainer.Config {
"KILL",
"AUDIT_WRITE",
},
Namespaces: map[string]bool{
"NEWNS": true,
"NEWUTS": true,
"NEWIPC": true,
"NEWPID": true,
"NEWNET": true,
Namespaces: libcontainer.Namespaces{
{Type: "NEWNS"},
{Type: "NEWUTS"},
{Type: "NEWIPC"},
{Type: "NEWPID"},
{Type: "NEWNET"},
},
Cgroups: &cgroups.Cgroup{
Parent: "docker",

View file

@ -66,7 +66,7 @@ if [ "$1" = '--go' ]; then
mv tmp-tar src/code.google.com/p/go/src/pkg/archive/tar
fi
clone git github.com/docker/libcontainer 53eca435e63db58b06cf796d3a9326db5fd42253
clone git github.com/docker/libcontainer 1597c68f7b941fd97881155d7f077852e2914e7b
# see src/github.com/docker/libcontainer/update-vendor.sh which is the "source of truth" for libcontainer deps (just like this file)
rm -rf src/github.com/docker/libcontainer/vendor
eval "$(grep '^clone ' src/github.com/docker/libcontainer/update-vendor.sh | grep -v 'github.com/codegangsta/cli')"

View file

@ -1,13 +1,13 @@
all:
docker build -t docker/libcontainer .
docker build -t dockercore/libcontainer .
test:
# we need NET_ADMIN for the netlink tests and SYS_ADMIN for mounting
docker run --rm -it --privileged docker/libcontainer
docker run --rm -it --privileged dockercore/libcontainer
sh:
docker run --rm -it --privileged -w /busybox docker/libcontainer nsinit exec sh
docker run --rm -it --privileged -w /busybox dockercore/libcontainer nsinit exec sh
GO_PACKAGES = $(shell find . -not \( -wholename ./vendor -prune -o -wholename ./.git -prune \) -name '*.go' -print0 | xargs -0n1 dirname | sort -u)

View file

@ -318,4 +318,29 @@ a container.
| Resume | Resume all processes inside the container if paused |
| Exec | Execute a new process inside of the container ( requires setns ) |
### Execute a new process inside of a running container.
User can execute a new process inside of a running container. Any binaries to be
executed must be accessible within the container's rootfs.
The started process will run inside the container's rootfs. Any changes
made by the process to the container's filesystem will persist after the
process finished executing.
The started process will join all the container's existing namespaces. When the
container is paused, the process will also be paused and will resume when
the container is unpaused. The started process will only run when the container's
primary process (PID 1) is running, and will not be restarted when the container
is restarted.
#### Planned additions
The started process will have its own cgroups nested inside the container's
cgroups. This is used for process tracking and optionally resource allocation
handling for the new process. Freezer cgroup is required, the rest of the cgroups
are optional. The process executor must place its pid inside the correct
cgroups before starting the process. This is done so that no child processes or
threads can escape the cgroups.
When the process is stopped, the process executor will try (in a best-effort way)
to stop all its children and remove the sub-cgroups.

View file

@ -50,6 +50,7 @@ type Cgroup struct {
CpuQuota int64 `json:"cpu_quota,omitempty"` // CPU hardcap limit (in usecs). Allowed cpu time in a given period.
CpuPeriod int64 `json:"cpu_period,omitempty"` // CPU period to be used for hardcapping (in usecs). 0 to use system default.
CpusetCpus string `json:"cpuset_cpus,omitempty"` // CPU to use
CpusetMems string `json:"cpuset_mems,omitempty"` // MEM to use
Freezer FreezerState `json:"freezer,omitempty"` // set the freeze value for the process
Slice string `json:"slice,omitempty"` // Parent slice to use for systemd
}

View file

@ -18,7 +18,7 @@ func (s *CpusetGroup) Set(d *data) error {
if err != nil {
return err
}
return s.SetDir(dir, d.c.CpusetCpus, d.pid)
return s.SetDir(dir, d.c.CpusetCpus, d.c.CpusetMems, d.pid)
}
func (s *CpusetGroup) Remove(d *data) error {
@ -29,7 +29,7 @@ func (s *CpusetGroup) GetStats(path string, stats *cgroups.Stats) error {
return nil
}
func (s *CpusetGroup) SetDir(dir, value string, pid int) error {
func (s *CpusetGroup) SetDir(dir, cpus string, mems string, pid int) error {
if err := s.ensureParent(dir); err != nil {
return err
}
@ -40,10 +40,15 @@ func (s *CpusetGroup) SetDir(dir, value string, pid int) error {
return err
}
// If we don't use --cpuset, the default cpuset.cpus is set in
// s.ensureParent, otherwise, use the value we set
if value != "" {
if err := writeFile(dir, "cpuset.cpus", value); err != nil {
// If we don't use --cpuset-xxx, the default value inherit from parent cgroup
// is set in s.ensureParent, otherwise, use the value we set
if cpus != "" {
if err := writeFile(dir, "cpuset.cpus", cpus); err != nil {
return err
}
}
if mems != "" {
if err := writeFile(dir, "cpuset.mems", mems); err != nil {
return err
}
}

View file

@ -38,12 +38,17 @@ func (s *MemoryGroup) Set(d *data) error {
}
}
// By default, MemorySwap is set to twice the size of RAM.
// If you want to omit MemorySwap, set it to `-1'.
if d.c.MemorySwap != -1 {
// If you want to omit MemorySwap, set it to '-1'.
if d.c.MemorySwap == 0 {
if err := writeFile(dir, "memory.memsw.limit_in_bytes", strconv.FormatInt(d.c.Memory*2, 10)); err != nil {
return err
}
}
if d.c.MemorySwap > 0 {
if err := writeFile(dir, "memory.memsw.limit_in_bytes", strconv.FormatInt(d.c.MemorySwap, 10)); err != nil {
return err
}
}
}
return nil
}

View file

@ -90,4 +90,8 @@ func expectMemoryStatEquals(t *testing.T, expected, actual cgroups.MemoryStats)
t.Fail()
}
}
if expected.Failcnt != actual.Failcnt {
log.Printf("Expected memory failcnt %d but found %d\n", expected.Failcnt, actual.Failcnt)
t.Fail()
}
}

View file

@ -313,5 +313,5 @@ func joinCpuset(c *cgroups.Cgroup, pid int) error {
s := &fs.CpusetGroup{}
return s.SetDir(path, c.CpusetCpus, pid)
return s.SetDir(path, c.CpusetCpus, c.CpusetMems, pid)
}

View file

@ -9,6 +9,7 @@ import (
"path/filepath"
"strconv"
"strings"
"time"
"github.com/docker/docker/pkg/mount"
)
@ -193,13 +194,30 @@ func EnterPid(cgroupPaths map[string]string, pid int) error {
}
// RemovePaths iterates over the provided paths removing them.
// If an error is encountered the removal proceeds and the first error is
// returned to ensure a partial removal is not possible.
// We trying to remove all paths five times with increasing delay between tries.
// If after all there are not removed cgroups - appropriate error will be
// returned.
func RemovePaths(paths map[string]string) (err error) {
for _, path := range paths {
if rerr := os.RemoveAll(path); err == nil {
err = rerr
delay := 10 * time.Millisecond
for i := 0; i < 5; i++ {
if i != 0 {
time.Sleep(delay)
delay *= 2
}
for s, p := range paths {
os.RemoveAll(p)
// TODO: here probably should be logging
_, err := os.Stat(p)
// We need this strange way of checking cgroups existence because
// RemoveAll almost always returns error, even on already removed
// cgroups
if os.IsNotExist(err) {
delete(paths, s)
}
}
if len(paths) == 0 {
return nil
}
}
return err
return fmt.Errorf("Failed to remove paths: %s", paths)
}

View file

@ -10,6 +10,57 @@ type MountConfig mount.MountConfig
type Network network.Network
type NamespaceType string
const (
NEWNET NamespaceType = "NEWNET"
NEWPID NamespaceType = "NEWPID"
NEWNS NamespaceType = "NEWNS"
NEWUTS NamespaceType = "NEWUTS"
NEWIPC NamespaceType = "NEWIPC"
NEWUSER NamespaceType = "NEWUSER"
)
// Namespace defines configuration for each namespace. It specifies an
// alternate path that is able to be joined via setns.
type Namespace struct {
Type NamespaceType `json:"type"`
Path string `json:"path,omitempty"`
}
type Namespaces []Namespace
func (n Namespaces) Remove(t NamespaceType) bool {
i := n.index(t)
if i == -1 {
return false
}
n = append(n[:i], n[i+1:]...)
return true
}
func (n Namespaces) Add(t NamespaceType, path string) {
i := n.index(t)
if i == -1 {
n = append(n, Namespace{Type: t, Path: path})
return
}
n[i].Path = path
}
func (n Namespaces) index(t NamespaceType) int {
for i, ns := range n {
if ns.Type == t {
return i
}
}
return -1
}
func (n Namespaces) Contains(t NamespaceType) bool {
return n.index(t) != -1
}
// Config defines configuration options for executing a process inside a contained environment.
type Config struct {
// Mount specific options.
@ -38,7 +89,7 @@ type Config struct {
// Namespaces specifies the container's namespaces that it should setup when cloning the init process
// If a namespace is not provided that namespace is shared from the container's parent process
Namespaces map[string]bool `json:"namespaces,omitempty"`
Namespaces Namespaces `json:"namespaces,omitempty"`
// Capabilities specify the capabilities to keep when executing the process inside the container
// All capbilities not specified will be dropped from the processes capability mask
@ -47,9 +98,6 @@ type Config struct {
// Networks specifies the container's network setup to be created
Networks []*Network `json:"networks,omitempty"`
// Ipc specifies the container's ipc setup to be created
IpcNsPath string `json:"ipc,omitempty"`
// Routes can be specified to create entries in the route table as the container is started
Routes []*Route `json:"routes,omitempty"`

View file

@ -64,12 +64,12 @@ func TestConfigJsonFormat(t *testing.T) {
t.Fail()
}
if !container.Namespaces["NEWNET"] {
if !container.Namespaces.Contains(NEWNET) {
t.Log("namespaces should contain NEWNET")
t.Fail()
}
if container.Namespaces["NEWUSER"] {
if container.Namespaces.Contains(NEWUSER) {
t.Log("namespaces should not contain NEWUSER")
t.Fail()
}

View file

@ -4,6 +4,8 @@ import (
"os"
"strings"
"testing"
"github.com/docker/libcontainer"
)
func TestExecPS(t *testing.T) {
@ -55,7 +57,6 @@ func TestIPCPrivate(t *testing.T) {
}
config := newTemplateConfig(rootfs)
config.Namespaces["NEWIPC"] = true
buffers, exitCode, err := runContainer(config, "", "readlink", "/proc/self/ns/ipc")
if err != nil {
t.Fatal(err)
@ -87,7 +88,7 @@ func TestIPCHost(t *testing.T) {
}
config := newTemplateConfig(rootfs)
config.Namespaces["NEWIPC"] = false
config.Namespaces.Remove(libcontainer.NEWIPC)
buffers, exitCode, err := runContainer(config, "", "readlink", "/proc/self/ns/ipc")
if err != nil {
t.Fatal(err)
@ -119,8 +120,7 @@ func TestIPCJoinPath(t *testing.T) {
}
config := newTemplateConfig(rootfs)
config.Namespaces["NEWIPC"] = false
config.IpcNsPath = "/proc/1/ns/ipc"
config.Namespaces.Add(libcontainer.NEWIPC, "/proc/1/ns/ipc")
buffers, exitCode, err := runContainer(config, "", "readlink", "/proc/self/ns/ipc")
if err != nil {
@ -148,8 +148,7 @@ func TestIPCBadPath(t *testing.T) {
defer remove(rootfs)
config := newTemplateConfig(rootfs)
config.Namespaces["NEWIPC"] = false
config.IpcNsPath = "/proc/1/ns/ipcc"
config.Namespaces.Add(libcontainer.NEWIPC, "/proc/1/ns/ipcc")
_, _, err = runContainer(config, "", "true")
if err == nil {

View file

@ -0,0 +1,140 @@
package integration
import (
"os"
"os/exec"
"strings"
"sync"
"testing"
"github.com/docker/libcontainer"
"github.com/docker/libcontainer/namespaces"
)
func TestExecIn(t *testing.T) {
if testing.Short() {
return
}
rootfs, err := newRootFs()
if err != nil {
t.Fatal(err)
}
defer remove(rootfs)
config := newTemplateConfig(rootfs)
if err := writeConfig(config); err != nil {
t.Fatalf("failed to write config %s", err)
}
containerCmd, statePath, containerErr := startLongRunningContainer(config)
defer func() {
// kill the container
if containerCmd.Process != nil {
containerCmd.Process.Kill()
}
if err := <-containerErr; err != nil {
t.Fatal(err)
}
}()
// start the exec process
state, err := libcontainer.GetState(statePath)
if err != nil {
t.Fatalf("failed to get state %s", err)
}
buffers := newStdBuffers()
execErr := make(chan error, 1)
go func() {
_, err := namespaces.ExecIn(config, state, []string{"ps"},
os.Args[0], "exec", buffers.Stdin, buffers.Stdout, buffers.Stderr,
"", nil)
execErr <- err
}()
if err := <-execErr; err != nil {
t.Fatalf("exec finished with error %s", err)
}
out := buffers.Stdout.String()
if !strings.Contains(out, "sleep 10") || !strings.Contains(out, "ps") {
t.Fatalf("unexpected running process, output %q", out)
}
}
func TestExecInRlimit(t *testing.T) {
if testing.Short() {
return
}
rootfs, err := newRootFs()
if err != nil {
t.Fatal(err)
}
defer remove(rootfs)
config := newTemplateConfig(rootfs)
if err := writeConfig(config); err != nil {
t.Fatalf("failed to write config %s", err)
}
containerCmd, statePath, containerErr := startLongRunningContainer(config)
defer func() {
// kill the container
if containerCmd.Process != nil {
containerCmd.Process.Kill()
}
if err := <-containerErr; err != nil {
t.Fatal(err)
}
}()
// start the exec process
state, err := libcontainer.GetState(statePath)
if err != nil {
t.Fatalf("failed to get state %s", err)
}
buffers := newStdBuffers()
execErr := make(chan error, 1)
go func() {
_, err := namespaces.ExecIn(config, state, []string{"/bin/sh", "-c", "ulimit -n"},
os.Args[0], "exec", buffers.Stdin, buffers.Stdout, buffers.Stderr,
"", nil)
execErr <- err
}()
if err := <-execErr; err != nil {
t.Fatalf("exec finished with error %s", err)
}
out := buffers.Stdout.String()
if limit := strings.TrimSpace(out); limit != "1024" {
t.Fatalf("expected rlimit to be 1024, got %s", limit)
}
}
// start a long-running container so we have time to inspect execin processes
func startLongRunningContainer(config *libcontainer.Config) (*exec.Cmd, string, chan error) {
containerErr := make(chan error, 1)
containerCmd := &exec.Cmd{}
var statePath string
createCmd := func(container *libcontainer.Config, console, dataPath, init string,
pipe *os.File, args []string) *exec.Cmd {
containerCmd = namespaces.DefaultCreateCommand(container, console, dataPath, init, pipe, args)
statePath = dataPath
return containerCmd
}
var containerStart sync.WaitGroup
containerStart.Add(1)
go func() {
buffers := newStdBuffers()
_, err := namespaces.Exec(config,
buffers.Stdin, buffers.Stdout, buffers.Stderr,
"", config.RootFs, []string{"sleep", "10"},
createCmd, containerStart.Done)
containerErr <- err
}()
containerStart.Wait()
return containerCmd, statePath, containerErr
}

View file

@ -1,33 +1,76 @@
package integration
import (
"encoding/json"
"log"
"os"
"runtime"
"github.com/docker/libcontainer"
"github.com/docker/libcontainer/namespaces"
_ "github.com/docker/libcontainer/namespaces/nsenter"
)
// init runs the libcontainer initialization code because of the busybox style needs
// to work around the go runtime and the issues with forking
func init() {
if len(os.Args) < 2 || os.Args[1] != "init" {
if len(os.Args) < 2 {
return
}
runtime.LockOSThread()
// handle init
if len(os.Args) >= 2 && os.Args[1] == "init" {
runtime.LockOSThread()
container, err := loadConfig()
if err != nil {
log.Fatal(err)
container, err := loadConfig()
if err != nil {
log.Fatal(err)
}
rootfs, err := os.Getwd()
if err != nil {
log.Fatal(err)
}
if err := namespaces.Init(container, rootfs, "", os.NewFile(3, "pipe"), os.Args[3:]); err != nil {
log.Fatalf("unable to initialize for container: %s", err)
}
os.Exit(1)
}
rootfs, err := os.Getwd()
if err != nil {
log.Fatal(err)
}
// handle execin
if len(os.Args) >= 2 && os.Args[0] == "nsenter-exec" {
runtime.LockOSThread()
if err := namespaces.Init(container, rootfs, "", os.NewFile(3, "pipe"), os.Args[3:]); err != nil {
log.Fatalf("unable to initialize for container: %s", err)
// User args are passed after '--' in the command line.
userArgs := findUserArgs()
config, err := loadConfigFromFd()
if err != nil {
log.Fatalf("docker-exec: unable to receive config from sync pipe: %s", err)
}
if err := namespaces.FinalizeSetns(config, userArgs); err != nil {
log.Fatalf("docker-exec: failed to exec: %s", err)
}
os.Exit(1)
}
os.Exit(1)
}
func findUserArgs() []string {
for i, a := range os.Args {
if a == "--" {
return os.Args[i+1:]
}
}
return []string{}
}
// loadConfigFromFd loads a container's config from the sync pipe that is provided by
// fd 3 when running a process
func loadConfigFromFd() (*libcontainer.Config, error) {
var config *libcontainer.Config
if err := json.NewDecoder(os.NewFile(3, "child")).Decode(&config); err != nil {
return nil, err
}
return config, nil
}

View file

@ -32,12 +32,12 @@ func newTemplateConfig(rootfs string) *libcontainer.Config {
"KILL",
"AUDIT_WRITE",
},
Namespaces: map[string]bool{
"NEWNS": true,
"NEWUTS": true,
"NEWIPC": true,
"NEWPID": true,
"NEWNET": true,
Namespaces: libcontainer.Namespaces{
{Type: libcontainer.NEWNS},
{Type: libcontainer.NEWUTS},
{Type: libcontainer.NEWIPC},
{Type: libcontainer.NEWPID},
{Type: libcontainer.NEWNET},
},
Cgroups: &cgroups.Cgroup{
Parent: "integration",

View file

@ -1,29 +0,0 @@
package ipc
import (
"fmt"
"os"
"syscall"
"github.com/docker/libcontainer/system"
)
// Join the IPC Namespace of specified ipc path if it exists.
// If the path does not exist then you are not joining a container.
func Initialize(nsPath string) error {
if nsPath == "" {
return nil
}
f, err := os.OpenFile(nsPath, os.O_RDONLY, 0)
if err != nil {
return fmt.Errorf("failed get IPC namespace fd: %v", err)
}
err = system.Setns(f.Fd(), syscall.CLONE_NEWIPC)
f.Close()
if err != nil {
return fmt.Errorf("failed to setns current IPC namespace: %v", err)
}
return nil
}

View file

@ -97,6 +97,10 @@ func FinalizeSetns(container *libcontainer.Config, args []string) error {
return err
}
if err := setupRlimits(container); err != nil {
return fmt.Errorf("setup rlimits %s", err)
}
if err := FinalizeNamespace(container); err != nil {
return err
}

View file

@ -13,7 +13,6 @@ import (
"github.com/docker/libcontainer"
"github.com/docker/libcontainer/apparmor"
"github.com/docker/libcontainer/console"
"github.com/docker/libcontainer/ipc"
"github.com/docker/libcontainer/label"
"github.com/docker/libcontainer/mount"
"github.com/docker/libcontainer/netlink"
@ -65,7 +64,10 @@ func Init(container *libcontainer.Config, uncleanRootfs, consolePath string, pip
if err := json.NewDecoder(pipe).Decode(&networkState); err != nil {
return err
}
// join any namespaces via a path to the namespace fd if provided
if err := joinExistingNamespaces(container.Namespaces); err != nil {
return err
}
if consolePath != "" {
if err := console.OpenAndDup(consolePath); err != nil {
return err
@ -79,9 +81,7 @@ func Init(container *libcontainer.Config, uncleanRootfs, consolePath string, pip
return fmt.Errorf("setctty %s", err)
}
}
if err := ipc.Initialize(container.IpcNsPath); err != nil {
return fmt.Errorf("setup IPC %s", err)
}
if err := setupNetwork(container, networkState); err != nil {
return fmt.Errorf("setup networking %s", err)
}
@ -178,17 +178,17 @@ func SetupUser(u string) error {
Home: "/",
}
passwdFile, err := user.GetPasswdFile()
passwdPath, err := user.GetPasswdPath()
if err != nil {
return err
}
groupFile, err := user.GetGroupFile()
groupPath, err := user.GetGroupPath()
if err != nil {
return err
}
execUser, err := user.GetExecUserFile(u, &defaultExecUser, passwdFile, groupFile)
execUser, err := user.GetExecUserPath(u, &defaultExecUser, passwdPath, groupPath)
if err != nil {
return fmt.Errorf("get supplementary groups %s", err)
}
@ -308,3 +308,22 @@ func LoadContainerEnvironment(container *libcontainer.Config) error {
}
return nil
}
// joinExistingNamespaces gets all the namespace paths specified for the container and
// does a setns on the namespace fd so that the current process joins the namespace.
func joinExistingNamespaces(namespaces []libcontainer.Namespace) error {
for _, ns := range namespaces {
if ns.Path != "" {
f, err := os.OpenFile(ns.Path, os.O_RDONLY, 0)
if err != nil {
return err
}
err = system.Setns(f.Fd(), uintptr(namespaceInfo[ns.Type]))
f.Close()
if err != nil {
return err
}
}
}
return nil
}

View file

@ -15,6 +15,10 @@
#include <unistd.h>
#include <getopt.h>
#ifndef PR_SET_CHILD_SUBREAPER
#define PR_SET_CHILD_SUBREAPER 36
#endif
static const kBufSize = 256;
static const char *kNsEnter = "nsenter";
@ -32,8 +36,8 @@ void get_args(int *argc, char ***argv)
contents_size += kBufSize;
contents = (char *)realloc(contents, contents_size);
bytes_read =
read(fd, contents + contents_offset,
contents_size - contents_offset);
read(fd, contents + contents_offset,
contents_size - contents_offset);
contents_offset += bytes_read;
}
while (bytes_read > 0);
@ -90,16 +94,17 @@ void nsenter()
}
if (prctl(PR_SET_CHILD_SUBREAPER, 1, 0, 0, 0) == -1) {
fprintf(stderr, "nsenter: failed to set child subreaper: %s", strerror(errno));
exit(1);
}
fprintf(stderr, "nsenter: failed to set child subreaper: %s",
strerror(errno));
exit(1);
}
static const struct option longopts[] = {
{"nspid", required_argument, NULL, 'n'},
{"console", required_argument, NULL, 't'},
{NULL, 0, NULL, 0}
};
pid_t init_pid = -1;
char *init_pid_str = NULL;
char *console = NULL;

View file

@ -1,50 +0,0 @@
package namespaces
import "errors"
type (
Namespace struct {
Key string `json:"key,omitempty"`
Value int `json:"value,omitempty"`
File string `json:"file,omitempty"`
}
Namespaces []*Namespace
)
// namespaceList is used to convert the libcontainer types
// into the names of the files located in /proc/<pid>/ns/* for
// each namespace
var (
namespaceList = Namespaces{}
ErrUnkownNamespace = errors.New("Unknown namespace")
ErrUnsupported = errors.New("Unsupported method")
)
func (ns *Namespace) String() string {
return ns.Key
}
func GetNamespace(key string) *Namespace {
for _, ns := range namespaceList {
if ns.Key == key {
cpy := *ns
return &cpy
}
}
return nil
}
// Contains returns true if the specified Namespace is
// in the slice
func (n Namespaces) Contains(ns string) bool {
return n.Get(ns) != nil
}
func (n Namespaces) Get(ns string) *Namespace {
for _, nsp := range n {
if nsp != nil && nsp.Key == ns {
return nsp
}
}
return nil
}

View file

@ -1,16 +0,0 @@
package namespaces
import (
"syscall"
)
func init() {
namespaceList = Namespaces{
{Key: "NEWNS", Value: syscall.CLONE_NEWNS, File: "mnt"},
{Key: "NEWUTS", Value: syscall.CLONE_NEWUTS, File: "uts"},
{Key: "NEWIPC", Value: syscall.CLONE_NEWIPC, File: "ipc"},
{Key: "NEWUSER", Value: syscall.CLONE_NEWUSER, File: "user"},
{Key: "NEWPID", Value: syscall.CLONE_NEWPID, File: "pid"},
{Key: "NEWNET", Value: syscall.CLONE_NEWNET, File: "net"},
}
}

View file

@ -1,30 +0,0 @@
package namespaces
import (
"testing"
)
func TestNamespacesContains(t *testing.T) {
ns := Namespaces{
GetNamespace("NEWPID"),
GetNamespace("NEWNS"),
GetNamespace("NEWUTS"),
}
if ns.Contains("NEWNET") {
t.Fatal("namespaces should not contain NEWNET")
}
if !ns.Contains("NEWPID") {
t.Fatal("namespaces should contain NEWPID but does not")
}
withNil := Namespaces{
GetNamespace("UNDEFINED"), // this element will be nil
GetNamespace("NEWPID"),
}
if !withNil.Contains("NEWPID") {
t.Fatal("namespaces should contain NEWPID but does not")
}
}

View file

@ -5,6 +5,8 @@ package namespaces
import (
"os"
"syscall"
"github.com/docker/libcontainer"
)
type initError struct {
@ -15,6 +17,15 @@ func (i initError) Error() string {
return i.Message
}
var namespaceInfo = map[libcontainer.NamespaceType]int{
libcontainer.NEWNET: syscall.CLONE_NEWNET,
libcontainer.NEWNS: syscall.CLONE_NEWNS,
libcontainer.NEWUSER: syscall.CLONE_NEWUSER,
libcontainer.NEWIPC: syscall.CLONE_NEWIPC,
libcontainer.NEWUTS: syscall.CLONE_NEWUTS,
libcontainer.NEWPID: syscall.CLONE_NEWPID,
}
// New returns a newly initialized Pipe for communication between processes
func newInitPipe() (parent *os.File, child *os.File, err error) {
fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0)
@ -26,13 +37,9 @@ func newInitPipe() (parent *os.File, child *os.File, err error) {
// GetNamespaceFlags parses the container's Namespaces options to set the correct
// flags on clone, unshare, and setns
func GetNamespaceFlags(namespaces map[string]bool) (flag int) {
for key, enabled := range namespaces {
if enabled {
if ns := GetNamespace(key); ns != nil {
flag |= ns.Value
}
}
func GetNamespaceFlags(namespaces libcontainer.Namespaces) (flag int) {
for _, v := range namespaces {
flag |= namespaceInfo[v.Type]
}
return flag
}

View file

@ -522,11 +522,10 @@ func NetworkSetMacAddress(iface *net.Interface, macaddr string) error {
var (
MULTICAST byte = 0x1
LOCALOUI byte = 0x2
)
if hwaddr[0]&0x1 == MULTICAST || hwaddr[0]&0x2 != LOCALOUI {
return fmt.Errorf("Incorrect Local MAC Address specified: %s", macaddr)
if hwaddr[0]&0x1 == MULTICAST {
return fmt.Errorf("Multicast MAC Address is not supported: %s", macaddr)
}
wb := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)

View file

@ -1,39 +0,0 @@
// +build linux
package network
import (
"fmt"
"os"
"syscall"
"github.com/docker/libcontainer/system"
)
// crosbymichael: could make a network strategy that instead of returning veth pair names it returns a pid to an existing network namespace
type NetNS struct {
}
func (v *NetNS) Create(n *Network, nspid int, networkState *NetworkState) error {
networkState.NsPath = n.NsPath
return nil
}
func (v *NetNS) Initialize(config *Network, networkState *NetworkState) error {
if networkState.NsPath == "" {
return fmt.Errorf("nspath does is not specified in NetworkState")
}
f, err := os.OpenFile(networkState.NsPath, os.O_RDONLY, 0)
if err != nil {
return fmt.Errorf("failed get network namespace fd: %v", err)
}
if err := system.Setns(f.Fd(), syscall.CLONE_NEWNET); err != nil {
f.Close()
return fmt.Errorf("failed to setns current network namespace: %v", err)
}
f.Close()
return nil
}

View file

@ -88,6 +88,18 @@ func SetInterfaceIp(name string, rawIp string) error {
return netlink.NetworkLinkAddIp(iface, ip, ipNet)
}
func DeleteInterfaceIp(name string, rawIp string) error {
iface, err := net.InterfaceByName(name)
if err != nil {
return err
}
ip, ipNet, err := net.ParseCIDR(rawIp)
if err != nil {
return err
}
return netlink.NetworkLinkDelIp(iface, ip, ipNet)
}
func SetMtu(name string, mtu int) error {
iface, err := net.InterfaceByName(name)
if err != nil {

View file

@ -13,7 +13,6 @@ var (
var strategies = map[string]NetworkStrategy{
"veth": &Veth{},
"loopback": &Loopback{},
"netns": &NetNS{},
}
// NetworkStrategy represents a specific network configuration for

View file

@ -8,9 +8,6 @@ type Network struct {
// Type sets the networks type, commonly veth and loopback
Type string `json:"type,omitempty"`
// Path to network namespace
NsPath string `json:"ns_path,omitempty"`
// The bridge to use.
Bridge string `json:"bridge,omitempty"`
@ -50,6 +47,4 @@ type NetworkState struct {
VethHost string `json:"veth_host,omitempty"`
// The name of the veth interface created inside the container for the child.
VethChild string `json:"veth_child,omitempty"`
// Net namespace path.
NsPath string `json:"ns_path,omitempty"`
}

View file

@ -1,33 +1,29 @@
// +build linux
package fs
package libcontainer
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"syscall"
"github.com/docker/libcontainer/cgroups"
)
// NotifyOnOOM sends signals on the returned channel when the cgroup reaches
// its memory limit. The channel is closed when the cgroup is removed.
func NotifyOnOOM(c *cgroups.Cgroup) (<-chan struct{}, error) {
d, err := getCgroupData(c, 0)
const oomCgroupName = "memory"
// NotifyOnOOM returns channel on which you can expect event about OOM,
// if process died without OOM this channel will be closed.
// s is current *libcontainer.State for container.
func NotifyOnOOM(s *State) (<-chan struct{}, error) {
dir := s.CgroupPaths[oomCgroupName]
if dir == "" {
return nil, fmt.Errorf("There is no path for %q in state", oomCgroupName)
}
oomControl, err := os.Open(filepath.Join(dir, "memory.oom_control"))
if err != nil {
return nil, err
}
return notifyOnOOM(d)
}
func notifyOnOOM(d *data) (<-chan struct{}, error) {
dir, err := d.path("memory")
if err != nil {
return nil, err
}
fd, _, syserr := syscall.RawSyscall(syscall.SYS_EVENTFD2, 0, syscall.FD_CLOEXEC, 0)
if syserr != 0 {
return nil, syserr
@ -35,48 +31,32 @@ func notifyOnOOM(d *data) (<-chan struct{}, error) {
eventfd := os.NewFile(fd, "eventfd")
oomControl, err := os.Open(filepath.Join(dir, "memory.oom_control"))
if err != nil {
eventfd.Close()
return nil, err
}
var (
eventControlPath = filepath.Join(dir, "cgroup.event_control")
data = fmt.Sprintf("%d %d", eventfd.Fd(), oomControl.Fd())
)
if err := writeFile(dir, "cgroup.event_control", data); err != nil {
eventControlPath := filepath.Join(dir, "cgroup.event_control")
data := fmt.Sprintf("%d %d", eventfd.Fd(), oomControl.Fd())
if err := ioutil.WriteFile(eventControlPath, []byte(data), 0700); err != nil {
eventfd.Close()
oomControl.Close()
return nil, err
}
ch := make(chan struct{})
go func() {
defer func() {
close(ch)
eventfd.Close()
oomControl.Close()
}()
buf := make([]byte, 8)
for {
if _, err := eventfd.Read(buf); err != nil {
return
}
// When a cgroup is destroyed, an event is sent to eventfd.
// So if the control path is gone, return instead of notifying.
if _, err := os.Lstat(eventControlPath); os.IsNotExist(err) {
return
}
ch <- struct{}{}
}
}()
return ch, nil
}

View file

@ -1,38 +1,48 @@
// +build linux
package fs
package libcontainer
import (
"encoding/binary"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"syscall"
"testing"
"time"
)
func TestNotifyOnOOM(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"memory.oom_control": "",
"cgroup.event_control": "",
})
memoryPath, err := ioutil.TempDir("", "testnotifyoom-")
if err != nil {
t.Fatal(err)
}
oomPath := filepath.Join(memoryPath, "memory.oom_control")
eventPath := filepath.Join(memoryPath, "cgroup.event_control")
if err := ioutil.WriteFile(oomPath, []byte{}, 0700); err != nil {
t.Fatal(err)
}
if err := ioutil.WriteFile(eventPath, []byte{}, 0700); err != nil {
t.Fatal(err)
}
var eventFd, oomControlFd int
ooms, err := notifyOnOOM(helper.CgroupData)
st := &State{
CgroupPaths: map[string]string{
"memory": memoryPath,
},
}
ooms, err := NotifyOnOOM(st)
if err != nil {
t.Fatal("expected no error, got:", err)
}
memoryPath, _ := helper.CgroupData.path("memory")
data, err := readFile(memoryPath, "cgroup.event_control")
data, err := ioutil.ReadFile(eventPath)
if err != nil {
t.Fatal("couldn't read event control file:", err)
}
if _, err := fmt.Sscanf(data, "%d %d", &eventFd, &oomControlFd); err != nil {
if _, err := fmt.Sscanf(string(data), "%d %d", &eventFd, &oomControlFd); err != nil {
t.Fatalf("invalid control data %q: %s", data, err)
}
@ -62,7 +72,9 @@ func TestNotifyOnOOM(t *testing.T) {
// simulate what happens when a cgroup is destroyed by cleaning up and then
// writing to the eventfd.
helper.cleanup()
if err := os.RemoveAll(memoryPath); err != nil {
t.Fatal(err)
}
if _, err := syscall.Write(efd, buf); err != nil {
t.Fatal("unable to write to eventfd:", err)
}

View file

@ -176,13 +176,13 @@
"TERM=xterm"
],
"hostname": "koye",
"namespaces": {
"NEWIPC": true,
"NEWNET": true,
"NEWNS": true,
"NEWPID": true,
"NEWUTS": true
},
"namespaces": [
{"type":"NEWIPC"},
{"type": "NEWNET"},
{"type": "NEWNS"},
{"type": "NEWPID"},
{"type": "NEWUTS"}
],
"networks": [
{
"address": "127.0.0.1/0",

View file

@ -175,13 +175,13 @@
"TERM=xterm"
],
"hostname": "koye",
"namespaces": {
"NEWIPC": true,
"NEWNET": true,
"NEWNS": true,
"NEWPID": true,
"NEWUTS": true
},
"namespaces": [
{"type": "NEWIPC"},
{"type": "NEWNET"},
{"type": "NEWNS"},
{"type": "NEWPID"},
{"type": "NEWUTS"}
],
"networks": [
{
"address": "127.0.0.1/0",

View file

@ -181,13 +181,13 @@
"TERM=xterm"
],
"hostname": "koye",
"namespaces": {
"NEWIPC": true,
"NEWNET": true,
"NEWNS": true,
"NEWPID": true,
"NEWUTS": true
},
"namespaces": [
{"type": "NEWIPC"},
{"type": "NEWNET"},
{"type": "NEWNS"},
{"type": "NEWPID"},
{"type": "NEWUTS"}
],
"networks": [
{
"address": "127.0.0.1/0",

View file

@ -175,13 +175,13 @@
"TERM=xterm"
],
"hostname": "koye",
"namespaces": {
"NEWIPC": true,
"NEWNET": true,
"NEWNS": true,
"NEWPID": true,
"NEWUTS": true
},
"namespaces": [
{"type": "NEWIPC"},
{"type": "NEWNET"},
{"type": "NEWNS"},
{"type": "NEWPID"},
{"type": "NEWUTS"}
],
"networks": [
{
"address": "127.0.0.1/0",

View file

@ -177,13 +177,13 @@
"TERM=xterm"
],
"hostname": "koye",
"namespaces": {
"NEWIPC": true,
"NEWNET": true,
"NEWNS": true,
"NEWPID": true,
"NEWUTS": true
},
"namespaces": [
{"type": "NEWIPC"},
{"type": "NEWNET"},
{"type": "NEWNS"},
{"type": "NEWPID"},
{"type": "NEWUTS"}
],
"networks": [
{
"address": "127.0.0.1/0",

View file

@ -1 +1,2 @@
Tianon Gravi <admwiggin@gmail.com> (@tianon)
Aleksa Sarai <cyphar@cyphar.com> (@cyphar)

View file

@ -9,22 +9,22 @@ import (
// Unix-specific path to the passwd and group formatted files.
const (
unixPasswdFile = "/etc/passwd"
unixGroupFile = "/etc/group"
unixPasswdPath = "/etc/passwd"
unixGroupPath = "/etc/group"
)
func GetPasswdFile() (string, error) {
return unixPasswdFile, nil
func GetPasswdPath() (string, error) {
return unixPasswdPath, nil
}
func GetPasswd() (io.ReadCloser, error) {
return os.Open(unixPasswdFile)
return os.Open(unixPasswdPath)
}
func GetGroupFile() (string, error) {
return unixGroupFile, nil
func GetGroupPath() (string, error) {
return unixGroupPath, nil
}
func GetGroup() (io.ReadCloser, error) {
return os.Open(unixGroupFile)
return os.Open(unixGroupPath)
}

View file

@ -4,7 +4,7 @@ package user
import "io"
func GetPasswdFile() (string, error) {
func GetPasswdPath() (string, error) {
return "", ErrUnsupported
}
@ -12,7 +12,7 @@ func GetPasswd() (io.ReadCloser, error) {
return nil, ErrUnsupported
}
func GetGroupFile() (string, error) {
func GetGroupPath() (string, error) {
return "", ErrUnsupported
}

View file

@ -197,11 +197,11 @@ type ExecUser struct {
Home string
}
// GetExecUserFile is a wrapper for GetExecUser. It reads data from each of the
// GetExecUserPath is a wrapper for GetExecUser. It reads data from each of the
// given file paths and uses that data as the arguments to GetExecUser. If the
// files cannot be opened for any reason, the error is ignored and a nil
// io.Reader is passed instead.
func GetExecUserFile(userSpec string, defaults *ExecUser, passwdPath, groupPath string) (*ExecUser, error) {
func GetExecUserPath(userSpec string, defaults *ExecUser, passwdPath, groupPath string) (*ExecUser, error) {
passwd, err := os.Open(passwdPath)
if err != nil {
passwd = nil