diff --git a/daemon/container.go b/daemon/container.go index 2ac8316137..bf93787ebf 100644 --- a/daemon/container.go +++ b/daemon/container.go @@ -233,6 +233,18 @@ func populateCommand(c *Container, env []string) error { return fmt.Errorf("invalid network mode: %s", c.hostConfig.NetworkMode) } + ipc := &execdriver.Ipc{} + + if c.hostConfig.IpcMode.IsContainer() { + ic, err := c.getIpcContainer() + if err != nil { + return err + } + ipc.ContainerID = ic.ID + } else { + ipc.HostIpc = c.hostConfig.IpcMode.IsHost() + } + // Build lists of devices allowed and created within the container. userSpecifiedDevices := make([]*devices.Device, len(c.hostConfig.Devices)) for i, deviceMapping := range c.hostConfig.Devices { @@ -274,6 +286,7 @@ func populateCommand(c *Container, env []string) error { InitPath: "/.dockerinit", WorkingDir: c.Config.WorkingDir, Network: en, + Ipc: ipc, Resources: resources, AllowedDevices: allowedDevices, AutoCreatedDevices: autoCreatedDevices, @@ -1250,10 +1263,25 @@ func (container *Container) GetMountLabel() string { return container.MountLabel } +func (container *Container) getIpcContainer() (*Container, error) { + containerID := container.hostConfig.IpcMode.Container() + c := container.daemon.Get(containerID) + if c == nil { + return nil, fmt.Errorf("no such container to join IPC: %s", containerID) + } + if !c.IsRunning() { + return nil, fmt.Errorf("cannot join IPC of a non running container: %s", containerID) + } + return c, nil +} + func (container *Container) getNetworkedContainer() (*Container, error) { parts := strings.SplitN(string(container.hostConfig.NetworkMode), ":", 2) switch parts[0] { case "container": + if len(parts) != 2 { + return nil, fmt.Errorf("no container specified to join network") + } nc := container.daemon.Get(parts[1]) if nc == nil { return nil, fmt.Errorf("no such container to join network: %s", parts[1]) diff --git a/daemon/create.go b/daemon/create.go index e72b0ef206..3a71a8ac7e 100644 --- a/daemon/create.go +++ b/daemon/create.go @@ -1,10 +1,13 @@ package daemon import ( + "fmt" + "github.com/docker/docker/engine" "github.com/docker/docker/graph" "github.com/docker/docker/pkg/parsers" "github.com/docker/docker/runconfig" + "github.com/docker/libcontainer/label" ) func (daemon *Daemon) ContainerCreate(job *engine.Job) engine.Status { @@ -80,6 +83,12 @@ func (daemon *Daemon) Create(config *runconfig.Config, hostConfig *runconfig.Hos if warnings, err = daemon.mergeAndVerifyConfig(config, img); err != nil { return nil, nil, err } + if hostConfig != nil && config.SecurityOpt == nil { + config.SecurityOpt, err = daemon.GenerateSecurityOpt(hostConfig.IpcMode) + if err != nil { + return nil, nil, err + } + } if container, err = daemon.newContainer(name, config, img); err != nil { return nil, nil, err } @@ -99,3 +108,20 @@ func (daemon *Daemon) Create(config *runconfig.Config, hostConfig *runconfig.Hos } return container, warnings, nil } +func (daemon *Daemon) GenerateSecurityOpt(ipcMode runconfig.IpcMode) ([]string, error) { + if ipcMode.IsHost() { + return label.DisableSecOpt(), nil + } + if ipcContainer := ipcMode.Container(); ipcContainer != "" { + c := daemon.Get(ipcContainer) + if c == nil { + return nil, fmt.Errorf("no such container to join IPC: %s", ipcContainer) + } + if !c.IsRunning() { + return nil, fmt.Errorf("cannot join IPC of a non running container: %s", ipcContainer) + } + + return label.DupSecOpt(c.ProcessLabel), nil + } + return nil, nil +} diff --git a/daemon/execdriver/driver.go b/daemon/execdriver/driver.go index c3ec559c02..411265814d 100644 --- a/daemon/execdriver/driver.go +++ b/daemon/execdriver/driver.go @@ -71,6 +71,12 @@ type Network struct { HostNetworking bool `json:"host_networking"` } +// IPC settings of the container +type Ipc struct { + ContainerID string `json:"container_id"` // id of the container to join ipc. + HostIpc bool `json:"host_ipc"` +} + type NetworkInterface struct { Gateway string `json:"gateway"` IPAddress string `json:"ip"` @@ -115,6 +121,7 @@ type Command struct { WorkingDir string `json:"working_dir"` ConfigPath string `json:"config_path"` // this should be able to be removed when the lxc template is moved into the driver Network *Network `json:"network"` + Ipc *Ipc `json:"ipc"` Resources *Resources `json:"resources"` Mounts []Mount `json:"mounts"` AllowedDevices []*devices.Device `json:"allowed_devices"` diff --git a/daemon/execdriver/native/create.go b/daemon/execdriver/native/create.go index 492247e492..de103eca8a 100644 --- a/daemon/execdriver/native/create.go +++ b/daemon/execdriver/native/create.go @@ -36,6 +36,10 @@ func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Config, e container.MountConfig.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != "" container.RestrictSys = true + if err := d.createIpc(container, c); err != nil { + return nil, err + } + if err := d.createNetwork(container, c); err != nil { return nil, err } @@ -124,6 +128,28 @@ func (d *driver) createNetwork(container *libcontainer.Config, c *execdriver.Com return nil } +func (d *driver) createIpc(container *libcontainer.Config, c *execdriver.Command) error { + if c.Ipc.HostIpc { + container.Namespaces["NEWIPC"] = false + return nil + } + + if c.Ipc.ContainerID != "" { + d.Lock() + active := d.activeContainers[c.Ipc.ContainerID] + d.Unlock() + + if active == nil || active.cmd.Process == nil { + return fmt.Errorf("%s is not a valid running container to join", c.Ipc.ContainerID) + } + cmd := active.cmd + + container.IpcNsPath = filepath.Join("/proc", fmt.Sprint(cmd.Process.Pid), "ns", "ipc") + } + + return nil +} + func (d *driver) setPrivileged(container *libcontainer.Config) (err error) { container.Capabilities = capabilities.GetAllCapabilities() container.Cgroups.AllowAllDevices = true diff --git a/docs/man/docker-run.1.md b/docs/man/docker-run.1.md index ff3dac17b0..0aa4cad3fe 100644 --- a/docs/man/docker-run.1.md +++ b/docs/man/docker-run.1.md @@ -23,6 +23,7 @@ docker-run - Run a command in a new container [**--expose**[=*[]*]] [**-h**|**--hostname**[=*HOSTNAME*]] [**-i**|**--interactive**[=*false*]] +[**--ipc**[=*[]*]] [**--security-opt**[=*[]*]] [**--link**[=*[]*]] [**--lxc-conf**[=*[]*]] @@ -142,6 +143,12 @@ ENTRYPOINT. **-i**, **--interactive**=*true*|*false* When set to true, keep stdin open even if not attached. The default is false. +**--ipc**=[] + Set the IPC mode for the container + **container**:<*name*|*id*>: reuses another container's IPC stack + **host**: use the host's IPC stack inside the container. + Note: the host mode gives the container full access to local IPC and is therefore considered insecure. + **--security-opt**=*secdriver*:*name*:*value* "label:user:USER" : Set the label user for the container "label:role:ROLE" : Set the label role for the container @@ -183,10 +190,11 @@ and foreground Docker containers. **--net**="bridge" Set the Network mode for the container - 'bridge': creates a new network stack for the container on the docker bridge - 'none': no networking for this container - 'container:': reuses another container network stack - 'host': use the host network stack inside the container. Note: the host mode gives the container full access to local system services such as D-bus and is therefore considered insecure. + **bridge**: creates a new network stack for the container on the docker bridge + **none**: no networking for this container + **container**:<*name*|*id*>: reuses another container's network stack + **host**: use the host network stack inside the container. + Note: the host mode gives the container full access to local system services such as D-bus and is therefore considered insecure. **--mac-address**=*macaddress* Set the MAC address for the container's Ethernet device: @@ -310,6 +318,71 @@ you’d like to connect instead, as in: # docker run -a stdin -a stdout -i -t fedora /bin/bash +## Sharing IPC between containers + +Using shm_server.c available here: http://www.cs.cf.ac.uk/Dave/C/node27.html + +Testing `--ipc=host` mode: + +Host shows a shared memory segment with 7 pids attached, happens to be from httpd: + +``` + $ sudo ipcs -m + + ------ Shared Memory Segments -------- + key shmid owner perms bytes nattch status + 0x01128e25 0 root 600 1000 7 +``` + +Now run a regular container, and it correctly does NOT see the shared memory segment from the host: + +``` + $ sudo docker run -it shm ipcs -m + + ------ Shared Memory Segments -------- + key shmid owner perms bytes nattch status +``` + +Run a container with the new `--ipc=host` option, and it now sees the shared memory segment from the host httpd: + + ``` + $ sudo docker run -it --ipc=host shm ipcs -m + + ------ Shared Memory Segments -------- + key shmid owner perms bytes nattch status + 0x01128e25 0 root 600 1000 7 +``` +Testing `--ipc=container:CONTAINERID` mode: + +Start a container with a program to create a shared memory segment: +``` + sudo docker run -it shm bash + $ sudo shm/shm_server & + $ sudo ipcs -m + + ------ Shared Memory Segments -------- + key shmid owner perms bytes nattch status + 0x0000162e 0 root 666 27 1 +``` +Create a 2nd container correctly shows no shared memory segment from 1st container: +``` + $ sudo docker run shm ipcs -m + + ------ Shared Memory Segments -------- + key shmid owner perms bytes nattch status +``` + +Create a 3rd container using the new --ipc=container:CONTAINERID option, now it shows the shared memory segment from the first: + +``` + $ sudo docker run -it --ipc=container:ed735b2264ac shm ipcs -m + $ sudo ipcs -m + + ------ Shared Memory Segments -------- + key shmid owner perms bytes nattch status + 0x0000162e 0 root 666 27 1 +``` + ## Linking Containers The link feature allows multiple containers to communicate with each other. For diff --git a/docs/sources/reference/run.md b/docs/sources/reference/run.md index 1abb7d0575..31029e2a11 100644 --- a/docs/sources/reference/run.md +++ b/docs/sources/reference/run.md @@ -50,6 +50,7 @@ following options. - [Container Identification](#container-identification) - [Name (--name)](#name-name) - [PID Equivalent](#pid-equivalent) + - [IPC Settings](#ipc-settings) - [Network Settings](#network-settings) - [Clean Up (--rm)](#clean-up-rm) - [Runtime Constraints on CPU and Memory](#runtime-constraints-on-cpu-and-memory) @@ -131,6 +132,22 @@ While not strictly a means of identifying a container, you can specify a version image you'd like to run the container with by adding `image[:tag]` to the command. For example, `docker run ubuntu:14.04`. +## IPC Settings + --ipc="" : Set the IPC mode for the container, + 'container:': reuses another container's IPC namespace + 'host': use the host's IPC namespace inside the container +By default, all containers have the IPC namespace enabled + +IPC (POSIX/SysV IPC) namespace provides separation of named shared memory segments, semaphores and message queues. + +Shared memory segments are used to accelerate inter-process communication at +memory speed, rather than through pipes or through the network stack. Shared +memory is commonly used by databases and custom-built (typically C/OpenMPI, +C++/using boost libraries) high performance applications for scientific +computing and financial services industries. If these types of applications +are broken into multiple containers, you might need to share the IPC mechanisms +of the containers. + ## Network settings --dns=[] : Set custom dns servers for the container diff --git a/integration-cli/docker_cli_run_test.go b/integration-cli/docker_cli_run_test.go index d536c626bb..54949730a1 100644 --- a/integration-cli/docker_cli_run_test.go +++ b/integration-cli/docker_cli_run_test.go @@ -2568,3 +2568,73 @@ func TestRunUnknownCommand(t *testing.T) { logDone("run - Unknown Command") } + +func TestRunModeIpcHost(t *testing.T) { + hostIpc, err := os.Readlink("/proc/1/ns/ipc") + if err != nil { + t.Fatal(err) + } + + cmd := exec.Command(dockerBinary, "run", "--ipc=host", "busybox", "readlink", "/proc/self/ns/ipc") + out2, _, err := runCommandWithOutput(cmd) + if err != nil { + t.Fatal(err, out2) + } + + out2 = strings.Trim(out2, "\n") + if hostIpc != out2 { + t.Fatalf("IPC different with --ipc=host %s != %s\n", hostIpc, out2) + } + + cmd = exec.Command(dockerBinary, "run", "busybox", "readlink", "/proc/self/ns/ipc") + out2, _, err = runCommandWithOutput(cmd) + if err != nil { + t.Fatal(err, out2) + } + + out2 = strings.Trim(out2, "\n") + if hostIpc == out2 { + t.Fatalf("IPC should be different without --ipc=host %s != %s\n", hostIpc, out2) + } + deleteAllContainers() + + logDone("run - hostname and several network modes") +} + +func TestRunModeIpcContainer(t *testing.T) { + cmd := exec.Command(dockerBinary, "run", "-d", "busybox", "top") + out, _, err := runCommandWithOutput(cmd) + if err != nil { + t.Fatal(err, out) + } + id := strings.TrimSpace(out) + state, err := inspectField(id, "State.Running") + if err != nil { + t.Fatal(err) + } + if state != "true" { + t.Fatal("Container state is 'not running'") + } + pid1, err := inspectField(id, "State.Pid") + if err != nil { + t.Fatal(err) + } + + parentContainerIpc, err := os.Readlink(fmt.Sprintf("/proc/%s/ns/ipc", pid1)) + if err != nil { + t.Fatal(err) + } + cmd = exec.Command(dockerBinary, "run", fmt.Sprintf("--ipc=container:%s", id), "busybox", "readlink", "/proc/self/ns/ipc") + out2, _, err := runCommandWithOutput(cmd) + if err != nil { + t.Fatal(err, out2) + } + + out2 = strings.Trim(out2, "\n") + if parentContainerIpc != out2 { + t.Fatalf("IPC different with --ipc=container:%s %s != %s\n", id, parentContainerIpc, out2) + } + deleteAllContainers() + + logDone("run - hostname and several network modes") +} diff --git a/runconfig/hostconfig.go b/runconfig/hostconfig.go index 5c49522038..01388ad727 100644 --- a/runconfig/hostconfig.go +++ b/runconfig/hostconfig.go @@ -28,6 +28,44 @@ func (n NetworkMode) IsNone() bool { return n == "none" } +type IpcMode string + +// IsPrivate indicates whether container use it's private ipc stack +func (n IpcMode) IsPrivate() bool { + return !(n.IsHost() || n.IsContainer()) +} + +func (n IpcMode) IsHost() bool { + return n == "host" +} + +func (n IpcMode) IsContainer() bool { + parts := strings.SplitN(string(n), ":", 2) + return len(parts) > 1 && parts[0] == "container" +} + +func (n IpcMode) Valid() bool { + parts := strings.Split(string(n), ":") + switch mode := parts[0]; mode { + case "", "host": + case "container": + if len(parts) != 2 || parts[1] == "" { + return false + } + default: + return false + } + return true +} + +func (n IpcMode) Container() string { + parts := strings.SplitN(string(n), ":", 2) + if len(parts) > 1 { + return parts[1] + } + return "" +} + type DeviceMapping struct { PathOnHost string PathInContainer string @@ -53,6 +91,7 @@ type HostConfig struct { VolumesFrom []string Devices []DeviceMapping NetworkMode NetworkMode + IpcMode IpcMode CapAdd []string CapDrop []string RestartPolicy RestartPolicy @@ -84,6 +123,7 @@ func ContainerHostConfigFromJob(job *engine.Job) *HostConfig { Privileged: job.GetenvBool("Privileged"), PublishAllPorts: job.GetenvBool("PublishAllPorts"), NetworkMode: NetworkMode(job.Getenv("NetworkMode")), + IpcMode: IpcMode(job.Getenv("IpcMode")), } job.GetenvJson("LxcConf", &hostConfig.LxcConf) diff --git a/runconfig/parse.go b/runconfig/parse.go index c62ab3fdd4..dfc84c1892 100644 --- a/runconfig/parse.go +++ b/runconfig/parse.go @@ -60,6 +60,7 @@ func Parse(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Config, flCpuset = cmd.String([]string{"-cpuset"}, "", "CPUs in which to allow execution (0-3, 0,1)") flNetMode = cmd.String([]string{"-net"}, "bridge", "Set the Network mode for the container\n'bridge': creates a new network stack for the container on the docker bridge\n'none': no networking for this container\n'container:': reuses another container network stack\n'host': use the host network stack inside the container. Note: the host mode gives the container full access to local system services such as D-bus and is therefore considered insecure.") flMacAddress = cmd.String([]string{"-mac-address"}, "", "Container MAC address (e.g. 92:d0:c6:0a:29:33)") + flIpcMode = cmd.String([]string{"-ipc"}, "", "Default is to create a private IPC namespace (POSIX SysV IPC) for the container\n'container:': reuses another container shared memory, semaphores and message queues\n'host': use the host shared memory,semaphores and message queues inside the container. Note: the host mode gives the container full access to local shared memory and is therefore considered insecure.") flRestartPolicy = cmd.String([]string{"-restart"}, "", "Restart policy to apply when a container exits (no, on-failure[:max-retry], always)") ) @@ -241,6 +242,11 @@ func Parse(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Config, // parse the '-e' and '--env' after, to allow override envVariables = append(envVariables, flEnv.GetAll()...) + ipcMode := IpcMode(*flIpcMode) + if !ipcMode.Valid() { + return nil, nil, cmd, fmt.Errorf("--ipc: invalid IPC mode: %v", err) + } + netMode, err := parseNetMode(*flNetMode) if err != nil { return nil, nil, cmd, fmt.Errorf("--net: invalid net mode: %v", err) @@ -289,6 +295,7 @@ func Parse(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Config, ExtraHosts: flExtraHosts.GetAll(), VolumesFrom: flVolumesFrom.GetAll(), NetworkMode: netMode, + IpcMode: ipcMode, Devices: deviceMappings, CapAdd: flCapAdd.GetAll(), CapDrop: flCapDrop.GetAll(),