Merge pull request #5411 from crosbymichael/lockdown

Update default restrictions for exec drivers
This commit is contained in:
unclejack 2014-04-26 03:27:56 +03:00
commit 44140f7909
22 changed files with 809 additions and 428 deletions

View File

@ -59,9 +59,10 @@ func init() {
}
type driver struct {
root string // root path for the driver to use
apparmor bool
sharedRoot bool
root string // root path for the driver to use
apparmor bool
sharedRoot bool
restrictionPath string
}
func NewDriver(root string, apparmor bool) (*driver, error) {
@ -69,10 +70,15 @@ func NewDriver(root string, apparmor bool) (*driver, error) {
if err := linkLxcStart(root); err != nil {
return nil, err
}
restrictionPath := filepath.Join(root, "empty")
if err := os.MkdirAll(restrictionPath, 0700); err != nil {
return nil, err
}
return &driver{
apparmor: apparmor,
root: root,
sharedRoot: rootIsShared(),
apparmor: apparmor,
root: root,
sharedRoot: rootIsShared(),
restrictionPath: restrictionPath,
}, nil
}
@ -403,14 +409,16 @@ func (d *driver) generateLXCConfig(c *execdriver.Command) (string, error) {
if err := LxcTemplateCompiled.Execute(fo, struct {
*execdriver.Command
AppArmor bool
ProcessLabel string
MountLabel string
AppArmor bool
ProcessLabel string
MountLabel string
RestrictionSource string
}{
Command: c,
AppArmor: d.apparmor,
ProcessLabel: process,
MountLabel: mount,
Command: c,
AppArmor: d.apparmor,
ProcessLabel: process,
MountLabel: mount,
RestrictionSource: d.restrictionPath,
}); err != nil {
return "", err
}

View File

@ -88,7 +88,9 @@ lxc.mount.entry = proc {{escapeFstabSpaces $ROOTFS}}/proc proc nosuid,nodev,noex
# WARNING: sysfs is a known attack vector and should probably be disabled
# if your userspace allows it. eg. see http://bit.ly/T9CkqJ
{{if .Privileged}}
lxc.mount.entry = sysfs {{escapeFstabSpaces $ROOTFS}}/sys sysfs nosuid,nodev,noexec 0 0
{{end}}
{{if .Tty}}
lxc.mount.entry = {{.Console}} {{escapeFstabSpaces $ROOTFS}}/dev/console none bind,rw 0 0
@ -109,8 +111,15 @@ lxc.mount.entry = {{$value.Source}} {{escapeFstabSpaces $ROOTFS}}/{{escapeFstabS
{{if .AppArmor}}
lxc.aa_profile = unconfined
{{else}}
#lxc.aa_profile = unconfined
# not unconfined
{{end}}
{{else}}
# restrict access to proc
lxc.mount.entry = {{.RestrictionSource}} {{escapeFstabSpaces $ROOTFS}}/proc/sys none bind,ro 0 0
lxc.mount.entry = {{.RestrictionSource}} {{escapeFstabSpaces $ROOTFS}}/proc/irq none bind,ro 0 0
lxc.mount.entry = {{.RestrictionSource}} {{escapeFstabSpaces $ROOTFS}}/proc/acpi none bind,ro 0 0
lxc.mount.entry = {{escapeFstabSpaces $ROOTFS}}/dev/null {{escapeFstabSpaces $ROOTFS}}/proc/sysrq-trigger none bind,ro 0 0
lxc.mount.entry = {{escapeFstabSpaces $ROOTFS}}/dev/null {{escapeFstabSpaces $ROOTFS}}/proc/kcore none bind,ro 0 0
{{end}}
# limits

View File

@ -25,6 +25,7 @@ func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Container
container.Cgroups.Name = c.ID
// check to see if we are running in ramdisk to disable pivot root
container.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != ""
container.Context["restriction_path"] = d.restrictionPath
if err := d.createNetwork(container, c); err != nil {
return nil, err
@ -33,6 +34,8 @@ func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Container
if err := d.setPrivileged(container); err != nil {
return nil, err
}
} else {
container.Mounts = append(container.Mounts, libcontainer.Mount{Type: "devtmpfs"})
}
if err := d.setupCgroups(container, c); err != nil {
return nil, err
@ -81,6 +84,11 @@ func (d *driver) setPrivileged(container *libcontainer.Container) error {
c.Enabled = true
}
container.Cgroups.DeviceAccess = true
// add sysfs as a mount for privileged containers
container.Mounts = append(container.Mounts, libcontainer.Mount{Type: "sysfs"})
delete(container.Context, "restriction_path")
if apparmor.IsEnabled() {
container.Context["apparmor_profile"] = "unconfined"
}
@ -99,7 +107,13 @@ func (d *driver) setupCgroups(container *libcontainer.Container, c *execdriver.C
func (d *driver) setupMounts(container *libcontainer.Container, c *execdriver.Command) error {
for _, m := range c.Mounts {
container.Mounts = append(container.Mounts, libcontainer.Mount{m.Source, m.Destination, m.Writable, m.Private})
container.Mounts = append(container.Mounts, libcontainer.Mount{
Type: "bind",
Source: m.Source,
Destination: m.Destination,
Writable: m.Writable,
Private: m.Private,
})
}
return nil
}

View File

@ -23,7 +23,7 @@ import (
const (
DriverName = "native"
Version = "0.1"
Version = "0.2"
BackupApparmorProfilePath = "apparmor/docker.back" // relative to docker root
)
@ -62,6 +62,7 @@ type driver struct {
root string
initPath string
activeContainers map[string]*exec.Cmd
restrictionPath string
}
func NewDriver(root, initPath string) (*driver, error) {
@ -72,8 +73,14 @@ func NewDriver(root, initPath string) (*driver, error) {
if err := apparmor.InstallDefaultProfile(filepath.Join(root, "../..", BackupApparmorProfilePath)); err != nil {
return nil, err
}
restrictionPath := filepath.Join(root, "empty")
if err := os.MkdirAll(restrictionPath, 0700); err != nil {
return nil, err
}
return &driver{
root: root,
restrictionPath: restrictionPath,
initPath: initPath,
activeContainers: make(map[string]*exec.Cmd),
}, nil

View File

@ -665,3 +665,25 @@ func TestUnPrivilegedCannotMount(t *testing.T) {
logDone("run - test un-privileged cannot mount")
}
func TestSysNotAvaliableInNonPrivilegedContainers(t *testing.T) {
cmd := exec.Command(dockerBinary, "run", "busybox", "ls", "/sys/kernel")
if code, err := runCommand(cmd); err == nil || code == 0 {
t.Fatal("sys should not be available in a non privileged container")
}
deleteAllContainers()
logDone("run - sys not avaliable in non privileged container")
}
func TestSysAvaliableInPrivilegedContainers(t *testing.T) {
cmd := exec.Command(dockerBinary, "run", "--privileged", "busybox", "ls", "/sys/kernel")
if code, err := runCommand(cmd); err != nil || code != 0 {
t.Fatalf("sys should be available in privileged container")
}
deleteAllContainers()
logDone("run - sys avaliable in privileged container")
}

View File

@ -16,76 +16,149 @@ process are specified in this file. The configuration is used for each process
Sample `container.json` file:
```json
{
"mounts" : [
{
"type" : "devtmpfs"
}
],
"tty" : true,
"environment" : [
"HOME=/",
"PATH=PATH=$PATH:/bin:/usr/bin:/sbin:/usr/sbin",
"container=docker",
"TERM=xterm-256color"
],
"hostname" : "koye",
"cgroups" : {
"parent" : "docker",
"name" : "docker-koye"
},
"capabilities_mask" : [
{
"value" : 8,
"key" : "SETPCAP",
"enabled" : false
},
{
"enabled" : false,
"value" : 16,
"key" : "SYS_MODULE"
},
{
"value" : 17,
"key" : "SYS_RAWIO",
"enabled" : false
},
{
"key" : "SYS_PACCT",
"value" : 20,
"enabled" : false
},
{
"value" : 21,
"key" : "SYS_ADMIN",
"enabled" : false
},
{
"value" : 23,
"key" : "SYS_NICE",
"enabled" : false
},
{
"value" : 24,
"key" : "SYS_RESOURCE",
"enabled" : false
},
{
"key" : "SYS_TIME",
"value" : 25,
"enabled" : false
},
{
"enabled" : false,
"value" : 26,
"key" : "SYS_TTY_CONFIG"
},
{
"key" : "AUDIT_WRITE",
"value" : 29,
"enabled" : false
},
{
"value" : 30,
"key" : "AUDIT_CONTROL",
"enabled" : false
},
{
"enabled" : false,
"key" : "MAC_OVERRIDE",
"value" : 32
},
{
"enabled" : false,
"key" : "MAC_ADMIN",
"value" : 33
},
{
"key" : "NET_ADMIN",
"value" : 12,
"enabled" : false
},
{
"value" : 27,
"key" : "MKNOD",
"enabled" : true
}
],
"networks" : [
{
"gateway" : "172.17.42.1",
"mtu" : 1500,
"address" : "127.0.0.1/0",
"type" : "loopback",
"gateway" : "localhost"
},
{
"mtu" : 1500,
"address" : "172.17.42.2/16",
"type" : "veth",
"context" : {
"bridge" : "docker0",
"prefix" : "veth"
},
"address" : "172.17.0.2/16",
"type" : "veth",
"mtu" : 1500
}
],
"cgroups" : {
"parent" : "docker",
"name" : "11bb30683fb0bdd57fab4d3a8238877f1e4395a2cfc7320ea359f7a02c1a5620"
},
"tty" : true,
"environment" : [
"HOME=/",
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
"HOSTNAME=11bb30683fb0",
"TERM=xterm"
],
"capabilities_mask" : [
"SETPCAP",
"SYS_MODULE",
"SYS_RAWIO",
"SYS_PACCT",
"SYS_ADMIN",
"SYS_NICE",
"SYS_RESOURCE",
"SYS_TIME",
"SYS_TTY_CONFIG",
"MKNOD",
"AUDIT_WRITE",
"AUDIT_CONTROL",
"MAC_OVERRIDE",
"MAC_ADMIN",
"NET_ADMIN"
],
"context" : {
"apparmor_profile" : "docker-default"
},
"mounts" : [
{
"source" : "/var/lib/docker/containers/11bb30683fb0bdd57fab4d3a8238877f1e4395a2cfc7320ea359f7a02c1a5620/resolv.conf",
"writable" : false,
"destination" : "/etc/resolv.conf",
"private" : true
},
{
"source" : "/var/lib/docker/containers/11bb30683fb0bdd57fab4d3a8238877f1e4395a2cfc7320ea359f7a02c1a5620/hostname",
"writable" : false,
"destination" : "/etc/hostname",
"private" : true
},
{
"source" : "/var/lib/docker/containers/11bb30683fb0bdd57fab4d3a8238877f1e4395a2cfc7320ea359f7a02c1a5620/hosts",
"writable" : false,
"destination" : "/etc/hosts",
"private" : true
"gateway" : "172.17.42.1"
}
],
"namespaces" : [
"NEWNS",
"NEWUTS",
"NEWIPC",
"NEWPID",
"NEWNET"
{
"key" : "NEWNS",
"value" : 131072,
"enabled" : true,
"file" : "mnt"
},
{
"key" : "NEWUTS",
"value" : 67108864,
"enabled" : true,
"file" : "uts"
},
{
"enabled" : true,
"file" : "ipc",
"key" : "NEWIPC",
"value" : 134217728
},
{
"file" : "pid",
"enabled" : true,
"value" : 536870912,
"key" : "NEWPID"
},
{
"enabled" : true,
"file" : "net",
"key" : "NEWNET",
"value" : 1073741824
}
]
}
```

View File

@ -0,0 +1,60 @@
// +build linux
package console
import (
"fmt"
"github.com/dotcloud/docker/pkg/label"
"github.com/dotcloud/docker/pkg/system"
"os"
"path/filepath"
"syscall"
)
// Setup initializes the proper /dev/console inside the rootfs path
func Setup(rootfs, consolePath, mountLabel string) error {
oldMask := system.Umask(0000)
defer system.Umask(oldMask)
stat, err := os.Stat(consolePath)
if err != nil {
return fmt.Errorf("stat console %s %s", consolePath, err)
}
var (
st = stat.Sys().(*syscall.Stat_t)
dest = filepath.Join(rootfs, "dev/console")
)
if err := os.Remove(dest); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("remove %s %s", dest, err)
}
if err := os.Chmod(consolePath, 0600); err != nil {
return err
}
if err := os.Chown(consolePath, 0, 0); err != nil {
return err
}
if err := system.Mknod(dest, (st.Mode&^07777)|0600, int(st.Rdev)); err != nil {
return fmt.Errorf("mknod %s %s", dest, err)
}
if err := label.SetFileLabel(consolePath, mountLabel); err != nil {
return fmt.Errorf("set file label %s %s", dest, err)
}
if err := system.Mount(consolePath, dest, "bind", syscall.MS_BIND, ""); err != nil {
return fmt.Errorf("bind %s to %s %s", consolePath, dest, err)
}
return nil
}
func OpenAndDup(consolePath string) error {
slave, err := system.OpenTerminal(consolePath, syscall.O_RDWR)
if err != nil {
return fmt.Errorf("open terminal %s", err)
}
if err := system.Dup2(slave.Fd(), 0); err != nil {
return err
}
if err := system.Dup2(slave.Fd(), 1); err != nil {
return err
}
return system.Dup2(slave.Fd(), 2)
}

View File

@ -23,7 +23,7 @@ type Container struct {
Networks []*Network `json:"networks,omitempty"` // nil for host's network stack
Cgroups *cgroups.Cgroup `json:"cgroups,omitempty"` // cgroups
Context Context `json:"context,omitempty"` // generic context for specific options (apparmor, selinux)
Mounts []Mount `json:"mounts,omitempty"`
Mounts Mounts `json:"mounts,omitempty"`
}
// Network defines configuration for a container's networking stack
@ -37,12 +37,3 @@ type Network struct {
Gateway string `json:"gateway,omitempty"`
Mtu int `json:"mtu,omitempty"`
}
// Bind mounts from the host system to the container
//
type Mount struct {
Source string `json:"source"` // Source path, in the host namespace
Destination string `json:"destination"` // Destination path, in the container
Writable bool `json:"writable"`
Private bool `json:"private"`
}

View File

@ -1,50 +1,146 @@
{
"hostname": "koye",
"tty": true,
"environment": [
"HOME=/",
"PATH=PATH=$PATH:/bin:/usr/bin:/sbin:/usr/sbin",
"container=docker",
"TERM=xterm-256color"
],
"namespaces": [
"NEWIPC",
"NEWNS",
"NEWPID",
"NEWUTS",
"NEWNET"
],
"capabilities_mask": [
"SETPCAP",
"SYS_MODULE",
"SYS_RAWIO",
"SYS_PACCT",
"SYS_ADMIN",
"SYS_NICE",
"SYS_RESOURCE",
"SYS_TIME",
"SYS_TTY_CONFIG",
"MKNOD",
"AUDIT_WRITE",
"AUDIT_CONTROL",
"MAC_OVERRIDE",
"MAC_ADMIN",
"NET_ADMIN"
],
"networks": [{
"type": "veth",
"context": {
"bridge": "docker0",
"prefix": "dock"
},
"address": "172.17.0.100/16",
"gateway": "172.17.42.1",
"mtu": 1500
}
],
"cgroups": {
"name": "docker-koye",
"parent": "docker",
"memory": 5248000
}
"mounts" : [
{
"type" : "devtmpfs"
}
],
"tty" : true,
"environment" : [
"HOME=/",
"PATH=PATH=$PATH:/bin:/usr/bin:/sbin:/usr/sbin",
"container=docker",
"TERM=xterm-256color"
],
"hostname" : "koye",
"cgroups" : {
"parent" : "docker",
"name" : "docker-koye"
},
"capabilities_mask" : [
{
"value" : 8,
"key" : "SETPCAP",
"enabled" : false
},
{
"enabled" : false,
"value" : 16,
"key" : "SYS_MODULE"
},
{
"value" : 17,
"key" : "SYS_RAWIO",
"enabled" : false
},
{
"key" : "SYS_PACCT",
"value" : 20,
"enabled" : false
},
{
"value" : 21,
"key" : "SYS_ADMIN",
"enabled" : false
},
{
"value" : 23,
"key" : "SYS_NICE",
"enabled" : false
},
{
"value" : 24,
"key" : "SYS_RESOURCE",
"enabled" : false
},
{
"key" : "SYS_TIME",
"value" : 25,
"enabled" : false
},
{
"enabled" : false,
"value" : 26,
"key" : "SYS_TTY_CONFIG"
},
{
"key" : "AUDIT_WRITE",
"value" : 29,
"enabled" : false
},
{
"value" : 30,
"key" : "AUDIT_CONTROL",
"enabled" : false
},
{
"enabled" : false,
"key" : "MAC_OVERRIDE",
"value" : 32
},
{
"enabled" : false,
"key" : "MAC_ADMIN",
"value" : 33
},
{
"key" : "NET_ADMIN",
"value" : 12,
"enabled" : false
},
{
"value" : 27,
"key" : "MKNOD",
"enabled" : true
}
],
"networks" : [
{
"mtu" : 1500,
"address" : "127.0.0.1/0",
"type" : "loopback",
"gateway" : "localhost"
},
{
"mtu" : 1500,
"address" : "172.17.42.2/16",
"type" : "veth",
"context" : {
"bridge" : "docker0",
"prefix" : "veth"
},
"gateway" : "172.17.42.1"
}
],
"namespaces" : [
{
"key" : "NEWNS",
"value" : 131072,
"enabled" : true,
"file" : "mnt"
},
{
"key" : "NEWUTS",
"value" : 67108864,
"enabled" : true,
"file" : "uts"
},
{
"enabled" : true,
"file" : "ipc",
"key" : "NEWIPC",
"value" : 134217728
},
{
"file" : "pid",
"enabled" : true,
"value" : 536870912,
"key" : "NEWPID"
},
{
"enabled" : true,
"file" : "net",
"key" : "NEWNET",
"value" : 1073741824
}
]
}

View File

@ -0,0 +1,143 @@
// +build linux
package mount
import (
"fmt"
"github.com/dotcloud/docker/pkg/label"
"github.com/dotcloud/docker/pkg/libcontainer"
"github.com/dotcloud/docker/pkg/libcontainer/mount/nodes"
"github.com/dotcloud/docker/pkg/libcontainer/security/restrict"
"github.com/dotcloud/docker/pkg/system"
"os"
"path/filepath"
"syscall"
)
// default mount point flags
const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
type mount struct {
source string
path string
device string
flags int
data string
}
// InitializeMountNamespace setups up the devices, mount points, and filesystems for use inside a
// new mount namepsace
func InitializeMountNamespace(rootfs, console string, container *libcontainer.Container) error {
var (
err error
flag = syscall.MS_PRIVATE
)
if container.NoPivotRoot {
flag = syscall.MS_SLAVE
}
if err := system.Mount("", "/", "", uintptr(flag|syscall.MS_REC), ""); err != nil {
return fmt.Errorf("mounting / as slave %s", err)
}
if err := system.Mount(rootfs, rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, ""); err != nil {
return fmt.Errorf("mouting %s as bind %s", rootfs, err)
}
if err := mountSystem(rootfs, container); err != nil {
return fmt.Errorf("mount system %s", err)
}
if err := setupBindmounts(rootfs, container.Mounts); err != nil {
return fmt.Errorf("bind mounts %s", err)
}
if err := nodes.CopyN(rootfs, nodes.DefaultNodes); err != nil {
return fmt.Errorf("copy dev nodes %s", err)
}
if restrictionPath := container.Context["restriction_path"]; restrictionPath != "" {
if err := restrict.Restrict(rootfs, restrictionPath); err != nil {
return fmt.Errorf("restrict %s", err)
}
}
if err := SetupPtmx(rootfs, console, container.Context["mount_label"]); err != nil {
return err
}
if err := system.Chdir(rootfs); err != nil {
return fmt.Errorf("chdir into %s %s", rootfs, err)
}
if container.NoPivotRoot {
err = MsMoveRoot(rootfs)
} else {
err = PivotRoot(rootfs)
}
if err != nil {
return err
}
if container.ReadonlyFs {
if err := SetReadonly(); err != nil {
return fmt.Errorf("set readonly %s", err)
}
}
system.Umask(0022)
return nil
}
// mountSystem sets up linux specific system mounts like sys, proc, shm, and devpts
// inside the mount namespace
func mountSystem(rootfs string, container *libcontainer.Container) error {
for _, m := range newSystemMounts(rootfs, container.Context["mount_label"], container.Mounts) {
if err := os.MkdirAll(m.path, 0755); err != nil && !os.IsExist(err) {
return fmt.Errorf("mkdirall %s %s", m.path, err)
}
if err := system.Mount(m.source, m.path, m.device, uintptr(m.flags), m.data); err != nil {
return fmt.Errorf("mounting %s into %s %s", m.source, m.path, err)
}
}
return nil
}
func setupBindmounts(rootfs string, bindMounts libcontainer.Mounts) error {
for _, m := range bindMounts.OfType("bind") {
var (
flags = syscall.MS_BIND | syscall.MS_REC
dest = filepath.Join(rootfs, m.Destination)
)
if !m.Writable {
flags = flags | syscall.MS_RDONLY
}
if err := system.Mount(m.Source, dest, "bind", uintptr(flags), ""); err != nil {
return fmt.Errorf("mounting %s into %s %s", m.Source, dest, err)
}
if !m.Writable {
if err := system.Mount(m.Source, dest, "bind", uintptr(flags|syscall.MS_REMOUNT), ""); err != nil {
return fmt.Errorf("remounting %s into %s %s", m.Source, dest, err)
}
}
if m.Private {
if err := system.Mount("", dest, "none", uintptr(syscall.MS_PRIVATE), ""); err != nil {
return fmt.Errorf("mounting %s private %s", dest, err)
}
}
}
return nil
}
// TODO: this is crappy right now and should be cleaned up with a better way of handling system and
// standard bind mounts allowing them to be more dymanic
func newSystemMounts(rootfs, mountLabel string, mounts libcontainer.Mounts) []mount {
systemMounts := []mount{
{source: "proc", path: filepath.Join(rootfs, "proc"), device: "proc", flags: defaultMountFlags},
}
if len(mounts.OfType("devtmpfs")) == 1 {
systemMounts = append(systemMounts, mount{source: "tmpfs", path: filepath.Join(rootfs, "dev"), device: "tmpfs", flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, data: "mode=755"})
}
systemMounts = append(systemMounts,
mount{source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaultMountFlags, data: label.FormatMountLabel("mode=1777,size=65536k", mountLabel)},
mount{source: "devpts", path: filepath.Join(rootfs, "dev", "pts"), device: "devpts", flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, data: label.FormatMountLabel("newinstance,ptmxmode=0666,mode=620,gid=5", mountLabel)})
if len(mounts.OfType("sysfs")) == 1 {
systemMounts = append(systemMounts, mount{source: "sysfs", path: filepath.Join(rootfs, "sys"), device: "sysfs", flags: defaultMountFlags})
}
return systemMounts
}

View File

@ -0,0 +1,19 @@
// +build linux
package mount
import (
"fmt"
"github.com/dotcloud/docker/pkg/system"
"syscall"
)
func MsMoveRoot(rootfs string) error {
if err := system.Mount(rootfs, "/", "", syscall.MS_MOVE, ""); err != nil {
return fmt.Errorf("mount move %s into / %s", rootfs, err)
}
if err := system.Chroot("."); err != nil {
return fmt.Errorf("chroot . %s", err)
}
return system.Chdir("/")
}

View File

@ -0,0 +1,49 @@
// +build linux
package nodes
import (
"fmt"
"github.com/dotcloud/docker/pkg/system"
"os"
"path/filepath"
"syscall"
)
// Default list of device nodes to copy
var DefaultNodes = []string{
"null",
"zero",
"full",
"random",
"urandom",
"tty",
}
// CopyN copies the device node from the host into the rootfs
func CopyN(rootfs string, nodesToCopy []string) error {
oldMask := system.Umask(0000)
defer system.Umask(oldMask)
for _, node := range nodesToCopy {
if err := Copy(rootfs, node); err != nil {
return err
}
}
return nil
}
func Copy(rootfs, node string) error {
stat, err := os.Stat(filepath.Join("/dev", node))
if err != nil {
return err
}
var (
dest = filepath.Join(rootfs, "dev", node)
st = stat.Sys().(*syscall.Stat_t)
)
if err := system.Mknod(dest, st.Mode, int(st.Rdev)); err != nil && !os.IsExist(err) {
return fmt.Errorf("copy %s %s", node, err)
}
return nil
}

View File

@ -0,0 +1,31 @@
// +build linux
package mount
import (
"fmt"
"github.com/dotcloud/docker/pkg/system"
"io/ioutil"
"os"
"path/filepath"
"syscall"
)
func PivotRoot(rootfs string) error {
pivotDir, err := ioutil.TempDir(rootfs, ".pivot_root")
if err != nil {
return fmt.Errorf("can't create pivot_root dir %s", pivotDir, err)
}
if err := system.Pivotroot(rootfs, pivotDir); err != nil {
return fmt.Errorf("pivot_root %s", err)
}
if err := system.Chdir("/"); err != nil {
return fmt.Errorf("chdir / %s", err)
}
// path to pivot dir now changed, update
pivotDir = filepath.Join("/", filepath.Base(pivotDir))
if err := system.Unmount(pivotDir, syscall.MNT_DETACH); err != nil {
return fmt.Errorf("unmount pivot_root dir %s", err)
}
return os.Remove(pivotDir)
}

View File

@ -0,0 +1,26 @@
// +build linux
package mount
import (
"fmt"
"github.com/dotcloud/docker/pkg/libcontainer/console"
"os"
"path/filepath"
)
func SetupPtmx(rootfs, consolePath, mountLabel string) error {
ptmx := filepath.Join(rootfs, "dev/ptmx")
if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) {
return err
}
if err := os.Symlink("pts/ptmx", ptmx); err != nil {
return fmt.Errorf("symlink dev ptmx %s", err)
}
if consolePath != "" {
if err := console.Setup(rootfs, consolePath, mountLabel); err != nil {
return err
}
}
return nil
}

View File

@ -0,0 +1,12 @@
// +build linux
package mount
import (
"github.com/dotcloud/docker/pkg/system"
"syscall"
)
func SetReadonly() error {
return system.Mount("/", "/", "bind", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC, "")
}

View File

@ -0,0 +1,31 @@
// +build linux
package mount
import (
"github.com/dotcloud/docker/pkg/system"
"syscall"
)
func RemountProc() error {
if err := system.Unmount("/proc", syscall.MNT_DETACH); err != nil {
return err
}
if err := system.Mount("proc", "/proc", "proc", uintptr(defaultMountFlags), ""); err != nil {
return err
}
return nil
}
func RemountSys() error {
if err := system.Unmount("/sys", syscall.MNT_DETACH); err != nil {
if err != syscall.EINVAL {
return err
}
} else {
if err := system.Mount("sysfs", "/sys", "sysfs", uintptr(defaultMountFlags), ""); err != nil {
return err
}
}
return nil
}

View File

@ -6,6 +6,7 @@ import (
"fmt"
"github.com/dotcloud/docker/pkg/label"
"github.com/dotcloud/docker/pkg/libcontainer"
"github.com/dotcloud/docker/pkg/libcontainer/mount"
"github.com/dotcloud/docker/pkg/system"
"os"
"path/filepath"
@ -63,10 +64,10 @@ func (ns *linuxNs) ExecIn(container *libcontainer.Container, nspid int, args []s
if err := system.Unshare(syscall.CLONE_NEWNS); err != nil {
return -1, err
}
if err := remountProc(); err != nil {
if err := mount.RemountProc(); err != nil {
return -1, fmt.Errorf("remount proc %s", err)
}
if err := remountSys(); err != nil {
if err := mount.RemountSys(); err != nil {
return -1, fmt.Errorf("remount sys %s", err)
}
goto dropAndExec

View File

@ -11,8 +11,10 @@ import (
"github.com/dotcloud/docker/pkg/apparmor"
"github.com/dotcloud/docker/pkg/label"
"github.com/dotcloud/docker/pkg/libcontainer"
"github.com/dotcloud/docker/pkg/libcontainer/capabilities"
"github.com/dotcloud/docker/pkg/libcontainer/console"
"github.com/dotcloud/docker/pkg/libcontainer/mount"
"github.com/dotcloud/docker/pkg/libcontainer/network"
"github.com/dotcloud/docker/pkg/libcontainer/security/capabilities"
"github.com/dotcloud/docker/pkg/libcontainer/utils"
"github.com/dotcloud/docker/pkg/system"
"github.com/dotcloud/docker/pkg/user"
@ -20,7 +22,7 @@ import (
// Init is the init process that first runs inside a new namespace to setup mounts, users, networking,
// and other options required for the new container.
func (ns *linuxNs) Init(container *libcontainer.Container, uncleanRootfs, console string, syncPipe *SyncPipe, args []string) error {
func (ns *linuxNs) Init(container *libcontainer.Container, uncleanRootfs, consolePath string, syncPipe *SyncPipe, args []string) error {
rootfs, err := utils.ResolveRootfs(uncleanRootfs)
if err != nil {
return err
@ -36,20 +38,16 @@ func (ns *linuxNs) Init(container *libcontainer.Container, uncleanRootfs, consol
ns.logger.Println("received context from parent")
syncPipe.Close()
if console != "" {
ns.logger.Printf("setting up %s as console\n", console)
slave, err := system.OpenTerminal(console, syscall.O_RDWR)
if err != nil {
return fmt.Errorf("open terminal %s", err)
}
if err := dupSlave(slave); err != nil {
return fmt.Errorf("dup2 slave %s", err)
if consolePath != "" {
ns.logger.Printf("setting up %s as console\n", consolePath)
if err := console.OpenAndDup(consolePath); err != nil {
return err
}
}
if _, err := system.Setsid(); err != nil {
return fmt.Errorf("setsid %s", err)
}
if console != "" {
if consolePath != "" {
if err := system.Setctty(); err != nil {
return fmt.Errorf("setctty %s", err)
}
@ -60,7 +58,7 @@ func (ns *linuxNs) Init(container *libcontainer.Container, uncleanRootfs, consol
label.Init()
ns.logger.Println("setup mount namespace")
if err := setupNewMountNamespace(rootfs, container.Mounts, console, container.ReadonlyFs, container.NoPivotRoot, container.Context["mount_label"]); err != nil {
if err := mount.InitializeMountNamespace(rootfs, consolePath, container); err != nil {
return fmt.Errorf("setup mount namespace %s", err)
}
if err := system.Sethostname(container.Hostname); err != nil {
@ -114,21 +112,6 @@ func setupUser(container *libcontainer.Container) error {
return nil
}
// dupSlave dup2 the pty slave's fd into stdout and stdin and ensures that
// the slave's fd is 0, or stdin
func dupSlave(slave *os.File) error {
if err := system.Dup2(slave.Fd(), 0); err != nil {
return err
}
if err := system.Dup2(slave.Fd(), 1); err != nil {
return err
}
if err := system.Dup2(slave.Fd(), 2); err != nil {
return err
}
return nil
}
// setupVethNetwork uses the Network config if it is not nil to initialize
// the new veth interface inside the container for use by changing the name to eth0
// setting the MTU and IP address along with the default gateway

View File

@ -1,265 +0,0 @@
// +build linux
package nsinit
import (
"fmt"
"github.com/dotcloud/docker/pkg/label"
"github.com/dotcloud/docker/pkg/libcontainer"
"github.com/dotcloud/docker/pkg/system"
"io/ioutil"
"os"
"path/filepath"
"syscall"
)
// default mount point flags
const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
// setupNewMountNamespace is used to initialize a new mount namespace for an new
// container in the rootfs that is specified.
//
// There is no need to unmount the new mounts because as soon as the mount namespace
// is no longer in use, the mounts will be removed automatically
func setupNewMountNamespace(rootfs string, bindMounts []libcontainer.Mount, console string, readonly, noPivotRoot bool, mountLabel string) error {
flag := syscall.MS_PRIVATE
if noPivotRoot {
flag = syscall.MS_SLAVE
}
if err := system.Mount("", "/", "", uintptr(flag|syscall.MS_REC), ""); err != nil {
return fmt.Errorf("mounting / as slave %s", err)
}
if err := system.Mount(rootfs, rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, ""); err != nil {
return fmt.Errorf("mouting %s as bind %s", rootfs, err)
}
if err := mountSystem(rootfs, mountLabel); err != nil {
return fmt.Errorf("mount system %s", err)
}
for _, m := range bindMounts {
var (
flags = syscall.MS_BIND | syscall.MS_REC
dest = filepath.Join(rootfs, m.Destination)
)
if !m.Writable {
flags = flags | syscall.MS_RDONLY
}
if err := system.Mount(m.Source, dest, "bind", uintptr(flags), ""); err != nil {
return fmt.Errorf("mounting %s into %s %s", m.Source, dest, err)
}
if !m.Writable {
if err := system.Mount(m.Source, dest, "bind", uintptr(flags|syscall.MS_REMOUNT), ""); err != nil {
return fmt.Errorf("remounting %s into %s %s", m.Source, dest, err)
}
}
if m.Private {
if err := system.Mount("", dest, "none", uintptr(syscall.MS_PRIVATE), ""); err != nil {
return fmt.Errorf("mounting %s private %s", dest, err)
}
}
}
if err := copyDevNodes(rootfs); err != nil {
return fmt.Errorf("copy dev nodes %s", err)
}
if err := setupPtmx(rootfs, console, mountLabel); err != nil {
return err
}
if err := system.Chdir(rootfs); err != nil {
return fmt.Errorf("chdir into %s %s", rootfs, err)
}
if noPivotRoot {
if err := rootMsMove(rootfs); err != nil {
return err
}
} else {
if err := rootPivot(rootfs); err != nil {
return err
}
}
if readonly {
if err := system.Mount("/", "/", "bind", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC, ""); err != nil {
return fmt.Errorf("mounting %s as readonly %s", rootfs, err)
}
}
system.Umask(0022)
return nil
}
// use a pivot root to setup the rootfs
func rootPivot(rootfs string) error {
pivotDir, err := ioutil.TempDir(rootfs, ".pivot_root")
if err != nil {
return fmt.Errorf("can't create pivot_root dir %s", pivotDir, err)
}
if err := system.Pivotroot(rootfs, pivotDir); err != nil {
return fmt.Errorf("pivot_root %s", err)
}
if err := system.Chdir("/"); err != nil {
return fmt.Errorf("chdir / %s", err)
}
// path to pivot dir now changed, update
pivotDir = filepath.Join("/", filepath.Base(pivotDir))
if err := system.Unmount(pivotDir, syscall.MNT_DETACH); err != nil {
return fmt.Errorf("unmount pivot_root dir %s", err)
}
if err := os.Remove(pivotDir); err != nil {
return fmt.Errorf("remove pivot_root dir %s", err)
}
return nil
}
// use MS_MOVE and chroot to setup the rootfs
func rootMsMove(rootfs string) error {
if err := system.Mount(rootfs, "/", "", syscall.MS_MOVE, ""); err != nil {
return fmt.Errorf("mount move %s into / %s", rootfs, err)
}
if err := system.Chroot("."); err != nil {
return fmt.Errorf("chroot . %s", err)
}
if err := system.Chdir("/"); err != nil {
return fmt.Errorf("chdir / %s", err)
}
return nil
}
// copyDevNodes mknods the hosts devices so the new container has access to them
func copyDevNodes(rootfs string) error {
oldMask := system.Umask(0000)
defer system.Umask(oldMask)
for _, node := range []string{
"null",
"zero",
"full",
"random",
"urandom",
"tty",
} {
if err := copyDevNode(rootfs, node); err != nil {
return err
}
}
return nil
}
func copyDevNode(rootfs, node string) error {
stat, err := os.Stat(filepath.Join("/dev", node))
if err != nil {
return err
}
var (
dest = filepath.Join(rootfs, "dev", node)
st = stat.Sys().(*syscall.Stat_t)
)
if err := system.Mknod(dest, st.Mode, int(st.Rdev)); err != nil && !os.IsExist(err) {
return fmt.Errorf("copy %s %s", node, err)
}
return nil
}
// setupConsole ensures that the container has a proper /dev/console setup
func setupConsole(rootfs, console string, mountLabel string) error {
oldMask := system.Umask(0000)
defer system.Umask(oldMask)
stat, err := os.Stat(console)
if err != nil {
return fmt.Errorf("stat console %s %s", console, err)
}
var (
st = stat.Sys().(*syscall.Stat_t)
dest = filepath.Join(rootfs, "dev/console")
)
if err := os.Remove(dest); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("remove %s %s", dest, err)
}
if err := os.Chmod(console, 0600); err != nil {
return err
}
if err := os.Chown(console, 0, 0); err != nil {
return err
}
if err := system.Mknod(dest, (st.Mode&^07777)|0600, int(st.Rdev)); err != nil {
return fmt.Errorf("mknod %s %s", dest, err)
}
if err := label.SetFileLabel(console, mountLabel); err != nil {
return fmt.Errorf("SetFileLabel Failed %s %s", dest, err)
}
if err := system.Mount(console, dest, "bind", syscall.MS_BIND, ""); err != nil {
return fmt.Errorf("bind %s to %s %s", console, dest, err)
}
return nil
}
// mountSystem sets up linux specific system mounts like sys, proc, shm, and devpts
// inside the mount namespace
func mountSystem(rootfs string, mountLabel string) error {
for _, m := range []struct {
source string
path string
device string
flags int
data string
}{
{source: "proc", path: filepath.Join(rootfs, "proc"), device: "proc", flags: defaultMountFlags},
{source: "sysfs", path: filepath.Join(rootfs, "sys"), device: "sysfs", flags: defaultMountFlags},
{source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaultMountFlags, data: label.FormatMountLabel("mode=1777,size=65536k", mountLabel)},
{source: "devpts", path: filepath.Join(rootfs, "dev", "pts"), device: "devpts", flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, data: label.FormatMountLabel("newinstance,ptmxmode=0666,mode=620,gid=5", mountLabel)},
} {
if err := os.MkdirAll(m.path, 0755); err != nil && !os.IsExist(err) {
return fmt.Errorf("mkdirall %s %s", m.path, err)
}
if err := system.Mount(m.source, m.path, m.device, uintptr(m.flags), m.data); err != nil {
return fmt.Errorf("mounting %s into %s %s", m.source, m.path, err)
}
}
return nil
}
// setupPtmx adds a symlink to pts/ptmx for /dev/ptmx and
// finishes setting up /dev/console
func setupPtmx(rootfs, console string, mountLabel string) error {
ptmx := filepath.Join(rootfs, "dev/ptmx")
if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) {
return err
}
if err := os.Symlink("pts/ptmx", ptmx); err != nil {
return fmt.Errorf("symlink dev ptmx %s", err)
}
if console != "" {
if err := setupConsole(rootfs, console, mountLabel); err != nil {
return err
}
}
return nil
}
// remountProc is used to detach and remount the proc filesystem
// commonly needed with running a new process inside an existing container
func remountProc() error {
if err := system.Unmount("/proc", syscall.MNT_DETACH); err != nil {
return err
}
if err := system.Mount("proc", "/proc", "proc", uintptr(defaultMountFlags), ""); err != nil {
return err
}
return nil
}
func remountSys() error {
if err := system.Unmount("/sys", syscall.MNT_DETACH); err != nil {
if err != syscall.EINVAL {
return err
}
} else {
if err := system.Mount("sysfs", "/sys", "sysfs", uintptr(defaultMountFlags), ""); err != nil {
return err
}
}
return nil
}

View File

@ -0,0 +1,51 @@
package restrict
import (
"fmt"
"os"
"path/filepath"
"syscall"
"github.com/dotcloud/docker/pkg/system"
)
const flags = syscall.MS_BIND | syscall.MS_REC | syscall.MS_RDONLY
var restrictions = map[string]string{
// dirs
"/proc/sys": "",
"/proc/irq": "",
"/proc/acpi": "",
// files
"/proc/sysrq-trigger": "/dev/null",
"/proc/kcore": "/dev/null",
}
// Restrict locks down access to many areas of proc
// by using the asumption that the user does not have mount caps to
// revert the changes made here
func Restrict(rootfs, empty string) error {
for dest, source := range restrictions {
dest = filepath.Join(rootfs, dest)
// we don't have a "/dev/null" for dirs so have the requester pass a dir
// for us to bind mount
switch source {
case "":
source = empty
default:
source = filepath.Join(rootfs, source)
}
if err := system.Mount(source, dest, "bind", flags, ""); err != nil {
if os.IsNotExist(err) {
continue
}
return fmt.Errorf("unable to mount %s over %s %s", source, dest, err)
}
if err := system.Mount("", dest, "bind", flags|syscall.MS_REMOUNT, ""); err != nil {
return fmt.Errorf("unable to mount %s over %s %s", source, dest, err)
}
}
return nil
}

View File

@ -11,6 +11,26 @@ var (
ErrUnsupported = errors.New("Unsupported method")
)
type Mounts []Mount
func (s Mounts) OfType(t string) Mounts {
out := Mounts{}
for _, m := range s {
if m.Type == t {
out = append(out, m)
}
}
return out
}
type Mount struct {
Type string `json:"type,omitempty"`
Source string `json:"source,omitempty"` // Source path, in the host namespace
Destination string `json:"destination,omitempty"` // Destination path, in the container
Writable bool `json:"writable,omitempty"`
Private bool `json:"private,omitempty"`
}
// namespaceList is used to convert the libcontainer types
// into the names of the files located in /proc/<pid>/ns/* for
// each namespace